[PATCH, rs6000] ELFv2 ABI 4/8: Struct passing calling convention changes

Mon Nov 11 14:50:00 GMT 2013

Hello,

this patch implements the new struct calling convention for ELFv2.

With the new ABI, so-called "homogeneous" aggregates, i.e. struct, arrays,
or unions that (recursively) contain only elements of the same floating-
point or vector type are passed as if those elements were passed as
separate arguments.  (This is done for up to 8 such elements.)

After the refactoring of the rs6000_function_arg / rs6000_arg_partial_bytes
logic that was done in a prior patch, implementing support for
homogenous aggregates is now mostly straightforward.  Note that
rs6000_psave_function_arg can now get called for BLKmode arguments,
and has to handle them using a PARALLEL.

Bye,
Ulrich


ChangeLog:

2013-11-11  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>
	    Michael Gschwind  <mkg@us.ibm.com>

	* config/rs6000/rs6000.h (AGGR_ARG_NUM_REG): Define.
	* config/rs6000/rs6000.c (rs6000_aggregate_candidate): New function.
	(rs6000_discover_homogeneous_aggregate): Likewise.
	(rs6000_function_arg_boundary): Handle homogeneous aggregates.
	(rs6000_function_arg_advance_1): Likewise.
	(rs6000_function_arg): Likewise.
	(rs6000_arg_partial_bytes): Likewise.
	(rs6000_psave_function_arg): Handle BLKmode arguments.

Index: gcc/gcc/config/rs6000/rs6000.h
===================================================================

--- gcc.orig/gcc/config/rs6000/rs6000.h
+++ gcc/gcc/config/rs6000/rs6000.h
@@ -1641,6 +1641,9 @@ extern enum reg_class rs6000_constraints
 #define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
 #define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
 
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument.  */
+#define AGGR_ARG_NUM_REG 8
+
 /* Return registers */
 #define GP_ARG_RETURN GP_ARG_MIN_REG
 #define FP_ARG_RETURN FP_ARG_MIN_REG
Index: gcc/gcc/config/rs6000/rs6000.c
===================================================================
--- gcc.orig/gcc/config/rs6000/rs6000.c
+++ gcc/gcc/config/rs6000/rs6000.c
@@ -8460,6 +8460,219 @@ rs6000_member_type_forces_blk (const_tre
    && TARGET_ALTIVEC_ABI					\
    && (NAMED))
 
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   or vector type.  If a non-floating point or vector type is found, or
+   if a floating point or vector type that doesn't match a non-VOIDmode
+   *MODEP is found, then return -1, otherwise return the count in the
+   sub-tree.  */
+
+static int
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
+	return -1;
+
+      /* Use V4SImode as representative of all 128-bit vector types.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !TYPE_MIN_VALUE (index)
+	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
+   float or vector aggregate that shall be passed in FP/vector registers
+   according to the ELFv2 ABI, return the homogeneous element mode in
+   *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
+
+   Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE.  */
+
+static bool
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
+				       enum machine_mode *elt_mode,
+				       int *n_elts)
+{
+  /* Note that we do not accept complex types at the top level as
+     homogeneous aggregates; these types are handled via the
+     targetm.calls.split_complex_arg mechanism.  Complex types
+     can be elements of homogeneous aggregates, however.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
+    {
+      enum machine_mode field_mode = VOIDmode;
+      int field_count = rs6000_aggregate_candidate (type, &field_mode);
+
+      if (field_count > 0)
+	{
+	  int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
+			(GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
+
+	  /* The ELFv2 ABI allows homogeneous aggregates to occupy
+	     up to AGGR_ARG_NUM_REG registers.  */
+	  if (field_count * n_regs <= AGGR_ARG_NUM_REG)
+	    {
+	      if (elt_mode)
+		*elt_mode = field_mode;
+	      if (n_elts)
+		*n_elts = field_count;
+	      return true;
+	    }
+	}
+    }
+
+  if (elt_mode)
+    *elt_mode = mode;
+  if (n_elts)
+    *n_elts = 1;
+  return false;
+}
+
 /* Return a nonzero value to say to return the function value in
    memory, just as large structures are always returned.  TYPE will be
    the data type of the value, and FNTYPE will be the type of the
@@ -8760,6 +8973,11 @@ function_arg_padding (enum machine_mode 
 static unsigned int
 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (DEFAULT_ABI == ABI_V4
       && (GET_MODE_SIZE (mode) == 8
 	  || (TARGET_HARD_FLOAT
@@ -8771,7 +8989,7 @@ rs6000_function_arg_boundary (enum machi
 	       && int_size_in_bytes (type) >= 8
 	       && int_size_in_bytes (type) < 16))
     return 64;
-  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	   || (type && TREE_CODE (type) == VECTOR_TYPE
 	       && int_size_in_bytes (type) >= 16))
     return 128;
@@ -8982,6 +9200,11 @@ static void
 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			       const_tree type, bool named, int depth)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   /* Only tick off an argument if we're not recursing.  */
   if (depth == 0)
     cum->nargs_prototype--;
@@ -9002,15 +9225,16 @@ rs6000_function_arg_advance_1 (CUMULATIV
 #endif
 
   if (TARGET_ALTIVEC_ABI
-      && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+      && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	  || (type && TREE_CODE (type) == VECTOR_TYPE
 	      && int_size_in_bytes (type) == 16)))
     {
       bool stack = false;
 
-      if (USE_ALTIVEC_FOR_ARG_P (cum, mode, named))
+      if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
 	{
-	  cum->vregno++;
+	  cum->vregno += n_elts;
+
 	  if (!TARGET_ALTIVEC)
 	    error ("cannot pass argument in vector register because"
 		   " altivec instructions are disabled, use -maltivec"
@@ -9164,15 +9388,15 @@ rs6000_function_arg_advance_1 (CUMULATIV
 
       cum->words = align_words + n_words;
 
-      if (SCALAR_FLOAT_MODE_P (mode)
+      if (SCALAR_FLOAT_MODE_P (elt_mode)
 	  && TARGET_HARD_FLOAT && TARGET_FPRS)
 	{
 	  /* _Decimal128 must be passed in an even/odd float register pair.
 	     This assumes that the register number is odd when fregno is
 	     odd.  */
-	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	  if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	    cum->fregno++;
-	  cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	  cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
 	}
 
       if (TARGET_DEBUG_ARG)
@@ -9543,6 +9767,7 @@ rs6000_psave_function_arg (enum machine_
       int n_words = rs6000_arg_size (mode, type);
 
       if (align_words + n_words > GP_ARG_NUM_REG
+	  || mode == BLKmode
 	  || (TARGET_32BIT && TARGET_POWERPC64))
 	{
 	  /* If this is partially on the stack, then we only
@@ -9638,6 +9863,8 @@ rs6000_function_arg (cumulative_args_t c
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   enum rs6000_abi abi = DEFAULT_ABI;
+  enum machine_mode elt_mode;
+  int n_elts;
 
   /* Return a marker to indicate whether CR1 needs to set or clear the
      bit that V.4 uses to say fp args were passed in registers.
@@ -9664,6 +9891,8 @@ rs6000_function_arg (cumulative_args_t c
       return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
     }
 
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
     {
       rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
@@ -9672,11 +9901,11 @@ rs6000_function_arg (cumulative_args_t c
       /* Else fall through to usual handling.  */
     }
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, named))
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
     {
-      rtx rvec[GP_ARG_NUM_REG + 1];
-      rtx r;
-      int k = 0;
+      rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+      rtx r, off;
+      int i, k = 0;
 
       /* Do we also need to pass this argument in the parameter
 	 save area?  */
@@ -9687,8 +9916,12 @@ rs6000_function_arg (cumulative_args_t c
 	}
 
       /* Describe where this argument goes in the vector registers.  */
-      r = gen_rtx_REG (mode, cum->vregno);
-      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+      for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
+	{
+	  r = gen_rtx_REG (elt_mode, cum->vregno + i);
+	  off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  rvec[k++] =  gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
 
       return rs6000_finish_function_arg (mode, rvec, k);
     }
@@ -9790,15 +10023,15 @@ rs6000_function_arg (cumulative_args_t c
 
       /* _Decimal128 must be passed in an even/odd float register pair.
 	 This assumes that the register number is odd when fregno is odd.  */
-      if (mode == TDmode && (cum->fregno % 2) == 1)
+      if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	cum->fregno++;
 
-      if (USE_FP_FOR_ARG_P (cum, mode))
+      if (USE_FP_FOR_ARG_P (cum, elt_mode))
 	{
-	  rtx rvec[GP_ARG_NUM_REG + 1];
-	  rtx r;
-	  int k = 0;
-	  unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+	  rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+	  rtx r, off;
+	  int i, k = 0;
+	  unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
 
 	  /* Do we also need to pass this argument in the parameter
 	     save area?  */
@@ -9809,19 +10042,23 @@ rs6000_function_arg (cumulative_args_t c
 	    k = rs6000_psave_function_arg (mode, type, align_words, rvec);
 
 	  /* Describe where this argument goes in the fprs.  */
-
-	  /* Check if the argument is split over registers and memory.
-	     This can only ever happen for long double or _Decimal128;
-	     complex types are handled via split_complex_arg.  */
-	  enum machine_mode fmode = mode;
-	  if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	  for (i = 0; i < n_elts
+		      && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
 	    {
-	      gcc_assert (fmode == TFmode || fmode == TDmode);
-	      fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
-	    }
+	      /* Check if the argument is split over registers and memory.
+		 This can only ever happen for long double or _Decimal128;
+		 complex types are handled via split_complex_arg.  */
+	      enum machine_mode fmode = elt_mode;
+	      if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
+		{
+		  gcc_assert (fmode == TFmode || fmode == TDmode);
+		  fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
+		}
 
-	  r = gen_rtx_REG (fmode, cum->fregno);
-	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+	      r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
+	      off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
 
 	  return rs6000_finish_function_arg (mode, rvec, k);
 	}
@@ -9854,11 +10091,15 @@ rs6000_arg_partial_bytes (cumulative_arg
   bool passed_in_gprs = true;
   int ret = 0;
   int align_words;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
 
   if (DEFAULT_ABI == ABI_V4)
     return 0;
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, named))
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
     {
       /* If we are passing this arg in the fixed parameter save area
          (gprs or memory) as well as VRs, we do not use the partial
@@ -9867,8 +10108,10 @@ rs6000_arg_partial_bytes (cumulative_arg
       if (TARGET_64BIT && ! cum->prototype)
 	return 0;
 
-      /* Otherwise, we pass in VRs only.  No partial copy possible.  */
+      /* Otherwise, we pass in VRs only.  Check for partial copies.  */
       passed_in_gprs = false;
+      if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
+	ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
     }
 
   /* In this complicated case we just disable the partial_nregs code.  */
@@ -9877,9 +10120,9 @@ rs6000_arg_partial_bytes (cumulative_arg
 
   align_words = rs6000_parm_start (mode, type, cum->words);
 
-  if (USE_FP_FOR_ARG_P (cum, mode))
+  if (USE_FP_FOR_ARG_P (cum, elt_mode))
     {
-      unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+      unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
 
       /* If we are passing this arg in the fixed parameter save area
          (gprs or memory) as well as FPRs, we do not use the partial
@@ -9894,8 +10137,9 @@ rs6000_arg_partial_bytes (cumulative_arg
 
       /* Otherwise, we pass in FPRs only.  Check for partial copies.  */
       passed_in_gprs = false;
-      if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
-	ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
+      if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
+	ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+	       * MIN (8, GET_MODE_SIZE (elt_mode)));
     }
 
   if (passed_in_gprs
-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  Ulrich.Weigand@de.ibm.com