This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

function parms in regs, patch 3 of 3


This patch defines a new macro, BLOCK_REG_PADDING, and uses it to
determine what to do with odd-sized struct pieces in regs.  I wanted
to make use of FUNCTION_ARG_PADDING in its default definition, so
that meant passing TYPE to emit_group_{load,store}.  Anyway, what
this does is allow powerpc64 to pad structs upward, downward and
every which way to our heart's content, in registers, without using
the PARALLELs invented for Irix support.  All controlled in one place,
rs6000.c:function_arg_padding.  Hmm.  It might be a good idea to make
the default PAD_VARARGS_DOWN use FUNCTION_ARG_PADDING too.

The rs6000 code has been tested using AGGREGATE_PADDING_FIXED == 0,
AGGREGATE_PADDING_FIXED == 1 and AGGREGATES_PAD_UPWARD_ALWAYS == 0,
AGGREGATE_PADDING_FIXED == 1 and AGGREGATES_PAD_UPWARD_ALWAYS == 1,
with and without MUST_PASS_IN_STACK redefined.  All seems well.
Of course, redefining MUST_PASS_IN_STACK means that code compiled
with previous versions of gcc might not be compatible with a new
gcc, but I think we need to suffer the pain now so that we're
compliant with the ABI.

	* expr.h (struct locate_and_pad_arg_data): Add where_pad.
	(BLOCK_REG_PADDING): Define.
	(emit_group_load, emit_group_store): Adjust declarations.
	* expr.c (emit_group_load): Add "type" param, and use
	BLOCK_REG_PADDING to determine need for a shift.  Optimize non-
	aligned accesses if !SLOW_UNALIGNED_ACCESS.
	(emit_group_store): Likewise.
	(emit_push_insn, expand_assignment, store_expr, expand_expr): Adjust
	emit_group_load and emit_group_store calls.
	* calls.c (store_unaligned_arguments_into_pseudos): Tidy.  Use
	BLOCK_REG_PADDING to determine whether we need endian_correction.
	(load_register_parameters): Localize vars.  Handle shifting of
	small values to the correct end of regs.  Adjust emit_group_load
	call.
	(expand_call, emit_library_call_value_1): Adjust emit_group_load
	and emit_group_store calls.
	* function.c (assign_parms): Set mem alignment for stack slots.
	Adjust emit_group_store call.  Store values at the "wrong" end
	of regs to the stack.  Use BLOCK_REG_PADDING.
	(locate_and_pad_parm): Save where_pad.
	(expand_function_end): Adjust emit_group_load call.
	* stmt.c (expand_value_return): Adjust emit_group_load call.
	* Makefile.in (calls.o): Depend on $(OPTABS_H).

	* config/rs6000/linux64.h (TARGET_BIG_ENDIAN): Redefine as 1.
	(FIXED_R13): Delete.
	(AGGREGATE_PADDING_FIXED): Define.
	(MUST_PASS_IN_STACK): Define.
	* config/rs6000/rs6000.h (struct rs6000_args): Remove orig_nargs.
	(PAD_VARARGS_DOWN): Define in terms of FUNCTION_ARG_PADDING.
	* config/rs6000/rs6000.c (init_cumulative_args): Don't set orig_nargs.
	(function_arg_padding): !AGGREGATE_PADDING_FIXED compatibility code.
	Act on AGGREGATES_PAD_UPWARD_ALWAYS.

-- 
Alan Modra
IBM OzLabs - Linux Technology Centre

diff -urp gcc2/gcc/expr.h gcc3/gcc/expr.h
--- gcc2/gcc/expr.h	2003-04-24 15:31:56.000000000 +0930
+++ gcc3/gcc/expr.h	2003-04-24 21:07:17.000000000 +0930
@@ -93,6 +93,8 @@ struct locate_and_pad_arg_data
   /* The amount that the stack pointer needs to be adjusted to
      force alignment for the next argument.  */
   struct args_size alignment_pad;
+  /* Which way we should pad this arg.  */
+  enum direction where_pad;
 };
 #endif
 
@@ -150,6 +152,14 @@ do {							\
       ? downward : upward))
 #endif
 
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is non-zero if this is the only
+   element.  */
+#ifndef BLOCK_REG_PADDING
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+#endif
+
 /* Supply a default definition for FUNCTION_ARG_BOUNDARY.  Normally, we let
    FUNCTION_ARG_PADDING, which also pads the length, handle any needed
    alignment.  */
@@ -416,19 +426,21 @@ extern void move_block_from_reg PARAMS (
 /* Generate a non-consecutive group of registers represented by a PARALLEL.  */
 extern rtx gen_group_rtx PARAMS ((rtx));
 
+#ifdef TREE_CODE
 /* Load a BLKmode value into non-consecutive registers represented by a
    PARALLEL.  */
-extern void emit_group_load PARAMS ((rtx, rtx, int));
+extern void emit_group_load PARAMS ((rtx, rtx, tree, int));
+#endif
 
 /* Move a non-consecutive group of registers represented by a PARALLEL into
    a non-consecutive group of registers represented by a PARALLEL.  */
 extern void emit_group_move PARAMS ((rtx, rtx));
 
+#ifdef TREE_CODE
 /* Store a BLKmode value from non-consecutive registers represented by a
    PARALLEL.  */
-extern void emit_group_store PARAMS ((rtx, rtx, int));
+extern void emit_group_store PARAMS ((rtx, rtx, tree, int));
 
-#ifdef TREE_CODE
 /* Copy BLKmode object from a set of registers.  */
 extern rtx copy_blkmode_from_reg PARAMS ((rtx, rtx, tree));
 #endif
diff -urp gcc2/gcc/expr.c gcc3/gcc/expr.c
--- gcc2/gcc/expr.c	2003-04-24 14:46:04.000000000 +0930
+++ gcc3/gcc/expr.c	2003-04-24 21:07:17.000000000 +0930
@@ -2212,19 +2212,15 @@ gen_group_rtx (orig)
   return gen_rtx_PARALLEL (GET_MODE (orig), gen_rtvec_v (length, tmps));
 }
 
-/* Emit code to move a block SRC to a block DST, where DST is non-consecutive
-   registers represented by a PARALLEL.  SSIZE represents the total size of
-   block SRC in bytes, or -1 if not known.  */
-/* ??? If SSIZE % UNITS_PER_WORD != 0, we make the blatant assumption that
-   the balance will be in what would be the low-order memory addresses, i.e.
-   left justified for big endian, right justified for little endian.  This
-   happens to be true for the targets currently using this support.  If this
-   ever changes, a new target macro along the lines of FUNCTION_ARG_PADDING
-   would be needed.  */
+/* Emit code to move a block ORIG_SRC of type TYPE to a block DST,
+   where DST is non-consecutive registers represented by a PARALLEL.
+   SSIZE represents the total size of block ORIG_SRC in bytes, or -1
+   if not known.  */ 
 
 void
-emit_group_load (dst, orig_src, ssize)
+emit_group_load (dst, orig_src, type, ssize)
      rtx dst, orig_src;
+     tree type;
      int ssize;
 {
   rtx *tmps, src;
@@ -2253,7 +2249,11 @@ emit_group_load (dst, orig_src, ssize)
       /* Handle trailing fragments that run over the size of the struct.  */
       if (ssize >= 0 && bytepos + (HOST_WIDE_INT) bytelen > ssize)
 	{
-	  shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT;
+	  /* Arrange to shift the fragment to where it belongs.
+	     extract_bit_field loads to the lsb of the reg.  */
+	  if (BLOCK_REG_PADDING (GET_MODE (orig_src), type, i == start)
+	      == (BYTES_BIG_ENDIAN ? upward : downward))
+	    shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT;
 	  bytelen = ssize - bytepos;
 	  if (bytelen <= 0)
 	    abort ();
@@ -2278,7 +2278,8 @@ emit_group_load (dst, orig_src, ssize)
 
       /* Optimize the access just a bit.  */
       if (GET_CODE (src) == MEM
-	  && MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode)
+	  && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (src))
+	      || MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode))
 	  && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
 	  && bytelen == GET_MODE_SIZE (mode))
 	{
@@ -2321,7 +2322,7 @@ emit_group_load (dst, orig_src, ssize)
 				     bytepos * BITS_PER_UNIT, 1, NULL_RTX,
 				     mode, mode, ssize);
 
-      if (BYTES_BIG_ENDIAN && shift)
+      if (shift)
 	expand_binop (mode, ashl_optab, tmps[i], GEN_INT (shift),
 		      tmps[i], 0, OPTAB_WIDEN);
     }
@@ -2353,13 +2354,16 @@ emit_group_move (dst, src)
 		    XEXP (XVECEXP (src, 0, i), 0));
 }
 
-/* Emit code to move a block SRC to a block DST, where SRC is non-consecutive
-   registers represented by a PARALLEL.  SSIZE represents the total size of
-   block DST, or -1 if not known.  */
+/* Emit code to move a block SRC to a block ORIG_DST of type TYPE,
+   where SRC is non-consecutive registers represented by a PARALLEL.
+   SSIZE represents the total size of block ORIG_DST, or -1 if not
+   known.  */
 
 void
-emit_group_store (orig_dst, src, ssize)
-     rtx orig_dst, src;
+emit_group_store (orig_dst, src, type, ssize)
+     rtx orig_dst;
+     rtx src;
+     tree type ATTRIBUTE_UNUSED;
      int ssize;
 {
   rtx *tmps, dst;
@@ -2404,8 +2408,8 @@ emit_group_store (orig_dst, src, ssize)
 	 the temporary.  */
 
       temp = assign_stack_temp (GET_MODE (dst), ssize, 0);
-      emit_group_store (temp, src, ssize);
-      emit_group_load (dst, temp, ssize);
+      emit_group_store (temp, src, type, ssize);
+      emit_group_load (dst, temp, type, ssize);
       return;
     }
   else if (GET_CODE (dst) != MEM && GET_CODE (dst) != CONCAT)
@@ -2426,7 +2430,10 @@ emit_group_store (orig_dst, src, ssize)
       /* Handle trailing fragments that run over the size of the struct.  */
       if (ssize >= 0 && bytepos + (HOST_WIDE_INT) bytelen > ssize)
 	{
-	  if (BYTES_BIG_ENDIAN)
+	  /* store_bit_field always takes its value from the lsb.
+	     Move the fragment to the lsb if it's not already there.  */
+	  if (BLOCK_REG_PADDING (GET_MODE (orig_dst), type, i == start)
+	      == (BYTES_BIG_ENDIAN ? upward : downward))
 	    {
 	      int shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT;
 	      expand_binop (mode, ashr_optab, tmps[i], GEN_INT (shift),
@@ -2459,7 +2466,8 @@ emit_group_store (orig_dst, src, ssize)
 
       /* Optimize the access just a bit.  */
       if (GET_CODE (dest) == MEM
-	  && MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode)
+	  && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (dest))
+	      || MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode))
 	  && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
 	  && bytelen == GET_MODE_SIZE (mode))
 	emit_move_insn (adjust_address (dest, mode, bytepos), tmps[i]);
@@ -3991,7 +3999,7 @@ emit_push_insn (x, mode, type, size, ali
       /* Handle calls that pass values in multiple non-contiguous locations.
 	 The Irix 6 ABI has examples of this.  */
       if (GET_CODE (reg) == PARALLEL)
-	emit_group_load (reg, x, -1);  /* ??? size? */
+	emit_group_load (reg, x, type, -1);
       else
 	move_block_to_reg (REGNO (reg), x, partial, mode);
     }
@@ -4194,7 +4202,8 @@ expand_assignment (to, from, want_value,
       /* Handle calls that return values in multiple non-contiguous locations.
 	 The Irix 6 ABI has examples of this.  */
       if (GET_CODE (to_rtx) == PARALLEL)
-	emit_group_load (to_rtx, value, int_size_in_bytes (TREE_TYPE (from)));
+	emit_group_load (to_rtx, value, TREE_TYPE (from),
+			 int_size_in_bytes (TREE_TYPE (from)));
       else if (GET_MODE (to_rtx) == BLKmode)
 	emit_block_move (to_rtx, value, expr_size (from), BLOCK_OP_NORMAL);
       else
@@ -4228,7 +4237,8 @@ expand_assignment (to, from, want_value,
       temp = expand_expr (from, 0, GET_MODE (to_rtx), 0);
 
       if (GET_CODE (to_rtx) == PARALLEL)
-	emit_group_load (to_rtx, temp, int_size_in_bytes (TREE_TYPE (from)));
+	emit_group_load (to_rtx, temp, TREE_TYPE (from),
+			 int_size_in_bytes (TREE_TYPE (from)));
       else
 	emit_move_insn (to_rtx, temp);
 
@@ -4641,7 +4651,8 @@ store_expr (exp, target, want_value)
       /* Handle calls that return values in multiple non-contiguous locations.
 	 The Irix 6 ABI has examples of this.  */
       else if (GET_CODE (target) == PARALLEL)
-	emit_group_load (target, temp, int_size_in_bytes (TREE_TYPE (exp)));
+	emit_group_load (target, temp, TREE_TYPE (exp),
+			 int_size_in_bytes (TREE_TYPE (exp)));
       else if (GET_MODE (temp) == BLKmode)
 	emit_block_move (target, temp, expr_size (exp),
 			 (want_value & 2
@@ -9182,7 +9193,7 @@ expand_expr (exp, target, tmode, modifie
 		    /* Handle calls that pass values in multiple
 		       non-contiguous locations.  The Irix 6 ABI has examples
 		       of this.  */
-		    emit_group_store (memloc, op0,
+		    emit_group_store (memloc, op0, inner_type,
 				      int_size_in_bytes (inner_type));
 		  else
 		    emit_move_insn (memloc, op0);
diff -urp gcc2/gcc/calls.c gcc3/gcc/calls.c
--- gcc2/gcc/calls.c	2003-04-24 11:25:45.000000000 +0930
+++ gcc3/gcc/calls.c	2003-04-24 21:07:17.000000000 +0930
@@ -27,6 +27,7 @@ Software Foundation, 59 Temple Place - S
 #include "tree.h"
 #include "flags.h"
 #include "expr.h"
+#include "optabs.h"
 #include "libfuncs.h"
 #include "function.h"
 #include "regs.h"
@@ -1015,22 +1016,22 @@ store_unaligned_arguments_into_pseudos (
 	    < (unsigned int) MIN (BIGGEST_ALIGNMENT, BITS_PER_WORD)))
       {
 	int bytes = int_size_in_bytes (TREE_TYPE (args[i].tree_value));
-	int big_endian_correction = 0;
-
-	args[i].n_aligned_regs
-	  = args[i].partial ? args[i].partial
-	    : (bytes + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD;
+	int nregs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	int endian_correction = 0;
 
+	args[i].n_aligned_regs = args[i].partial ? args[i].partial : nregs;
 	args[i].aligned_regs = (rtx *) xmalloc (sizeof (rtx)
 						* args[i].n_aligned_regs);
 
-	/* Structures smaller than a word are aligned to the least
-	   significant byte (to the right).  On a BYTES_BIG_ENDIAN machine,
+	/* Structures smaller than a word are normally aligned to the
+	   least significant byte.  On a BYTES_BIG_ENDIAN machine,
 	   this means we must skip the empty high order bytes when
 	   calculating the bit offset.  */
-	if (BYTES_BIG_ENDIAN
-	    && bytes < UNITS_PER_WORD)
-	  big_endian_correction = (BITS_PER_WORD  - (bytes * BITS_PER_UNIT));
+	if (bytes < UNITS_PER_WORD
+	    && (BLOCK_REG_PADDING (args[i].mode,
+				   TREE_TYPE (args[i].tree_value), 1)
+		== downward))
+	  endian_correction = BITS_PER_WORD - bytes * BITS_PER_UNIT;
 
 	for (j = 0; j < args[i].n_aligned_regs; j++)
 	  {
@@ -1039,6 +1040,8 @@ store_unaligned_arguments_into_pseudos (
 	    int bitsize = MIN (bytes * BITS_PER_UNIT, BITS_PER_WORD);
 
 	    args[i].aligned_regs[j] = reg;
+	    word = extract_bit_field (word, bitsize, 0, 1, NULL_RTX,
+				      word_mode, word_mode, BITS_PER_WORD);
 
 	    /* There is no need to restrict this code to loading items
 	       in TYPE_ALIGN sized hunks.  The bitfield instructions can
@@ -1054,11 +1057,8 @@ store_unaligned_arguments_into_pseudos (
 	    emit_move_insn (reg, const0_rtx);
 
 	    bytes -= bitsize / BITS_PER_UNIT;
-	    store_bit_field (reg, bitsize, big_endian_correction, word_mode,
-			     extract_bit_field (word, bitsize, 0, 1, NULL_RTX,
-						word_mode, word_mode,
-						BITS_PER_WORD),
-			     BITS_PER_WORD);
+	    store_bit_field (reg, bitsize, endian_correction, word_mode,
+			     word, BITS_PER_WORD);
 	  }
       }
 }
@@ -1689,34 +1689,45 @@ load_register_parameters (args, num_actu
     {
       rtx reg = ((flags & ECF_SIBCALL)
 		 ? args[i].tail_call_reg : args[i].reg);
-      int partial = args[i].partial;
-      int nregs;
-
       if (reg)
 	{
+	  int partial = args[i].partial;
+	  int nregs;
+	  int size = 0;
 	  rtx before_arg = get_last_insn ();
 	  /* Set to non-negative if must move a word at a time, even if just
 	     one word (e.g, partial == 1 && mode == DFmode).  Set to -1 if
 	     we just use a normal move insn.  This value can be zero if the
 	     argument is a zero size structure with no fields.  */
-	  nregs = (partial ? partial
-		   : (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode
-		      ? ((int_size_in_bytes (TREE_TYPE (args[i].tree_value))
-			  + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
-		      : -1));
+	  nregs = -1;
+	  if (partial)
+	    nregs = partial;
+	  else if (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode)
+	    {
+	      size = int_size_in_bytes (TREE_TYPE (args[i].tree_value));
+	      nregs = (size + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD;
+	    }
+	  else
+	    size = GET_MODE_SIZE (args[i].mode);
 
 	  /* Handle calls that pass values in multiple non-contiguous
 	     locations.  The Irix 6 ABI has examples of this.  */
 
 	  if (GET_CODE (reg) == PARALLEL)
-	    emit_group_load (reg, args[i].value,
-			     int_size_in_bytes (TREE_TYPE (args[i].tree_value)));
+	    {
+	      tree type = TREE_TYPE (args[i].tree_value);
+	      emit_group_load (reg, args[i].value, type,
+			       int_size_in_bytes (type));
+	    }
 
 	  /* If simple case, just do move.  If normal partial, store_one_arg
 	     has already loaded the register for us.  In all other cases,
 	     load the register(s) from memory.  */
 
-	  else if (nregs == -1)
+	  else if (nregs == -1
+		   && !(size < UNITS_PER_WORD
+			&& (args[i].locate.where_pad
+			    == (BYTES_BIG_ENDIAN ? upward : downward))))
 	    emit_move_insn (reg, args[i].value);
 
 	  /* If we have pre-computed the values to put in the registers in
@@ -1728,9 +1739,42 @@ load_register_parameters (args, num_actu
 			      args[i].aligned_regs[j]);
 
 	  else if (partial == 0 || args[i].pass_on_stack)
-	    move_block_to_reg (REGNO (reg),
-			       validize_mem (args[i].value), nregs,
-			       args[i].mode);
+	    {
+	      rtx mem = validize_mem (args[i].value);
+
+	      /* Handle case where we have a value that needs shifting
+		 up to the msb.  eg. a QImode value and we're padding
+		 upward on a BYTES_BIG_ENDIAN machine.  */
+	      if (nregs == -1)
+		{
+		  rtx ri = gen_rtx_REG (word_mode, REGNO (reg));
+		  rtx x;
+		  int shift = (UNITS_PER_WORD - size) * BITS_PER_UNIT;
+		  x = expand_binop (word_mode, ashl_optab, mem,
+				    GEN_INT (shift), ri, 1, OPTAB_WIDEN);
+		  if (x != ri)
+		    emit_move_insn (ri, x);
+		}
+
+	      /* Handle a BLKmode that needs shifting.  */
+	      else if (nregs == 1 && size < UNITS_PER_WORD
+		       && args[i].locate.where_pad == downward)
+		{
+		  rtx tem = operand_subword_force (mem, 0, args[i].mode);
+		  rtx ri = gen_rtx_REG (word_mode, REGNO (reg));
+		  rtx x = gen_reg_rtx (word_mode);
+		  int shift = (UNITS_PER_WORD - size) * BITS_PER_UNIT;
+		  optab dir = BYTES_BIG_ENDIAN ? lshr_optab : ashl_optab;
+
+		  emit_move_insn (x, tem);
+		  x = expand_binop (word_mode, dir, x, GEN_INT (shift),
+				    ri, 1, OPTAB_WIDEN);
+		  if (x != ri)
+		    emit_move_insn (ri, x);
+		}
+	      else
+		move_block_to_reg (REGNO (reg), mem, nregs, args[i].mode);
+	    }
 
 	  /* When a parameter is a block, and perhaps in other cases, it is
 	     possible that it did a load from an argument slot that was
@@ -3225,7 +3269,7 @@ expand_call (exp, target, ignore)
 	    }
 
 	  if (! rtx_equal_p (target, valreg))
-	    emit_group_store (target, valreg,
+	    emit_group_store (target, valreg, TREE_TYPE (exp),
 			      int_size_in_bytes (TREE_TYPE (exp)));
 
 	  /* We can not support sibling calls for this case.  */
@@ -3976,7 +4020,7 @@ emit_library_call_value_1 (retval, orgfu
       /* Handle calls that pass values in multiple non-contiguous
 	 locations.  The PA64 has examples of this for library calls.  */
       if (reg != 0 && GET_CODE (reg) == PARALLEL)
-	emit_group_load (reg, val, GET_MODE_SIZE (GET_MODE (val)));
+	emit_group_load (reg, val, NULL_TREE, GET_MODE_SIZE (GET_MODE (val)));
       else if (reg != 0 && partial == 0)
 	emit_move_insn (reg, val);
 
@@ -4080,7 +4124,7 @@ emit_library_call_value_1 (retval, orgfu
 	  if (GET_CODE (valreg) == PARALLEL)
 	    {
 	      temp = gen_reg_rtx (outmode);
-	      emit_group_store (temp, valreg, outmode);
+	      emit_group_store (temp, valreg, NULL_TREE, outmode);
 	      valreg = temp;
 	    }
 
@@ -4123,7 +4167,7 @@ emit_library_call_value_1 (retval, orgfu
 	{
 	  if (value == 0)
 	    value = gen_reg_rtx (outmode);
-	  emit_group_store (value, valreg, outmode);
+	  emit_group_store (value, valreg, NULL_TREE, outmode);
 	}
       else if (value != 0)
 	emit_move_insn (value, valreg);
diff -urp gcc2/gcc/function.c gcc3/gcc/function.c
--- gcc2/gcc/function.c	2003-04-24 17:52:45.000000000 +0930
+++ gcc3/gcc/function.c	2003-04-24 21:07:17.000000000 +0930
@@ -4623,6 +4623,8 @@ assign_parms (fndecl)
 						  offset_rtx));
 
 	set_mem_attributes (stack_parm, parm, 1);
+	if (entry_parm && MEM_ATTRS (stack_parm)->align < PARM_BOUNDARY)
+	  set_mem_align (stack_parm, PARM_BOUNDARY);
 
 	/* Set also REG_ATTRS if parameter was passed in a register.  */
 	if (entry_parm)
@@ -4654,6 +4656,7 @@ assign_parms (fndecl)
 	     locations.  The Irix 6 ABI has examples of this.  */
 	  if (GET_CODE (entry_parm) == PARALLEL)
 	    emit_group_store (validize_mem (stack_parm), entry_parm,
+			      TREE_TYPE (parm),
 			      int_size_in_bytes (TREE_TYPE (parm)));
 
 	  else
@@ -4757,7 +4760,10 @@ assign_parms (fndecl)
 
 	 Set DECL_RTL to that place.  */
 
-      if (nominal_mode == BLKmode || GET_CODE (entry_parm) == PARALLEL)
+      if (nominal_mode == BLKmode
+	  || (locate.where_pad == (BYTES_BIG_ENDIAN ? upward : downward)
+	      && GET_MODE_SIZE (promoted_mode) < UNITS_PER_WORD)
+	  || GET_CODE (entry_parm) == PARALLEL)
 	{
 	  /* If a BLKmode arrives in registers, copy it to a stack slot.
 	     Handle calls that pass values in multiple non-contiguous
@@ -4793,7 +4799,7 @@ assign_parms (fndecl)
 	      /* Handle calls that pass values in multiple non-contiguous
 		 locations.  The Irix 6 ABI has examples of this.  */
 	      if (GET_CODE (entry_parm) == PARALLEL)
-		emit_group_store (mem, entry_parm, size);
+		emit_group_store (mem, entry_parm, TREE_TYPE (parm), size);
 
 	      /* If SIZE is that of a mode no bigger than a word, just use
 		 that mode's store operation.  */
@@ -4802,7 +4808,10 @@ assign_parms (fndecl)
 		  enum machine_mode mode
 		    = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
 
-		  if (mode != BLKmode)
+		  if (mode != BLKmode
+		      && (size == UNITS_PER_WORD
+			  || (BLOCK_REG_PADDING (mode, TREE_TYPE (parm), 1)
+			      != (BYTES_BIG_ENDIAN ? upward : downward))))
 		    {
 		      rtx reg = gen_rtx_REG (mode, REGNO (entry_parm));
 		      emit_move_insn (change_address (mem, mode, 0), reg);
@@ -4813,7 +4822,8 @@ assign_parms (fndecl)
 		     to memory.  Note that the previous test doesn't
 		     handle all cases (e.g. SIZE == 3).  */
 		  else if (size != UNITS_PER_WORD
-			   && BYTES_BIG_ENDIAN)
+			   && (BLOCK_REG_PADDING (mode, TREE_TYPE (parm), 1)
+			       == downward))
 		    {
 		      rtx tem, x;
 		      int by = (UNITS_PER_WORD - size) * BITS_PER_UNIT;
@@ -5411,6 +5421,7 @@ locate_and_pad_parm (passed_mode, type, 
     = type ? size_in_bytes (type) : size_int (GET_MODE_SIZE (passed_mode));
   where_pad = FUNCTION_ARG_PADDING (passed_mode, type);
   boundary = FUNCTION_ARG_BOUNDARY (passed_mode, type);
+  locate->where_pad = where_pad;
 
 #ifdef ARGS_GROW_DOWNWARD
   locate->slot_offset.constant = -initial_offset_ptr->constant;
@@ -7142,6 +7153,7 @@ expand_function_end (filename, line, end
 		emit_group_move (real_decl_rtl, decl_rtl);
 	      else
 		emit_group_load (real_decl_rtl, decl_rtl,
+				 TREE_TYPE (decl_result),
 				 int_size_in_bytes (TREE_TYPE (decl_result)));
 	    }
 	  else
diff -urp gcc2/gcc/stmt.c gcc3/gcc/stmt.c
--- gcc2/gcc/stmt.c	2003-04-22 18:55:34.000000000 +0930
+++ gcc3/gcc/stmt.c	2003-04-24 21:07:18.000000000 +0930
@@ -3008,7 +3008,7 @@ expand_value_return (val)
 	val = convert_modes (mode, old_mode, val, unsignedp);
 #endif
       if (GET_CODE (return_reg) == PARALLEL)
-	emit_group_load (return_reg, val, int_size_in_bytes (type));
+	emit_group_load (return_reg, val, type, int_size_in_bytes (type));
       else
 	emit_move_insn (return_reg, val);
     }
diff -urp gcc2/gcc/Makefile.in gcc3/gcc/Makefile.in
--- gcc2/gcc/Makefile.in	2003-04-24 15:47:39.000000000 +0930
+++ gcc3/gcc/Makefile.in	2003-04-24 21:14:33.000000000 +0930
@@ -1531,7 +1531,7 @@ builtins.o : builtins.c $(CONFIG_H) $(SY
    $(RECOG_H) output.h typeclass.h hard-reg-set.h toplev.h hard-reg-set.h \
    except.h $(TM_P_H) $(PREDICT_H) libfuncs.h real.h langhooks.h
 calls.o : calls.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) flags.h \
-   $(EXPR_H) langhooks.h $(TARGET_H) \
+   $(EXPR_H) $(OPTABS_H) langhooks.h $(TARGET_H) \
    libfuncs.h $(REGS_H) toplev.h output.h function.h $(TIMEVAR_H) $(TM_P_H) cgraph.h except.h
 expmed.o : expmed.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
    flags.h insn-config.h $(EXPR_H) $(OPTABS_H) $(RECOG_H) real.h \
diff -urp gcc2/gcc/config/rs6000/linux64.h gcc3/gcc/config/rs6000/linux64.h
--- gcc2/gcc/config/rs6000/linux64.h	2003-04-22 18:55:34.000000000 +0930
+++ gcc3/gcc/config/rs6000/linux64.h	2003-04-24 21:07:17.000000000 +0930
@@ -49,6 +49,10 @@
 #undef	TARGET_64BIT
 #define	TARGET_64BIT		1
 
+/* And we're always big-endian.  */
+#undef	TARGET_BIG_ENDIAN
+#define TARGET_BIG_ENDIAN 1
+
 /* 64-bit PowerPC Linux always has a TOC.  */
 #undef  TARGET_NO_TOC
 #define TARGET_NO_TOC		0
@@ -143,8 +147,25 @@
 #undef  JUMP_TABLES_IN_TEXT_SECTION
 #define JUMP_TABLES_IN_TEXT_SECTION 1
 
-/* 64-bit PowerPC Linux always has GPR13 fixed.  */
-#define FIXED_R13		1
+/* The linux ppc64 ABI isn't explicit on whether aggregates smaller
+   than a doubleword should be padded upward or downward.  You could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers.  ie. pad updard.
+   Setting both of the following defines results in this behaviour.
+   Setting just the first one will result in aggregates that fit in a
+   doubleword being padded downward, and others being padded upward.
+   Not a bad idea as this results in struct { int x; } being passed
+   the same way as an int.  */
+#define AGGREGATE_PADDING_FIXED 1
+/* #define AGGREGATES_PAD_UPWARD_ALWAYS 1 */
+
+/* We don't want anything in the reg parm area being passed on the
+   stack.  */
+#define MUST_PASS_IN_STACK(MODE, TYPE)				\
+  ((TYPE) != 0							\
+   && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST		\
+       || TREE_ADDRESSABLE (TYPE)))
 
 /* __throw will restore its own return address to be the same as the
    return address of the function that the throw is being made to.
diff -urp gcc2/gcc/config/rs6000/rs6000.h gcc3/gcc/config/rs6000/rs6000.h
--- gcc2/gcc/config/rs6000/rs6000.h	2003-04-23 11:29:04.000000000 +0930
+++ gcc3/gcc/config/rs6000/rs6000.h	2003-04-24 21:07:17.000000000 +0930
@@ -1701,7 +1701,6 @@ typedef struct rs6000_args
   int fregno;			/* next available FP register */
   int vregno;			/* next available AltiVec register */
   int nargs_prototype;		/* # args left in the current prototype */
-  int orig_nargs;		/* Original value of nargs_prototype */
   int prototype;		/* Whether a prototype was defined */
   int call_cookie;		/* Do special things for this call */
   int sysv_gregno;		/* next available GP register */
@@ -1832,13 +1831,8 @@ typedef struct rs6000_args
 #define EXPAND_BUILTIN_VA_ARG(valist, type) \
   rs6000_va_arg (valist, type)
 
-/* For AIX, the rule is that structures are passed left-aligned in
-   their stack slot.  However, GCC does not presently do this:
-   structures which are the same size as integer types are passed
-   right-aligned, as if they were in fact integers.  This only
-   matters for structures of size 1 or 2, or 4 when TARGET_64BIT.
-   ABI_V4 does not use std_expand_builtin_va_arg.  */
-#define PAD_VARARGS_DOWN (TYPE_MODE (type) != BLKmode)
+#define PAD_VARARGS_DOWN \
+   (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
 
 /* Define this macro to be a nonzero value if the location where a function
    argument is passed depends on whether or not it is a named argument.  */
diff -urp gcc2/gcc/config/rs6000/rs6000.c gcc3/gcc/config/rs6000/rs6000.c
--- gcc2/gcc/config/rs6000/rs6000.c	2003-04-24 14:46:04.000000000 +0930
+++ gcc3/gcc/config/rs6000/rs6000.c	2003-04-24 21:43:35.000000000 +0930
@@ -3132,8 +3132,6 @@ init_cumulative_args (cum, fntype, libna
   else
     cum->nargs_prototype = 0;
 
-  cum->orig_nargs = cum->nargs_prototype;
-
   /* Check for a longcall attribute.  */
   if (fntype
       && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
@@ -3172,8 +3170,37 @@ function_arg_padding (mode, type)
      enum machine_mode mode;
      tree type;
 {
+#if !AGGREGATE_PADDING_FIXED
+  /* GCC used to pass structures of the same size as integer types as
+     if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
+     ie. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
+     passed padded downward, except that -mstrict-align further
+     muddied the water in that multi-component structures of 2 and 4
+     bytes in size were passed padded upward.
+
+     The following arranges for best compatibility with previous
+     versions of gcc, but removes the -mstrict-align dependency.  */
+  if (BYTES_BIG_ENDIAN)
+    {
+      HOST_WIDE_INT size = 0;
+
+      if (mode == BLKmode)
+	{
+	  if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+	    size = int_size_in_bytes (type);
+	}
+      else
+	size = GET_MODE_SIZE (mode);
+
+      if (size == 1 || size == 2 || size == 4)
+	return downward;
+    }
+  return upward;
+#else
+#if AGGREGATES_PAD_UPWARD_ALWAYS
   if (type != 0 && AGGREGATE_TYPE_P (type))
     return upward;
+#endif
 
   /* This is the default definition.  */
   return (! BYTES_BIG_ENDIAN
@@ -3183,6 +3210,7 @@ function_arg_padding (mode, type)
                  && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
               : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
              ? downward : upward));
+#endif
 }
 
 /* If defined, a C expression that gives the alignment boundary, in bits,


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]