This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PR target/18916; Function arg passing mem align fixes.


This patch fixes g++.dg/ext/altivec-3.C on powerpc64-linux, which stated
failing when rs6000.c:expand_block_move first used altivec insns.

The underlying problem is that various places in gcc's function arg
passing machinery set alignment of rtx mems from the type of the arg,
*not* the actual alignment of the arg, which may be less.  Tests of
alignment in expand_block_move thus are not testing the actual
alignment, which may not be suitable for altivec.

	PR target/18916
	* builtins.c (std_gimplify_va_arg_expr): Adjust alignment of *ap.
	* expr.h (struct locate_and_pad_arg_data): Add "boundary".
	* function.c (locate_and_pad_parm): Set new field.
	(assign_parm_find_stack_rtl): Use it instead of FUNCTION_ARG_BOUNDARY.
	Tweak where_pad test to include "none".  Always set mem align for
	stack_parm.
	* calls.c (compute_argument_addresses): Override alignment of stack
	arg calculated from its type with the alignment given by
	FUNCTION_ARG_BOUNDARY.
	(store_one_arg): Likewise.

Bootstrapped and regression tested powerpc-linux, powerpc64-linux and
i686-linux.  By inspection of assembly code, this patch also fixes
PR18916.  OK to apply?

It occurred to me while developing this patch that in the case of fixed
args we can probably calculate the actual alignment from the offset and
known stack alignment.  This could allow us to set a more strict
alignment than that given by FUNCTION_ARG_BOUNDARY, which in turn might
allow wider reg moves to be used in setting up args.  Perhaps I'll look
at that for 4.1.

diff -urp -xCVS -x'*~' -x'.#*' gcc-virgin/gcc/builtins.c gcc-current/gcc/builtins.c
--- gcc-virgin/gcc/builtins.c	2004-12-18 20:17:40.000000000 +1030
+++ gcc-current/gcc/builtins.c	2005-01-01 12:47:47.000452601 +1030
@@ -3928,6 +3928,18 @@ std_gimplify_va_arg_expr (tree valist, t
 		  build2 (BIT_AND_EXPR, TREE_TYPE (valist), valist_tmp, t));
       gimplify_and_add (t, pre_p);
     }
+  else
+    boundary = align;
+
+  /* If the actual alignment is less than the alignment of the type,
+     adjust the type accordingly so that we don't assume strict alignment
+     when deferencing the pointer.  */
+  boundary *= BITS_PER_UNIT;
+  if (boundary < TYPE_ALIGN (type))
+    {
+      type = build_variant_type_copy (type);
+      TYPE_ALIGN (type) = boundary;
+    }
 
   /* Compute the rounded size of the type.  */
   type_size = size_in_bytes (type);
diff -urp -xCVS -x'*~' -x'.#*' gcc-virgin/gcc/expr.h gcc-current/gcc/expr.h
--- gcc-virgin/gcc/expr.h	2004-12-31 08:11:22.000000000 +1030
+++ gcc-current/gcc/expr.h	2004-12-31 21:25:07.000000000 +1030
@@ -116,6 +116,8 @@ struct locate_and_pad_arg_data
   struct args_size alignment_pad;
   /* Which way we should pad this arg.  */
   enum direction where_pad;
+  /* slot_offset is at least this aligned.  */
+  unsigned int boundary;
 };
 
 /* Add the value of the tree INC to the `struct args_size' TO.  */
diff -urp -xCVS -x'*~' -x'.#*' gcc-virgin/gcc/function.c gcc-current/gcc/function.c
--- gcc-virgin/gcc/function.c	2004-12-31 08:11:22.000000000 +1030
+++ gcc-current/gcc/function.c	2005-01-01 11:22:23.235835612 +1030
@@ -2403,22 +2403,21 @@ assign_parm_find_stack_rtl (tree parm, s
 
   set_mem_attributes (stack_parm, parm, 1);
 
-  boundary = FUNCTION_ARG_BOUNDARY (data->promoted_mode, data->passed_type);
-  align = 0;
+  boundary = data->locate.boundary;
+  align = BITS_PER_UNIT;
 
   /* If we're padding upward, we know that the alignment of the slot
      is FUNCTION_ARG_BOUNDARY.  If we're using slot_offset, we're
      intentionally forcing upward padding.  Otherwise we have to come
      up with a guess at the alignment based on OFFSET_RTX.  */
-  if (data->locate.where_pad == upward || data->entry_parm)
+  if (data->locate.where_pad != downward || data->entry_parm)
     align = boundary;
   else if (GET_CODE (offset_rtx) == CONST_INT)
     {
       align = INTVAL (offset_rtx) * BITS_PER_UNIT | boundary;
       align = align & -align;
     }
-  if (align > 0)
-    set_mem_align (stack_parm, align);
+  set_mem_align (stack_parm, align);
 
   if (data->entry_parm)
     set_reg_attrs_for_parm (data->entry_parm, stack_parm);
@@ -3409,6 +3408,7 @@ locate_and_pad_parm (enum machine_mode p
   where_pad = FUNCTION_ARG_PADDING (passed_mode, type);
   boundary = FUNCTION_ARG_BOUNDARY (passed_mode, type);
   locate->where_pad = where_pad;
+  locate->boundary = boundary;
 
 #ifdef ARGS_GROW_DOWNWARD
   locate->slot_offset.constant = -initial_offset_ptr->constant;
diff -urp -xCVS -x'*~' -x'.#*' gcc-virgin/gcc/calls.c gcc-current/gcc/calls.c
--- gcc-virgin/gcc/calls.c	2004-12-31 08:11:22.000000000 +1030
+++ gcc-current/gcc/calls.c	2005-01-01 11:24:23.110005404 +1030
@@ -1357,6 +1357,7 @@ compute_argument_addresses (struct arg_d
 	  rtx offset = ARGS_SIZE_RTX (args[i].locate.offset);
 	  rtx slot_offset = ARGS_SIZE_RTX (args[i].locate.slot_offset);
 	  rtx addr;
+	  unsigned int align, boundary;
 
 	  /* Skip this parm if it will not be passed on the stack.  */
 	  if (! args[i].pass_on_stack && args[i].reg != 0)
@@ -1369,9 +1370,18 @@ compute_argument_addresses (struct arg_d
 
 	  addr = plus_constant (addr, arg_offset);
 	  args[i].stack = gen_rtx_MEM (args[i].mode, addr);
-	  set_mem_align (args[i].stack, PARM_BOUNDARY);
 	  set_mem_attributes (args[i].stack,
 			      TREE_TYPE (args[i].tree_value), 1);
+	  align = BITS_PER_UNIT;
+	  boundary = args[i].locate.boundary;
+	  if (args[i].locate.where_pad != downward)
+	    align = boundary;
+	  else if (GET_CODE (offset) == CONST_INT)
+	    {
+	      align = INTVAL (offset) * BITS_PER_UNIT | boundary;
+	      align = align & -align;
+	    }
+	  set_mem_align (args[i].stack, align);
 
 	  if (GET_CODE (slot_offset) == CONST_INT)
 	    addr = plus_constant (arg_reg, INTVAL (slot_offset));
@@ -1380,9 +1390,9 @@ compute_argument_addresses (struct arg_d
 
 	  addr = plus_constant (addr, arg_offset);
 	  args[i].stack_slot = gen_rtx_MEM (args[i].mode, addr);
-	  set_mem_align (args[i].stack_slot, PARM_BOUNDARY);
 	  set_mem_attributes (args[i].stack_slot,
 			      TREE_TYPE (args[i].tree_value), 1);
+	  set_mem_align (args[i].stack_slot, args[i].locate.boundary);
 
 	  /* Function incoming arguments may overlap with sibling call
 	     outgoing arguments and we cannot allow reordering of reads
@@ -4119,9 +4129,7 @@ store_one_arg (struct arg_data *arg, rtx
 				  NULL_RTX, TYPE_MODE (sizetype), 0);
 	}
 
-      /* Some types will require stricter alignment, which will be
-	 provided for elsewhere in argument layout.  */
-      parm_align = MAX (PARM_BOUNDARY, TYPE_ALIGN (TREE_TYPE (pval)));
+      parm_align = arg->locate.boundary;
 
       /* When an argument is padded down, the block is aligned to
 	 PARM_BOUNDARY, but the actual argument isn't.  */

-- 
Alan Modra
IBM OzLabs - Linux Technology Centre


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]