This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [2/6] stack branch merge to trunk -- Collect alignment info


On Tue, Jun 10, 2008 at 02:37:50PM -0700, H.J. Lu wrote:
> Hi,
> 
> This patch collects alignment information.  If a backend doesn't
> support stack alignment, there will be no change. The only
> non-trivial change is in reload1.c. We move setting
> frame_pointer_needed from init_elim_table in reload1.c
> to compute_regsets in global.c since it can't change
> once it is computed in compute_regsets first.  Bernd, Ulrich,
> can you review it?
> 
> Thanks.

I updated the middle end patch not to use DRAP for eh_return.  Ian,
Bernd, Ulrich, do you have any comments?

Thanks.


H.J.
---
2008-06-13  Joey Ye  <joey.ye@intel.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* builtins.c (expand_builtin_setjmp_receiver): Replace
	virtual_incoming_args_rtx with
	crtl->args.internal_arg_pointer.
	(expand_builtin_apply_args_1): Likewise.
	(expand_builtin_longjmp): Need DRAP for stack alignment.
	(expand_builtin_apply): Likewise.

	* caller-save.c (setup_save_areas): Call assign_stack_local_1
	instead of assign_stack_local to allow alignment reduction.

	* calls.c (emit_call_1): Need DRAP for stack alignment if
	return pops.
	(expand_call): Replace virtual_incoming_args_rtx with
	crtl->args.internal_arg_pointer.
	* stmt.c (expand_nl_goto_receiver): Likewise.

	* cfgexpand.c (get_decl_align_unit): Estimate stack variable
	alignment and store to stack_alignment_estimated and
	max_used_stack_slot_alignment.
	(expand_one_var): Likewise.
	(expand_stack_alignment): New function.
	(tree_expand_cfg): Initialize max_used_stack_slot_alignment
	and stack_alignment_estimated fields in rtl_data.  Call
	expand_stack_alignment at end.

	* defaults.h (INCOMING_STACK_BOUNDARY): New.
	(MAX_STACK_ALIGNMENT): Likewise.
	(MAX_SUPPORTED_STACK_ALIGNMENT): Likewise.
	(SUPPORTS_STACK_ALIGNMENT): Likewise.

	* emit-rtl.c (gen_reg_rtx): Estimate stack alignment for
	stack alignment when generating virtual registers.

	* function.c (assign_stack_local): Renamed to ...
	(assign_stack_local_1): This.  Add a parameter to indicate
	if it is OK to reduce alignment.
	(assign_stack_local): Use it.
	(instantiate_new_reg): Instantiate virtual incoming args rtx
	to vDRAP if stack realignment and DRAP is needed.
	(assign_parms): Collect parameter/return type alignment and
	contribute to stack_alignment_estimated.
	(locate_and_pad_parm): Likewise.
	(get_arg_pointer_save_area): Replace virtual_incoming_args_rtx
	with crtl->args.internal_arg_pointer.

	* global.c (compute_regsets): Set frame_pointer_needed here.
	Support stack alignment.

	* reload1.c (update_eliminables): Stack realign needs frame
        pointer.
	(init_elim_table): Don't set frame_pointer_needed here.

	* rtl.h (assign_stack_local_1): Declare new funtion.

	* target-def.h (TARGET_UPDATE_STACK_BOUNDARY): New.
	(TARGET_GET_DRAP_RTX): Likewise.
	(TARGET_CALLS): Add TARGET_UPDATE_STACK_BOUNDARY and
	TARGET_GET_DRAP_RTX.

	* target.h (gcc_target): Add update_stack_boundary and
	get_drap_rtx.

	* tree-vectorizer.c (vect_can_force_dr_alignment_p): Replace
	STACK_BOUNDARY with MAX_STACK_ALIGNMENT.

diff -x doc -x config -x testsuite -x ChangeLog.stackalign \
		-x function.h -x dwarf2out.c -x REVISION -x .svn \
	 	-upr ../../gcc/gcc gcc/gcc
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/builtins.c gcc/gcc/builtins.c
--- ../../gcc/gcc/builtins.c	2008-06-13 06:35:11.000000000 -0700
+++ gcc/gcc/builtins.c	2008-06-13 06:29:29.000000000 -0700
@@ -743,7 +743,7 @@ expand_builtin_setjmp_receiver (rtx rece
 	{
 	  /* Now restore our arg pointer from the address at which it
 	     was saved in our stack frame.  */
-	  emit_move_insn (virtual_incoming_args_rtx,
+	  emit_move_insn (crtl->args.internal_arg_pointer,
 			  copy_to_reg (get_arg_pointer_save_area ()));
 	}
     }
@@ -778,6 +778,11 @@ expand_builtin_longjmp (rtx buf_addr, rt
   rtx fp, lab, stack, insn, last;
   enum machine_mode sa_mode = STACK_SAVEAREA_MODE (SAVE_NONLOCAL);
 
+  /* DRAP is needed for stack realign if longjmp is expanded to current 
+     function  */
+  if (SUPPORTS_STACK_ALIGNMENT)
+    crtl->need_drap = true;
+
   if (setjmp_alias_set == -1)
     setjmp_alias_set = new_alias_set ();
 
@@ -1342,7 +1347,7 @@ expand_builtin_apply_args_1 (void)
       }
 
   /* Save the arg pointer to the block.  */
-  tem = copy_to_reg (virtual_incoming_args_rtx);
+  tem = copy_to_reg (crtl->args.internal_arg_pointer);
 #ifdef STACK_GROWS_DOWNWARD
   /* We need the pointer as the caller actually passed them to us, not
      as we might have pretended they were passed.  Make sure it's a valid
@@ -1450,6 +1455,14 @@ expand_builtin_apply (rtx function, rtx 
   /* Allocate a block of memory onto the stack and copy the memory
      arguments to the outgoing arguments address.  */
   allocate_dynamic_stack_space (argsize, 0, BITS_PER_UNIT);
+
+  /* Set DRAP flag to true, even though allocate_dynamic_stack_space
+     may have already set current_function_calls_alloca to true.
+     current_function_calls_alloca won't be set if argsize is zero,
+     so we have to guarantee need_drap is true here.  */
+  if (SUPPORTS_STACK_ALIGNMENT)
+    crtl->need_drap = true;
+
   dest = virtual_outgoing_args_rtx;
 #ifndef STACK_GROWS_DOWNWARD
   if (GET_CODE (argsize) == CONST_INT)
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/caller-save.c gcc/gcc/caller-save.c
--- ../../gcc/gcc/caller-save.c	2008-05-26 14:04:10.000000000 -0700
+++ gcc/gcc/caller-save.c	2008-06-08 13:35:06.000000000 -0700
@@ -356,10 +356,16 @@ setup_save_areas (void)
 	if (! do_save)
 	  continue;
 
-	/* We have found an acceptable mode to store in.  */
+	/* We have found an acceptable mode to store in.  Since hard
+	   register is always saved in the widest mode available,
+	   the mode may be wider than necessary, it is OK to reduce
+	   the alignment of spill space.  We will verify that it is
+	   equal to or greater than required when we restore and save
+	   the hard register in insert_restore and insert_save.  */
 	regno_save_mem[i][j]
-	  = assign_stack_local (regno_save_mode[i][j],
-				GET_MODE_SIZE (regno_save_mode[i][j]), 0);
+	  = assign_stack_local_1 (regno_save_mode[i][j],
+				GET_MODE_SIZE (regno_save_mode[i][j]),
+				0, true);
 
 	/* Setup single word save area just in case...  */
 	for (k = 0; k < j; k++)
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/calls.c gcc/gcc/calls.c
--- ../../gcc/gcc/calls.c	2008-05-15 08:33:48.000000000 -0700
+++ gcc/gcc/calls.c	2008-05-28 21:01:40.000000000 -0700
@@ -417,6 +417,10 @@ emit_call_1 (rtx funexp, tree fntree, tr
       rounded_stack_size -= n_popped;
       rounded_stack_size_rtx = GEN_INT (rounded_stack_size);
       stack_pointer_delta -= n_popped;
+
+      /* If popup is needed, stack realign must use DRAP  */
+      if (SUPPORTS_STACK_ALIGNMENT)
+        crtl->need_drap = true;
     }
 
   if (!ACCUMULATE_OUTGOING_ARGS)
@@ -2390,7 +2394,7 @@ expand_call (tree exp, rtx target, int i
 	 incoming argument block.  */
       if (pass == 0)
 	{
-	  argblock = virtual_incoming_args_rtx;
+	  argblock = crtl->args.internal_arg_pointer;
 	  argblock
 #ifdef STACK_GROWS_DOWNWARD
 	    = plus_constant (argblock, crtl->args.pretend_args_size);
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/cfgexpand.c gcc/gcc/cfgexpand.c
--- ../../gcc/gcc/cfgexpand.c	2008-06-06 15:10:06.000000000 -0700
+++ gcc/gcc/cfgexpand.c	2008-06-13 06:29:29.000000000 -0700
@@ -157,10 +157,25 @@ get_decl_align_unit (tree decl)
 
   align = DECL_ALIGN (decl);
   align = LOCAL_ALIGNMENT (TREE_TYPE (decl), align);
-  if (align > PREFERRED_STACK_BOUNDARY)
-    align = PREFERRED_STACK_BOUNDARY;
+
+  if (align > MAX_SUPPORTED_STACK_ALIGNMENT)
+    align = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  if (SUPPORTS_STACK_ALIGNMENT)
+    {
+      if (crtl->stack_alignment_estimated < align)
+	{
+	  gcc_assert(!crtl->stack_realign_processed);
+          crtl->stack_alignment_estimated = align;
+	}
+    }
+
+  /* stack_alignment_needed > PREFERRED_STACK_BOUNDARY is permitted.
+     So here we only make sure stack_alignment_needed >= align.  */
   if (crtl->stack_alignment_needed < align)
     crtl->stack_alignment_needed = align;
+  if (crtl->max_used_stack_slot_alignment < crtl->stack_alignment_needed)
+    crtl->max_used_stack_slot_alignment = crtl->stack_alignment_needed;
 
   return align / BITS_PER_UNIT;
 }
@@ -739,6 +754,31 @@ defer_stack_allocation (tree var, bool t
 static HOST_WIDE_INT
 expand_one_var (tree var, bool toplevel, bool really_expand)
 {
+  if (SUPPORTS_STACK_ALIGNMENT
+      && TREE_TYPE (var) != error_mark_node
+      && TREE_CODE (var) == VAR_DECL)
+    {
+      unsigned int align;
+
+      /* Because we don't know if VAR will be in register or on stack,
+	 we conservatively assume it will be on stack even if VAR is
+	 eventually put into register after RA pass.  For non-automatic
+	 variables, which won't be on stack, we collect alignment of
+	 type and ignore user specified alignment.  */
+      if (TREE_STATIC (var) || DECL_EXTERNAL (var))
+	align = TYPE_ALIGN (TREE_TYPE (var));
+      else
+	align = DECL_ALIGN (var);
+
+      if (crtl->stack_alignment_estimated < align)
+        {
+          /* stack_alignment_estimated shouldn't change after stack
+             realign decision made */
+          gcc_assert(!crtl->stack_realign_processed);
+	  crtl->stack_alignment_estimated = align;
+	}
+    }
+
   if (TREE_CODE (var) != VAR_DECL)
     ;
   else if (DECL_EXTERNAL (var))
@@ -1823,6 +1863,66 @@ discover_nonconstant_array_refs (void)
     }
 }
 
+/* This function sets crtl->args.internal_arg_pointer to a virtual
+   register if DRAP is needed.  Local register allocator will replace
+   virtual_incoming_args_rtx with the virtual register.  */
+
+static void
+expand_stack_alignment (void)
+{
+  rtx drap_rtx;
+  unsigned int preferred_stack_boundary;
+
+  if (! SUPPORTS_STACK_ALIGNMENT)
+    return;
+  
+  if (cfun->calls_alloca
+      || cfun->has_nonlocal_label
+      || crtl->has_nonlocal_goto)
+    crtl->need_drap = true;
+
+  gcc_assert (crtl->stack_alignment_needed
+	      <= crtl->stack_alignment_estimated);
+
+  /* Update stack boundary if needed.  */
+  if (targetm.calls.update_stack_boundary)
+    targetm.calls.update_stack_boundary (); 
+
+  /* Update crtl->stack_alignment_estimated and use it later to align
+     stack.  We check PREFERRED_STACK_BOUNDARY if there may be non-call
+     exceptions since callgraph doesn't collect incoming stack alignment
+     in this case.  */
+  if (flag_non_call_exceptions
+      && PREFERRED_STACK_BOUNDARY > crtl->preferred_stack_boundary)
+    preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+  else
+    preferred_stack_boundary = crtl->preferred_stack_boundary;
+  if (preferred_stack_boundary > crtl->stack_alignment_estimated)
+    crtl->stack_alignment_estimated = preferred_stack_boundary;
+  if (preferred_stack_boundary > crtl->stack_alignment_needed)
+    crtl->stack_alignment_needed = preferred_stack_boundary;
+
+  crtl->stack_realign_needed
+    = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
+
+  crtl->stack_realign_processed = true;
+
+  /* Target has to redefine TARGET_GET_DRAP_RTX to support stack
+     alignment.  */
+  gcc_assert (targetm.calls.get_drap_rtx != NULL);
+  drap_rtx = targetm.calls.get_drap_rtx (); 
+
+  /* Do nothing if NULL is returned, which means DRAP is not needed.  */
+  if (NULL != drap_rtx)
+    {
+      crtl->args.internal_arg_pointer = drap_rtx;
+
+      /* Call fixup_tail_casss to clean up REG_EQUIV note if DRAP is
+         needed. */
+      fixup_tail_calls ();
+    }
+}
+
 /* Translate the intermediate representation contained in the CFG
    from GIMPLE trees to RTL.
 
@@ -1859,6 +1959,8 @@ tree_expand_cfg (void)
 
   targetm.expand_to_rtl_hook ();
   crtl->stack_alignment_needed = STACK_BOUNDARY;
+  crtl->max_used_stack_slot_alignment = STACK_BOUNDARY;
+  crtl->stack_alignment_estimated = STACK_BOUNDARY;
   crtl->preferred_stack_boundary = STACK_BOUNDARY;
   cfun->cfg->max_jumptable_ents = 0;
 
@@ -1930,6 +2032,9 @@ tree_expand_cfg (void)
   sbitmap_free (blocks);
 
   compact_blocks ();
+
+  expand_stack_alignment ();
+
 #ifdef ENABLE_CHECKING
   verify_flow_info ();
 #endif
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/defaults.h gcc/gcc/defaults.h
--- ../../gcc/gcc/defaults.h	2008-05-27 17:15:37.000000000 -0700
+++ gcc/gcc/defaults.h	2008-06-08 13:33:28.000000000 -0700
@@ -557,6 +557,12 @@ along with GCC; see the file COPYING3.  
 #define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
 #endif
 
+/* Set INCOMING_STACK_BOUNDARY to PREFERRED_STACK_BOUNDARY if it is not
+   defined.  */
+#ifndef INCOMING_STACK_BOUNDARY
+#define INCOMING_STACK_BOUNDARY PREFERRED_STACK_BOUNDARY
+#endif
+
 #ifndef TARGET_DEFAULT_PACK_STRUCT
 #define TARGET_DEFAULT_PACK_STRUCT 0
 #endif
@@ -944,6 +950,21 @@ along with GCC; see the file COPYING3.  
 #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 0
 #endif
 
+/* MAX_STACK_ALIGNMENT is the maximum stack alignment guaranteed by
+   the backend.  MAX_SUPPORTED_STACK_ALIGNMENT is the maximum best
+   effort stack alignment supported by the backend.  If the backend
+   supports stack alignment, MAX_SUPPORTED_STACK_ALIGNMENT and
+   MAX_STACK_ALIGNMENT are the same.  Otherwise, the incoming stack
+   boundary will limit the maximum guaranteed stack alignment.  */
+#ifdef MAX_STACK_ALIGNMENT
+#define MAX_SUPPORTED_STACK_ALIGNMENT MAX_STACK_ALIGNMENT
+#else
+#define MAX_STACK_ALIGNMENT STACK_BOUNDARY
+#define MAX_SUPPORTED_STACK_ALIGNMENT PREFERRED_STACK_BOUNDARY
+#endif
+
+#define SUPPORTS_STACK_ALIGNMENT (MAX_STACK_ALIGNMENT > STACK_BOUNDARY)
+
 #ifndef LOCAL_ALIGNMENT
 #define LOCAL_ALIGNMENT(TYPE, ALIGNMENT) ALIGNMENT
 #endif
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/emit-rtl.c gcc/gcc/emit-rtl.c
--- ../../gcc/gcc/emit-rtl.c	2008-06-06 15:10:06.000000000 -0700
+++ gcc/gcc/emit-rtl.c	2008-06-08 13:33:31.000000000 -0700
@@ -864,9 +864,18 @@ rtx
 gen_reg_rtx (enum machine_mode mode)
 {
   rtx val;
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
 
   gcc_assert (can_create_pseudo_p ());
 
+  /* If a virtual register with bigger mode alignment is generated,
+     increase stack alignment estimation because it might be spilled
+     to stack later.  */
+  if (SUPPORTS_STACK_ALIGNMENT 
+      && crtl->stack_alignment_estimated < align
+      && !crtl->stack_realign_processed)
+    crtl->stack_alignment_estimated = align;
+
   if (generating_concat_p
       && (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
 	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT))
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/function.c gcc/gcc/function.c
--- ../../gcc/gcc/function.c	2008-06-06 15:10:06.000000000 -0700
+++ gcc/gcc/function.c	2008-06-08 13:35:06.000000000 -0700
@@ -350,10 +350,14 @@ get_stack_local_alignment (tree type, en
    -2 means use BITS_PER_UNIT,
    positive specifies alignment boundary in bits.
 
+   If REDUCE_ALIGNMENT_OK is true, it is OK to reduce alignment.
+
    We do not round to stack_boundary here.  */
 
 rtx
-assign_stack_local (enum machine_mode mode, HOST_WIDE_INT size, int align)
+assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
+		      int align,
+		      bool reduce_alignment_ok ATTRIBUTE_UNUSED)
 {
   rtx x, addr;
   int bigend_correction = 0;
@@ -375,17 +379,52 @@ assign_stack_local (enum machine_mode mo
   else
     alignment = align / BITS_PER_UNIT;
 
+  alignment_in_bits = alignment * BITS_PER_UNIT;
+
   if (FRAME_GROWS_DOWNWARD)
     frame_offset -= size;
 
-  /* Ignore alignment we can't do with expected alignment of the boundary.  */
-  if (alignment * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
-    alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+  /* Ignore alignment if it exceeds MAX_SUPPORTED_STACK_ALIGNMENT.  */
+  if (alignment_in_bits >= MAX_SUPPORTED_STACK_ALIGNMENT)
+    {
+      alignment_in_bits = MAX_SUPPORTED_STACK_ALIGNMENT;
+      alignment = alignment_in_bits / BITS_PER_UNIT;
+    }
 
-  alignment_in_bits = alignment * BITS_PER_UNIT;
+  if (SUPPORTS_STACK_ALIGNMENT)
+    {
+      if (crtl->stack_alignment_estimated < alignment_in_bits)
+	{
+          if (!crtl->stack_realign_processed)
+	    crtl->stack_alignment_estimated = alignment_in_bits;
+          else
+	    {
+	      /* If stack is realigned and stack alignment value
+		 hasn't been finalized, it is OK not to increase
+		 stack_alignment_estimated.  The bigger alignment
+		 requirement is recorded in stack_alignment_needed
+		 below.  */
+	      gcc_assert (!crtl->stack_realign_finalized);
+	      if (!crtl->stack_realign_needed)
+		{
+		  /* It is OK to reduce the alignment as long as the
+		     requested size is 0 or the estimated stack
+		     alignment >= mode alignment.  */
+		  gcc_assert (reduce_alignment_ok
+		              || size == 0
+			      || (crtl->stack_alignment_estimated
+				  >= GET_MODE_ALIGNMENT (mode)));
+		  alignment_in_bits = crtl->stack_alignment_estimated;
+		  alignment = alignment_in_bits / BITS_PER_UNIT;
+		}
+	    }
+	}
+    }
 
   if (crtl->stack_alignment_needed < alignment_in_bits)
     crtl->stack_alignment_needed = alignment_in_bits;
+  if (crtl->max_used_stack_slot_alignment < crtl->stack_alignment_needed)
+    crtl->max_used_stack_slot_alignment = crtl->stack_alignment_needed;
 
   /* Calculate how many bytes the start of local variables is off from
      stack alignment.  */
@@ -449,6 +488,14 @@ assign_stack_local (enum machine_mode mo
 
   return x;
 }
+
+/* Wrap up assign_stack_local_1 with last parameter as false.  */
+
+rtx
+assign_stack_local (enum machine_mode mode, HOST_WIDE_INT size, int align)
+{
+  return assign_stack_local_1 (mode, size, align, false);
+}
 
 /* Removes temporary slot TEMP from LIST.  */
 
@@ -1167,7 +1214,17 @@ instantiate_new_reg (rtx x, HOST_WIDE_IN
   HOST_WIDE_INT offset;
 
   if (x == virtual_incoming_args_rtx)
-    new = arg_pointer_rtx, offset = in_arg_offset;
+    {
+      /* Replace vitural_incoming_args_rtx to internal arg pointer here */
+      if (crtl->args.internal_arg_pointer != virtual_incoming_args_rtx)
+        {
+          gcc_assert (stack_realign_drap);
+          new = crtl->args.internal_arg_pointer;
+          offset = 0;
+        }
+      else
+        new = arg_pointer_rtx, offset = in_arg_offset;
+    }
   else if (x == virtual_stack_vars_rtx)
     new = frame_pointer_rtx, offset = var_offset;
   else if (x == virtual_stack_dynamic_rtx)
@@ -2969,6 +3026,20 @@ assign_parms (tree fndecl)
 	  continue;
 	}
 
+      /* Estimate stack alignment from parameter alignment.  */
+      if (SUPPORTS_STACK_ALIGNMENT)
+        {
+          unsigned int align = FUNCTION_ARG_BOUNDARY (data.promoted_mode,
+						      data.passed_type);
+	  if (TYPE_ALIGN (data.nominal_type) > align)
+	    align = TYPE_ALIGN (data.passed_type);
+	  if (crtl->stack_alignment_estimated < align)
+	    {
+	      gcc_assert (!crtl->stack_realign_processed);
+	      crtl->stack_alignment_estimated = align;
+	    }
+	}
+	
       if (cfun->stdarg && !TREE_CHAIN (parm))
 	assign_parms_setup_varargs (&all, &data, false);
 
@@ -3006,6 +3077,28 @@ assign_parms (tree fndecl)
      now that all parameters have been copied out of hard registers.  */
   emit_insn (all.first_conversion_insn);
 
+  /* Estimate reload stack alignment from scalar return mode.  */
+  if (SUPPORTS_STACK_ALIGNMENT)
+    {
+      if (DECL_RESULT (fndecl))
+	{
+	  tree type = TREE_TYPE (DECL_RESULT (fndecl));
+	  enum machine_mode mode = TYPE_MODE (type);
+
+	  if (mode != BLKmode
+	      && mode != VOIDmode
+	      && !AGGREGATE_TYPE_P (type))
+	    {
+	      unsigned int align = GET_MODE_ALIGNMENT (mode);
+	      if (crtl->stack_alignment_estimated < align)
+		{
+		  gcc_assert (!crtl->stack_realign_processed);
+		  crtl->stack_alignment_estimated = align;
+		}
+	    }
+	} 
+    }
+
   /* If we are receiving a struct value address as the first argument, set up
      the RTL for the function result. As this might require code to convert
      the transmitted address to Pmode, we do this here to ensure that possible
@@ -3283,12 +3376,39 @@ locate_and_pad_parm (enum machine_mode p
   locate->where_pad = where_pad;
   locate->boundary = boundary;
 
-  /* Remember if the outgoing parameter requires extra alignment on the
-     calling function side.  */
-  if (boundary > PREFERRED_STACK_BOUNDARY)
-    boundary = PREFERRED_STACK_BOUNDARY;
+  /* We can't exceed MAX_SUPPORTED_STACK_ALIGNMENT when we remember
+     if the outgoing parameter requires extra alignment on the calling
+     function side.  */
+  if (boundary >= MAX_SUPPORTED_STACK_ALIGNMENT)
+    boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  if (SUPPORTS_STACK_ALIGNMENT)
+    {
+      /* stack_alignment_estimated can't change after stack has been
+	 realigned.  */
+      if (crtl->stack_alignment_estimated < boundary)
+        {
+          if (!crtl->stack_realign_processed)
+	    crtl->stack_alignment_estimated = boundary;
+	  else
+	    {
+	      /* If stack is realigned and stack alignment value
+		 hasn't been finalized, it is OK not to increase
+		 stack_alignment_estimated.  The bigger alignment
+		 requirement is recorded in stack_alignment_needed
+		 below.  */
+	      gcc_assert (!crtl->stack_realign_finalized
+			  && crtl->stack_realign_needed);
+	    }
+	}
+    }
+
   if (crtl->stack_alignment_needed < boundary)
     crtl->stack_alignment_needed = boundary;
+  if (crtl->max_used_stack_slot_alignment < crtl->stack_alignment_needed)
+    crtl->max_used_stack_slot_alignment = crtl->stack_alignment_needed;
+  if (crtl->preferred_stack_boundary < boundary)
+    crtl->preferred_stack_boundary = boundary;
 
 #ifdef ARGS_GROW_DOWNWARD
   locate->slot_offset.constant = -initial_offset_ptr->constant;
@@ -4624,7 +4744,8 @@ get_arg_pointer_save_area (void)
 	 generated stack slot may not be a valid memory address, so we
 	 have to check it and fix it if necessary.  */
       start_sequence ();
-      emit_move_insn (validize_mem (ret), virtual_incoming_args_rtx);
+      emit_move_insn (validize_mem (ret),
+                      crtl->args.internal_arg_pointer);
       seq = get_insns ();
       end_sequence ();
 
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/global.c gcc/gcc/global.c
--- ../../gcc/gcc/global.c	2008-05-15 08:33:48.000000000 -0700
+++ gcc/gcc/global.c	2008-05-28 21:01:40.000000000 -0700
@@ -206,7 +206,9 @@ static void build_insn_chain (void);
 
    This will normally be called with ELIM_SET as the file static
    variable eliminable_regset, and NO_GLOBAL_SET as the file static
-   variable NO_GLOBAL_ALLOC_REGS.  */
+   variable NO_GLOBAL_ALLOC_REGS.
+
+   It also initializes global flag frame_pointer_needed.  */
 
 static void
 compute_regsets (HARD_REG_SET *elim_set, 
@@ -222,10 +224,19 @@ compute_regsets (HARD_REG_SET *elim_set,
   static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
   size_t i;
 #endif
+
+  /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore
+     sp for alloca.  So we can't eliminate the frame pointer in that
+     case.  At some point, we should improve this by emitting the
+     sp-adjusting insns for this case.  */
   int need_fp
     = (! flag_omit_frame_pointer
        || (cfun->calls_alloca && EXIT_IGNORE_STACK)
-       || FRAME_POINTER_REQUIRED);
+       || FRAME_POINTER_REQUIRED
+       || crtl->accesses_prior_frames
+       || crtl->stack_realign_needed);
+
+  frame_pointer_needed = need_fp;
 
   max_regno = max_reg_num ();
   compact_blocks ();
@@ -246,7 +257,10 @@ compute_regsets (HARD_REG_SET *elim_set,
     {
       bool cannot_elim
 	= (! CAN_ELIMINATE (eliminables[i].from, eliminables[i].to)
-	   || (eliminables[i].to == STACK_POINTER_REGNUM && need_fp));
+	   || (eliminables[i].to == STACK_POINTER_REGNUM
+	       && need_fp 
+	       && (! SUPPORTS_STACK_ALIGNMENT
+		   || ! stack_realign_fp)));
 
       if (!regs_asm_clobbered[eliminables[i].from])
 	{
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/reload1.c gcc/gcc/reload1.c
--- ../../gcc/gcc/reload1.c	2008-06-02 10:41:08.000000000 -0700
+++ gcc/gcc/reload1.c	2008-06-01 19:30:52.000000000 -0700
@@ -3667,8 +3667,11 @@ update_eliminables (HARD_REG_SET *pset)
   frame_pointer_needed = 1;
   for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
     {
-      if (ep->can_eliminate && ep->from == FRAME_POINTER_REGNUM
-	  && ep->to != HARD_FRAME_POINTER_REGNUM)
+      if (ep->can_eliminate
+	  && ep->from == FRAME_POINTER_REGNUM
+	  && ep->to != HARD_FRAME_POINTER_REGNUM
+	  && (! SUPPORTS_STACK_ALIGNMENT
+	      || ! crtl->stack_realign_needed))
 	frame_pointer_needed = 0;
 
       if (! ep->can_eliminate && ep->can_eliminate_previous)
@@ -3699,7 +3702,9 @@ elimination_target_reg_p (rtx x)
   return false;
 }
 
-/* Initialize the table of registers to eliminate.  */
+/* Initialize the table of registers to eliminate.
+   Pre-condition: global flag frame_pointer_needed has been set before
+   calling this function.  */
 
 static void
 init_elim_table (void)
@@ -3712,19 +3717,6 @@ init_elim_table (void)
   if (!reg_eliminate)
     reg_eliminate = xcalloc (sizeof (struct elim_table), NUM_ELIMINABLE_REGS);
 
-  /* Does this function require a frame pointer?  */
-
-  frame_pointer_needed = (! flag_omit_frame_pointer
-			  /* ?? If EXIT_IGNORE_STACK is set, we will not save
-			     and restore sp for alloca.  So we can't eliminate
-			     the frame pointer in that case.  At some point,
-			     we should improve this by emitting the
-			     sp-adjusting insns for this case.  */
-			  || (cfun->calls_alloca
-			      && EXIT_IGNORE_STACK)
-			  || crtl->accesses_prior_frames
-			  || FRAME_POINTER_REQUIRED);
-
   num_eliminable = 0;
 
 #ifdef ELIMINABLE_REGS
@@ -3735,7 +3727,10 @@ init_elim_table (void)
       ep->to = ep1->to;
       ep->can_eliminate = ep->can_eliminate_previous
 	= (CAN_ELIMINATE (ep->from, ep->to)
-	   && ! (ep->to == STACK_POINTER_REGNUM && frame_pointer_needed));
+	   && ! (ep->to == STACK_POINTER_REGNUM
+		 && frame_pointer_needed 
+		 && (! SUPPORTS_STACK_ALIGNMENT
+		     || ! stack_realign_fp)));
     }
 #else
   reg_eliminate[0].from = reg_eliminate_1[0].from;
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/rtl.h gcc/gcc/rtl.h
--- ../../gcc/gcc/rtl.h	2008-06-02 10:41:08.000000000 -0700
+++ gcc/gcc/rtl.h	2008-06-01 17:27:56.000000000 -0700
@@ -1572,6 +1572,7 @@ extern rtx simplify_subtraction (rtx);
 
 /* In function.c  */
 extern rtx assign_stack_local (enum machine_mode, HOST_WIDE_INT, int);
+extern rtx assign_stack_local_1 (enum machine_mode, HOST_WIDE_INT, int, bool);
 extern rtx assign_stack_temp (enum machine_mode, HOST_WIDE_INT, int);
 extern rtx assign_stack_temp_for_type (enum machine_mode,
 				       HOST_WIDE_INT, int, tree);
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/stmt.c gcc/gcc/stmt.c
--- ../../gcc/gcc/stmt.c	2008-06-06 15:10:07.000000000 -0700
+++ gcc/gcc/stmt.c	2008-06-08 13:33:32.000000000 -0700
@@ -1819,7 +1819,7 @@ expand_nl_goto_receiver (void)
 	{
 	  /* Now restore our arg pointer from the address at which it
 	     was saved in our stack frame.  */
-	  emit_move_insn (virtual_incoming_args_rtx,
+	  emit_move_insn (crtl->args.internal_arg_pointer,
 			  copy_to_reg (get_arg_pointer_save_area ()));
 	}
     }
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/target-def.h gcc/gcc/target-def.h
--- ../../gcc/gcc/target-def.h	2008-06-03 14:48:15.000000000 -0700
+++ gcc/gcc/target-def.h	2008-06-03 13:28:35.000000000 -0700
@@ -566,6 +566,8 @@
 
 #define TARGET_FUNCTION_VALUE default_function_value
 #define TARGET_INTERNAL_ARG_POINTER default_internal_arg_pointer
+#define TARGET_UPDATE_STACK_BOUNDARY NULL
+#define TARGET_GET_DRAP_RTX NULL
 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS hook_bool_void_true
 
 #define TARGET_CALLS {						\
@@ -587,6 +589,8 @@
    TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN,			\
    TARGET_FUNCTION_VALUE,					\
    TARGET_INTERNAL_ARG_POINTER,					\
+   TARGET_UPDATE_STACK_BOUNDARY,				\
+   TARGET_GET_DRAP_RTX,						\
    TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS				\
    }
 
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/target.h gcc/gcc/target.h
--- ../../gcc/gcc/target.h	2008-06-06 15:10:06.000000000 -0700
+++ gcc/gcc/target.h	2008-06-08 13:33:28.000000000 -0700
@@ -831,6 +831,13 @@ struct gcc_target
        current function.  */
     rtx (*internal_arg_pointer) (void);
 
+    /* Update the current function stack boundary if needed.  */
+    void (*update_stack_boundary) (void);
+
+    /* Handle stack alignment and return an rtx for Dynamic Realign
+       Argument Pointer if necessary.  */
+    rtx (*get_drap_rtx) (void);
+
     /* Return true if all function parameters should be spilled to the
        stack.  */
     bool (*allocate_stack_slots_for_args) (void);
diff -x doc -x config -x testsuite -x ChangeLog.stackalign -x function.h -x dwarf2out.c -x REVISION -x .svn -upr ../../gcc/gcc/tree-vectorizer.c gcc/gcc/tree-vectorizer.c
--- ../../gcc/gcc/tree-vectorizer.c	2008-06-06 15:10:06.000000000 -0700
+++ gcc/gcc/tree-vectorizer.c	2008-06-08 13:33:30.000000000 -0700
@@ -1788,9 +1788,7 @@ vect_can_force_dr_alignment_p (const_tre
   if (TREE_STATIC (decl))
     return (alignment <= MAX_OFILE_ALIGNMENT);
   else
-    /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
-       correct until someone implements forced stack alignment.  */
-    return (alignment <= STACK_BOUNDARY); 
+    return (alignment <= MAX_STACK_ALIGNMENT);
 }
 
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]