This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

new_ia32 branch: stack alignment


This patch adds the stack alignment changes to the new_ia32 branch.
OK to install?

Bernd

	Merge stack alignment patches into the new_ia32 branch:
	* i386.c (ix86_preferred_stack_boundary,
	ix86_preferred_stack_boundary_string): New variables.
	(override_options): Handle -mpreferred-stack-boundary.
	(ix86_compute_frame_size): New function.
	(ix86_expand_prologue): Use it.
	(ix86_expand_epilogue): Likewise.
	
	* i386.h (ix86_preferred_stack_boundary,
	ix86_preferred_stack_boundary_string): Declare new variables.
	(TARGET_OPTIONS): New option -mpreferred-stack-boundary.
	Add documentation for -mbranch-cost.
	(PREFERRED_STACK_BOUNDARY): New macro.
	(LOCAL_ALIGNMENT): New macro.
	(INITIAL_ELIMINATION_OFFSET): Use ix86_compute_frame_size.
	
Index: config/i386/i386.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.81.2.13
diff -u -p -r1.81.2.13 i386.c
--- config/i386/i386.c	1999/07/20 05:36:41	1.81.2.13
+++ config/i386/i386.c	1999/07/25 19:29:40
@@ -196,6 +196,12 @@ const char *ix86_align_loops_string;
 /* Power of two alignment for non-loop jumps. */
 const char *ix86_align_jumps_string;
 
+/* Power of two alignment for stack boundary in bytes.  */
+const char *ix86_preferred_stack_boundary_string;
+
+/* Preferred alignment for stack boundary in bits.  */
+int ix86_preferred_stack_boundary;
+
 /* Values 1-5: see jump.c */
 int ix86_branch_cost;
 const char *ix86_branch_cost_string;
@@ -399,6 +405,17 @@ override_options ()
 	       ix86_align_funcs, MAX_CODE_ALIGN);
     }
 
+  /* Validate -mpreferred_stack_boundary= value, or provide default.
+     The default of 128 bits is for Pentium III's SSE __m128.  */
+  ix86_preferred_stack_boundary = 128;
+  if (ix86_preferred_stack_boundary_string)
+    {
+      int i = atoi (ix86_preferred_stack_boundary_string);
+      if (i < 2 || i > 31)
+	fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i);
+      ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+    }
+
   /* Validate -mbranch-cost= value, or provide default. */
   ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
   if (ix86_branch_cost_string)
@@ -1434,7 +1451,7 @@ ix86_expand_prologue ()
   int limit;
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
   rtx insn;
 
   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
@@ -1509,33 +1526,26 @@ void
 ix86_expand_epilogue ()
 {
   register int regno;
-  register int nregs, limit;
-  int offset;
+  register int limit;
+  int nregs;
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
-  long tsize = get_frame_size ();
-
-  /* Compute the number of registers to pop */
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
 
-  limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
-
-  nregs = 0;
-
-  for (regno = limit - 1; regno >= 0; regno--)
-    if ((regs_ever_live[regno] && ! call_used_regs[regno])
-	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
-      nregs++;
-
   /* SP is often unreliable so we may have to go off the frame pointer. */
 
-  offset = - tsize - (nregs * UNITS_PER_WORD);
+  offset = -(tsize + nregs * UNITS_PER_WORD);
 
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.  Otherwise,
      restore sp (if necessary) and pop the registers. */
 
+  limit = (frame_pointer_needed
+	   ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+
   if (nregs > 1 || sp_valid)
     {
       if ( !sp_valid )
@@ -2292,6 +2302,67 @@ legitimize_address (x, oldx, mode)
    and subtraction are the only arithmetic that may appear in these
    expressions.  FILE is the stdio stream to write to, X is the rtx, and
    CODE is the operand print code from the output string.  */
+
+/* Compute the size of local storage taking into consideration the
+   desired stack alignment which is to be maintained.  Also determine
+   the number of registers saved below the local storage.  */
+
+HOST_WIDE_INT
+ix86_compute_frame_size (size, nregs_on_stack)
+     HOST_WIDE_INT size;
+     int *nregs_on_stack;
+{
+  int limit;
+  int nregs;
+  int regno;
+  int padding;
+  int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+				  || current_function_uses_const_pool);
+  HOST_WIDE_INT total_size;
+
+  limit = frame_pointer_needed
+	  ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
+
+  nregs = 0;
+
+  for (regno = limit - 1; regno >= 0; regno--)
+    if ((regs_ever_live[regno] && ! call_used_regs[regno])
+	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+      nregs++;
+
+  padding = 0;
+  total_size = size + (nregs * UNITS_PER_WORD);
+
+#ifdef PREFERRED_STACK_BOUNDARY
+  {
+    int offset;
+    int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+
+    offset = 4;
+    if (frame_pointer_needed)
+      offset += UNITS_PER_WORD;
+
+    total_size += offset;
+    
+    padding = ((total_size + preferred_alignment - 1)
+	       & -preferred_alignment) - total_size;
+
+    if (padding < (((offset + preferred_alignment - 1)
+		    & -preferred_alignment) - offset))
+      padding += preferred_alignment;
+
+    /* Don't bother aligning the stack of a leaf function
+       which doesn't allocate any stack slots.  */
+    if (size == 0 && current_function_is_leaf)
+      padding = 0;
+  }
+#endif
+
+  if (nregs_on_stack)
+    *nregs_on_stack = nregs;
+
+  return size + padding;
+}
 
 static void
 output_pic_addr_const (file, x, code)
Index: config/i386/i386.h
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.61.2.8
diff -u -p -r1.61.2.8 i386.h
--- config/i386/i386.h	1999/07/20 05:36:42	1.61.2.8
+++ config/i386/i386.h	1999/07/25 19:29:41
@@ -265,8 +265,11 @@ extern int ix86_arch;
     "Jump targets are aligned to this power of 2" },		\
   { "align-functions=",	&ix86_align_funcs_string,		\
     "Function starts are aligned to this power of 2" },		\
+  { "preferred-stack-boundary=",				\
+    &ix86_preferred_stack_boundary_string,			\
+    "Attempt to keep stack aligned to this power of 2" },	\
   { "branch-cost=",	&ix86_branch_cost_string,		\
-    0 /* undocumented */ },					\
+    "Branches are this expensive (1-5, arbitrary units)" },	\
   SUBTARGET_OPTIONS						\
 }
 
@@ -409,6 +412,10 @@ extern int ix86_arch;
 /* Boundary (in *bits*) on which stack pointer should be aligned.  */
 #define STACK_BOUNDARY 32
 
+/* Boundary (in *bits*) on which the stack pointer preferrs to be
+   aligned; the compiler cannot rely on having this alignment.  */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
 /* Allocation boundary for the code of a function. */
 #define FUNCTION_BOUNDARY \
    (1 << ((ix86_align_funcs >= 0 ? ix86_align_funcs : -ix86_align_funcs) + 3))
@@ -540,6 +547,46 @@ extern int ix86_arch;
 	: (ALIGN))							\
     : (ALIGN))
 
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)					\
+  (TREE_CODE (TYPE) == ARRAY_TYPE					\
+    ? ((TYPE_MODE (TREE_TYPE (TYPE)) == DFmode && (ALIGN) < 64)		\
+	? 64								\
+   	: (TYPE_MODE (TREE_TYPE (TYPE)) == XFmode && (ALIGN) < 128)	\
+	? 128								\
+	: (ALIGN))							\
+    : TREE_CODE (TYPE) == COMPLEX_TYPE					\
+    ? ((TYPE_MODE (TYPE) == DCmode && (ALIGN) < 64)			\
+	? 64								\
+   	: (TYPE_MODE (TYPE) == XCmode && (ALIGN) < 128)			\
+	? 128								\
+	: (ALIGN))							\
+    : ((TREE_CODE (TYPE) == RECORD_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == QUAL_UNION_TYPE)				\
+	&& TYPE_FIELDS (TYPE))						\
+    ? ((DECL_MODE (TYPE_FIELDS (TYPE)) == DFmode && (ALIGN) < 64)	\
+	? 64								\
+	: (DECL_MODE (TYPE_FIELDS (TYPE)) == XFmode && (ALIGN) < 128)	\
+	? 128								\
+	: (ALIGN))							\
+    : TREE_CODE (TYPE) == REAL_TYPE					\
+    ? ((TYPE_MODE (TYPE) == DFmode && (ALIGN) < 64)			\
+	? 64								\
+   	: (TYPE_MODE (TYPE) == XFmode && (ALIGN) < 128)			\
+	? 128								\
+	: (ALIGN))							\
+    : (ALIGN))
+
 /* Set this non-zero if move instructions will actually fail to work
    when given unaligned data.  */
 #define STRICT_ALIGNMENT 0
@@ -1385,20 +1432,23 @@ do {								\
     (OFFSET) = 8;	/* Skip saved PC and previous frame pointer */	\
   else									\
     {									\
-      int regno;							\
-      int offset = 0;							\
+      int nregs;							\
+      int offset;							\
+      int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT; \
+      HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),	\
+						     &nregs);		\
 									\
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)		\
-	if ((regs_ever_live[regno] && ! call_used_regs[regno])		\
-	    || ((current_function_uses_pic_offset_table			\
-		 || current_function_uses_const_pool)			\
-		&& flag_pic && regno == PIC_OFFSET_TABLE_REGNUM))	\
-	  offset += 4;							\
+      (OFFSET) = (tsize + nregs * UNITS_PER_WORD);			\
 									\
-      (OFFSET) = offset + get_frame_size ();				\
+      offset = 4;							\
+      if (frame_pointer_needed)						\
+	offset += UNITS_PER_WORD;					\
 									\
-      if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
-	(OFFSET) += 4;	/* Skip saved PC */				\
+      if ((FROM) == ARG_POINTER_REGNUM)					\
+	(OFFSET) += offset;						\
+      else								\
+	(OFFSET) -= ((offset + preferred_alignment - 1)			\
+		     & -preferred_alignment) - offset;			\
     }									\
 }
 
@@ -2513,11 +2563,13 @@ extern const char *ix86_regparm_string;	
 extern const char *ix86_align_loops_string;	/* power of two alignment for loops */
 extern const char *ix86_align_jumps_string;	/* power of two alignment for non-loop jumps */
 extern const char *ix86_align_funcs_string;	/* power of two alignment for functions */
+extern const char *ix86_preferred_stack_boundary_string;/* power of two alignment for stack boundary */
 extern const char *ix86_branch_cost_string;	/* values 1-5: see jump.c */
 extern int ix86_regparm;			/* ix86_regparm_string as a number */
 extern int ix86_align_loops;			/* power of two alignment for loops */
 extern int ix86_align_jumps;			/* power of two alignment for non-loop jumps */
 extern int ix86_align_funcs;			/* power of two alignment for functions */
+extern int ix86_preferred_stack_boundary;	/* preferred stack boundary alignment in bits */
 extern int ix86_branch_cost;			/* values 1-5: see jump.c */
 extern const char * const hi_reg_name[];	/* names for 16 bit regs */
 extern const char * const qi_reg_name[];	/* names for 8 bit regs (low) */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]