This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH] double alignment (finally..)



Ok, here's the third version of the stack/argument double-alignment, updated
for 0621. Now that the necessary copyright assignments are made, are there
any new objections to the patch? -mstack-align-double doesn't cause new
regressions on 0621.

If its ok, I'd be happy to check it in.

Wed Jun 24 14:17:59 CEST 1998  Marc Lehmann  <pcg@goof.com>

	* i386.c (ix86_sp_offset, ix86_frame_size): New functions.
	(ix86_prologue, ix86_epilogue):	use ix86_frame_size () instead
	of get_frame_size ().
	* i386.h (MASK_STACK_ALIGN_DOUBLE, MASK_ARG_ALIGN_DOUBLE,
	TARGET_STACK_ALIGN_DOUBLE, TARGET_ARG_ALIGN_DOUBLE):
	new defines.
	(TARGET_SWITCHES): new switches -m[no-]stack-align-double,
	-m[no]-arg-align-double, -mall-align-double.
	(FUNCTION_ARG_BOUNDARY): align DFmode and XFmode arguments
	to 64 bit when -marg-align-double is requested.
	(STACK_BOUNDARY, BIGGEST_ALIGNMENT): set to 64 if double-alignment
	requested.
	(INITIAL_ELIMINATION_OFFSET): use ix86_sp_offset () instead
	of get_frame_size ().

Index: gcc/config/i386/i386.c
===================================================================
diff -u -p -u -r1.1.1.19 i386.c
--- i386.c	1998/06/21 20:41:57	1.1.1.19
+++ i386.c	1998/06/24 12:05:03
@@ -2017,6 +2017,74 @@ load_pic_register (do_rtl)
     emit_insn (gen_blockage ());
 }
 
+/* Calculate initial elimination offsets for fp/ap pointer elimination.  */
+int
+ix86_sp_offset (for_arg)
+     int for_arg;
+{
+  int regno;
+  int offset = 0;
+  int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+				  || current_function_uses_const_pool);
+  int tsize = get_frame_size ();
+  
+  for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
+    if ((regs_ever_live[regno] && ! call_used_regs[regno])
+	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+      offset += 4;
+
+  if (! TARGET_STACK_ALIGN_DOUBLE
+      || (tsize == 0 && (offset % 8) == 4))
+    return tsize + offset + (for_arg ? 4 : 0);
+
+  if (((tsize + offset) % 8) == 0)
+    return tsize + offset + (for_arg ? 8 : 0);
+  
+  return tsize + offset + (for_arg ? 12 : 4);
+}
+
+/* Calculate the size of the frame for this function.  This may be larger
+   than what get_frame_size reports if we need to ensure alignment of
+   doubles.  */
+static int
+ix86_frame_size ()
+{
+  int regno;
+  int offset = 0;
+  int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+				  || current_function_uses_const_pool);
+  int limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+  int tsize = get_frame_size ();
+
+  if (! TARGET_STACK_ALIGN_DOUBLE)
+    return tsize;
+
+  for (regno = 0; regno < limit; regno++)
+    if ((regs_ever_live[regno] && ! call_used_regs[regno])
+	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+      offset += 4;
+  
+  /* If we need a frame pointer, we adjust the frame size if necessary */
+  if (frame_pointer_needed)
+    { 
+      if ((tsize % 8) == 4)
+	return tsize + 4;
+      else
+	return tsize;
+    }
+  
+  /* Otherwise, it gets complicated.  */
+  
+  /* There's one case where we don't have to do anything.  */
+  if (tsize == 0 && (offset % 8) == 4)
+    return tsize;
+  
+  if (((tsize + offset) % 8) == 0)
+    return tsize + 4;
+  
+  return tsize + 8;
+}
+
 static void
 ix86_prologue (do_rtl)
      int do_rtl;
@@ -2026,7 +2094,7 @@ ix86_prologue (do_rtl)
   rtx xops[4];
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  long tsize = ix86_frame_size ();
   rtx insn;
   int cfa_offset = INCOMING_FRAME_SP_OFFSET, cfa_store_offset = cfa_offset;
   
@@ -2240,7 +2308,7 @@ ix86_epilogue (do_rtl)
   rtx xops[3];
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  long tsize = ix86_frame_size ();
 
   /* Compute the number of registers to pop */
 
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /home/cvsroot/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.1.1.20
diff -u -p -u -r1.1.1.20 i386.h
--- i386.h	1998/06/21 20:41:58	1.1.1.20
+++ i386.h	1998/06/24 12:06:50
@@ -96,6 +96,8 @@ extern int target_flags;
 #define MASK_DEBUG_ARG		000020000000	/* Debug function_arg */   
 #define MASK_SCHEDULE_PROLOGUE  000040000000    /* Emit prologue as rtl */
 #define MASK_STACK_PROBE	000100000000	/* Enable stack probing */
+#define MASK_STACK_ALIGN_DOUBLE	000200000000	/* Align doubles on the stack */
+#define MASK_ARG_ALIGN_DOUBLE	000400000000	/* Align doubles in argument slots */
 
 /* Use the floating point instructions */
 #define TARGET_80387 (target_flags & MASK_80387)
@@ -110,6 +112,16 @@ extern int target_flags;
    faster code on the pentium.  */
 #define TARGET_ALIGN_DOUBLE (target_flags & MASK_ALIGN_DOUBLE)
 
+/* Align doubles to a two word boundary on the stack.  This option by itself
+   does not break binary compatibility.  It is implied by
+   TARGET_ALIGN_DOUBLE.  */
+#define TARGET_STACK_ALIGN_DOUBLE (target_flags & MASK_STACK_ALIGN_DOUBLE)
+
+/* Align doubles to a two word boundary on the stack even if used
+   as arguments. This option by will break the ABI in much the same way
+   TARGET_ALIGN_DOUBLE does.  */
+#define TARGET_ARG_ALIGN_DOUBLE (target_flags & MASK_ARG_ALIGN_DOUBLE)
+
 /* Put uninitialized locals into bss, not data.
    Meaningful only on svr3.  */
 #define TARGET_SVR3_SHLIB (target_flags & MASK_SVR3_SHLIB)
@@ -185,6 +197,13 @@ extern int target_flags;
   { "no-rtd",			-MASK_RTD },				\
   { "align-double",		 MASK_ALIGN_DOUBLE },			\
   { "no-align-double",		-MASK_ALIGN_DOUBLE },			\
+  { "stack-align-double",	 MASK_STACK_ALIGN_DOUBLE },		\
+  { "no-stack-align-double",	-MASK_STACK_ALIGN_DOUBLE },		\
+  { "arg-align-double",		 MASK_ARG_ALIGN_DOUBLE },		\
+  { "no-arg-align-double",	-MASK_ARG_ALIGN_DOUBLE },		\
+  { "all-align-double",		 MASK_ALIGN_DOUBLE			\
+				 | MASK_STACK_ALIGN_DOUBLE		\
+				 | MASK_ARG_ALIGN_DOUBLE},		\
   { "svr3-shlib",		 MASK_SVR3_SHLIB },			\
   { "no-svr3-shlib",		-MASK_SVR3_SHLIB },			\
   { "ieee-fp",			 MASK_IEEE_FP },			\
@@ -397,8 +416,14 @@ extern int ix86_arch;
 /* Allocation boundary (in *bits*) for storing arguments in argument list.  */
 #define PARM_BOUNDARY 32
 
+/* Aligning doubles to a two-word boundary is faster on pentiums
+   and pentium pros. Unfortunately, it breaks the ABI, so
+   make it only when requested via -marg-align-double.  */
+#define FUNCTION_ARG_BOUNDARY(MODE,TYPE) \
+  ((TARGET_ARG_ALIGN_DOUBLE && (MODE == DFmode || MODE == XFmode)) ? 64 : 32)
+
 /* Boundary (in *bits*) on which stack pointer should be aligned.  */
-#define STACK_BOUNDARY 32
+#define STACK_BOUNDARY (TARGET_STACK_ALIGN_DOUBLE || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
 
 /* Allocation boundary (in *bits*) for the code of a function.
    For i486, we get better performance by aligning to a cache
@@ -417,8 +442,12 @@ extern int ix86_arch;
    The published ABIs say that doubles should be aligned on word
    boundaries, but the Pentium gets better performance with them
    aligned on 64 bit boundaries. */
-#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE || TARGET_STACK_ALIGN_DOUBLE \
+			   || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
 
+/* Biggest alignment any structure field can require in bits.  */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
 /* If defined, a C expression to compute the alignment given to a
    constant that is being placed in memory.  CONSTANT is the constant
    and ALIGN is the alignment that the object would ordinarily have.
@@ -1579,22 +1608,8 @@ do {						\
   if ((FROM) == ARG_POINTER_REGNUM && (TO) == FRAME_POINTER_REGNUM)	\
     (OFFSET) = 8;	/* Skip saved PC and previous frame pointer */	\
   else									\
-    {									\
-      int regno;							\
-      int offset = 0;							\
-									\
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)		\
-	if ((regs_ever_live[regno] && ! call_used_regs[regno])		\
-	    || ((current_function_uses_pic_offset_table			\
-		 || current_function_uses_const_pool)			\
-		&& flag_pic && regno == PIC_OFFSET_TABLE_REGNUM))	\
-	  offset += 4;							\
-									\
-      (OFFSET) = offset + get_frame_size ();				\
-									\
-      if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
-	(OFFSET) += 4;	/* Skip saved PC */				\
-    }									\
+    (OFFSET) = ix86_sp_offset ((FROM) == ARG_POINTER_REGNUM		\
+                               && (TO) == STACK_POINTER_REGNUM);	\
 }
 
 /* Addressing modes, and classification of registers for them.  */
@@ -2773,6 +2788,7 @@ extern char *output_int_conditional_move
 extern char *output_fp_conditional_move ();
 extern int ix86_can_use_return_insn_p ();
+extern int ix86_sp_offset ();
 
 #ifdef NOTYET
 extern struct rtx_def *copy_all_rtx ();
 extern void rewrite_address ();
	


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]