This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
[PATCH] double alignment (finally..)
- To: egcs at cygnus dot com
- Subject: [PATCH] double alignment (finally..)
- From: Marc Lehmann <pcg at goof dot com>
- Date: Wed, 24 Jun 1998 14:21:56 +0200
Ok, here's the third version of the stack/argument double-alignment, updated
for 0621. Now that the necessary copyright assignments are made, are there
any new objections to the patch? -mstack-align-double doesn't cause new
regressions on 0621.
If its ok, I'd be happy to check it in.
Wed Jun 24 14:17:59 CEST 1998 Marc Lehmann <pcg@goof.com>
* i386.c (ix86_sp_offset, ix86_frame_size): New functions.
(ix86_prologue, ix86_epilogue): use ix86_frame_size () instead
of get_frame_size ().
* i386.h (MASK_STACK_ALIGN_DOUBLE, MASK_ARG_ALIGN_DOUBLE,
TARGET_STACK_ALIGN_DOUBLE, TARGET_ARG_ALIGN_DOUBLE):
new defines.
(TARGET_SWITCHES): new switches -m[no-]stack-align-double,
-m[no]-arg-align-double, -mall-align-double.
(FUNCTION_ARG_BOUNDARY): align DFmode and XFmode arguments
to 64 bit when -marg-align-double is requested.
(STACK_BOUNDARY, BIGGEST_ALIGNMENT): set to 64 if double-alignment
requested.
(INITIAL_ELIMINATION_OFFSET): use ix86_sp_offset () instead
of get_frame_size ().
Index: gcc/config/i386/i386.c
===================================================================
diff -u -p -u -r1.1.1.19 i386.c
--- i386.c 1998/06/21 20:41:57 1.1.1.19
+++ i386.c 1998/06/24 12:05:03
@@ -2017,6 +2017,74 @@ load_pic_register (do_rtl)
emit_insn (gen_blockage ());
}
+/* Calculate initial elimination offsets for fp/ap pointer elimination. */
+int
+ix86_sp_offset (for_arg)
+ int for_arg;
+{
+ int regno;
+ int offset = 0;
+ int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool);
+ int tsize = get_frame_size ();
+
+ for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ offset += 4;
+
+ if (! TARGET_STACK_ALIGN_DOUBLE
+ || (tsize == 0 && (offset % 8) == 4))
+ return tsize + offset + (for_arg ? 4 : 0);
+
+ if (((tsize + offset) % 8) == 0)
+ return tsize + offset + (for_arg ? 8 : 0);
+
+ return tsize + offset + (for_arg ? 12 : 4);
+}
+
+/* Calculate the size of the frame for this function. This may be larger
+ than what get_frame_size reports if we need to ensure alignment of
+ doubles. */
+static int
+ix86_frame_size ()
+{
+ int regno;
+ int offset = 0;
+ int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool);
+ int limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+ int tsize = get_frame_size ();
+
+ if (! TARGET_STACK_ALIGN_DOUBLE)
+ return tsize;
+
+ for (regno = 0; regno < limit; regno++)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ offset += 4;
+
+ /* If we need a frame pointer, we adjust the frame size if necessary */
+ if (frame_pointer_needed)
+ {
+ if ((tsize % 8) == 4)
+ return tsize + 4;
+ else
+ return tsize;
+ }
+
+ /* Otherwise, it gets complicated. */
+
+ /* There's one case where we don't have to do anything. */
+ if (tsize == 0 && (offset % 8) == 4)
+ return tsize;
+
+ if (((tsize + offset) % 8) == 0)
+ return tsize + 4;
+
+ return tsize + 8;
+}
+
static void
ix86_prologue (do_rtl)
int do_rtl;
@@ -2026,7 +2094,7 @@ ix86_prologue (do_rtl)
rtx xops[4];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ long tsize = ix86_frame_size ();
rtx insn;
int cfa_offset = INCOMING_FRAME_SP_OFFSET, cfa_store_offset = cfa_offset;
@@ -2240,7 +2308,7 @@ ix86_epilogue (do_rtl)
rtx xops[3];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ long tsize = ix86_frame_size ();
/* Compute the number of registers to pop */
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /home/cvsroot/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.1.1.20
diff -u -p -u -r1.1.1.20 i386.h
--- i386.h 1998/06/21 20:41:58 1.1.1.20
+++ i386.h 1998/06/24 12:06:50
@@ -96,6 +96,8 @@ extern int target_flags;
#define MASK_DEBUG_ARG 000020000000 /* Debug function_arg */
#define MASK_SCHEDULE_PROLOGUE 000040000000 /* Emit prologue as rtl */
#define MASK_STACK_PROBE 000100000000 /* Enable stack probing */
+#define MASK_STACK_ALIGN_DOUBLE 000200000000 /* Align doubles on the stack */
+#define MASK_ARG_ALIGN_DOUBLE 000400000000 /* Align doubles in argument slots */
/* Use the floating point instructions */
#define TARGET_80387 (target_flags & MASK_80387)
@@ -110,6 +112,16 @@ extern int target_flags;
faster code on the pentium. */
#define TARGET_ALIGN_DOUBLE (target_flags & MASK_ALIGN_DOUBLE)
+/* Align doubles to a two word boundary on the stack. This option by itself
+ does not break binary compatibility. It is implied by
+ TARGET_ALIGN_DOUBLE. */
+#define TARGET_STACK_ALIGN_DOUBLE (target_flags & MASK_STACK_ALIGN_DOUBLE)
+
+/* Align doubles to a two word boundary on the stack even if used
+ as arguments. This option by will break the ABI in much the same way
+ TARGET_ALIGN_DOUBLE does. */
+#define TARGET_ARG_ALIGN_DOUBLE (target_flags & MASK_ARG_ALIGN_DOUBLE)
+
/* Put uninitialized locals into bss, not data.
Meaningful only on svr3. */
#define TARGET_SVR3_SHLIB (target_flags & MASK_SVR3_SHLIB)
@@ -185,6 +197,13 @@ extern int target_flags;
{ "no-rtd", -MASK_RTD }, \
{ "align-double", MASK_ALIGN_DOUBLE }, \
{ "no-align-double", -MASK_ALIGN_DOUBLE }, \
+ { "stack-align-double", MASK_STACK_ALIGN_DOUBLE }, \
+ { "no-stack-align-double", -MASK_STACK_ALIGN_DOUBLE }, \
+ { "arg-align-double", MASK_ARG_ALIGN_DOUBLE }, \
+ { "no-arg-align-double", -MASK_ARG_ALIGN_DOUBLE }, \
+ { "all-align-double", MASK_ALIGN_DOUBLE \
+ | MASK_STACK_ALIGN_DOUBLE \
+ | MASK_ARG_ALIGN_DOUBLE}, \
{ "svr3-shlib", MASK_SVR3_SHLIB }, \
{ "no-svr3-shlib", -MASK_SVR3_SHLIB }, \
{ "ieee-fp", MASK_IEEE_FP }, \
@@ -397,8 +416,14 @@ extern int ix86_arch;
/* Allocation boundary (in *bits*) for storing arguments in argument list. */
#define PARM_BOUNDARY 32
+/* Aligning doubles to a two-word boundary is faster on pentiums
+ and pentium pros. Unfortunately, it breaks the ABI, so
+ make it only when requested via -marg-align-double. */
+#define FUNCTION_ARG_BOUNDARY(MODE,TYPE) \
+ ((TARGET_ARG_ALIGN_DOUBLE && (MODE == DFmode || MODE == XFmode)) ? 64 : 32)
+
/* Boundary (in *bits*) on which stack pointer should be aligned. */
-#define STACK_BOUNDARY 32
+#define STACK_BOUNDARY (TARGET_STACK_ALIGN_DOUBLE || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
/* Allocation boundary (in *bits*) for the code of a function.
For i486, we get better performance by aligning to a cache
@@ -417,8 +442,12 @@ extern int ix86_arch;
The published ABIs say that doubles should be aligned on word
boundaries, but the Pentium gets better performance with them
aligned on 64 bit boundaries. */
-#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE || TARGET_STACK_ALIGN_DOUBLE \
+ || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
+/* Biggest alignment any structure field can require in bits. */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
/* If defined, a C expression to compute the alignment given to a
constant that is being placed in memory. CONSTANT is the constant
and ALIGN is the alignment that the object would ordinarily have.
@@ -1579,22 +1608,8 @@ do { \
if ((FROM) == ARG_POINTER_REGNUM && (TO) == FRAME_POINTER_REGNUM) \
(OFFSET) = 8; /* Skip saved PC and previous frame pointer */ \
else \
- { \
- int regno; \
- int offset = 0; \
- \
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) \
- if ((regs_ever_live[regno] && ! call_used_regs[regno]) \
- || ((current_function_uses_pic_offset_table \
- || current_function_uses_const_pool) \
- && flag_pic && regno == PIC_OFFSET_TABLE_REGNUM)) \
- offset += 4; \
- \
- (OFFSET) = offset + get_frame_size (); \
- \
- if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM) \
- (OFFSET) += 4; /* Skip saved PC */ \
- } \
+ (OFFSET) = ix86_sp_offset ((FROM) == ARG_POINTER_REGNUM \
+ && (TO) == STACK_POINTER_REGNUM); \
}
/* Addressing modes, and classification of registers for them. */
@@ -2773,6 +2788,7 @@ extern char *output_int_conditional_move
extern char *output_fp_conditional_move ();
extern int ix86_can_use_return_insn_p ();
+extern int ix86_sp_offset ();
#ifdef NOTYET
extern struct rtx_def *copy_all_rtx ();
extern void rewrite_address ();