This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
new_ia32 branch: stack alignment
- To: gcc-patches at gcc dot gnu dot org
- Subject: new_ia32 branch: stack alignment
- From: Bernd Schmidt <bernds at cygnus dot co dot uk>
- Date: Sun, 25 Jul 1999 20:33:58 +0100 (BST)
This patch adds the stack alignment changes to the new_ia32 branch.
OK to install?
Bernd
Merge stack alignment patches into the new_ia32 branch:
* i386.c (ix86_preferred_stack_boundary,
ix86_preferred_stack_boundary_string): New variables.
(override_options): Handle -mpreferred-stack-boundary.
(ix86_compute_frame_size): New function.
(ix86_expand_prologue): Use it.
(ix86_expand_epilogue): Likewise.
* i386.h (ix86_preferred_stack_boundary,
ix86_preferred_stack_boundary_string): Declare new variables.
(TARGET_OPTIONS): New option -mpreferred-stack-boundary.
Add documentation for -mbranch-cost.
(PREFERRED_STACK_BOUNDARY): New macro.
(LOCAL_ALIGNMENT): New macro.
(INITIAL_ELIMINATION_OFFSET): Use ix86_compute_frame_size.
Index: config/i386/i386.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.81.2.13
diff -u -p -r1.81.2.13 i386.c
--- config/i386/i386.c 1999/07/20 05:36:41 1.81.2.13
+++ config/i386/i386.c 1999/07/25 19:29:40
@@ -196,6 +196,12 @@ const char *ix86_align_loops_string;
/* Power of two alignment for non-loop jumps. */
const char *ix86_align_jumps_string;
+/* Power of two alignment for stack boundary in bytes. */
+const char *ix86_preferred_stack_boundary_string;
+
+/* Preferred alignment for stack boundary in bits. */
+int ix86_preferred_stack_boundary;
+
/* Values 1-5: see jump.c */
int ix86_branch_cost;
const char *ix86_branch_cost_string;
@@ -399,6 +405,17 @@ override_options ()
ix86_align_funcs, MAX_CODE_ALIGN);
}
+ /* Validate -mpreferred_stack_boundary= value, or provide default.
+ The default of 128 bits is for Pentium III's SSE __m128. */
+ ix86_preferred_stack_boundary = 128;
+ if (ix86_preferred_stack_boundary_string)
+ {
+ int i = atoi (ix86_preferred_stack_boundary_string);
+ if (i < 2 || i > 31)
+ fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i);
+ ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ }
+
/* Validate -mbranch-cost= value, or provide default. */
ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
if (ix86_branch_cost_string)
@@ -1434,7 +1451,7 @@ ix86_expand_prologue ()
int limit;
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
rtx insn;
/* Note: AT&T enter does NOT have reversed args. Enter is probably
@@ -1509,33 +1526,26 @@ void
ix86_expand_epilogue ()
{
register int regno;
- register int nregs, limit;
- int offset;
+ register int limit;
+ int nregs;
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
- long tsize = get_frame_size ();
-
- /* Compute the number of registers to pop */
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
- limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
-
- nregs = 0;
-
- for (regno = limit - 1; regno >= 0; regno--)
- if ((regs_ever_live[regno] && ! call_used_regs[regno])
- || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
- nregs++;
-
/* SP is often unreliable so we may have to go off the frame pointer. */
- offset = - tsize - (nregs * UNITS_PER_WORD);
+ offset = -(tsize + nregs * UNITS_PER_WORD);
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
less work than reloading sp and popping the register. Otherwise,
restore sp (if necessary) and pop the registers. */
+ limit = (frame_pointer_needed
+ ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+
if (nregs > 1 || sp_valid)
{
if ( !sp_valid )
@@ -2292,6 +2302,67 @@ legitimize_address (x, oldx, mode)
and subtraction are the only arithmetic that may appear in these
expressions. FILE is the stdio stream to write to, X is the rtx, and
CODE is the operand print code from the output string. */
+
+/* Compute the size of local storage taking into consideration the
+ desired stack alignment which is to be maintained. Also determine
+ the number of registers saved below the local storage. */
+
+HOST_WIDE_INT
+ix86_compute_frame_size (size, nregs_on_stack)
+ HOST_WIDE_INT size;
+ int *nregs_on_stack;
+{
+ int limit;
+ int nregs;
+ int regno;
+ int padding;
+ int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool);
+ HOST_WIDE_INT total_size;
+
+ limit = frame_pointer_needed
+ ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
+
+ nregs = 0;
+
+ for (regno = limit - 1; regno >= 0; regno--)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ nregs++;
+
+ padding = 0;
+ total_size = size + (nregs * UNITS_PER_WORD);
+
+#ifdef PREFERRED_STACK_BOUNDARY
+ {
+ int offset;
+ int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+
+ offset = 4;
+ if (frame_pointer_needed)
+ offset += UNITS_PER_WORD;
+
+ total_size += offset;
+
+ padding = ((total_size + preferred_alignment - 1)
+ & -preferred_alignment) - total_size;
+
+ if (padding < (((offset + preferred_alignment - 1)
+ & -preferred_alignment) - offset))
+ padding += preferred_alignment;
+
+ /* Don't bother aligning the stack of a leaf function
+ which doesn't allocate any stack slots. */
+ if (size == 0 && current_function_is_leaf)
+ padding = 0;
+ }
+#endif
+
+ if (nregs_on_stack)
+ *nregs_on_stack = nregs;
+
+ return size + padding;
+}
static void
output_pic_addr_const (file, x, code)
Index: config/i386/i386.h
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.61.2.8
diff -u -p -r1.61.2.8 i386.h
--- config/i386/i386.h 1999/07/20 05:36:42 1.61.2.8
+++ config/i386/i386.h 1999/07/25 19:29:41
@@ -265,8 +265,11 @@ extern int ix86_arch;
"Jump targets are aligned to this power of 2" }, \
{ "align-functions=", &ix86_align_funcs_string, \
"Function starts are aligned to this power of 2" }, \
+ { "preferred-stack-boundary=", \
+ &ix86_preferred_stack_boundary_string, \
+ "Attempt to keep stack aligned to this power of 2" }, \
{ "branch-cost=", &ix86_branch_cost_string, \
- 0 /* undocumented */ }, \
+ "Branches are this expensive (1-5, arbitrary units)" }, \
SUBTARGET_OPTIONS \
}
@@ -409,6 +412,10 @@ extern int ix86_arch;
/* Boundary (in *bits*) on which stack pointer should be aligned. */
#define STACK_BOUNDARY 32
+/* Boundary (in *bits*) on which the stack pointer preferrs to be
+ aligned; the compiler cannot rely on having this alignment. */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
/* Allocation boundary for the code of a function. */
#define FUNCTION_BOUNDARY \
(1 << ((ix86_align_funcs >= 0 ? ix86_align_funcs : -ix86_align_funcs) + 3))
@@ -540,6 +547,46 @@ extern int ix86_arch;
: (ALIGN)) \
: (ALIGN))
+/* If defined, a C expression to compute the alignment for a local
+ variable. TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+ (TREE_CODE (TYPE) == ARRAY_TYPE \
+ ? ((TYPE_MODE (TREE_TYPE (TYPE)) == DFmode && (ALIGN) < 64) \
+ ? 64 \
+ : (TYPE_MODE (TREE_TYPE (TYPE)) == XFmode && (ALIGN) < 128) \
+ ? 128 \
+ : (ALIGN)) \
+ : TREE_CODE (TYPE) == COMPLEX_TYPE \
+ ? ((TYPE_MODE (TYPE) == DCmode && (ALIGN) < 64) \
+ ? 64 \
+ : (TYPE_MODE (TYPE) == XCmode && (ALIGN) < 128) \
+ ? 128 \
+ : (ALIGN)) \
+ : ((TREE_CODE (TYPE) == RECORD_TYPE \
+ || TREE_CODE (TYPE) == UNION_TYPE \
+ || TREE_CODE (TYPE) == QUAL_UNION_TYPE) \
+ && TYPE_FIELDS (TYPE)) \
+ ? ((DECL_MODE (TYPE_FIELDS (TYPE)) == DFmode && (ALIGN) < 64) \
+ ? 64 \
+ : (DECL_MODE (TYPE_FIELDS (TYPE)) == XFmode && (ALIGN) < 128) \
+ ? 128 \
+ : (ALIGN)) \
+ : TREE_CODE (TYPE) == REAL_TYPE \
+ ? ((TYPE_MODE (TYPE) == DFmode && (ALIGN) < 64) \
+ ? 64 \
+ : (TYPE_MODE (TYPE) == XFmode && (ALIGN) < 128) \
+ ? 128 \
+ : (ALIGN)) \
+ : (ALIGN))
+
/* Set this non-zero if move instructions will actually fail to work
when given unaligned data. */
#define STRICT_ALIGNMENT 0
@@ -1385,20 +1432,23 @@ do { \
(OFFSET) = 8; /* Skip saved PC and previous frame pointer */ \
else \
{ \
- int regno; \
- int offset = 0; \
+ int nregs; \
+ int offset; \
+ int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT; \
+ HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), \
+ &nregs); \
\
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) \
- if ((regs_ever_live[regno] && ! call_used_regs[regno]) \
- || ((current_function_uses_pic_offset_table \
- || current_function_uses_const_pool) \
- && flag_pic && regno == PIC_OFFSET_TABLE_REGNUM)) \
- offset += 4; \
+ (OFFSET) = (tsize + nregs * UNITS_PER_WORD); \
\
- (OFFSET) = offset + get_frame_size (); \
+ offset = 4; \
+ if (frame_pointer_needed) \
+ offset += UNITS_PER_WORD; \
\
- if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM) \
- (OFFSET) += 4; /* Skip saved PC */ \
+ if ((FROM) == ARG_POINTER_REGNUM) \
+ (OFFSET) += offset; \
+ else \
+ (OFFSET) -= ((offset + preferred_alignment - 1) \
+ & -preferred_alignment) - offset; \
} \
}
@@ -2513,11 +2563,13 @@ extern const char *ix86_regparm_string;
extern const char *ix86_align_loops_string; /* power of two alignment for loops */
extern const char *ix86_align_jumps_string; /* power of two alignment for non-loop jumps */
extern const char *ix86_align_funcs_string; /* power of two alignment for functions */
+extern const char *ix86_preferred_stack_boundary_string;/* power of two alignment for stack boundary */
extern const char *ix86_branch_cost_string; /* values 1-5: see jump.c */
extern int ix86_regparm; /* ix86_regparm_string as a number */
extern int ix86_align_loops; /* power of two alignment for loops */
extern int ix86_align_jumps; /* power of two alignment for non-loop jumps */
extern int ix86_align_funcs; /* power of two alignment for functions */
+extern int ix86_preferred_stack_boundary; /* preferred stack boundary alignment in bits */
extern int ix86_branch_cost; /* values 1-5: see jump.c */
extern const char * const hi_reg_name[]; /* names for 16 bit regs */
extern const char * const qi_reg_name[]; /* names for 8 bit regs (low) */