This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR target/40838: gcc shouldn't assume that the stack is aligned
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: ubizjak at gmail dot com
- Date: Thu, 6 Aug 2009 14:42:16 -0700
- Subject: PATCH: PR target/40838: gcc shouldn't assume that the stack is aligned
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
Hi,
In 32bit, the incoming stack may not be 16 byte aligned. This patch
assumes the incoming stack is 4 byte aligned and realigns stack if any
SSE variable is put on stack. Any comments?
Thanks.
H.J.
---
gcc/
2009-08-06 H.J. Lu <hongjiu.lu@intel.com>
PR target/40838
* config/i386/i386.c (ix86_update_stack_boundary): Use
STACK_BOUNDARY if use_stack_boundary_for_incoming_stack_boundary
is set.
(VALID_SSE_VECTOR_MODE): New.
(ix86_minimum_alignment): In 32bit, set
use_stack_boundary_for_incoming_stack_boundary if any SSE
variables are put on stack.
* config/i386/i386.h (machine_function): Add
use_stack_boundary_for_incoming_stack_boundary.
gcc/testsuite/
2009-08-06 H.J. Lu <hongjiu.lu@intel.com>
PR target/40838
* gcc.target/i386/incoming-6.c: New.
* gcc.target/i386/incoming-7.c: Likewise.
* gcc.target/i386/incoming-8.c: Likewise.
* gcc.target/i386/incoming-9.c: Likewise.
Index: gcc/testsuite/gcc.target/i386/incoming-7.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0)
@@ -0,0 +1,16 @@
+/* PR target/40838 */
+/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */
+/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+extern v4si y(v4si, v4si, v4si, v4si, v4si);
+
+extern v4si s1, s2;
+
+v4si x(void)
+{
+ return y(s1, s2, s1, s2, s2);
+}
+
+/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
Index: gcc/testsuite/gcc.target/i386/incoming-9.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0)
@@ -0,0 +1,18 @@
+/* PR target/40838 */
+/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */
+/* { dg-options "-w -O3 -mno-sse -mpreferred-stack-boundary=4" } */
+
+float
+foo (float f)
+{
+ float array[128];
+ float x;
+ int i;
+ for (i = 0; i < sizeof(array) / sizeof(*array); i++)
+ array[i] = f;
+ for (i = 0; i < sizeof(array) / sizeof(*array); i++)
+ x += array[i];
+ return x;
+}
+
+/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
Index: gcc/testsuite/gcc.target/i386/incoming-6.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0)
@@ -0,0 +1,17 @@
+/* PR target/40838 */
+/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */
+/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+extern v4si y(v4si *s3);
+
+extern v4si s1, s2;
+
+v4si x(void)
+{
+ v4si s3 = s1 + s2;
+ return y(&s3);
+}
+
+/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
Index: gcc/testsuite/gcc.target/i386/incoming-8.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0)
@@ -0,0 +1,18 @@
+/* PR target/40838 */
+/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */
+/* { dg-options "-w -O3 -msse2 -mpreferred-stack-boundary=4" } */
+
+float
+foo (float f)
+{
+ float array[128];
+ float x;
+ int i;
+ for (i = 0; i < sizeof(array) / sizeof(*array); i++)
+ array[i] = f;
+ for (i = 0; i < sizeof(array) / sizeof(*array); i++)
+ x += array[i];
+ return x;
+}
+
+/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h (revision 150532)
+++ gcc/config/i386/i386.h (working copy)
@@ -2389,6 +2389,8 @@ struct GTY(()) machine_function {
/* This value is used for amd64 targets and specifies the current abi
to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */
enum calling_abi call_abi;
+ /* Use STACK_BOUNDARY for incoming stack boundary. */
+ int use_stack_boundary_for_incoming_stack_boundary;
struct machine_cfa_state cfa;
};
#endif
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 150532)
+++ gcc/config/i386/i386.c (working copy)
@@ -8230,11 +8230,19 @@ find_drap_reg (void)
static void
ix86_update_stack_boundary (void)
{
+ /* Should we use STACK_BOUNDARY for incoming stack boundary? */
+ unsigned int incoming_stack_boundary;
+
+ if (cfun->machine->use_stack_boundary_for_incoming_stack_boundary)
+ incoming_stack_boundary = STACK_BOUNDARY;
+ else
+ incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+
/* Prefer the one specified at command line. */
ix86_incoming_stack_boundary
= (ix86_user_incoming_stack_boundary
? ix86_user_incoming_stack_boundary
- : ix86_default_incoming_stack_boundary);
+ : incoming_stack_boundary);
/* Incoming stack alignment can be changed on individual functions
via force_align_arg_pointer attribute. We use the smallest
@@ -20069,6 +20077,10 @@ ix86_local_alignment (tree exp, enum mac
return align;
}
+#define VALID_SSE_VECTOR_MODE(MODE) \
+ ((MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V2DFmode \
+ || (MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DImode)
+
/* Compute the minimum required alignment for dynamic stack realignment
purposes for a local variable, parameter or a stack slot. EXP is
the data type or decl itself, MODE is its mode and ALIGN is the
@@ -20080,7 +20092,7 @@ ix86_minimum_alignment (tree exp, enum m
{
tree type, decl;
- if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
+ if (TARGET_64BIT)
return align;
if (exp && DECL_P (exp))
@@ -20094,6 +20106,15 @@ ix86_minimum_alignment (tree exp, enum m
decl = NULL;
}
+ /* In 32bit, use STACK_BOUNDARY for incoming stack boundary if any
+ SSE variables are put on stack. */
+ if (VALID_SSE_VECTOR_MODE (mode)
+ || (type && VALID_SSE_VECTOR_MODE (TYPE_MODE (type))))
+ cfun->machine->use_stack_boundary_for_incoming_stack_boundary = 1;
+
+ if (align != 64 || ix86_preferred_stack_boundary >= 64)
+ return align;
+
/* Don't do dynamic stack realignment for long long objects with
-mpreferred-stack-boundary=2. */
if ((mode == DImode || (type && TYPE_MODE (type) == DImode))