This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR middle-end/37009: No need to align stack when incoming stack is aligned
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Joey Ye <joey dot ye at intel dot com>, Xuepeng Guo <xuepeng dot guo at intel dot com>
- Date: Sat, 2 Aug 2008 17:52:59 -0700
- Subject: PATCH: PR middle-end/37009: No need to align stack when incoming stack is aligned
Caller has to align the stack frame for callee at least at the
biggest alignment of parameters in parameter block. There is no
need for callee to align the stack to the biggest alignment of
parameters in parameter block. This patch adds a new field,
parm_stack_boundary, to rtl_data to track the biggest alignment
of parameters in parameter block. It is initialized to 0. Joey,
Xuepeng, what do you think?
I am testing it on Linux/ia32, Linux/ia64 and Linux/x86-64.
Thanks.
H.J.
---
gcc/
2008-08-02 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/37009
* cfgexpand.c (expand_stack_alignment): Check parm_stack_boundary
for incoming stack boundary.
* function.c (assign_parm_find_entry_rtl): Update
parm_stack_boundary.
* function.h (rtl_data): Add parm_stack_boundary.
* config/i386/i386.c (ix86_finalize_stack_realign_flags): Check
parm_stack_boundary for incoming stack boundary.
gcc/testsuite/
2008-08-02 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/37009
* gcc.dg/torture/stackalign/alloca-2.c: New.
* gcc.dg/torture/stackalign/alloca-3.c: Likewise.
* gcc.dg/torture/stackalign/vararg-3.c: Likewise.
* gcc.target/i386/incoming-1.c: Likewise.
* gcc.target/i386/incoming-2.c: Likewise.
* gcc.target/i386/incoming-3.c: Likewise.
* gcc.target/i386/incoming-4.c: Likewise.
--- gcc/cfgexpand.c.parm 2008-08-01 19:48:30.000000000 -0700
+++ gcc/cfgexpand.c 2008-08-02 17:36:28.000000000 -0700
@@ -2184,7 +2184,7 @@ static void
expand_stack_alignment (void)
{
rtx drap_rtx;
- unsigned int preferred_stack_boundary;
+ unsigned int preferred_stack_boundary, incoming_stack_boundary;
if (! SUPPORTS_STACK_ALIGNMENT)
return;
@@ -2215,8 +2215,15 @@ expand_stack_alignment (void)
if (preferred_stack_boundary > crtl->stack_alignment_needed)
crtl->stack_alignment_needed = preferred_stack_boundary;
+ /* The incoming stack frame has to be aligned at least at
+ parm_stack_boundary. */
+ if (crtl->parm_stack_boundary > INCOMING_STACK_BOUNDARY)
+ incoming_stack_boundary = crtl->parm_stack_boundary;
+ else
+ incoming_stack_boundary = INCOMING_STACK_BOUNDARY;
+
crtl->stack_realign_needed
- = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
+ = incoming_stack_boundary < crtl->stack_alignment_estimated;
crtl->stack_realign_tried = crtl->stack_realign_needed;
crtl->stack_realign_processed = true;
--- gcc/config/i386/i386.c.parm 2008-08-02 16:39:24.000000000 -0700
+++ gcc/config/i386/i386.c 2008-08-02 17:36:28.000000000 -0700
@@ -7613,7 +7613,10 @@ ix86_finalize_stack_realign_flags (void)
{
/* Check if stack realign is really needed after reload, and
stores result in cfun */
- unsigned int stack_realign = (ix86_incoming_stack_boundary
+ unsigned int incoming_stack_boundary
+ = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+ ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+ unsigned int stack_realign = (incoming_stack_boundary
< (current_function_is_leaf
? crtl->max_used_stack_slot_alignment
: crtl->stack_alignment_needed));
--- gcc/function.c.parm 2008-08-01 19:48:30.000000000 -0700
+++ gcc/function.c 2008-08-02 17:36:28.000000000 -0700
@@ -2261,6 +2261,11 @@ assign_parm_find_entry_rtl (struct assig
entry_parm ? data->partial : 0, current_function_decl,
&all->stack_args_size, &data->locate);
+ /* Update parm_stack_boundary if this parameter is passed in the
+ stack. */
+ if (!in_regs && crtl->parm_stack_boundary < data->locate.boundary)
+ crtl->parm_stack_boundary = data->locate.boundary;
+
/* Adjust offsets to include the pretend args. */
pretend_bytes = all->extra_pretend_bytes - pretend_bytes;
data->locate.slot_offset.constant += pretend_bytes;
--- gcc/function.h.parm 2008-08-01 19:48:30.000000000 -0700
+++ gcc/function.h 2008-08-02 17:36:28.000000000 -0700
@@ -339,6 +339,9 @@ struct rtl_data GTY(())
to call other functions. */
unsigned int preferred_stack_boundary;
+ /* The minimum alignment of parameter stack. */
+ unsigned int parm_stack_boundary;
+
/* The largest alignment of slot allocated on the stack. */
unsigned int max_used_stack_slot_alignment;
--- gcc/testsuite/gcc.dg/torture/stackalign/alloca-2.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/alloca-2.c 2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,56 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT 16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+ __builtin_strncpy (p, "good", size);
+}
+
+void
+__attribute__ ((noinline))
+foo (__m128 x, __m128 y ,__m128 z , int size)
+{
+ char *p = __builtin_alloca (size + 1);
+ aligned i;
+
+ bar (p, size);
+ if (__builtin_strncmp (p, "good", size) != 0)
+ {
+#ifdef DEBUG
+ p[size] = '\0';
+ printf ("Failed: %s != good\n", p);
+#endif
+ abort ();
+ }
+
+ if (check_int (&i, __alignof__(i)) != i)
+ abort ();
+}
+
+int
+main (void)
+{
+ __m128 x = { 1.0 };
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run SSE2 test only if host has SSE2 support. */
+ if (edx & bit_SSE2)
+ foo (x, x, x, 5);
+
+ return 0;
+}
--- gcc/testsuite/gcc.dg/torture/stackalign/alloca-3.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/alloca-3.c 2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,56 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT 16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+ __builtin_strncpy (p, "good", size);
+}
+
+void
+__attribute__ ((noinline))
+foo (__m128 x, __m128 y ,__m128 z ,__m128 a, int size)
+{
+ char *p = __builtin_alloca (size + 1);
+ aligned i;
+
+ bar (p, size);
+ if (__builtin_strncmp (p, "good", size) != 0)
+ {
+#ifdef DEBUG
+ p[size] = '\0';
+ printf ("Failed: %s != good\n", p);
+#endif
+ abort ();
+ }
+
+ if (check_int (&i, __alignof__(i)) != i)
+ abort ();
+}
+
+int
+main (void)
+{
+ __m128 x = { 1.0 };
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run SSE2 test only if host has SSE2 support. */
+ if (edx & bit_SSE2)
+ foo (x, x, x, x, 5);
+
+ return 0;
+}
--- gcc/testsuite/gcc.dg/torture/stackalign/vararg-3.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/vararg-3.c 2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,84 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <stdarg.h>
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT 16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+ __builtin_strncpy (p, "good", size);
+}
+
+__m128 a = { 1.0 };
+
+void
+test (va_list arg)
+{
+ char *p;
+ aligned i;
+ int size;
+ double x;
+ __m128 e;
+
+ size = va_arg (arg, int);
+ if (size != 5)
+ abort ();
+
+ p = __builtin_alloca (size + 1);
+
+ x = va_arg (arg, double);
+ if (x != 5.0)
+ abort ();
+
+ bar (p, size);
+ if (__builtin_strncmp (p, "good", size) != 0)
+ {
+#ifdef DEBUG
+ p[size] = '\0';
+ printf ("Failed: %s != good\n", p);
+#endif
+ abort ();
+ }
+
+ if (check_int (&i, __alignof__(i)) != i)
+ abort ();
+
+ e = va_arg (arg, __m128);
+ if (__builtin_memcmp (&e, &a, sizeof (e)))
+ abort ();
+}
+
+void
+foo (const char *fmt, ...)
+{
+ va_list arg;
+ va_start (arg, fmt);
+ test (arg);
+ va_end (arg);
+}
+
+int
+main (void)
+{
+ __m128 x = { 1.0 };
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run SSE2 test only if host has SSE2 support. */
+ if (edx & bit_SSE2)
+ foo ("foo", 5, 5.0, x);
+
+ return 0;
+}
--- gcc/testsuite/gcc.target/i386/incoming-1.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-1.c 2008-08-02 17:43:39.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 x, __m128 y, __m128 z, int size)
+{
+ int __attribute((aligned(16))) xxx;
+
+ xxx = 2;
+ bar (&xxx);
+ return size;
+}
+
+/* { dg-final { scan-assembler "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-2.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-2.c 2008-08-02 17:43:43.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 x, __m128 y, __m128 z, __m128 a, int size)
+{
+ int __attribute((aligned(16))) xxx;
+
+ xxx = 2;
+ bar (&xxx);
+ return size;
+}
+
+/* { dg-final { scan-assembler-not "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-3.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-3.c 2008-08-02 17:43:48.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 y, int size, ...)
+{
+ int __attribute((aligned(16))) xxx;
+
+ xxx = 2;
+ bar (&xxx);
+ return size;
+}
+
+/* { dg-final { scan-assembler-not "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-4.c.parm 2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-4.c 2008-08-02 17:43:52.000000000 -0700
@@ -0,0 +1,20 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <stdarg.h>
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+__m128
+foo(va_list arg)
+{
+ int __attribute((aligned(16))) xxx;
+
+ xxx = 2;
+ bar (&xxx);
+ return va_arg (arg, __m128);
+}
+
+/* { dg-final { scan-assembler "and\[l\]\[ \t\]" } } */