This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: PR middle-end/37009: No need to align stack when incoming stack is aligned


Caller has to align the stack frame for callee at least at the
biggest alignment of parameters in parameter block. There is no
need for callee to align the stack to the biggest alignment of
parameters in parameter block.  This patch adds a new field,
parm_stack_boundary, to rtl_data to track the biggest alignment
of parameters in parameter block. It is initialized to 0. Joey,
Xuepeng, what do you think?

I am testing it on Linux/ia32, Linux/ia64 and Linux/x86-64.

Thanks.


H.J.
---
gcc/

2008-08-02  H.J. Lu  <hongjiu.lu@intel.com>

	PR middle-end/37009
	* cfgexpand.c (expand_stack_alignment): Check parm_stack_boundary
	for incoming stack boundary.

	* function.c (assign_parm_find_entry_rtl): Update
	parm_stack_boundary.

	* function.h (rtl_data): Add parm_stack_boundary.

	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Check
	parm_stack_boundary for incoming stack boundary.

gcc/testsuite/

2008-08-02  H.J. Lu  <hongjiu.lu@intel.com>

	PR middle-end/37009
	* gcc.dg/torture/stackalign/alloca-2.c: New.
	* gcc.dg/torture/stackalign/alloca-3.c: Likewise.
	* gcc.dg/torture/stackalign/vararg-3.c: Likewise.
	* gcc.target/i386/incoming-1.c: Likewise.
	* gcc.target/i386/incoming-2.c: Likewise.
	* gcc.target/i386/incoming-3.c: Likewise.
	* gcc.target/i386/incoming-4.c: Likewise.

--- gcc/cfgexpand.c.parm	2008-08-01 19:48:30.000000000 -0700
+++ gcc/cfgexpand.c	2008-08-02 17:36:28.000000000 -0700
@@ -2184,7 +2184,7 @@ static void
 expand_stack_alignment (void)
 {
   rtx drap_rtx;
-  unsigned int preferred_stack_boundary;
+  unsigned int preferred_stack_boundary, incoming_stack_boundary;
 
   if (! SUPPORTS_STACK_ALIGNMENT)
     return;
@@ -2215,8 +2215,15 @@ expand_stack_alignment (void)
   if (preferred_stack_boundary > crtl->stack_alignment_needed)
     crtl->stack_alignment_needed = preferred_stack_boundary;
 
+  /* The incoming stack frame has to be aligned at least at
+     parm_stack_boundary.  */
+  if (crtl->parm_stack_boundary > INCOMING_STACK_BOUNDARY)
+    incoming_stack_boundary = crtl->parm_stack_boundary;
+  else
+    incoming_stack_boundary = INCOMING_STACK_BOUNDARY;
+
   crtl->stack_realign_needed
-    = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
+    = incoming_stack_boundary < crtl->stack_alignment_estimated;
   crtl->stack_realign_tried = crtl->stack_realign_needed;
 
   crtl->stack_realign_processed = true;
--- gcc/config/i386/i386.c.parm	2008-08-02 16:39:24.000000000 -0700
+++ gcc/config/i386/i386.c	2008-08-02 17:36:28.000000000 -0700
@@ -7613,7 +7613,10 @@ ix86_finalize_stack_realign_flags (void)
 {
   /* Check if stack realign is really needed after reload, and 
      stores result in cfun */
-  unsigned int stack_realign = (ix86_incoming_stack_boundary
+  unsigned int incoming_stack_boundary
+    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+  unsigned int stack_realign = (incoming_stack_boundary
 				< (current_function_is_leaf
 				   ? crtl->max_used_stack_slot_alignment
 				   : crtl->stack_alignment_needed));
--- gcc/function.c.parm	2008-08-01 19:48:30.000000000 -0700
+++ gcc/function.c	2008-08-02 17:36:28.000000000 -0700
@@ -2261,6 +2261,11 @@ assign_parm_find_entry_rtl (struct assig
 		       entry_parm ? data->partial : 0, current_function_decl,
 		       &all->stack_args_size, &data->locate);
 
+  /* Update parm_stack_boundary if this parameter is passed in the
+     stack.  */
+  if (!in_regs && crtl->parm_stack_boundary < data->locate.boundary)
+    crtl->parm_stack_boundary = data->locate.boundary;
+
   /* Adjust offsets to include the pretend args.  */
   pretend_bytes = all->extra_pretend_bytes - pretend_bytes;
   data->locate.slot_offset.constant += pretend_bytes;
--- gcc/function.h.parm	2008-08-01 19:48:30.000000000 -0700
+++ gcc/function.h	2008-08-02 17:36:28.000000000 -0700
@@ -339,6 +339,9 @@ struct rtl_data GTY(())
      to call other functions.  */
   unsigned int preferred_stack_boundary;
 
+  /* The minimum alignment of parameter stack.  */
+  unsigned int parm_stack_boundary;
+
   /* The largest alignment of slot allocated on the stack.  */
   unsigned int max_used_stack_slot_alignment;
 
--- gcc/testsuite/gcc.dg/torture/stackalign/alloca-2.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/alloca-2.c	2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,56 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT	16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+  __builtin_strncpy (p, "good", size);
+}
+
+void
+__attribute__ ((noinline))
+foo (__m128 x, __m128 y ,__m128 z , int size)
+{
+  char *p = __builtin_alloca (size + 1);
+  aligned i;
+
+  bar (p, size);
+  if (__builtin_strncmp (p, "good", size) != 0)
+    {
+#ifdef DEBUG
+      p[size] = '\0';
+      printf ("Failed: %s != good\n", p);
+#endif
+      abort ();
+    }
+
+  if (check_int (&i,  __alignof__(i)) != i)
+    abort ();
+}
+
+int
+main (void)
+{
+  __m128 x = { 1.0 };
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run SSE2 test only if host has SSE2 support.  */
+  if (edx & bit_SSE2)
+    foo (x, x, x, 5);
+
+  return 0;
+}
--- gcc/testsuite/gcc.dg/torture/stackalign/alloca-3.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/alloca-3.c	2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,56 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT	16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+  __builtin_strncpy (p, "good", size);
+}
+
+void
+__attribute__ ((noinline))
+foo (__m128 x, __m128 y ,__m128 z ,__m128 a, int size)
+{
+  char *p = __builtin_alloca (size + 1);
+  aligned i;
+
+  bar (p, size);
+  if (__builtin_strncmp (p, "good", size) != 0)
+    {
+#ifdef DEBUG
+      p[size] = '\0';
+      printf ("Failed: %s != good\n", p);
+#endif
+      abort ();
+    }
+
+  if (check_int (&i,  __alignof__(i)) != i)
+    abort ();
+}
+
+int
+main (void)
+{
+  __m128 x = { 1.0 };
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run SSE2 test only if host has SSE2 support.  */
+  if (edx & bit_SSE2)
+    foo (x, x, x, x, 5);
+
+  return 0;
+}
--- gcc/testsuite/gcc.dg/torture/stackalign/vararg-3.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/stackalign/vararg-3.c	2008-08-02 17:36:28.000000000 -0700
@@ -0,0 +1,84 @@
+/* PR middle-end/37009 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse2" } */
+
+#include <stdarg.h>
+#include <emmintrin.h>
+#include "cpuid.h"
+#include "check.h"
+
+#ifndef ALIGNMENT
+#define ALIGNMENT	16
+#endif
+
+typedef int aligned __attribute__((aligned(ALIGNMENT)));
+
+void
+bar (char *p, int size)
+{
+  __builtin_strncpy (p, "good", size);
+}
+
+__m128 a = { 1.0 };
+
+void
+test (va_list arg)
+{
+  char *p;
+  aligned i;
+  int size;
+  double x;
+  __m128 e;
+
+  size = va_arg (arg, int);
+  if (size != 5)
+    abort ();
+
+  p = __builtin_alloca (size + 1);
+
+  x = va_arg (arg, double);
+  if (x != 5.0)
+    abort ();
+
+  bar (p, size);
+  if (__builtin_strncmp (p, "good", size) != 0)
+    {
+#ifdef DEBUG
+      p[size] = '\0';
+      printf ("Failed: %s != good\n", p);
+#endif
+      abort ();
+    }
+
+  if (check_int (&i,  __alignof__(i)) != i)
+    abort ();
+
+  e = va_arg (arg, __m128);
+  if (__builtin_memcmp (&e, &a, sizeof (e)))
+    abort ();
+}
+
+void
+foo (const char *fmt, ...)
+{
+  va_list arg;
+  va_start (arg, fmt);
+  test (arg);
+  va_end (arg);
+}
+
+int
+main (void)
+{
+  __m128 x = { 1.0 };
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run SSE2 test only if host has SSE2 support.  */
+  if (edx & bit_SSE2)
+    foo ("foo", 5, 5.0, x);
+
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/incoming-1.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-1.c	2008-08-02 17:43:39.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 x, __m128 y, __m128 z, int size)
+{
+  int __attribute((aligned(16))) xxx;
+
+  xxx = 2;
+  bar (&xxx);
+  return size;
+}
+
+/* { dg-final { scan-assembler "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-2.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-2.c	2008-08-02 17:43:43.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 x, __m128 y, __m128 z, __m128 a, int size)
+{
+  int __attribute((aligned(16))) xxx;
+
+  xxx = 2;
+  bar (&xxx);
+  return size;
+}
+
+/* { dg-final { scan-assembler-not "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-3.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-3.c	2008-08-02 17:43:48.000000000 -0700
@@ -0,0 +1,19 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+int
+foo(__m128 y, int size, ...)
+{
+  int __attribute((aligned(16))) xxx;
+
+  xxx = 2;
+  bar (&xxx);
+  return size;
+}
+
+/* { dg-final { scan-assembler-not "and\[l\]\[ \t\]" } } */
--- gcc/testsuite/gcc.target/i386/incoming-4.c.parm	2008-08-02 17:36:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/incoming-4.c	2008-08-02 17:43:52.000000000 -0700
@@ -0,0 +1,20 @@
+/* PR middle-end/37009 */
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-w -msse2 -mpreferred-stack-boundary=2" } */
+
+#include <stdarg.h>
+#include <emmintrin.h>
+
+extern void bar (int *);
+
+__m128
+foo(va_list arg) 
+{
+  int __attribute((aligned(16))) xxx;
+
+  xxx = 2;
+  bar (&xxx);
+  return va_arg (arg, __m128);
+}
+
+/* { dg-final { scan-assembler "and\[l\]\[ \t\]" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]