This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501)


Hi!

Honza recently changed the i?86 backend, so that it often doesn't
do -maccumulate-outgoing-args by default on x86_64.
Unfortunately, on some of the here included testcases this regressed
quite a bit the generated code.  As AVX vectors are used, the dynamic
realignment code needs to assume e.g. that some of them will need to be
spilled, and for -mno-accumulate-outgoing-args the code needs to set
need_drap early as well.  But in when emitting the prologue/epilogue,
if need_drap is set, we don't perform the optimization for leaf functions
which have zero size stack frame, thus we end up with uselessly doing
dynamic stack realignment, setting up DRAP that nothing uses and later on
restore everything back.

This patch improves it, if the DRAP register isn't live at the start of
entry bb successor and we aren't going to realign the stack, we don't
need DRAP at all, and even if we need DRAP register, that can't be the sole
reason for doing stack realignment, the prologue code is able to set up DRAP
even without dynamic stack realignment.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-12-20  Jakub Jelinek  <jakub@redhat.com>

	PR target/59501
	* config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
	if !crtl->stack_realign_needed.
	(ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
	and stack_realign_needed will be false, clear drap_reg and need_drap.
	Optimize leaf functions that don't need stack frame even if
	crtl->need_drap.

	* gcc.target/i386/pr59501-1.c: New test.
	* gcc.target/i386/pr59501-1a.c: New test.
	* gcc.target/i386/pr59501-2.c: New test.
	* gcc.target/i386/pr59501-2a.c: New test.
	* gcc.target/i386/pr59501-3.c: New test.
	* gcc.target/i386/pr59501-3a.c: New test.
	* gcc.target/i386/pr59501-4.c: New test.
	* gcc.target/i386/pr59501-4a.c: New test.
	* gcc.target/i386/pr59501-5.c: New test.
	* gcc.target/i386/pr59501-6.c: New test.

--- gcc/config/i386/i386.c.jj	2013-12-19 13:35:23.000000000 +0100
+++ gcc/config/i386/i386.c	2013-12-20 11:44:14.389310804 +0100
@@ -9235,7 +9235,9 @@ ix86_save_reg (unsigned int regno, bool
 	}
     }
 
-  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+  if (crtl->drap_reg
+      && regno == REGNO (crtl->drap_reg)
+      && crtl->stack_realign_needed)
     return true;
 
   return (df_regs_ever_live_p (regno)
@@ -10473,12 +10475,23 @@ ix86_finalize_stack_realign_flags (void)
       return;
     }
 
+  /* If drap has been set, but it actually isn't live at the start
+     of the function and !stack_realign, there is no reason to set it up.  */
+  if (crtl->drap_reg && !stack_realign)
+    {
+      basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+      if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+	{
+	  crtl->drap_reg = NULL_RTX;
+	  crtl->need_drap = false;
+	}
+    }
+
   /* If the only reason for frame_pointer_needed is that we conservatively
      assumed stack realignment might be needed, but in the end nothing that
      needed the stack alignment had been spilled, clear frame_pointer_needed
      and say we don't need stack realignment.  */
   if (stack_realign
-      && !crtl->need_drap
       && frame_pointer_needed
       && crtl->is_leaf
       && flag_omit_frame_pointer
@@ -10516,6 +10529,18 @@ ix86_finalize_stack_realign_flags (void)
 	      }
 	}
 
+      /* If drap has been set, but it actually isn't live at the start
+	 of the function, there is no reason to set it up.  */
+      if (crtl->drap_reg)
+	{
+	  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+	  if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+	    {
+	      crtl->drap_reg = NULL_RTX;
+	      crtl->need_drap = false;
+	    }
+	}
+
       frame_pointer_needed = false;
       stack_realign = false;
       crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
--- gcc/testsuite/gcc.target/i386/pr59501-2.c.jj	2013-12-20 12:02:08.754662741 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-2.c	2013-12-20 12:02:04.665668734 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-1.c"
--- gcc/testsuite/gcc.target/i386/pr59501-1.c.jj	2013-12-20 12:01:44.253781613 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-1.c	2013-12-20 12:12:26.715391613 +0100
@@ -0,0 +1,30 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) V
+foo (double *x, unsigned *y)
+{
+  V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+  return r;
+}
+
+static void
+TEST (void)
+{
+  double a[16];
+  unsigned b[4] = { 5, 0, 15, 7 };
+  int i;
+  for (i = 0; i < 16; i++)
+    a[i] = 0.5 + i;
+  V v = foo (a, b);
+  if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-4a.c.jj	2013-12-20 12:19:20.603212859 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-4a.c	2013-12-20 12:23:33.647881672 +0100
@@ -0,0 +1,8 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-3a.c"
+
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
--- gcc/testsuite/gcc.target/i386/pr59501-3.c.jj	2013-12-20 12:02:44.644462041 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-3.c	2013-12-20 12:13:06.834181801 +0100
@@ -0,0 +1,30 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+  V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+  return r;
+}
+
+static void
+TEST (void)
+{
+  double a[16];
+  unsigned b[4] = { 5, 0, 15, 7 };
+  int i;
+  for (i = 0; i < 16; i++)
+    a[i] = 0.5 + i;
+  V v = foo (a, 0, 0, 0, 0, 0, 0, b);
+  if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-3a.c.jj	2013-12-20 12:18:41.313420496 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-3a.c	2013-12-20 12:22:15.257292900 +0100
@@ -0,0 +1,15 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+typedef double V __attribute__ ((vector_size (32)));
+
+V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+  V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+  return r;
+}
+
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
--- gcc/testsuite/gcc.target/i386/pr59501-1a.c.jj	2013-12-20 12:15:16.890495826 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-1a.c	2013-12-20 12:22:02.820358481 +0100
@@ -0,0 +1,17 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+typedef double V __attribute__ ((vector_size (32)));
+
+V
+foo (double *x, unsigned *y)
+{
+  V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+  return r;
+}
+
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
+/* And DRAP isn't needed either.  */
+/* { dg-final { scan-assembler-not "r10" } } */
--- gcc/testsuite/gcc.target/i386/pr59501-6.c.jj	2013-12-20 12:08:21.574682265 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-6.c	2013-12-20 12:08:32.966622139 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-5.c"
--- gcc/testsuite/gcc.target/i386/pr59501-5.c.jj	2013-12-20 12:06:53.276148649 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-5.c	2013-12-20 12:13:38.012014216 +0100
@@ -0,0 +1,39 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) void
+bar (char *p)
+{
+  p[0] = 1;
+  p[37] = 2;
+  asm volatile ("" : : "r" (p) : "memory");
+}
+
+__attribute__((noinline, noclone)) V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+  bar (__builtin_alloca (a + b + c + d + e + f));
+  V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+  return r;
+}
+
+static void
+TEST (void)
+{
+  double a[16];
+  unsigned b[4] = { 5, 0, 15, 7 };
+  int i;
+  for (i = 0; i < 16; i++)
+    a[i] = 0.5 + i;
+  V v = foo (a, 0, 30, 0, 0, 8, 0, b);
+  if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)  
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-4.c.jj	2013-12-20 12:03:16.159292616 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-4.c	2013-12-20 12:06:24.651298808 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-3.c"
--- gcc/testsuite/gcc.target/i386/pr59501-2a.c.jj	2013-12-20 12:18:11.371578107 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-2a.c	2013-12-20 12:22:07.988329149 +0100
@@ -0,0 +1,10 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-1a.c"
+
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
+/* And DRAP isn't needed either.  */
+/* { dg-final { scan-assembler-not "r10" } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]