PATCH: Don't allocate space for SSE reg in reg save area if SSE is disabled

Jakub Jelinek jakub@redhat.com
Tue Sep 2 18:30:00 GMT 2008


On Mon, Sep 01, 2008 at 05:49:32PM +0200, Jan Hubicka wrote:
> This is OK, thanks!

Unfortunately that patch had a bug (and I'm certain the older patch too), which
caused va-arg-12.c to FAIL.  The problem is that if (ix86_varargs_fpr_size)
check was used even to protect even setting the *.fp_offset field in
ix86_va_start, but ix86_varargs_fpr_size is 0 even when there are
some floating point va_arg uses and SSE is enabled - cum->sse_nregs
is 0 if there are 8 or more normal arguments in SSE registers.
In this case we don't need to set up reg_save_area pointer, nor save any
registers in there, but fp_offset still has to be initialized to 176
to tell va_arg (ap, double) etc. that it should use solely the overflow
area.  Fixed in the patch below which I've bootstrapped/regtested on
x86_64-linux and additionally did compat.exp and struct-layout-1.exp
testing with ALT_CC_UNDER_TEST and ALT_CXX_UNDER_TEST.

> > -  /* Indicate to allocate space on the stack for varargs save area.  */
> > -  ix86_save_varrargs_registers = 1;
> > +  /* FPR size of varargs save area.  We don't need it if we don't pass
> > +     anything in SSE registers.  */
> > +  if (cum->sse_nregs && cfun->va_list_fpr_size)
> > +    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
> 
> I would probably preffer to have else ix86_varargs_fpr_size = 0; as it
> at least made me run across the patch to see where else this is
> modified. ;)

This is changed in the patch below too.  Ok for trunk?

2008-09-02  H.J. Lu  <hongjiu.lu@intel.com>
	    Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (X86_64_VARARGS_SIZE): Removed.
	(setup_incoming_varargs_64): Assume cum != NULL. Set/check
	ix86_varargs_gpr_size and ix86_varargs_fpr_size.  Use
	ix86_varargs_gpr_size instead of X86_64_REGPARM_MAX.
	Don't set ix86_save_varrargs_registers.
	(ix86_setup_incoming_varargs): Assume cum != NULL.
	(ix86_va_start): Check ix86_varargs_gpr_size and
	ix86_varargs_fpr_size instead of cfun->va_list_gpr_size and
	cfun->va_list_fpr_size, respectively.  Subtract 8*X86_64_REGPARM_MAX
	from frame pointer if ix86_varargs_gpr_size == 0.
	(ix86_compute_frame_layout): Updated.

	* config/i386/i386.h (ix86_save_varrargs_registers): Removed.
	(ix86_varargs_gpr_size): Define.
	(ix86_varargs_fpr_size): Likewise.
	(machine_function): Remove save_varrargs_registers.
	Add varargs_gpr_size and varargs_fpr_size.

	* gcc.target/i386/amd64-abi-3.c: New test.
	* gcc.target/i386/amd64-abi-4.c: Likewise.
	* gcc.target/i386/amd64-abi-5.c: Likewise.
	* gcc.target/i386/amd64-abi-6.c: Likewise.

--- gcc/config/i386/i386.c.jj	2008-09-01 21:07:04.000000000 +0200
+++ gcc/config/i386/i386.c	2008-09-02 11:57:20.000000000 +0200
@@ -1631,9 +1631,6 @@ rtx ix86_compare_op0 = NULL_RTX;
 rtx ix86_compare_op1 = NULL_RTX;
 rtx ix86_compare_emitted = NULL_RTX;
 
-/* Size of the register save area.  */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
 /* Define the structure for the machine field in struct function.  */
 
 struct stack_local_entry GTY(())
@@ -6312,14 +6309,24 @@ setup_incoming_varargs_64 (CUMULATIVE_AR
   int i;
   int regparm = ix86_regparm;
 
-  if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+  if (cum->call_abi != DEFAULT_ABI)
     regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
 
-  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
-    return;
+  /* GPR size of varargs save area.  */
+  if (cfun->va_list_gpr_size)
+    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+  else
+    ix86_varargs_gpr_size = 0;
+
+  /* FPR size of varargs save area.  We don't need it if we don't pass
+     anything in SSE registers.  */
+  if (cum->sse_nregs && cfun->va_list_fpr_size)
+    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+  else
+    ix86_varargs_fpr_size = 0;
 
-  /* Indicate to allocate space on the stack for varargs save area.  */
-  ix86_save_varrargs_registers = 1;
+  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+    return;
 
   save_area = frame_pointer_rtx;
   set = get_varargs_alias_set ();
@@ -6337,7 +6344,7 @@ setup_incoming_varargs_64 (CUMULATIVE_AR
 					x86_64_int_parameter_registers[i]));
     }
 
-  if (cum->sse_nregs && cfun->va_list_fpr_size)
+  if (ix86_varargs_fpr_size)
     {
       /* Now emit code to save SSE registers.  The AX parameter contains number
 	 of SSE parameter registers used to call this function.  We use
@@ -6382,7 +6389,7 @@ setup_incoming_varargs_64 (CUMULATIVE_AR
       tmp_reg = gen_reg_rtx (Pmode);
       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
 			      plus_constant (save_area,
-					     8 * X86_64_REGPARM_MAX + 127)));
+					     ix86_varargs_gpr_size + 127)));
       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
       MEM_NOTRAP_P (mem) = 1;
       set_mem_alias_set (mem, set);
@@ -6438,7 +6445,7 @@ ix86_setup_incoming_varargs (CUMULATIVE_
   if (stdarg_p (fntype))
     function_arg_advance (&next_cum, mode, type, 1);
 
-  if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+  if (cum->call_abi == MS_ABI)
     setup_incoming_varargs_ms_64 (&next_cum);
   else
     setup_incoming_varargs_64 (&next_cum);
@@ -6501,7 +6508,7 @@ ix86_va_start (tree valist, rtx nextarg)
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
     }
 
-  if (cfun->va_list_fpr_size)
+  if (TARGET_SSE && cfun->va_list_fpr_size)
     {
       type = TREE_TYPE (fpr);
       t = build2 (MODIFY_EXPR, type, fpr,
@@ -6520,12 +6527,15 @@ ix86_va_start (tree valist, rtx nextarg)
   TREE_SIDE_EFFECTS (t) = 1;
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
-  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
     {
       /* Find the register save area.
 	 Prologue of the function save it right above stack frame.  */
       type = TREE_TYPE (sav);
       t = make_tree (type, frame_pointer_rtx);
+      if (!ix86_varargs_gpr_size)
+	t = build2 (POINTER_PLUS_EXPR, type, t,
+		    size_int (-8 * X86_64_REGPARM_MAX));
       t = build2 (MODIFY_EXPR, type, sav, t);
       TREE_SIDE_EFFECTS (t) = 1;
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
@@ -7500,13 +7510,8 @@ ix86_compute_frame_layout (struct ix86_f
   offset += frame->nregs * UNITS_PER_WORD;
 
   /* Va-arg area */
-  if (ix86_save_varrargs_registers)
-    {
-      offset += X86_64_VARARGS_SIZE;
-      frame->va_arg_size = X86_64_VARARGS_SIZE;
-    }
-  else
-    frame->va_arg_size = 0;
+  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+  offset += frame->va_arg_size;
 
   /* Align start of frame for local function.  */
   frame->padding1 = ((offset + stack_alignment_needed - 1)
--- gcc/config/i386/i386.h.jj	2008-09-01 21:07:10.000000000 +0200
+++ gcc/config/i386/i386.h	2008-09-01 21:20:53.000000000 +0200
@@ -2390,7 +2390,8 @@ struct machine_function GTY(())
 {
   struct stack_local_entry *stack_locals;
   const char *some_ld_name;
-  int save_varrargs_registers;
+  int varargs_gpr_size;
+  int varargs_fpr_size;
   int accesses_prev_frame;
   int optimize_mode_switching[MAX_386_ENTITIES];
   int needs_cld;
@@ -2416,7 +2417,8 @@ struct machine_function GTY(())
 };
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
-#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
 #define ix86_current_function_needs_cld (cfun->machine->needs_cld)
 #define ix86_tls_descriptor_calls_expanded_in_cfun \
--- gcc/testsuite/gcc.target/i386/amd64-abi-3.c.jj	2008-09-01 21:20:53.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/amd64-abi-3.c	2008-09-01 21:20:53.000000000 +0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mno-sse" } */
+/* { dg-final { scan-assembler "subq\[\\t \]*\\\$88,\[\\t \]*%rsp" } } */
+/* { dg-final { scan-assembler-not "subq\[\\t \]*\\\$216,\[\\t \]*%rsp" } } */
+
+#include <stdarg.h>
+
+void foo (va_list va_arglist);
+
+void
+test (int a1, ...)
+{
+  va_list va_arglist;
+  va_start (va_arglist, a1);
+  foo (va_arglist);
+  va_end (va_arglist);
+}
--- gcc/testsuite/gcc.target/i386/amd64-abi-4.c.jj	2008-09-01 21:20:53.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/amd64-abi-4.c	2008-09-01 21:20:53.000000000 +0200
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mno-sse" } */
+
+#include <stdarg.h>
+#include <assert.h>
+
+int n1 = 30;
+int n2 = 324;
+void *n3 = (void *) &n2;
+int n4 = 407;
+
+int e1;
+int e2;
+void *e3;
+int e4;
+
+static void
+__attribute__((noinline))
+foo (va_list va_arglist)
+{
+  e2 = va_arg (va_arglist, int);
+  e3 = va_arg (va_arglist, void *);
+  e4 = va_arg (va_arglist, int);
+}
+
+static void
+__attribute__((noinline))
+test (int a1, ...)
+{
+  e1 = a1;
+  va_list va_arglist;
+  va_start (va_arglist, a1);
+  foo (va_arglist);
+  va_end (va_arglist);
+}
+
+int
+main ()
+{
+  test (n1, n2, n3, n4);
+  assert (n1 == e1);
+  assert (n2 == e2);
+  assert (n3 == e3);
+  assert (n4 == e4);
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/amd64-abi-5.c.jj	2008-09-01 21:20:53.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/amd64-abi-5.c	2008-09-01 21:20:53.000000000 +0200
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+#include <stdarg.h>
+#include <assert.h>
+
+int n1 = 30;
+double n2 = 324;
+double n3 = 39494.94;
+double n4 = 407;
+double n5 = 32.304;
+double n6 = 394.14;
+double n7 = 4.07;
+double n8 = 32.4;
+double n9 = 314.194;
+double n10 = 0.1407;
+
+int e1;
+double e2;
+double e3;
+double e4;
+double e5;
+double e6;
+double e7;
+double e8;
+double e9;
+double e10;
+
+static void
+__attribute__((noinline))
+test (int a1, ...)
+{
+  e1 = a1;
+  va_list va_arglist;
+  va_start (va_arglist, a1);
+  e2 = va_arg (va_arglist, double);
+  e3 = va_arg (va_arglist, double);
+  e4 = va_arg (va_arglist, double);
+  e5 = va_arg (va_arglist, double);
+  e6 = va_arg (va_arglist, double);
+  e7 = va_arg (va_arglist, double);
+  e8 = va_arg (va_arglist, double);
+  e9 = va_arg (va_arglist, double);
+  e10 = va_arg (va_arglist, double);
+  va_end (va_arglist);
+}
+
+int
+main ()
+{
+  test (n1, n2, n3, n4, n5, n6, n7, n8, n9, n10);
+  assert (n1 == e1);
+  assert (n2 == e2);
+  assert (n3 == e3);
+  assert (n4 == e4);
+  assert (n5 == e5);
+  assert (n6 == e6);
+  assert (n7 == e7);
+  assert (n8 == e8);
+  assert (n9 == e9);
+  assert (n10 == e10);
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/amd64-abi-6.c.jj	2008-09-01 21:20:53.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/amd64-abi-6.c	2008-09-01 21:20:53.000000000 +0200
@@ -0,0 +1,71 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+#include <stdarg.h>
+#include <assert.h>
+
+int n1 = 30;
+double n2 = 324;
+double n3 = 39494.94;
+double n4 = 407;
+double n5 = 32.304;
+double n6 = 394.14;
+double n7 = 4.07;
+double n8 = 32.4;
+double n9 = 314.194;
+double n10 = 0.1407;
+
+int e1;
+double e2;
+double e3;
+double e4;
+double e5;
+double e6;
+double e7;
+double e8;
+double e9;
+double e10;
+
+static void
+__attribute__((noinline))
+foo (va_list va_arglist)
+{
+  e2 = va_arg (va_arglist, double);
+  e3 = va_arg (va_arglist, double);
+  e4 = va_arg (va_arglist, double);
+  e5 = va_arg (va_arglist, double);
+  e6 = va_arg (va_arglist, double);
+  e7 = va_arg (va_arglist, double);
+  e8 = va_arg (va_arglist, double);
+  e9 = va_arg (va_arglist, double);
+  e10 = va_arg (va_arglist, double);
+}
+
+static void
+__attribute__((noinline))
+test (int a1, ...)
+{
+  va_list va_arglist;
+  e1 = a1;
+  va_start (va_arglist, a1);
+  foo (va_arglist);
+  va_end (va_arglist);
+}
+
+int
+main ()
+{
+  test (n1, n2, n3, n4, n5, n6, n7, n8, n9, n10);
+  assert (n1 == e1);
+  assert (n2 == e2);
+  assert (n3 == e3);
+  assert (n4 == e4);
+  assert (n5 == e5);
+  assert (n6 == e6);
+  assert (n7 == e7);
+  assert (n8 == e8);
+  assert (n9 == e9);
+  assert (n10 == e10);
+  return 0;
+}


	Jakub



More information about the Gcc-patches mailing list