This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: Don't allocate space for SSE reg in reg save area if SSE is disabled
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org, ubizjak at gmail dot com, torvalds at linux-foundation dot org, jakub at redhat dot com, jh at suse dot cz
- Date: Thu, 28 Aug 2008 10:17:35 -0700
- Subject: Re: PATCH: Don't allocate space for SSE reg in reg save area if SSE is disabled
- References: <20080828165745.GA26488@lucon.org> <20080828170449.GA1738@lucon.org>
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
On Thu, Aug 28, 2008 at 10:04:49AM -0700, H.J. Lu wrote:
> On Thu, Aug 28, 2008 at 09:57:45AM -0700, H.J. Lu wrote:
> >
> > We are allocating space for SSE reg in reg save area even if SSE is
> > disabled. -mno-sse is used on Linux kernel. Here is patch to avoid it.
> > OK for trunk and 4.3?
> >
>
> Here is the updated patch with some optimization. OK for trunk and
> 4.3?
>
> Thanks.
>
>
If I set X86_64_SSE_REGPARM_MAX to 0 for -mno-sse, we don't get warnings
when double is used. Here is the updated patch.
H.J.
----
gcc/
2008-08-28 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (X86_64_VARARGS_SIZE): Removed.
(setup_incoming_varargs_64): Remove ix86_save_varrargs_registers.
Set/check ix86_varrargs_gpr_size and ix86_varrargs_fpr_size.
(ix86_va_start): Check ix86_varrargs_gpr_size and
ix86_varrargs_fpr_size instead of cfun->va_list_gpr_size and
cfun->va_list_fpr_size, respectively.
(ix86_compute_frame_layout): Updated.
* config/i386/i386.h (machine_function): Remove
save_varrargs_registers. Add varrargs_gpr_size and
varrargs_fpr_size.
(ix86_save_varrargs_registers): Removed.
(ix86_varrargs_gpr_size): New.
(ix86_varrargs_fpr_size): Likewise.
gcc/testsuite/
2008-08-28 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/amd64-abi-3.c: New.
* gcc.target/i386/amd64-abi-4.c: Likewise.
--- gcc/config/i386/i386.c.sse 2008-08-27 11:33:00.000000000 -0700
+++ gcc/config/i386/i386.c 2008-08-28 10:11:50.000000000 -0700
@@ -1631,9 +1631,6 @@ rtx ix86_compare_op0 = NULL_RTX;
rtx ix86_compare_op1 = NULL_RTX;
rtx ix86_compare_emitted = NULL_RTX;
-/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
/* Define the structure for the machine field in struct function. */
struct stack_local_entry GTY(())
@@ -6169,11 +6166,16 @@ setup_incoming_varargs_64 (CUMULATIVE_AR
if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
- if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
- return;
+ /* GPR size of varargs save area. */
+ if (cfun->va_list_gpr_size)
+ ix86_varrargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
- /* Indicate to allocate space on the stack for varargs save area. */
- ix86_save_varrargs_registers = 1;
+ /* FPR size of varargs save area. */
+ if (cum->sse_nregs && cfun->va_list_fpr_size)
+ ix86_varrargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+
+ if (! ix86_varrargs_gpr_size && ! ix86_varrargs_fpr_size)
+ return;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
@@ -6191,7 +6193,7 @@ setup_incoming_varargs_64 (CUMULATIVE_AR
x86_64_int_parameter_registers[i]));
}
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (ix86_varrargs_fpr_size)
{
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use
@@ -6336,7 +6338,7 @@ ix86_va_start (tree valist, rtx nextarg)
n_gpr = crtl->args.info.regno;
n_fpr = crtl->args.info.sse_regno;
- if (cfun->va_list_gpr_size)
+ if (ix86_varrargs_gpr_size)
{
type = TREE_TYPE (gpr);
t = build2 (MODIFY_EXPR, type,
@@ -6345,7 +6347,7 @@ ix86_va_start (tree valist, rtx nextarg)
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- if (cfun->va_list_fpr_size)
+ if (ix86_varrargs_fpr_size)
{
type = TREE_TYPE (fpr);
t = build2 (MODIFY_EXPR, type, fpr,
@@ -6364,7 +6366,7 @@ ix86_va_start (tree valist, rtx nextarg)
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ if (ix86_varrargs_gpr_size || ix86_varrargs_fpr_size)
{
/* Find the register save area.
Prologue of the function save it right above stack frame. */
@@ -7296,13 +7298,8 @@ ix86_compute_frame_layout (struct ix86_f
offset += frame->nregs * UNITS_PER_WORD;
/* Va-arg area */
- if (ix86_save_varrargs_registers)
- {
- offset += X86_64_VARARGS_SIZE;
- frame->va_arg_size = X86_64_VARARGS_SIZE;
- }
- else
- frame->va_arg_size = 0;
+ frame->va_arg_size = ix86_varrargs_gpr_size + ix86_varrargs_fpr_size;
+ offset += frame->va_arg_size;
/* Align start of frame for local function. */
frame->padding1 = ((offset + stack_alignment_needed - 1)
--- gcc/config/i386/i386.h.sse 2008-08-28 08:51:15.000000000 -0700
+++ gcc/config/i386/i386.h 2008-08-28 10:11:10.000000000 -0700
@@ -2338,7 +2338,8 @@ struct machine_function GTY(())
{
struct stack_local_entry *stack_locals;
const char *some_ld_name;
- int save_varrargs_registers;
+ int varrargs_gpr_size;
+ int varrargs_fpr_size;
int accesses_prev_frame;
int optimize_mode_switching[MAX_386_ENTITIES];
int needs_cld;
@@ -2364,7 +2365,8 @@ struct machine_function GTY(())
};
#define ix86_stack_locals (cfun->machine->stack_locals)
-#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
+#define ix86_varrargs_gpr_size (cfun->machine->varrargs_gpr_size)
+#define ix86_varrargs_fpr_size (cfun->machine->varrargs_fpr_size)
#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
#define ix86_tls_descriptor_calls_expanded_in_cfun \
--- gcc/testsuite/gcc.target/i386/amd64-abi-3.c.sse 2008-08-28 09:47:19.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/amd64-abi-3.c 2008-08-28 09:43:50.000000000 -0700
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mno-sse" } */
+/* { dg-final { scan-assembler "subq\[\\t \]*\\\$88,\[\\t \]*%rsp" } } */
+/* { dg-final { scan-assembler-not "subq\[\\t \]*\\\$126,\[\\t \]*%rsp" } } */
+
+#include <stdarg.h>
+
+void foo (va_list va_arglist);
+
+void
+test (int a1, ...)
+{
+ va_list va_arglist;
+ va_start (va_arglist, a1);
+ foo (va_arglist);
+ va_end (va_arglist);
+}
--- gcc/testsuite/gcc.target/i386/amd64-abi-4.c.sse 2008-08-28 09:47:16.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/amd64-abi-4.c 2008-08-28 09:46:43.000000000 -0700
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mno-sse" } */
+
+#include <stdarg.h>
+#include <assert.h>
+
+int n1 = 30;
+int n2 = 324;
+void *n3 = (void *) &n2;
+int n4 = 407;
+
+int e1;
+int e2;
+void *e3;
+int e4;
+
+static void
+__attribute__((noinline))
+foo (va_list va_arglist)
+{
+ e2 = va_arg (va_arglist, int);
+ e3 = va_arg (va_arglist, void *);
+ e4 = va_arg (va_arglist, int);
+}
+
+static void
+__attribute__((noinline))
+test (int a1, ...)
+{
+ e1 = a1;
+ va_list va_arglist;
+ va_start (va_arglist, a1);
+ foo (va_arglist);
+ va_end (va_arglist);
+}
+
+int
+main ()
+{
+ test (n1, n2, n3, n4);
+ assert (n1 == e1);
+ assert (n2 == e2);
+ assert (n3 == e3);
+ assert (n4 == e4);
+ return 0;
+}