This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, i386] Optionally use %xmm0 to return float and/or doublevalues (take 3)
Here is my patch with the times for a povray benchmark run.
SSE-related compiler options:
-msse -msse2 -mfpmath=sse -march=pentium4 -mtune=pentium4
With the patch:
Total Scene Processing Times
Parse Time: 0 hours 0 minutes 1 seconds (1 seconds)
Photon Time: 0 hours 0 minutes 38 seconds (38 seconds)
Render Time: 0 hours 31 minutes 46 seconds (1906 seconds)
Total Time: 0 hours 32 minutes 25 seconds (1945 seconds)
Without the patch:
Total Scene Processing Times
Parse Time: 0 hours 0 minutes 1 seconds (1 seconds)
Photon Time: 0 hours 0 minutes 38 seconds (38 seconds)
Render Time: 0 hours 32 minutes 4 seconds (1924 seconds)
Total Time: 0 hours 32 minutes 43 seconds (1963 seconds)
So povray is 1% faster with the patch.
Paolo
gcc:
2005-01-25 Paolo Bonzini <bonzini@gnu.org>
* config/i386/i386-protos.h (ix86_function_value): Accept two
arguments, like the target macro.
* config/i386/i386.h (FUNCTION_VALUE): Pass both arguments.
* config/i386/i386.c (ix86_function_value): Accept the second
argument of the target macro.
(ix86_function_ok_for_sibcall): Pass a function pointer to
ix86_function_value.
(ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust
call to ix86_value_regno).
(ix86_value_regno): Add support for returning floating point values
in SSE registers.
gcc/testsuite:
2005-01-25 Paolo Bonzini <bonzini@gnu.org>
* gcc.dg/i386-ssefn-1.c, gcc.dg/i386-ssefn-2.c, gcc.dg/i386-ssefn-3.c,
gcc.dg/i386-ssefn-4.c: New.
Index: i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.128
diff -u -p -r1.128 i386-protos.h
--- i386-protos.h 20 Jan 2005 18:20:42 -0000 1.128
+++ i386-protos.h 26 Jan 2005 14:48:16 -0000
@@ -199,7 +199,7 @@ extern void init_cumulative_args (CUMULA
extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
tree, int);
-extern rtx ix86_function_value (tree);
+extern rtx ix86_function_value (tree, tree);
#endif
#endif
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.790
diff -u -p -r1.790 i386.c
--- i386.c 25 Jan 2005 18:43:55 -0000 1.790
+++ i386.c 26 Jan 2005 14:48:19 -0000
@@ -913,7 +913,7 @@ const struct attribute_spec ix86_attribu
static bool ix86_function_ok_for_sibcall (tree, tree);
static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
-static int ix86_value_regno (enum machine_mode);
+static int ix86_value_regno (enum machine_mode, tree);
static bool contains_128bit_aligned_vector_p (tree);
static rtx ix86_struct_value_rtx (tree, int);
static bool ix86_ms_bitfield_layout_p (tree);
@@ -1643,19 +1643,27 @@ const struct attribute_spec ix86_attribu
static bool
ix86_function_ok_for_sibcall (tree decl, tree exp)
{
+ tree func;
+
/* If we are generating position-independent code, we cannot sibcall
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. */
if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
return false;
+ if (decl)
+ func = decl;
+ else
+ func = NULL;
+
/* If we are returning floats on the 80387 register stack, we cannot
make a sibcall from a function that doesn't return a float to a
function that does or, conversely, from a function that does return
a float to a function that doesn't; the necessary stack adjustment
would not be executed. */
- if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
- != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
+ if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
+ != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl)))
return false;
/* If this call is indirect, we'll need to be able to use a call-clobbered
@@ -3031,7 +3039,7 @@ ix86_function_value_regno_p (int regno)
If the precise function being called is known, FUNC is its FUNCTION_DECL;
otherwise, FUNC is 0. */
rtx
-ix86_function_value (tree valtype)
+ix86_function_value (tree valtype, tree func)
{
enum machine_mode natmode = type_natural_mode (valtype);
@@ -3047,7 +3055,7 @@ ix86_function_value (tree valtype)
return ret;
}
else
- return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
+ return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
}
/* Return false iff type is returned in memory. */
@@ -3149,23 +3157,35 @@ ix86_libcall_value (enum machine_mode mo
}
}
else
- return gen_rtx_REG (mode, ix86_value_regno (mode));
+ return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
}
/* Given a mode, return the register to use for a return value. */
static int
-ix86_value_regno (enum machine_mode mode)
+ix86_value_regno (enum machine_mode mode, tree func)
{
- /* Floating point return values in %st(0). */
- if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
- return FIRST_FLOAT_REG;
+ gcc_assert (!TARGET_64BIT);
+
/* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
we prevent this case when sse is not available. */
if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
return FIRST_SSE_REG;
- /* Everything else in %eax. */
- return 0;
+
+ /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
+ if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
+ return 0;
+
+ /* Floating point return values in %st(0), except for local functions when
+ SSE math is enabled. */
+ if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ {
+ struct cgraph_local_info *i = cgraph_local_info (func);
+ if (k && i && i->local)
+ return FIRST_SSE_REG;
+ }
+
+ return FIRST_FLOAT_REG;
}
/* Create the va_list data type. */
Index: i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.419
diff -u -p -r1.419 i386.h
--- i386.h 20 Jan 2005 06:47:25 -0000 1.419
+++ i386.h 26 Jan 2005 14:48:19 -0000
@@ -1705,7 +1705,7 @@ enum reg_class
If the precise function being called is known, FUNC is its FUNCTION_DECL;
otherwise, FUNC is 0. */
#define FUNCTION_VALUE(VALTYPE, FUNC) \
- ix86_function_value (VALTYPE)
+ ix86_function_value (VALTYPE, FUNC)
#define FUNCTION_VALUE_REGNO_P(N) \
ix86_function_value_regno_p (N)
Index: testsuite/gcc.dg/i386-ssefn-1.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-1.c
diff -N testsuite/gcc.dg/i386-ssefn-1.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-1.c 26 Jan 2005 14:48:52 -0000
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE and local functions
+ Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler-not "movsd" } } */
+/* { dg-final { scan-assembler-not "mulsd" } } */
+/* { dg-options "-O2 -msse -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+ return 3.14159265;
+}
+
+float ys (float a)
+{
+ return xs () * a;
+}
+
+static double xd (void)
+{
+ return 3.1415926535;
+}
+
+double yd (double a)
+{
+ return xd () * a;
+}
+
Index: testsuite/gcc.dg/i386-ssefn-2.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-2.c
diff -N testsuite/gcc.dg/i386-ssefn-2.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-2.c 26 Jan 2005 14:48:52 -0000
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE2 and local functions
+ Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler "movsd" } } */
+/* { dg-final { scan-assembler "mulsd" } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+ return 3.14159265;
+}
+
+float ys (float a)
+{
+ return xs () * a;
+}
+
+static double xd (void)
+{
+ return 3.1415926535;
+}
+
+double yd (double a)
+{
+ return xd () * a;
+}
+
Index: testsuite/gcc.dg/i386-ssefn-3.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-3.c
diff -N testsuite/gcc.dg/i386-ssefn-3.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-3.c 26 Jan 2005 14:48:52 -0000
@@ -0,0 +1,42 @@
+/* Execution test for argument passing with SSE and local functions
+ Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+ return 3.14159265;
+}
+
+float ys (float a)
+{
+ return xs () * a;
+}
+
+static double xd (void)
+{
+ return 3.1415926535;
+}
+
+double yd (double a)
+{
+ return xd () * a;
+}
+
+int main()
+{
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if (cpu_facilities & bit_SSE)
+ {
+ assert (ys (1) == xs ());
+ assert (ys (2) == xs () * 2);
+ assert (yd (1) == xd ());
+ assert (yd (2) == xd () * 2);
+ }
+}
Index: testsuite/gcc.dg/i386-ssefn-4.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-4.c
diff -N testsuite/gcc.dg/i386-ssefn-4.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-4.c 26 Jan 2005 14:48:52 -0000
@@ -0,0 +1,42 @@
+/* Execution test for argument passing with SSE2 and local functions
+ Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+ return 3.14159265;
+}
+
+float ys (float a)
+{
+ return xs () * a;
+}
+
+static double xd (void)
+{
+ return 3.1415926535;
+}
+
+double yd (double a)
+{
+ return xd () * a;
+}
+
+int main()
+{
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if (cpu_facilities & bit_SSE2)
+ {
+ assert (ys (1) == xs ());
+ assert (ys (2) == xs () * 2);
+ assert (yd (1) == xd ());
+ assert (yd (2) == xd () * 2);
+ }
+}