This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386] Optionally use %xmm0 to return float and/or doublevalues (take 2)


However, tomorrow (unless Uros beats me to it :-) I'll benchmark povray: a much simpler patch that only does this for local functions might still provide a good bang for the buck.

Here's the simpler patch, bootstrapped/regtested C/C++ on i686-pc-linux-gnu (a Pentium 4). Like in the previous patch, the special calling convention is used as long as the ISA supports it (e.g. for floats on SSE processors, or for doubles on SSE2), but this time only for local functions and with -mfpmath=sse.


Again, the code does not apply to x86-64 platforms, for which -mfpmath=sse is the default, so it requires a special command-line option.

I'll provide benchmark numbers tomorrow, however I still ask if this is ok for mainline?

Paolo
gcc:
2005-01-25  Paolo Bonzini  <bonzini@gnu.org>

	* config/i386/i386-protos.h (ix86_function_value): Accept two
	arguments, like the target macro.
	* config/i386/i386.h (FUNCTION_VALUE): Pass both arguments.
	* config/i386/i386.c (ix86_function_value): Accept the second
	argument of the target macro.
	(ix86_function_ok_for_sibcall): Pass a function pointer to
	ix86_function_value.
	(ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust
	call to ix86_value_regno).
	(ix86_value_regno): Add support for returning floating point values
	in SSE registers.

gcc/testsuite:
2005-01-25  Paolo Bonzini  <bonzini@gnu.org>

	* gcc.dg/i386-ssefn-1.c, gcc.dg/i386-ssefn-2.c, gcc.dg/i386-ssefn-3.c,
	gcc.dg/i386-ssefn-4.c: New.

Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.128
diff -u -p -r1.128 i386-protos.h
--- config/i386/i386-protos.h	20 Jan 2005 18:20:42 -0000	1.128
+++ config/i386/i386-protos.h	25 Jan 2005 14:17:21 -0000
@@ -199,7 +199,7 @@ extern void init_cumulative_args (CUMULA
 extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
 				  tree, int);
-extern rtx ix86_function_value (tree);
+extern rtx ix86_function_value (tree, tree);
 #endif
 
 #endif
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.789
diff -u -p -r1.789 i386.c
--- config/i386/i386.c	22 Jan 2005 23:07:53 -0000	1.789
+++ config/i386/i386.c	25 Jan 2005 14:17:23 -0000
@@ -908,7 +908,7 @@ static int ix86_function_regparm (tree, 
 static bool ix86_function_ok_for_sibcall (tree, tree);
 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
-static int ix86_value_regno (enum machine_mode);
+static int ix86_value_regno (enum machine_mode, tree);
 static bool contains_128bit_aligned_vector_p (tree);
 static rtx ix86_struct_value_rtx (tree, int);
 static bool ix86_ms_bitfield_layout_p (tree);
@@ -1639,19 +1652,33 @@ const struct attribute_spec ix86_attribu
 static bool
 ix86_function_ok_for_sibcall (tree decl, tree exp)
 {
+  tree func;
+
   /* If we are generating position-independent code, we cannot sibcall
      optimize any indirect call, or a direct call to a global function,
      as the PLT requires %ebx be live.  */
   if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
     return false;
 
+  if (decl)
+    func = decl;
+  else
+    {
+      /* We're looking at the CALL_EXPR, we need the type of the function.  */
+      func = TREE_OPERAND (exp, 0);		/* pointer expression */
+      func = TREE_TYPE (func);			/* pointer type */
+
+      /* TREE_TYPE (func) is a function type.  */
+    }
+
   /* If we are returning floats on the 80387 register stack, we cannot
      make a sibcall from a function that doesn't return a float to a
      function that does or, conversely, from a function that does return
      a float to a function that doesn't; the necessary stack adjustment
      would not be executed.  */
-  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
-      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
+  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
+      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+					   TREE_TYPE (cfun->decl))))
     return false;
 
   /* If this call is indirect, we'll need to be able to use a call-clobbered
@@ -1659,12 +1686,7 @@ ix86_function_ok_for_sibcall (tree decl,
      such registers are not used for passing parameters.  */
   if (!decl && !TARGET_64BIT)
     {
-      tree type;
-
-      /* We're looking at the CALL_EXPR, we need the type of the function.  */
-      type = TREE_OPERAND (exp, 0);		/* pointer expression */
-      type = TREE_TYPE (type);			/* pointer type */
-      type = TREE_TYPE (type);			/* function type */
+      tree type = TREE_TYPE (func);
 
       if (ix86_function_regparm (type, NULL) >= 3)
 	{
@@ -3027,7 +3103,7 @@ ix86_function_value_regno_p (int regno)
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 rtx
-ix86_function_value (tree valtype)
+ix86_function_value (tree valtype, tree func)
 {
   enum machine_mode natmode = type_natural_mode (valtype);
 
@@ -3043,7 +3119,7 @@ ix86_function_value (tree valtype)
       return ret;
     }
   else
-    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
+    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
 }
 
 /* Return false iff type is returned in memory.  */
@@ -3145,23 +3221,35 @@ ix86_libcall_value (enum machine_mode mo
 	}
     }
   else
-    return gen_rtx_REG (mode, ix86_value_regno (mode));
+    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
 }
 
 /* Given a mode, return the register to use for a return value.  */
 
 static int
-ix86_value_regno (enum machine_mode mode)
+ix86_value_regno (enum machine_mode mode, tree func)
 {
-  /* Floating point return values in %st(0).  */
-  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
-    return FIRST_FLOAT_REG;
+  gcc_assert (!TARGET_64BIT);
+
   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
      we prevent this case when sse is not available.  */
   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
     return FIRST_SSE_REG;
-  /* Everything else in %eax.  */
-  return 0;
+
+  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
+  if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387))
+    return 0;
+
+  /* Floating point return values in %st(0), except for local functions when
+     SSE math is enabled.  */
+  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (func);
+      if (i && i->local)
+        return FIRST_SSE_REG;
+    }
+
+  return FIRST_FLOAT_REG;
 }
 
 /* Create the va_list data type.  */
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.419
diff -u -p -r1.419 i386.h
--- config/i386/i386.h	20 Jan 2005 06:47:25 -0000	1.419
+++ config/i386/i386.h	25 Jan 2005 14:17:24 -0000
@@ -1705,7 +1716,7 @@ enum reg_class
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 #define FUNCTION_VALUE(VALTYPE, FUNC)  \
-   ix86_function_value (VALTYPE)
+   ix86_function_value (VALTYPE, FUNC)
 
 #define FUNCTION_VALUE_REGNO_P(N) \
   ix86_function_value_regno_p (N)
Index: testsuite/gcc.dg/i386-ssefn-1.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-1.c
diff -N testsuite/gcc.dg/i386-ssefn-1.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-1.c	25 Jan 2005 14:05:21 -0000
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler-not "movsd" } } */
+/* { dg-final { scan-assembler-not "mulsd" } } */
+/* { dg-options "-O2 -msse -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
Index: testsuite/gcc.dg/i386-ssefn-2.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-2.c
diff -N testsuite/gcc.dg/i386-ssefn-2.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-2.c	25 Jan 2005 14:05:21 -0000
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler "movsd" } } */
+/* { dg-final { scan-assembler "mulsd" } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
Index: testsuite/gcc.dg/i386-ssefn-3.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-3.c
diff -N testsuite/gcc.dg/i386-ssefn-3.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-3.c	25 Jan 2005 14:05:21 -0000
@@ -0,0 +1,42 @@
+/* Execution test for argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+}
Index: testsuite/gcc.dg/i386-ssefn-4.c
===================================================================
RCS file: testsuite/gcc.dg/i386-ssefn-4.c
diff -N testsuite/gcc.dg/i386-ssefn-4.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/i386-ssefn-4.c	25 Jan 2005 14:05:21 -0000
@@ -0,0 +1,42 @@
+/* Execution test for argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE2)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]