[PATCH] Change i386 ABI for local functions to use SSE2 regs if -msse2

Jakub Jelinek jakub@redhat.com
Fri Apr 1 10:10:00 GMT 2005


Hi!

For details see
http://gcc.gnu.org/ml/gcc-patches/2005-01/msg01891.html
http://gcc.gnu.org/ml/gcc-patches/2005-01/msg01994.html

This patch changes ABI for local functions to pass floating point values
in SSE registers where possible.
I got ~ 1.34% improvement on povray benchmark with this.

In the thread following those mails Andreas rised the question about
gdb inferior calls to those functions, but already the current
transparent regparm(3) for static functions whose address is not
taken nor they are attribute used makes gdb inferior calls to
such functions impossible, so IMHO this is not something that
should prevent this optimization.

Bootstrapped/regtested on i386-redhat-linux, additionally regtested
with RUNTESTFLAGS="--target_board=unix/-march=pentium4"
and RUNTESTFLAGS="--target_board=unix/-march=pentium4/-mfpmath=sse"
(and for those ignoring failed scan asm tests, as they rely on
-march=i386 or other non-pentium4 arch).

Ok for HEAD?

2005-04-01  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse
	and local functions, set sse_nregs to 8 and float_in_sse.
	(function_arg_advance, function_arg): If float_in_sse, pass
	SFmode and DFmode arguments in SSE registers.
	* config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field.

	* config/i386/i386.c (ix86_value_regno): Only optimize local functions
	of -funit-at-a-time.

2005-01-25  Paolo Bonzini  <bonzini@gnu.org>

	* config/i386/i386-protos.h (ix86_function_value): Accept two
	arguments, like the target macro.
	* config/i386/i386.h (FUNCTION_VALUE): Pass both arguments.
	* config/i386/i386.c (ix86_function_value): Accept the second
	argument of the target macro.
	(ix86_function_ok_for_sibcall): Pass a function pointer to
	ix86_function_value.
	(ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust
	call to ix86_value_regno).
	(ix86_value_regno): Add support for returning floating point values
	in SSE registers.

2005-01-25  Paolo Bonzini  <bonzini@gnu.org>

	* gcc.dg/i386-ssefn-1.c, gcc.dg/i386-ssefn-2.c, gcc.dg/i386-ssefn-3.c,
	gcc.dg/i386-ssefn-4.c: New.

--- gcc/config/i386/i386-protos.h	2 Feb 2005 00:30:25 -0000	1.131
+++ gcc/config/i386/i386-protos.h	28 Feb 2005 12:13:57 -0000	1.131.8.1
@@ -204,7 +204,7 @@ extern void init_cumulative_args (CUMULA
 extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
 				  tree, int);
-extern rtx ix86_function_value (tree);
+extern rtx ix86_function_value (tree, tree);
 #endif
 
 #endif
--- gcc/config/i386/i386.h	28 Feb 2005 12:13:58 -0000	1.421.8.1
+++ gcc/config/i386/i386.h	28 Feb 2005 12:16:32 -0000	1.421.8.2
@@ -1696,7 +1696,7 @@
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 #define FUNCTION_VALUE(VALTYPE, FUNC)  \
-   ix86_function_value (VALTYPE)
+   ix86_function_value (VALTYPE, FUNC)
 
 #define FUNCTION_VALUE_REGNO_P(N) \
   ix86_function_value_regno_p (N)
@@ -1736,6 +1736,8 @@
   int mmx_nregs;		/* # mmx registers available for passing */
   int mmx_regno;		/* next available mmx register number */
   int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int float_in_sse;		/* true if in 32-bit mode SFmode/DFmode should
+				   be passed in SSE registers.  */
 } CUMULATIVE_ARGS;
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
--- gcc/config/i386/i386.c	28 Feb 2005 12:13:59 -0000	1.795.8.2
+++ gcc/config/i386/i386.c	28 Feb 2005 12:16:24 -0000	1.795.8.3
@@ -913,7 +913,7 @@
 static bool ix86_function_ok_for_sibcall (tree, tree);
 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
-static int ix86_value_regno (enum machine_mode);
+static int ix86_value_regno (enum machine_mode, tree);
 static bool contains_128bit_aligned_vector_p (tree);
 static rtx ix86_struct_value_rtx (tree, int);
 static bool ix86_ms_bitfield_layout_p (tree);
@@ -1643,19 +1643,27 @@
 static bool
 ix86_function_ok_for_sibcall (tree decl, tree exp)
 {
+  tree func;
+
   /* If we are generating position-independent code, we cannot sibcall
      optimize any indirect call, or a direct call to a global function,
      as the PLT requires %ebx be live.  */
   if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
     return false;
 
+  if (decl)
+    func = decl;
+  else
+    func = NULL;
+
   /* If we are returning floats on the 80387 register stack, we cannot
      make a sibcall from a function that doesn't return a float to a
      function that does or, conversely, from a function that does return
      a float to a function that doesn't; the necessary stack adjustment
      would not be executed.  */
-  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
-      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
+  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
+      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+					   cfun->decl)))
     return false;
 
   /* If this call is indirect, we'll need to be able to use a call-clobbered
@@ -2035,7 +2043,22 @@
     }
   if ((!fntype && !libname)
       || (fntype && !TYPE_ARG_TYPES (fntype)))
-    cum->maybe_vaarg = 1;
+    cum->maybe_vaarg = true;
+
+  /* For local functions, pass SFmode (and DFmode for SSE2) arguments
+     in SSE registers even for 32-bit mode and not just 3, but up to
+     8 SSE arguments in registers.  */
+  if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
+      && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
+      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (fndecl);
+      if (i && i->local)
+	{
+	  cum->sse_nregs = 8;
+	  cum->float_in_sse = true;
+	}
+    }
 
   if (TARGET_DEBUG_ARG)
     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
@@ -2726,6 +2749,14 @@
 	    }
 	  break;
 
+	case DFmode:
+	  if (!TARGET_SSE2)
+	    break;
+	case SFmode:
+	  if (!cum->float_in_sse)
+	    break;
+	  /* FALLTHRU */
+
 	case TImode:
 	case V16QImode:
 	case V8HImode:
@@ -2847,6 +2878,13 @@
 	    ret = gen_rtx_REG (mode, regno);
 	  }
 	break;
+      case DFmode:
+	if (!TARGET_SSE2)
+	  break;
+      case SFmode:
+	if (!cum->float_in_sse)
+	  break;
+	/* FALLTHRU */
       case TImode:
       case V16QImode:
       case V8HImode:
@@ -3038,7 +3076,7 @@
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 rtx
-ix86_function_value (tree valtype)
+ix86_function_value (tree valtype, tree func)
 {
   enum machine_mode natmode = type_natural_mode (valtype);
 
@@ -3054,7 +3092,7 @@
       return ret;
     }
   else
-    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
+    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
 }
 
 /* Return false iff type is returned in memory.  */
@@ -3156,23 +3194,36 @@
 	}
     }
   else
-    return gen_rtx_REG (mode, ix86_value_regno (mode));
+    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
 }
 
 /* Given a mode, return the register to use for a return value.  */
 
 static int
-ix86_value_regno (enum machine_mode mode)
+ix86_value_regno (enum machine_mode mode, tree func)
 {
-  /* Floating point return values in %st(0).  */
-  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
-    return FIRST_FLOAT_REG;
+  gcc_assert (!TARGET_64BIT);
+
   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
      we prevent this case when sse is not available.  */
   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
     return FIRST_SSE_REG;
-  /* Everything else in %eax.  */
-  return 0;
+
+  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
+  if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
+    return 0;
+
+  /* Floating point return values in %st(0), except for local functions when
+     SSE math is enabled.  */
+  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
+      && flag_unit_at_a_time)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (func);
+      if (i && i->local)
+	return FIRST_SSE_REG;
+    }
+
+  return FIRST_FLOAT_REG;
 }
 
 /* Create the va_list data type.  */
--- gcc/testsuite/gcc.dg/i386-ssefn-4.c 1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/i386-ssefn-4.c	28 Feb 2005 12:14:04 -0000	1.1.2.1
@@ -0,0 +1,43 @@
+/* Execution test for argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE2)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/i386-ssefn-2.c 1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/i386-ssefn-2.c	28 Feb 2005 12:14:04 -0000	1.1.2.1
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler "movsd" } } */
+/* { dg-final { scan-assembler "mulsd" } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
--- gcc/testsuite/gcc.dg/i386-ssefn-1.c 1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/i386-ssefn-1.c	28 Feb 2005 12:14:04 -0000	1.1.2.1
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler-not "movsd" } } */
+/* { dg-final { scan-assembler-not "mulsd" } } */
+/* { dg-options "-O2 -msse -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
--- gcc/testsuite/gcc.dg/i386-ssefn-3.c 1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/i386-ssefn-3.c	28 Feb 2005 12:14:04 -0000	1.1.2.1
@@ -0,0 +1,43 @@
+/* Execution test for argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list