This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386] Optionally use %xmm0 to return float and/or double values (take 3)


On Thu, Jan 27, 2005 at 09:40:14AM +0100, Paolo Bonzini wrote:
> >>+  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> >>+    {
> >>+      struct cgraph_local_info *i = cgraph_local_info (func);
> >>+      if (k && i && i->local)
> >
> >
> >k is not defined here.  If I s/k && //, I get these numbers with
> >povray 3.6.1 benchmark:
> 
> Oops... in my tests k was a global initialized to getenv("SSE_RETURN") 
> to avoid installing multiple cc1's.  I thought I had removed all traces 
> of it.
> 
> But I'm happy that you can reproduce my results; and I agree that at -O2 
> (more precisely without -finline-functions) the result ought to be better.

BTW, with the following patch on top of your patch I get
  Parse Time:    0 hours  0 minutes  1 seconds (1 seconds)
  Photon Time:   0 hours  0 minutes 43 seconds (43 seconds)
  Render Time:   0 hours 31 minutes 11 seconds (1871 seconds)
  Total Time:    0 hours 31 minutes 55 seconds (1915 seconds)
which means 1.34% improvement over CVS while your patch alone was 0.62%
(all timings were smaller from two runs, but in all cases the diff between
the two runs was <= 3sec).
Binary size decreased as well:
2118372   31840  193108 2343320  23c198 obj1/unix/povray
2216571   32640  193136 2442347  25446b obj2/unix/povray
2215787   32640  193136 2441563  25415b obj3/unix/povray
2213007   32640  193136 2438783  25367f obj4/unix/povray
(obj1 is 3.4.3-RH, obj2 is stock CVS HEAD, obj3 is HEAD+your patch, obj4
is HEAD+your patch+patch below).

2005-01-27  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse
	and local functions, set sse_nregs to 8 and float_in_sse.
	(function_arg_advance, function_arg): If float_in_sse, pass
	SFmode and DFmode arguments in SSE registers.
	* config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field.

	* config/i386/i386.c (ix86_value_regno): Only optimize local functions
	of -funit-at-a-time.

--- gcc/config/i386/i386.c.jj	2005-01-27 08:02:09.000000000 +0100
+++ gcc/config/i386/i386.c	2005-01-27 11:18:23.194763303 +0100
@@ -2036,7 +2036,22 @@ init_cumulative_args (CUMULATIVE_ARGS *c
     }
   if ((!fntype && !libname)
       || (fntype && !TYPE_ARG_TYPES (fntype)))
-    cum->maybe_vaarg = 1;
+    cum->maybe_vaarg = true;
+
+  /* For local functions, pass SFmode (and DFmode for SSE2) arguments
+     in SSE registers even for 32-bit mode and not just 3, but up to
+     8 SSE arguments in registers.  */
+  if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
+      && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
+      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (fndecl);
+      if (i && i->local)
+        {
+          cum->sse_nregs = 8;
+          cum->float_in_sse = true;
+        }
+    }
 
   if (TARGET_DEBUG_ARG)
     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
@@ -2727,6 +2742,14 @@ function_arg_advance (CUMULATIVE_ARGS *c
 	    }
 	  break;
 
+        case DFmode:
+          if (!TARGET_SSE2)
+            break;
+        case SFmode:
+          if (!cum->float_in_sse)
+            break;
+          /* FALLTHRU */
+
 	case TImode:
 	case V16QImode:
 	case V8HImode:
@@ -2848,6 +2871,13 @@ function_arg (CUMULATIVE_ARGS *cum, enum
 	    ret = gen_rtx_REG (mode, regno);
 	  }
 	break;
+      case DFmode:
+        if (!TARGET_SSE2)
+          break;
+      case SFmode:
+        if (!cum->float_in_sse)
+          break;
+        /* FALLTHRU */
       case TImode:
       case V16QImode:
       case V8HImode:
@@ -3178,7 +3208,8 @@ ix86_value_regno (enum machine_mode mode
 
   /* Floating point return values in %st(0), except for local functions when
      SSE math is enabled.  */
-  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
+      && flag_unit_at_a_time)
     {
       struct cgraph_local_info *i = cgraph_local_info (func);
       if (i && i->local)
--- gcc/config/i386/i386.h.jj	2005-01-27 07:45:15.000000000 +0100
+++ gcc/config/i386/i386.h	2005-01-27 11:09:58.999469382 +0100
@@ -1745,6 +1745,8 @@ typedef struct ix86_args {
   int mmx_nregs;		/* # mmx registers available for passing */
   int mmx_regno;		/* next available mmx register number */
   int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int float_in_sse;		/* true if in 32-bit mode SFmode/DFmode should
+				   be passed in SSE registers.  */
 } CUMULATIVE_ARGS;
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]