This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC PATCH] (( fpregparm (X) )) attribute for i386 floating point


Hello!

Attached to this message, please find a patch which implements 'fpregparm' function attribute and -mfpregparm=X handling for i386 target. Fpregparm attribute and parameter is handled like regparm attribute, and passes FP arguments to functions in FP stack. Patch is tested, both with and without va_arg() and produces code as expected. Fpregparm attribute can be combined with regparm attribute and -mfpregparm=X can be combined with -mfpregparm=X.
There is a little problem in reg-stack.c. convert_regs_entry() function thinks that FP registers are not initialized at function entry and fills FP stack with dummy NaNs. I have disabled this code for now, but there should be a test if caller has initialized registers to some value.


Whetstone benchmark shows 1.6% overal speedup. For floating-point: N2 loop is ~2.3% faster, N6 loop is ~5.5% faster:

gcc-35 -O2 -ffast-math -mfpregparm=7:

N1 floating point     -1.12441420555114746       590.769              0.468
N2 floating point     -1.12241148948669434       481.433              4.020
N3 if then else        1.00000000000000000                 327.560    4.550
N4 fixed point        12.00000000000000000                 490.909    9.240
N5 sin,cos etc.        0.49907428026199341                  52.663   22.750
N6 floating point      0.99999988079071045       290.152             26.770
N7 assignments         3.00000000000000000                 423.745    6.280
N8 exp,sqrt etc.       0.75095528364181519                  19.367   27.660

MWIPS 1415.400 101.738

gcc-35 -O2 -ffast-math -mfpregparm=0:

N1 floating point     -1.12441420555114746       590.296              0.462
N2 floating point     -1.12241148948669434       470.201              4.060
N3 if then else        1.00000000000000000                 323.102    4.550
N4 fixed point        12.00000000000000000                 490.599    9.120
N5 sin,cos etc.        0.49907428026199341                  52.523   22.500
N6 floating point      0.99999988079071045       275.896             27.770
N7 assignments         3.00000000000000000                 424.055    6.190
N8 exp,sqrt etc.       0.75095528364181519                  19.362   27.290

MWIPS 1393.341 101.942


This testcase (integers are there just to test integer registers passing with regparm...):


__attribute__ (( fpregparm (7) ))
__attribute__ (( regparm (3) ))
    double test (double a, double b, double c,
                 int d, double e, int f,
                 double g, double h, int i,
                 int j, double k, double l,
                 double m) {
 return m + l + k + j + i + h + g + f + e + d + c + b + a;
}

produces following asm code:

test:
       pushl   %ebp
       movl %esp, %ebp
       fldl 20(%ebp)
       faddl   12(%ebp)
       pushl   %ecx
       faddp   %st, %st(7)
       fildl   8(%ebp)
       faddp   %st, %st(7)
       fildl   (%esp)
       movl %edx, (%esp)
       faddp   %st, %st(7)
       fxch %st(6)
       faddp   %st, %st(5)
       fxch %st(4)
       faddp   %st, %st(3)
       fildl   (%esp)
       movl %eax, (%esp)
       faddp   %st, %st(3)
       fxch %st(2)
       faddp   %st, %st(1)
       fildl   (%esp)
       addl $4, %esp
       popl %ebp
       faddp   %st, %st(1)
       faddp   %st, %st(1)
       faddp   %st, %st(1)
       faddp   %st, %st(1)
       ret


Index: reg-stack.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.146
diff -u -r1.146 reg-stack.c
--- reg-stack.c	6 Apr 2004 19:34:09 -0000	1.146
+++ reg-stack.c	13 Apr 2004 08:55:34 -0000
@@ -2520,12 +2520,16 @@
 	    rtx init;
 
 	    bi->stack_in.reg[++top] = reg;
+#if 0
+	    /* FIXME: If fpregparm != 0, this code inserts dummy NaN loads
+	       at the beginning of function. DISABLE FOR NOW!  */
 
 	    init = gen_rtx_SET (VOIDmode,
 				FP_MODE_REG (FIRST_STACK_REG, SFmode),
 				nan);
 	    insert_insn_on_edge (init, e);
 	    inserted = 1;
+#endif
 	  }
 
       bi->stack_in.top = top;
Index: config/i386/i386.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.384
diff -u -r1.384 i386.h
--- config/i386/i386.h	9 Apr 2004 19:57:45 -0000	1.384
+++ config/i386/i386.h	13 Apr 2004 08:55:34 -0000
@@ -468,6 +468,8 @@
     N_("Generate code for given CPU"), 0},			\
   { "regparm=",		&ix86_regparm_string,			\
     N_("Number of registers used to pass integer arguments"), 0},\
+  { "fpregparm=",	&ix86_fp_regparm_string,			\
+    N_("Number of registers used to pass floating point arguments"), 0},\
   { "align-loops=",	&ix86_align_loops_string,		\
     N_("Loop code aligned to this power of 2"), 0},		\
   { "align-jumps=",	&ix86_align_jumps_string,		\
@@ -1728,12 +1730,15 @@
   int words;			/* # words passed so far */
   int nregs;			/* # registers available for passing */
   int regno;			/* next available register number */
-  int fastcall;		/* fastcall calling convention is used */
+  int fastcall;			/* fastcall calling convention is used */
+  int fp_words;			/* # fp words passed so far */
+  int fp_nregs;			/* # fp registers available for passing */
+  int fp_regno;			/* next available fp register number */
   int sse_words;		/* # sse words passed so far */
   int sse_nregs;		/* # sse registers available for passing */
+  int sse_regno;		/* next available sse register number */
   int warn_sse;			/* True when we want to warn about SSE ABI.  */
   int warn_mmx;			/* True when we want to warn about MMX ABI.  */
-  int sse_regno;		/* next available sse register number */
   int mmx_words;		/* # mmx words passed so far */
   int mmx_nregs;		/* # mmx registers available for passing */
   int mmx_regno;		/* next available mmx register number */
@@ -2502,10 +2507,17 @@
 /* Max number of args passed in registers.  If this is more than 3, we will
    have problems with ebx (register #4), since it is a caller save register and
    is also used as the pic register in ELF.  So for now, don't allow more than
-   3 registers to be passed in registers.  */
+   3 args to be passed in registers.  */
 
 #define REGPARM_MAX (TARGET_64BIT ? 6 : 3)
 
+/* Max number of floating point args passed in floating point stack slots.
+   Some instructions require additional stack slot as temporary storage space
+   for internal calculations. To be safe, do not allow args to fill up all
+   stack slots.  */
+
+#define FP_REGPARM_MAX (TARGET_64BIT ? 0: (TARGET_80387 ? 7 : 0))
+
 #define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_SSE ? 3 : 0))
 
 #define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
@@ -3010,6 +3022,9 @@
 
 extern int ix86_regparm;
 extern const char *ix86_regparm_string;
+
+extern int ix86_fp_regparm;
+extern const char *ix86_fp_regparm_string;
 
 extern int ix86_preferred_stack_boundary;
 extern const char *ix86_preferred_stack_boundary_string;
Index: config/i386/i386.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.662
diff -u -r1.662 i386.c
--- config/i386/i386.c	12 Apr 2004 23:23:14 -0000	1.662
+++ config/i386/i386.c	13 Apr 2004 08:55:37 -0000
@@ -799,12 +799,18 @@
 /* # of registers to use to pass arguments.  */
 const char *ix86_regparm_string;
 
+/* # of floating-point registers to use to pass arguments.  */
+const char *ix86_fp_regparm_string;
+
 /* true if sse prefetch instruction is not NOOP.  */
 int x86_prefetch_sse;
 
 /* ix86_regparm_string as a number */
 int ix86_regparm;
 
+/* ix86_fp_regparm_string as a number */
+int ix86_fp_regparm;
+
 /* Alignment to use for loops and jumps:  */
 
 /* Power of two alignment for loops.  */
@@ -913,10 +919,12 @@
 static void ix86_compute_frame_layout (struct ix86_frame *);
 static int ix86_comp_type_attributes (tree, tree);
 static int ix86_function_regparm (tree, tree);
+static int ix86_function_fp_regparm (tree);
 const struct attribute_spec ix86_attribute_table[];
 static bool ix86_function_ok_for_sibcall (tree, tree);
 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
+static tree ix86_handle_fp_regparm_attribute (tree *, tree, tree, int, bool *);
 static int ix86_value_regno (enum machine_mode);
 static bool contains_128bit_aligned_vector_p (tree);
 static bool ix86_ms_bitfield_layout_p (tree);
@@ -1330,6 +1338,19 @@
    if (TARGET_64BIT)
      ix86_regparm = REGPARM_MAX;
 
+  /* Validate -mfpregparm= value.  */
+  if (ix86_fp_regparm_string)
+    {
+      i = atoi (ix86_fp_regparm_string);
+      if (i < 0 || i > FP_REGPARM_MAX)
+	error ("-mfpregparm=%d is not between 0 and %d", i, FP_REGPARM_MAX);
+      else
+	ix86_fp_regparm = i;
+    }
+  else
+    if (TARGET_64BIT)
+      ix86_fp_regparm = FP_REGPARM_MAX;
+
   /* If the user has provided any of the -malign-* options,
      warn and use that value only if -falign-* is not set.
      Remove this code in GCC 3.2 or later.  */
@@ -1569,6 +1590,9 @@
   /* Regparm attribute specifies how many integer arguments are to be
      passed in registers.  */
   { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
+  /* Fpregparm attribute specifies how many floating point arguments are
+     to be passed in registers.  */
+  { "fpregparm",   1, 1, false, true,  true,  ix86_handle_fp_regparm_attribute },
 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
   { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
   { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
@@ -1714,6 +1738,43 @@
   return NULL_TREE;
 }
 
+/* Handle a "fpregparm" attribute;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+ix86_handle_fp_regparm_attribute (tree *node, tree name, tree args,
+				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning ("`%s' attribute only applies to functions",
+	       IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+  else
+    {
+      tree cst;
+
+      cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != INTEGER_CST)
+	{
+	  warning ("`%s' attribute requires an integer constant argument",
+		   IDENTIFIER_POINTER (name));
+	  *no_add_attrs = true;
+	}
+      else if (compare_tree_int (cst, FP_REGPARM_MAX) > 0)
+	{
+	  warning ("argument to `%s' attribute larger than %d",
+		   IDENTIFIER_POINTER (name), FP_REGPARM_MAX);
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
 /* Return 0 if the attributes for two types are incompatible, 1 if they
    are compatible, and 2 if they are nearly compatible (which causes a
    warning to be generated).  */
@@ -1742,7 +1803,7 @@
   return 1;
 }
 
-/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
+/* Return the regparm value for a function with the indicated TYPE and DECL.
    DECL may be NULL when calling function indirectly
    or considering a libcall.  */
 
@@ -1787,6 +1848,20 @@
   return regparm;
 }
 
+/* Return the fpregparm value for a fuction with the indicated TYPE.  */
+
+static int
+ix86_function_fp_regparm (tree type)
+{
+  tree attr;
+
+  attr = lookup_attribute ("fpregparm", TYPE_ATTRIBUTES (type));
+  if (attr)
+    return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+  else
+    return ix86_fp_regparm;
+}
+
 /* Return true if EAX is live at the start of the function.  Used by 
    ix86_expand_prologue to determine if we need special help before
    calling allocate_stack_worker.  */
@@ -1863,6 +1938,8 @@
   int i;
   if (!TARGET_64BIT)
     return (regno < REGPARM_MAX
+	    || (TARGET_80387 && (regno < FP_REGPARM_MAX) &&
+		FP_REGNO_P (regno) && !fixed_regs[regno])
 	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
   if (SSE_REGNO_P (regno) && TARGET_SSE)
     return true;
@@ -1905,12 +1982,17 @@
   *cum = zero_cum;
 
   /* Set up the number of registers to use for passing arguments.  */
-  if (fntype)
+  if (fntype) {
     cum->nregs = ix86_function_regparm (fntype, fndecl);
-  else
+    cum->fp_nregs = ix86_function_fp_regparm (fntype);
+  }
+  else {
     cum->nregs = ix86_regparm;
+    cum->fp_nregs = ix86_fp_regparm;
+  }
   cum->sse_nregs = SSE_REGPARM_MAX;
   cum->mmx_nregs = MMX_REGPARM_MAX;
+  cum->fp_regno = FIRST_FLOAT_REG;
   cum->warn_sse = true;
   cum->warn_mmx = true;
   cum->maybe_vaarg = false;
@@ -1931,7 +2013,7 @@
      are no variable arguments.  If there are variable arguments, then
      we won't pass anything in registers */
 
-  if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
+  if (cum->nregs || cum->fp_nregs || !TARGET_MMX || !TARGET_SSE)
     {
       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
 	   param != 0; param = next_param)
@@ -1942,6 +2024,7 @@
 	      if (!TARGET_64BIT)
 		{
 		  cum->nregs = 0;
+		  cum->fp_nregs = 0;
 		  cum->sse_nregs = 0;
 		  cum->mmx_nregs = 0;
 		  cum->warn_sse = 0;
@@ -1957,8 +2040,8 @@
     cum->maybe_vaarg = 1;
 
   if (TARGET_DEBUG_ARG)
-    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
-
+    fprintf (stderr, ", nregs=%d, fp_nregs=%d, sse_nregs=%d )\n",
+	     cum->nregs, cum->fp_nregs, cum->sse_nregs);
   return;
 }
 
@@ -2515,8 +2598,9 @@
 
   if (TARGET_DEBUG_ARG)
     fprintf (stderr,
-	     "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
-	     words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
+	     "function_adv (sz=%d, wds=%2d, nregs=%d, fpnregs=%d ssenregs=%d, mode=%s, named=%d)\n\n",
+	     words, cum->words, cum->nregs, cum->fp_nregs, cum->sse_nregs,
+	     GET_MODE_NAME (mode), named);
   if (TARGET_64BIT)
     {
       int int_nregs, sse_nregs;
@@ -2558,7 +2642,21 @@
 	      cum->mmx_regno = 0;
 	    }
 	}
-      else
+      else if (TARGET_80387 && (mode == SFmode || mode == DFmode ||
+				mode == XFmode || mode == TFmode))
+	{
+	  cum->fp_words += words;
+	  cum->fp_nregs -= 1;
+	  cum->fp_regno += 1;
+	  if (cum->fp_nregs <= 0)
+	    {
+	      cum->fp_nregs = 0;
+	      cum->fp_regno = 0;
+	    }
+	}
+      else if (mode == DImode || mode == SImode || 
+	       mode == HImode || mode == QImode ||
+	       ((mode == BLKmode) && (bytes >= 0)))
 	{
 	  cum->words += words;
 	  cum->nregs -= words;
@@ -2685,18 +2783,26 @@
 	      ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
 	  }
 	break;
+      case SFmode:
+      case DFmode:
+      case XFmode:
+      case TFmode:
+	if (cum->fp_nregs)
+	  ret = gen_rtx_REG (mode, cum->fp_regno);
+	break;
       }
 
   if (TARGET_DEBUG_ARG)
     {
       fprintf (stderr,
-	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
-	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
+	       "function_arg (size=%d, wds=%2d, nregs=%d, fp_nregs=%d, sse_nregs=%d mode=%4s, named=%d, ",
+	       words, cum->words, cum->nregs, cum->fp_nregs, cum->sse_nregs,
+	       GET_MODE_NAME (mode), named);
 
       if (ret)
 	print_simple_rtl (stderr, ret);
       else
-	fprintf (stderr, ", stack");
+	fprintf (stderr, " stack");
 
       fprintf (stderr, " )\n");
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]