[RFC PATCH, i386]: Pass FP arguments in i387 registers

Uros Bizjak ubizjak@gmail.com
Thu Oct 12 15:30:00 GMT 2006


Hello!

This patch implements i387 register passing ABI extension for FP
function arguments:
a) unconditionally passes XFmode arguments of local functions in i387 registers
b) passes DFmode and SFmode arguments of local functions in i387
registers for -ffast-math

c) for -m387regparm, passes XFmode arguments in i387 registers
d) for -m387regparm, passes DFmode and SFmode arguments in 387
registers if -ffast-math is selected

This patch also cares for -msseregparm setting and doesn't step on its toes.

Following is a testcase, that _illustrates_ working of this patch:

--cut here--
static double __attribute__((noinline)) test(double a, double b)
{
  return a / b;
}

double foo(double a, double b)
{
  return test(a, b);
}
--cut here--

gcc -O2 -fomit-frame-pointer -ffast-math:
(loads FP arguments passed through memory to global function "foo"
into x87 and passes them to local func "test"):
test:
        fdivp   %st, %st(1)
        ret

foo:
        fldl    4(%esp)
        fldl    12(%esp)
        fxch    %st(1)
        jmp     test

gcc -O2 -fomit-frame-pointer -ffast-math -m387regparm:
test:
        fdivp   %st, %st(1)
        ret
foo:
        jmp     test

gcc -O2 -fomit-frame-pointer -ffast-math -mfpmath=sse -msse2:
test:
        divsd   %xmm1, %xmm0
        ret
foo:
        subl    $12, %esp
        movsd   24(%esp), %xmm1
        movsd   16(%esp), %xmm0
        call    test
        movsd   %xmm0, (%esp)
        fldl    (%esp)
        addl    $12, %esp
        ret

gcc -O2 -fomit-frame-pointer -ffast-math -mfpmath=sse -msse2 -m387regparm:
test:
        divsd   %xmm1, %xmm0
        ret
foo:
        subl    $12, %esp
        fstpl   (%esp)
        movsd   (%esp), %xmm1
        fstpl   (%esp)
        movsd   (%esp), %xmm0
        call    test
        movsd   %xmm0, (%esp)
        fldl    (%esp)
        addl    $12, %esp
        ret

etc, etc, etc... It works even for the silliest cases.

The patch was bootstrapped on i386-pc-linux-gnu and regression tested
on c,c++ and fortran. Povray-3.6.1 was also compiled and its test run
produced correct results. FWIW, the patch removed 96 "fld"
instructions and 385 "fst" instructions from calls to local functions
without any ABI violation.

Actually, this patch is fully functional, the only FIXME deals with
error detection and subsequent edge stack correction if "dead"
register is to be used (I never hit this problem). What remains as a
WIP is just a couple of lines of documentation and a bunch of
testcases.

YMMV, but this patch is expected to bring ~1% speedup on 32bit x87 FP
intensive code.

2005-10-12  Uros Bizjak  <uros@kss-loka.si>

        * config/i386/i386.opt: New target option -m387regparm.

        * config/i386/i386.h (struct ix86_args): Add x87_nregs, x87_regno,
        float_in_x87: Add new variables. mmx_words, sse_words: Remove.
        (X87_REGPARM_MAX): Define.

        * config/i386/i386.c (override_options): Error out for
        -m387regparm but no 80387 support.
        (ix86_attribute_table): Add 387regparm.
        (ix86_handle_cconv_attribute): Update comments for 387regparm.
        (ix86_comp_type_attributes): Check for mismatched 387regparm types.
        (ix86_function_387regparm): New function.
        (ix86_function_arg_regno_p): Add X87_REGPARM_MAX 80387 floating
        point registers.
        (init_cumulative_args): Initialize x87_nregs and float_in_x87
        variables.
        (function_arg_advance): Process x87_nregs and x87_regno when
        floating point argument is to be passed in 80387 register.
        (function_arg): Pass SFmode and DFmode arguments in 80387
        registers when cum->float_in_x87 and flag_unsafe_math_optimizations
        are set. Pass XFmode arguments in 80387 registers when
        cum->float_in_x87 is set.

        * reg-stack.c (convert_regs_entry): =FIXME= Disable NaN load when
        function argument is passed through 80387 register.

Uros.
-------------- next part --------------
Index: reg-stack.c
===================================================================
--- reg-stack.c	(revision 117595)
+++ reg-stack.c	(working copy)
@@ -2571,15 +2571,20 @@
       for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
 	if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
 	  {
-	    rtx init;
+	    rtx init ATTRIBUTE_UNUSED;
 
 	    bi->stack_in.reg[++top] = reg;
 
+	    /* FIXME: This code inserts invalid NaN load when
+	       function argument is passed through x87 register.
+	       DISABLE NaN LOADS FOR NOW.  */
+#if 0
 	    init = gen_rtx_SET (VOIDmode,
 				FP_MODE_REG (FIRST_STACK_REG, SFmode),
 				not_a_num);
 	    insert_insn_on_edge (init, e);
 	    inserted = 1;
+#endif
 	  }
 
       bi->stack_in.top = top;
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 117595)
+++ config/i386/i386.h	(working copy)
@@ -1415,19 +1415,21 @@
    such as FUNCTION_ARG to determine where the next arg should go.  */
 
 typedef struct ix86_args {
-  int words;			/* # words passed so far */
   int nregs;			/* # registers available for passing */
   int regno;			/* next available register number */
+  int words;			/* # words passed so far */
   int fastcall;			/* fastcall calling convention is used */
-  int sse_words;		/* # sse words passed so far */
+  int x87_nregs;		/* # x87 registers available for passing */
+  int x87_regno;		/* # next available x87 register number */
   int sse_nregs;		/* # sse registers available for passing */
+  int sse_regno;		/* next available sse register number */
   int warn_sse;			/* True when we want to warn about SSE ABI.  */
-  int warn_mmx;			/* True when we want to warn about MMX ABI.  */
-  int sse_regno;		/* next available sse register number */
-  int mmx_words;		/* # mmx words passed so far */
   int mmx_nregs;		/* # mmx registers available for passing */
   int mmx_regno;		/* next available mmx register number */
+  int warn_mmx;			/* True when we want to warn about MMX ABI.  */
   int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int float_in_x87;		/* 1 if floating point arguments should
+				   be passed in 80387 registere.  */
   int float_in_sse;		/* 1 if in 32-bit mode SFmode (2 for DFmode) should
 				   be passed in SSE registers.  Otherwise 0.  */
 } CUMULATIVE_ARGS;
@@ -1723,6 +1725,8 @@
 
 #define REGPARM_MAX (TARGET_64BIT ? 6 : 3)
 
+#define X87_REGPARM_MAX 3
+
 #define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_SSE ? 3 : 0))
 
 #define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt	(revision 117595)
+++ config/i386/i386.opt	(working copy)
@@ -197,6 +197,10 @@
 Target Report Mask(SSE3)
 Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
 
+m387regparm
+Target RejectNegative Mask(387REGPARM)
+Use x87 register passing conventions to pass floating point arguments
+
 msseregparm
 Target RejectNegative Mask(SSEREGPARM)
 Use SSE register passing conventions for SF and DF mode
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 117595)
+++ config/i386/i386.c	(working copy)
@@ -1912,6 +1912,11 @@
 	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
     }
 
+  /* Accept -m387regparm only if 80387 support is enabled.  */
+  if (TARGET_387REGPARM
+      && ! TARGET_80387)
+    error ("-m387regparm used without 80387 enabled");
+
   /* Accept -msseregparm only if at least SSE support is enabled.  */
   if (TARGET_SSEREGPARM
       && ! TARGET_SSE)
@@ -2213,6 +2218,9 @@
   /* Regparm attribute specifies how many integer arguments are to be
      passed in registers.  */
   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
+  /* 387regparm attribute says we are passing floating point arguments
+     in 80387 registers.  */
+  { "387regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
   /* Sseregparm attribute says we are using x86_64 calling conventions
      for FP arguments.  */
   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
@@ -2315,8 +2323,8 @@
   return true;
 }
 
-/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
-   calling convention attributes;
+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "387regparm"
+   and "sseregparm" calling convention attributes;
    arguments as in struct attribute_spec.handler.  */
 
 static tree
@@ -2381,7 +2389,8 @@
       return NULL_TREE;
     }
 
-  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
+  /* Can combine fastcall with stdcall (redundant), 387regparm
+     and sseregparm.  */
   if (is_attribute_p ("fastcall", name))
     {
       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -2398,8 +2407,8 @@
 	}
     }
 
-  /* Can combine stdcall with fastcall (redundant), regparm and
-     sseregparm.  */
+  /* Can combine stdcall with fastcall (redundant), regparm,
+     387regparm and sseregparm.  */
   else if (is_attribute_p ("stdcall", name))
     {
       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -2412,7 +2421,7 @@
 	}
     }
 
-  /* Can combine cdecl with regparm and sseregparm.  */
+  /* Can combine cdecl with regparm, 387regparm and sseregparm.  */
   else if (is_attribute_p ("cdecl", name))
     {
       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
@@ -2425,7 +2434,7 @@
 	}
     }
 
-  /* Can combine sseregparm with all attributes.  */
+  /* Can combine 387regparm and sseregparm with all attributes.  */
 
   return NULL_TREE;
 }
@@ -2450,6 +2459,11 @@
 	  != ix86_function_regparm (type2, NULL)))
     return 0;
 
+  /* Check for mismatched 387regparm types.  */
+  if (!lookup_attribute ("387regparm", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("387regparm", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
   /* Check for mismatched sseregparm types.  */
   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
@@ -2538,6 +2552,47 @@
   return regparm;
 }
 
+/* Return 1 if we can pass up to X87_REGPARM_MAX floating point
+   arguments in x87 registers for a function with the indicated
+   TYPE and DECL.  DECL may be NULL when calling function indirectly
+   or considering a libcall.  Otherwise return 0.  */
+
+static int
+ix86_function_387regparm (tree type, tree decl)
+{
+  /* Use x87 registers to pass floating point arguments if requested
+     by the 387regparm attribute.  */
+  if (TARGET_387REGPARM
+      || (type
+	  && lookup_attribute ("387regparm", TYPE_ATTRIBUTES (type))))
+    {
+      if (!TARGET_80387)
+	{
+	  if (decl)
+	    error ("Calling %qD with attribute 387regparm without "
+		   "80387 enabled", decl);
+	  else
+	    error ("Calling %qT with attribute 387regparm without "
+		   "80387 enabled", type);
+	  return 0;
+	}
+
+      return 1;
+    }
+
+  /* For local functions, pass up to X87_REGPARM_MAX floating point
+     arguments in x87 registers.  */
+  if (!TARGET_64BIT && decl
+      && flag_unit_at_a_time && !profile_flag)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (decl);
+      if (i && i->local)
+	return 1;
+    }
+
+  return 0;
+}
+
 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
    in SSE registers for a function with the indicated TYPE and DECL.
    DECL may be NULL when calling function indirectly
@@ -2657,6 +2712,8 @@
   int i;
   if (!TARGET_64BIT)
     return (regno < REGPARM_MAX
+	    || (TARGET_80387 && FP_REGNO_P (regno)
+		&& (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
 	    || (TARGET_MMX && MMX_REGNO_P (regno)
 		&& (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
 	    || (TARGET_SSE && SSE_REGNO_P (regno)
@@ -2720,6 +2777,8 @@
 
   /* Set up the number of registers to use for passing arguments.  */
   cum->nregs = ix86_regparm;
+  if (TARGET_80387)
+    cum->x87_nregs = X87_REGPARM_MAX;
   if (TARGET_SSE)
     cum->sse_nregs = SSE_REGPARM_MAX;
   if (TARGET_MMX)
@@ -2741,6 +2800,10 @@
 	cum->nregs = ix86_function_regparm (fntype, fndecl);
     }
 
+  /* Set up the number of 80387 registers used for passing
+     floating point arguments.  Warn for mismatching ABI.  */
+  cum->float_in_x87 = ix86_function_387regparm (fntype, fndecl);
+
   /* Set up the number of SSE registers used for passing SFmode
      and DFmode arguments.  Warn for mismatching ABI.  */
   cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
@@ -2750,7 +2813,8 @@
      are no variable arguments.  If there are variable arguments, then
      we won't pass anything in registers in 32-bit mode. */
 
-  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
+  if (cum->nregs || cum->mmx_nregs
+      || cum->x87_nregs || cum->sse_nregs)
     {
       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
 	   param != 0; param = next_param)
@@ -2761,11 +2825,13 @@
 	      if (!TARGET_64BIT)
 		{
 		  cum->nregs = 0;
+		  cum->x87_nregs = 0;
 		  cum->sse_nregs = 0;
 		  cum->mmx_nregs = 0;
 		  cum->warn_sse = 0;
 		  cum->warn_mmx = 0;
 		  cum->fastcall = 0;
+		  cum->float_in_x87 = 0;
 		  cum->float_in_sse = 0;
 		}
 	      cum->maybe_vaarg = true;
@@ -3492,14 +3558,36 @@
 	    }
 	  break;
 
+	case SFmode:
+	  if (cum->float_in_sse > 0)
+	    goto skip_80387;
+
 	case DFmode:
-	  if (cum->float_in_sse < 2)
+	  if (cum->float_in_sse > 1)
+	    goto skip_80387;
+
+	  if (! (cum->float_in_x87
+		 && flag_unsafe_math_optimizations))
 	    break;
-	case SFmode:
-	  if (cum->float_in_sse < 1)
+
+	case XFmode:
+	  if (!cum->float_in_x87)
 	    break;
-	  /* FALLTHRU */
 
+	  if (!type || !AGGREGATE_TYPE_P (type))
+	    {
+	      cum->x87_nregs -= 1;
+	      cum->x87_regno += 1;
+	      if (cum->x87_nregs <= 0)
+		{
+		  cum->x87_nregs = 0;
+		  cum->x87_regno = 0;
+		}
+	    }
+	  break;
+
+ skip_80387:
+
 	case TImode:
 	case V16QImode:
 	case V8HImode:
@@ -3509,7 +3597,6 @@
 	case V2DFmode:
 	  if (!type || !AGGREGATE_TYPE_P (type))
 	    {
-	      cum->sse_words += words;
 	      cum->sse_nregs -= 1;
 	      cum->sse_regno += 1;
 	      if (cum->sse_nregs <= 0)
@@ -3526,7 +3613,6 @@
 	case V2SFmode:
 	  if (!type || !AGGREGATE_TYPE_P (type))
 	    {
-	      cum->mmx_words += words;
 	      cum->mmx_nregs -= 1;
 	      cum->mmx_regno += 1;
 	      if (cum->mmx_nregs <= 0)
@@ -3591,7 +3677,6 @@
   else
     switch (mode)
       {
-	/* For now, pass fp/complex values on the stack.  */
       default:
 	break;
 
@@ -3621,13 +3706,30 @@
 	    ret = gen_rtx_REG (mode, regno);
 	  }
 	break;
-      case DFmode:
-	if (cum->float_in_sse < 2)
+
+	case SFmode:
+	  if (cum->float_in_sse > 1)
+	    goto skip_80387;
+
+	case DFmode:
+	  if (cum->float_in_sse > 2)
+	    goto skip_80387;
+
+	  if (! (cum->float_in_x87
+		 && flag_unsafe_math_optimizations))
+	    break;
+
+	case XFmode:
+	  if (!cum->float_in_x87)
+	    break;
+
+	  if (!type || !AGGREGATE_TYPE_P (type))
+	    if (cum->x87_nregs)
+	      ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
 	  break;
-      case SFmode:
-	if (cum->float_in_sse < 1)
-	  break;
-	/* FALLTHRU */
+
+ skip_80387:
+
       case TImode:
       case V16QImode:
       case V8HImode:


More information about the Gcc-patches mailing list