This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH] (( fpregparm (X) )) attribute for i386 floating point
- From: Uros Bizjak <uros at kss-loka dot si>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 13 Apr 2004 11:38:00 +0200
- Subject: [RFC PATCH] (( fpregparm (X) )) attribute for i386 floating point
Hello!
Attached to this message, please find a patch which implements
'fpregparm' function attribute and -mfpregparm=X handling for i386
target. Fpregparm attribute and parameter is handled like regparm
attribute, and passes FP arguments to functions in FP stack. Patch is
tested, both with and without va_arg() and produces code as expected.
Fpregparm attribute can be combined with regparm attribute and
-mfpregparm=X can be combined with -mfpregparm=X.
There is a little problem in reg-stack.c. convert_regs_entry() function
thinks that FP registers are not initialized at function entry and fills
FP stack with dummy NaNs. I have disabled this code for now, but there
should be a test if caller has initialized registers to some value.
Whetstone benchmark shows 1.6% overal speedup. For floating-point: N2
loop is ~2.3% faster, N6 loop is ~5.5% faster:
gcc-35 -O2 -ffast-math -mfpregparm=7:
N1 floating point -1.12441420555114746 590.769 0.468
N2 floating point -1.12241148948669434 481.433 4.020
N3 if then else 1.00000000000000000 327.560 4.550
N4 fixed point 12.00000000000000000 490.909 9.240
N5 sin,cos etc. 0.49907428026199341 52.663 22.750
N6 floating point 0.99999988079071045 290.152 26.770
N7 assignments 3.00000000000000000 423.745 6.280
N8 exp,sqrt etc. 0.75095528364181519 19.367 27.660
MWIPS 1415.400 101.738
gcc-35 -O2 -ffast-math -mfpregparm=0:
N1 floating point -1.12441420555114746 590.296 0.462
N2 floating point -1.12241148948669434 470.201 4.060
N3 if then else 1.00000000000000000 323.102 4.550
N4 fixed point 12.00000000000000000 490.599 9.120
N5 sin,cos etc. 0.49907428026199341 52.523 22.500
N6 floating point 0.99999988079071045 275.896 27.770
N7 assignments 3.00000000000000000 424.055 6.190
N8 exp,sqrt etc. 0.75095528364181519 19.362 27.290
MWIPS 1393.341 101.942
This testcase (integers are there just to test integer registers passing
with regparm...):
__attribute__ (( fpregparm (7) ))
__attribute__ (( regparm (3) ))
double test (double a, double b, double c,
int d, double e, int f,
double g, double h, int i,
int j, double k, double l,
double m) {
return m + l + k + j + i + h + g + f + e + d + c + b + a;
}
produces following asm code:
test:
pushl %ebp
movl %esp, %ebp
fldl 20(%ebp)
faddl 12(%ebp)
pushl %ecx
faddp %st, %st(7)
fildl 8(%ebp)
faddp %st, %st(7)
fildl (%esp)
movl %edx, (%esp)
faddp %st, %st(7)
fxch %st(6)
faddp %st, %st(5)
fxch %st(4)
faddp %st, %st(3)
fildl (%esp)
movl %eax, (%esp)
faddp %st, %st(3)
fxch %st(2)
faddp %st, %st(1)
fildl (%esp)
addl $4, %esp
popl %ebp
faddp %st, %st(1)
faddp %st, %st(1)
faddp %st, %st(1)
faddp %st, %st(1)
ret
Index: reg-stack.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.146
diff -u -r1.146 reg-stack.c
--- reg-stack.c 6 Apr 2004 19:34:09 -0000 1.146
+++ reg-stack.c 13 Apr 2004 08:55:34 -0000
@@ -2520,12 +2520,16 @@
rtx init;
bi->stack_in.reg[++top] = reg;
+#if 0
+ /* FIXME: If fpregparm != 0, this code inserts dummy NaN loads
+ at the beginning of function. DISABLE FOR NOW! */
init = gen_rtx_SET (VOIDmode,
FP_MODE_REG (FIRST_STACK_REG, SFmode),
nan);
insert_insn_on_edge (init, e);
inserted = 1;
+#endif
}
bi->stack_in.top = top;
Index: config/i386/i386.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.384
diff -u -r1.384 i386.h
--- config/i386/i386.h 9 Apr 2004 19:57:45 -0000 1.384
+++ config/i386/i386.h 13 Apr 2004 08:55:34 -0000
@@ -468,6 +468,8 @@
N_("Generate code for given CPU"), 0}, \
{ "regparm=", &ix86_regparm_string, \
N_("Number of registers used to pass integer arguments"), 0},\
+ { "fpregparm=", &ix86_fp_regparm_string, \
+ N_("Number of registers used to pass floating point arguments"), 0},\
{ "align-loops=", &ix86_align_loops_string, \
N_("Loop code aligned to this power of 2"), 0}, \
{ "align-jumps=", &ix86_align_jumps_string, \
@@ -1728,12 +1730,15 @@
int words; /* # words passed so far */
int nregs; /* # registers available for passing */
int regno; /* next available register number */
- int fastcall; /* fastcall calling convention is used */
+ int fastcall; /* fastcall calling convention is used */
+ int fp_words; /* # fp words passed so far */
+ int fp_nregs; /* # fp registers available for passing */
+ int fp_regno; /* next available fp register number */
int sse_words; /* # sse words passed so far */
int sse_nregs; /* # sse registers available for passing */
+ int sse_regno; /* next available sse register number */
int warn_sse; /* True when we want to warn about SSE ABI. */
int warn_mmx; /* True when we want to warn about MMX ABI. */
- int sse_regno; /* next available sse register number */
int mmx_words; /* # mmx words passed so far */
int mmx_nregs; /* # mmx registers available for passing */
int mmx_regno; /* next available mmx register number */
@@ -2502,10 +2507,17 @@
/* Max number of args passed in registers. If this is more than 3, we will
have problems with ebx (register #4), since it is a caller save register and
is also used as the pic register in ELF. So for now, don't allow more than
- 3 registers to be passed in registers. */
+ 3 args to be passed in registers. */
#define REGPARM_MAX (TARGET_64BIT ? 6 : 3)
+/* Max number of floating point args passed in floating point stack slots.
+ Some instructions require additional stack slot as temporary storage space
+ for internal calculations. To be safe, do not allow args to fill up all
+ stack slots. */
+
+#define FP_REGPARM_MAX (TARGET_64BIT ? 0: (TARGET_80387 ? 7 : 0))
+
#define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_SSE ? 3 : 0))
#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
@@ -3010,6 +3022,9 @@
extern int ix86_regparm;
extern const char *ix86_regparm_string;
+
+extern int ix86_fp_regparm;
+extern const char *ix86_fp_regparm_string;
extern int ix86_preferred_stack_boundary;
extern const char *ix86_preferred_stack_boundary_string;
Index: config/i386/i386.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.662
diff -u -r1.662 i386.c
--- config/i386/i386.c 12 Apr 2004 23:23:14 -0000 1.662
+++ config/i386/i386.c 13 Apr 2004 08:55:37 -0000
@@ -799,12 +799,18 @@
/* # of registers to use to pass arguments. */
const char *ix86_regparm_string;
+/* # of floating-point registers to use to pass arguments. */
+const char *ix86_fp_regparm_string;
+
/* true if sse prefetch instruction is not NOOP. */
int x86_prefetch_sse;
/* ix86_regparm_string as a number */
int ix86_regparm;
+/* ix86_fp_regparm_string as a number */
+int ix86_fp_regparm;
+
/* Alignment to use for loops and jumps: */
/* Power of two alignment for loops. */
@@ -913,10 +919,12 @@
static void ix86_compute_frame_layout (struct ix86_frame *);
static int ix86_comp_type_attributes (tree, tree);
static int ix86_function_regparm (tree, tree);
+static int ix86_function_fp_regparm (tree);
const struct attribute_spec ix86_attribute_table[];
static bool ix86_function_ok_for_sibcall (tree, tree);
static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
+static tree ix86_handle_fp_regparm_attribute (tree *, tree, tree, int, bool *);
static int ix86_value_regno (enum machine_mode);
static bool contains_128bit_aligned_vector_p (tree);
static bool ix86_ms_bitfield_layout_p (tree);
@@ -1330,6 +1338,19 @@
if (TARGET_64BIT)
ix86_regparm = REGPARM_MAX;
+ /* Validate -mfpregparm= value. */
+ if (ix86_fp_regparm_string)
+ {
+ i = atoi (ix86_fp_regparm_string);
+ if (i < 0 || i > FP_REGPARM_MAX)
+ error ("-mfpregparm=%d is not between 0 and %d", i, FP_REGPARM_MAX);
+ else
+ ix86_fp_regparm = i;
+ }
+ else
+ if (TARGET_64BIT)
+ ix86_fp_regparm = FP_REGPARM_MAX;
+
/* If the user has provided any of the -malign-* options,
warn and use that value only if -falign-* is not set.
Remove this code in GCC 3.2 or later. */
@@ -1569,6 +1590,9 @@
/* Regparm attribute specifies how many integer arguments are to be
passed in registers. */
{ "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
+ /* Fpregparm attribute specifies how many floating point arguments are
+ to be passed in registers. */
+ { "fpregparm", 1, 1, false, true, true, ix86_handle_fp_regparm_attribute },
#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
{ "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
{ "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
@@ -1714,6 +1738,43 @@
return NULL_TREE;
}
+/* Handle a "fpregparm" attribute;
+ arguments as in struct attribute_spec.handler. */
+static tree
+ix86_handle_fp_regparm_attribute (tree *node, tree name, tree args,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning ("`%s' attribute only applies to functions",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+ else
+ {
+ tree cst;
+
+ cst = TREE_VALUE (args);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ {
+ warning ("`%s' attribute requires an integer constant argument",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+ else if (compare_tree_int (cst, FP_REGPARM_MAX) > 0)
+ {
+ warning ("argument to `%s' attribute larger than %d",
+ IDENTIFIER_POINTER (name), FP_REGPARM_MAX);
+ *no_add_attrs = true;
+ }
+ }
+
+ return NULL_TREE;
+}
+
/* Return 0 if the attributes for two types are incompatible, 1 if they
are compatible, and 2 if they are nearly compatible (which causes a
warning to be generated). */
@@ -1742,7 +1803,7 @@
return 1;
}
-/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
+/* Return the regparm value for a function with the indicated TYPE and DECL.
DECL may be NULL when calling function indirectly
or considering a libcall. */
@@ -1787,6 +1848,20 @@
return regparm;
}
+/* Return the fpregparm value for a fuction with the indicated TYPE. */
+
+static int
+ix86_function_fp_regparm (tree type)
+{
+ tree attr;
+
+ attr = lookup_attribute ("fpregparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+ else
+ return ix86_fp_regparm;
+}
+
/* Return true if EAX is live at the start of the function. Used by
ix86_expand_prologue to determine if we need special help before
calling allocate_stack_worker. */
@@ -1863,6 +1938,8 @@
int i;
if (!TARGET_64BIT)
return (regno < REGPARM_MAX
+ || (TARGET_80387 && (regno < FP_REGPARM_MAX) &&
+ FP_REGNO_P (regno) && !fixed_regs[regno])
|| (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
if (SSE_REGNO_P (regno) && TARGET_SSE)
return true;
@@ -1905,12 +1982,17 @@
*cum = zero_cum;
/* Set up the number of registers to use for passing arguments. */
- if (fntype)
+ if (fntype) {
cum->nregs = ix86_function_regparm (fntype, fndecl);
- else
+ cum->fp_nregs = ix86_function_fp_regparm (fntype);
+ }
+ else {
cum->nregs = ix86_regparm;
+ cum->fp_nregs = ix86_fp_regparm;
+ }
cum->sse_nregs = SSE_REGPARM_MAX;
cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->fp_regno = FIRST_FLOAT_REG;
cum->warn_sse = true;
cum->warn_mmx = true;
cum->maybe_vaarg = false;
@@ -1931,7 +2013,7 @@
are no variable arguments. If there are variable arguments, then
we won't pass anything in registers */
- if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
+ if (cum->nregs || cum->fp_nregs || !TARGET_MMX || !TARGET_SSE)
{
for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
param != 0; param = next_param)
@@ -1942,6 +2024,7 @@
if (!TARGET_64BIT)
{
cum->nregs = 0;
+ cum->fp_nregs = 0;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
cum->warn_sse = 0;
@@ -1957,8 +2040,8 @@
cum->maybe_vaarg = 1;
if (TARGET_DEBUG_ARG)
- fprintf (stderr, ", nregs=%d )\n", cum->nregs);
-
+ fprintf (stderr, ", nregs=%d, fp_nregs=%d, sse_nregs=%d )\n",
+ cum->nregs, cum->fp_nregs, cum->sse_nregs);
return;
}
@@ -2515,8 +2598,9 @@
if (TARGET_DEBUG_ARG)
fprintf (stderr,
- "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
- words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
+ "function_adv (sz=%d, wds=%2d, nregs=%d, fpnregs=%d ssenregs=%d, mode=%s, named=%d)\n\n",
+ words, cum->words, cum->nregs, cum->fp_nregs, cum->sse_nregs,
+ GET_MODE_NAME (mode), named);
if (TARGET_64BIT)
{
int int_nregs, sse_nregs;
@@ -2558,7 +2642,21 @@
cum->mmx_regno = 0;
}
}
- else
+ else if (TARGET_80387 && (mode == SFmode || mode == DFmode ||
+ mode == XFmode || mode == TFmode))
+ {
+ cum->fp_words += words;
+ cum->fp_nregs -= 1;
+ cum->fp_regno += 1;
+ if (cum->fp_nregs <= 0)
+ {
+ cum->fp_nregs = 0;
+ cum->fp_regno = 0;
+ }
+ }
+ else if (mode == DImode || mode == SImode ||
+ mode == HImode || mode == QImode ||
+ ((mode == BLKmode) && (bytes >= 0)))
{
cum->words += words;
cum->nregs -= words;
@@ -2685,18 +2783,26 @@
ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
}
break;
+ case SFmode:
+ case DFmode:
+ case XFmode:
+ case TFmode:
+ if (cum->fp_nregs)
+ ret = gen_rtx_REG (mode, cum->fp_regno);
+ break;
}
if (TARGET_DEBUG_ARG)
{
fprintf (stderr,
- "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
- words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
+ "function_arg (size=%d, wds=%2d, nregs=%d, fp_nregs=%d, sse_nregs=%d mode=%4s, named=%d, ",
+ words, cum->words, cum->nregs, cum->fp_nregs, cum->sse_nregs,
+ GET_MODE_NAME (mode), named);
if (ret)
print_simple_rtl (stderr, ret);
else
- fprintf (stderr, ", stack");
+ fprintf (stderr, " stack");
fprintf (stderr, " )\n");
}