This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC] PR/19653: fld used for reloads in sse mode
- From: Paolo Bonzini <paolo dot bonzini at lu dot unisi dot ch>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Michael Matz <matz at suse dot de>, Richard Guenther <richard dot guenther at gmail dot com>
- Date: Thu, 14 Jul 2005 10:35:16 +0200
- Subject: [RFC] PR/19653: fld used for reloads in sse mode
This patch is enough to remove all fld's from Richard Guenther's
libsse2, on top of the regclass patch at
http://gcc.gnu.org/ml/gcc-patches/2005-07/msg00914.html and after
removing from the MD the `#' hints for regclass. These can be removed with
sed -i 's/#[^",\][^",]*\([",]\)/\1/g' i386.md
All three patches are necessary.
---
Luckily, no reload surgery is needed, most of the changes are localized
in the i386 target.
The patch is composed of several parts:
1) it modifies PREFERRED_RELOAD_CLASS to return NO_REGS for
floating-point constants if math will be done in SSE registers: this
mimicks what is done for SSE vector constants.
2) it modifies PREFERRED_RELOAD_CLASS to return a subclass of SSE_REGS
for floating-point rtx's that are not constant, if math will be done in
SSE registers.
3) it defines a PREFERRED_OUTPUT_RELOAD_CLASS install that constrains
registers in the correct class, depending on the -mfpmath setting.
4) it makes find_reloads obey PREFERRED_OUTPUT_RELOAD_CLASS to compute
the badness of an alternative.
Finally, this patch also includes a fix to push_reload, to not obey
PREFERRED_*_RELOAD_CLASS if it returns NO_REGS but this is not valid. I
think it is correct, because ix86_preferred_reload_class is very liberal
in returning NO_REGS. It was enough to move
if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
return NO_REGS;
if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
return NO_REGS;
near the beginning of ix86_preferred_reload_class, to trigger the
assertion near line 1300 of reload.c.
Do you think this is the right approach?
Paolo
2005-07-14 Paolo Bonzini <bonzini@gnu.org>
* config/i386/i386.c (ix86_preferred_reload_class): Force
using SSE registers (and return NO_REGS for floating-point
constants) if math is done with SSE.
(ix86_preferred_output_reload_class): New.
* config/i386/i386-protos.h (ix86_preferred_output_reload_class): New.
* config/i386/i386.h (PREFERRED_OUTPUT_RELOAD_CLASS): New.
* reload.c (find_reloads): Take PREFERRED_OUTPUT_RELOAD_CLASS
into account.
(push_reload): Allow PREFERRED_*_RELOAD_CLASS to liberally
return NO_REGS.
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.841
diff -c -r1.841 i386.c
*** config/i386/i386.c 11 Jul 2005 09:35:16 -0000 1.841
--- config/i386/i386.c 14 Jul 2005 08:07:55 -0000
***************
*** 15315,15329 ****
enum reg_class
ix86_preferred_reload_class (rtx x, enum reg_class class)
{
/* We're only allowed to return a subclass of CLASS. Many of the
following checks fail for NO_REGS, so eliminate that early. */
if (class == NO_REGS)
return NO_REGS;
/* All classes can load zeros. */
! if (x == CONST0_RTX (GET_MODE (x)))
return class;
/* Floating-point constants need more complex checks. */
if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{
--- 15315,15353 ----
enum reg_class
ix86_preferred_reload_class (rtx x, enum reg_class class)
{
+ enum machine_mode mode = GET_MODE (x);
+ bool is_sse_math_mode;
+
/* We're only allowed to return a subclass of CLASS. Many of the
following checks fail for NO_REGS, so eliminate that early. */
if (class == NO_REGS)
return NO_REGS;
/* All classes can load zeros. */
! if (x == CONST0_RTX (mode))
return class;
+ /* Let reload make the decision if we are loading: a) a vector constant into
+ an MMX or SSE register b) a floating-point constant into an SSE register
+ that will be used for math. This is because there are no MMX/SSE
+ load-from-constant instructions. */
+
+ is_sse_math_mode =
+ TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode);
+
+ if (CONSTANT_P (x))
+ {
+ if (MAYBE_MMX_CLASS_P (class))
+ return NO_REGS;
+ if (MAYBE_SSE_CLASS_P (class)
+ && (VECTOR_MODE_P (mode) || mode == TImode || is_sse_math_mode))
+ return NO_REGS;
+ }
+
+ /* Prefer SSE regs if we only use them from math. */
+ if (is_sse_math_mode)
+ return reg_class_subset_p (SSE_REGS, class) ? SSE_REGS : NO_REGS;
+
/* Floating-point constants need more complex checks. */
if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{
***************
*** 15335,15342 ****
zero above. We only want to wind up preferring 80387 registers if
we plan on doing computation with them. */
if (TARGET_80387
! && (TARGET_MIX_SSE_I387
! || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
&& standard_80387_constant_p (x))
{
/* Limit class to non-sse. */
--- 15359,15365 ----
zero above. We only want to wind up preferring 80387 registers if
we plan on doing computation with them. */
if (TARGET_80387
! && TARGET_MIX_SSE_I387
&& standard_80387_constant_p (x))
{
/* Limit class to non-sse. */
***************
*** 15352,15361 ****
return NO_REGS;
}
- if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
- return NO_REGS;
- if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
- return NO_REGS;
/* Generally when we see PLUS here, it's the function invariant
(plus soft-fp const_int). Which can only be computed into general
--- 15375,15380 ----
***************
*** 15376,15381 ****
--- 15395,15437 ----
return class;
}
+
+ /* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+ enum reg_class
+ ix86_preferred_output_reload_class (rtx x, enum reg_class class)
+ {
+ enum reg_class desired_class;
+
+ if (class == NO_REGS)
+ return NO_REGS;
+
+ if (!FLOAT_MODE_P (GET_MODE (x)) || TARGET_MIX_SSE_I387)
+ return class;
+
+ /* Restrict the output reload class to the register bank that
+ we are doing math on. */
+ if (TARGET_80387
+ && (TARGET_MIX_SSE_I387
+ || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x)))))
+ {
+ if (class == FP_TOP_SSE_REGS)
+ desired_class = FP_TOP_REG;
+ else if (class == FP_SECOND_SSE_REGS)
+ desired_class = FP_SECOND_REG;
+ else
+ desired_class = FLOAT_REGS;
+ }
+ else
+ desired_class = SSE_REGS;
+
+ /* If we would like not to return a subset of CLASS, fail. If reload
+ knows better, it will override our choice. */
+ if (!reg_class_subset_p (desired_class, class))
+ return NO_REGS;
+ else
+ return desired_class;
+ }
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers.
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.440
diff -c -r1.440 i386.h
*** config/i386/i386.h 26 Jun 2005 05:18:34 -0000 1.440
--- config/i386/i386.h 14 Jul 2005 08:07:55 -0000
***************
*** 1294,1299 ****
--- 1294,1305 ----
#define PREFERRED_RELOAD_CLASS(X, CLASS) \
ix86_preferred_reload_class ((X), (CLASS))
+ /* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+
+ #define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
+ ix86_preferred_output_reload_class ((X), (CLASS))
+
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers. */
#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.143
diff -c -r1.143 i386-protos.h
*** config/i386/i386-protos.h 29 Jun 2005 17:27:16 -0000 1.143
--- config/i386/i386-protos.h 14 Jul 2005 08:07:55 -0000
***************
*** 188,193 ****
--- 188,194 ----
extern bool ix86_cannot_change_mode_class (enum machine_mode,
enum machine_mode, enum reg_class);
extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
+ extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
extern int ix86_mode_needed (int, rtx);
extern void emit_i387_cw_initialization (int);
Index: reload.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reload.c,v
retrieving revision 1.273
diff -c -r1.273 reload.c
*** reload.c 25 Jun 2005 02:00:53 -0000 1.273
--- reload.c 14 Jul 2005 08:07:56 -0000
***************
*** 1231,1245 ****
/* Narrow down the class of register wanted if that is
desirable on this machine for efficiency. */
! if (in != 0)
! class = PREFERRED_RELOAD_CLASS (in, class);
/* Output reloads may need analogous treatment, different in detail. */
#ifdef PREFERRED_OUTPUT_RELOAD_CLASS
! if (out != 0)
! class = PREFERRED_OUTPUT_RELOAD_CLASS (out, class);
#endif
/* Make sure we use a class that can handle the actual pseudo
inside any subreg. For example, on the 386, QImode regs
can appear within SImode subregs. Although GENERAL_REGS
--- 1231,1254 ----
/* Narrow down the class of register wanted if that is
desirable on this machine for efficiency. */
! {
! enum reg_class preferred_class = class;
!
! if (in != 0)
! preferred_class = PREFERRED_RELOAD_CLASS (in, class);
/* Output reloads may need analogous treatment, different in detail. */
#ifdef PREFERRED_OUTPUT_RELOAD_CLASS
! if (out != 0)
! preferred_class = PREFERRED_OUTPUT_RELOAD_CLASS (out, preferred_class);
#endif
+ /* Discard what the target said if we cannot do it. */
+ if (preferred_class != NO_REGS
+ || (optional && type == RELOAD_FOR_OUTPUT))
+ class = preferred_class;
+ }
+
/* Make sure we use a class that can handle the actual pseudo
inside any subreg. For example, on the 386, QImode regs
can appear within SImode subregs. Although GENERAL_REGS
***************
*** 3443,3457 ****
/* If we can't reload this value at all, reject this
alternative. Note that we could also lose due to
! LIMIT_RELOAD_RELOAD_CLASS, but we don't check that
here. */
if (! CONSTANT_P (operand)
! && (enum reg_class) this_alternative[i] != NO_REGS
! && (PREFERRED_RELOAD_CLASS (operand,
! (enum reg_class) this_alternative[i])
! == NO_REGS))
! bad = 1;
/* Alternative loses if it requires a type of reload not
permitted for this insn. We can always reload SCRATCH
--- 3452,3477 ----
/* If we can't reload this value at all, reject this
alternative. Note that we could also lose due to
! LIMIT_RELOAD_CLASS, but we don't check that
here. */
if (! CONSTANT_P (operand)
! && (enum reg_class) this_alternative[i] != NO_REGS)
! {
! if (PREFERRED_RELOAD_CLASS
! (operand, (enum reg_class) this_alternative[i])
! == NO_REGS)
! bad = 1;
!
! #ifdef PREFERRED_OUTPUT_RELOAD_CLASS
! if (operand_type[i] == RELOAD_FOR_OUTPUT
! && PREFERRED_OUTPUT_RELOAD_CLASS
! (operand, (enum reg_class) this_alternative[i])
! == NO_REGS)
! bad = 1;
! #endif
! }
!
/* Alternative loses if it requires a type of reload not
permitted for this insn. We can always reload SCRATCH