[power7-meissner] Fix 31343 regression, add new register class
Michael Meissner
meissner@linux.vnet.ibm.com
Thu Feb 19 01:30:00 GMT 2009
This patch fixes regressions in compling VMX files like pr31343, which is the
post reload CSE failing due to insn not found. What is happening the
load/stores match before the general move pattern, and they have a predicate
that constrains it to altivec/VSX registers. If the general move pattern
occurs first, it eventually sees GPRs are valid, and tries to use them. I
think this is a bug, but for now restoring the old code allows for forward
progress.
I also started adding a new register class (VSX_REGS) that I might need for
movdf, and moving some of the RELOAD macros into function for easier
debugging.
2009-02-18 Michael Meissner <meissner@linux.vnet.ibm.com>
* doc/md.texi (Machine Constraints): Document 'j' and 'w'
constraints.
* config/rs6000/constraints.md ("w" constraint): Add new register
constraint for VSX_REGS.
* config/rs6000/rs6000-protos.h (rs6000_preferred_reload_class):
New function declaration.
(rs6000_secondary_memory_needed): Ditto.
(rs6000_cannot_change_mode_class): Ditto.
* config/rs6000/rs6000.c (rs6000_legitimate_offset_address_p):
Allow V2DI mode.
(rs6000_preferred_reload_class): Move here from
PREFERRED_RELOAD_CLASS macro in rs6000.h. Wire easy vector
constants to the vector registers.
(rs6000_secondary_memory_needed): Move to function instead of
being a macro.
(rs6000_cannot_change_mode_class): Ditto.
* config/rs6000/vsx.md (VSr): Use w register call instead of fv.
(lx_<mode>_vsx): Add load/store insns before the general move,
which allows post-reload cse to not move things to GPRs.
(stx_<mode>_vsx): Ditto.
(mov<move>_vsx): Use <VSr> for zeroing register.
* config/rs6000/rs6000.h (STACK_BOUNDARY): -mvsx enables 128 bit
alignment.
(LOCAL_ALIGNMENT): Ditto.
(STARTING_FRAME_OFFSET): Ditto.
(STACK_DYNAMIC_OFFSET): Ditto.
(EPILOGUE_USES): Ditto.
(UNITS_PER_SIMD_WORD): Add -mvsx support.
(enum reg_class): Add VSX_REGS.
(REG_CLASS_NAMES): Ditto.
(REG_CLASS_CONTENTS): Ditto.
(PREFERRED_RELOAD_CLASS): Move to a function.
(SECONDARY_MEMORY_NEEDED): Ditto.
(CANNOT_CHANGE_MODE_CLASS): Ditto.
* config/rs6000/altivec.md (altivec_lvx_<mode>): Restore previous
code, that fixes PR 31343.
(altivec_stvx_<mode>): Ditto.
(mov<mode>_altivec): Add j constraint for zeroing vector register.
Index: gcc/doc/md.texi
===================================================================
--- gcc/doc/md.texi (revision 144172)
+++ gcc/doc/md.texi (working copy)
@@ -1904,7 +1904,10 @@ Address base register
Floating point register
@item v
-Vector register
+Altivec vector register
+
+@item w
+VSX vector register (floating point and altivec)
@item h
@samp{MQ}, @samp{CTR}, or @samp{LINK} register
@@ -1990,6 +1993,9 @@ AND masks that can be performed by two r
@item W
Vector constant that does not require memory
+@item j
+Vector constant that is all zeros.
+
@end table
@item Intel 386---@file{config/i386/constraints.md}
Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md (revision 144172)
+++ gcc/config/rs6000/constraints.md (working copy)
@@ -38,6 +38,9 @@
(define_register_constraint "l" "LINK_REGS"
"@internal")
+(define_register_constraint "w" "TARGET_VSX ? VSX_REGS : NO_REGS"
+ "@internal")
+
(define_register_constraint "v" "ALTIVEC_REGS"
"@internal")
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h (revision 144172)
+++ gcc/config/rs6000/rs6000-protos.h (working copy)
@@ -64,9 +64,14 @@ extern int insvdi_rshift_rlwimi_p (rtx,
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
+extern enum reg_class rs6000_preferred_reload_class(rtx, enum reg_class);
extern enum reg_class rs6000_secondary_reload_class (enum reg_class,
enum machine_mode, rtx);
-
+extern bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
+ enum machine_mode);
+extern bool rs6000_cannot_change_mode_class (enum machine_mode,
+ enum machine_mode,
+ enum reg_class);
extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
rtx, rtx, rtx);
extern void paired_expand_vector_move (rtx operands[]);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 144172)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -3759,6 +3759,7 @@ rs6000_legitimate_offset_address_p (enum
case V4SFmode:
case V4SImode:
case V2DFmode:
+ case V2DImode:
/* AltiVec/VSX vector modes. Only reg+reg addressing is valid and
constant offset zero should not occur due to canonicalization. */
return false;
@@ -11697,6 +11698,80 @@ rs6000_instantiate_decls (void)
instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
}
+/* Given an rtx X being reloaded into a reg required to be
+ in class CLASS, return the class of reg to actually use.
+ In general this is just CLASS; but on some machines
+ in some cases it is preferable to use a more restrictive class.
+
+ On the RS/6000, we have to return NO_REGS when we want to reload a
+ floating-point CONST_DOUBLE to force it to be copied to memory.
+
+ We also don't want to reload integer values into floating-point
+ registers if we can at all help it. In fact, this can
+ cause reload to die, if it tries to generate a reload of CTR
+ into a FP register and discovers it doesn't have the memory location
+ required.
+
+ ??? Would it be a good idea to have reload do the converse, that is
+ try to reload floating modes into FP registers if possible?
+ */
+
+enum reg_class
+rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (TARGET_VSX && VSX_VECTOR_MODE (mode) && x == CONST0_RTX (mode)
+ && (rclass == FLOAT_REGS || rclass == VSX_REGS
+ || rclass == ALTIVEC_REGS))
+ return rclass;
+
+ if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode) && rclass == ALTIVEC_REGS
+ && easy_vector_constant (x, mode))
+ return rclass;
+
+ if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+ return NO_REGS;
+
+ if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+ return GENERAL_REGS;
+
+ return rclass;
+}
+
+/* If we are copying between FP or AltiVec registers and anything
+ else, we need a memory location. The exception is when we are
+ targeting ppc64 and the move to/from fpr to gpr instructions
+ are available.*/
+
+bool
+rs6000_secondary_memory_needed (enum reg_class class1,
+ enum reg_class class2,
+ enum machine_mode mode)
+{
+ if (class1 == class2)
+ return false;
+
+ if (class1 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ return true;
+
+ if (class2 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ return true;
+
+ if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ return true;
+
+ return false;
+}
+
/* Return the register class of a scratch register needed to copy IN into
or out of a register in RCLASS in MODE. If it can be done directly,
NO_REGS is returned. */
@@ -11772,6 +11847,28 @@ rs6000_secondary_reload_class (enum reg_
/* Otherwise, we need GENERAL_REGS. */
return GENERAL_REGS;
}
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
+
+bool
+rs6000_cannot_change_mode_class (enum machine_mode from,
+ enum machine_mode to,
+ enum reg_class rclass)
+{
+ return (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)
+ ? ((GET_MODE_SIZE (from) < 8 || GET_MODE_SIZE (to) < 8
+ || TARGET_IEEEQUAD)
+ && reg_classes_intersect_p (FLOAT_REGS, rclass))
+ : (((TARGET_E500_DOUBLE
+ && ((((to) == DFmode) + ((from) == DFmode)) == 1
+ || (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == DDmode) + ((from) == DDmode)) == 1
+ || (((to) == TDmode) + ((from) == TDmode)) == 1
+ || (((to) == DImode) + ((from) == DImode)) == 1))
+ || (TARGET_SPE
+ && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1))
+ && reg_classes_intersect_p (GENERAL_REGS, rclass)));
+}
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md (revision 144172)
+++ gcc/config/rs6000/vsx.md (working copy)
@@ -46,13 +46,29 @@
(define_mode_attr VSr [(V16QI "v")
(V8HI "v")
(V4SI "v")
- (V4SF "fv")
+ (V4SF "w")
(V2DI "v")
- (V2DF "fv")])
+ (V2DF "w")])
-;; VSX move instructions
+;; Generic LX load instruction.
+(define_insn "*lx_<mode>_vsx"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>")
+ (match_operand:VSX_L 1 "memory_operand" "Z"))]
+ "TARGET_VSX"
+ "lx<VSm>x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+;; Generic STVX store instruction.
+(define_insn "*stx_<mode>_vsx"
+ [(set (match_operand:VSX_L 0 "memory_operand" "=Z")
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>"))]
+ "TARGET_VSX"
+ "stx<VSm>x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
+;; VSX moves
(define_insn "*mov<mode>_vsx"
- [(set (match_operand:VSX_L 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,o,r,r,fv,v")
+ [(set (match_operand:VSX_L 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,o,r,r,<VSr>,v")
(match_operand:VSX_L 1 "input_operand" "<VSr>,Z,<VSr>,r,o,r,j,W"))]
"TARGET_VSX
&& (register_operand (operands[0], <MODE>mode)
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h (revision 144172)
+++ gcc/config/rs6000/rs6000.h (working copy)
@@ -610,8 +610,9 @@ extern int rs6000_xilinx_fpu;
#define PARM_BOUNDARY (TARGET_32BIT ? 32 : 64)
/* Boundary (in *bits*) on which stack pointer should be aligned. */
-#define STACK_BOUNDARY \
- ((TARGET_32BIT && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI) ? 64 : 128)
+#define STACK_BOUNDARY \
+ ((TARGET_32BIT && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI && !TARGET_VSX) \
+ ? 64 : 128)
/* Allocation boundary (in *bits*) for the code of a function. */
#define FUNCTION_BOUNDARY 32
@@ -623,10 +624,11 @@ extern int rs6000_xilinx_fpu;
local store. TYPE is the data type, and ALIGN is the alignment
that the object would ordinarily have. */
#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
- ((TARGET_ALTIVEC && TREE_CODE (TYPE) == VECTOR_TYPE) ? 128 : \
+ (((TARGET_ALTIVEC || TARGET_VSX) \
+ && TREE_CODE (TYPE) == VECTOR_TYPE) ? 128 : \
(TARGET_E500_DOUBLE \
- && TYPE_MODE (TYPE) == DFmode) ? 64 : \
- ((TARGET_SPE && TREE_CODE (TYPE) == VECTOR_TYPE \
+ && TYPE_MODE (TYPE) == DFmode) ? 64 : \
+ ((TARGET_SPE && TREE_CODE (TYPE) == VECTOR_TYPE \
&& SPE_VECTOR_MODE (TYPE_MODE (TYPE))) || (TARGET_PAIRED_FLOAT \
&& TREE_CODE (TYPE) == VECTOR_TYPE \
&& PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) ? 64 : ALIGN)
@@ -971,10 +973,12 @@ extern int rs6000_xilinx_fpu;
#define PAIRED_VECTOR_MODE(MODE) \
((MODE) == V2SFmode)
-#define UNITS_PER_SIMD_WORD(MODE) \
- (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \
- : (TARGET_SPE ? UNITS_PER_SPE_WORD : (TARGET_PAIRED_FLOAT ? \
- UNITS_PER_PAIRED_WORD : UNITS_PER_WORD)))
+#define UNITS_PER_SIMD_WORD(MODE) \
+ (TARGET_VSX ? UNITS_PER_VSX_WORD \
+ : (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \
+ : (TARGET_SPE ? UNITS_PER_SPE_WORD \
+ : (TARGET_PAIRED_FLOAT ? UNITS_PER_PAIRED_WORD \
+ : UNITS_PER_WORD))))
/* Value is TRUE if hard register REGNO can hold a value of
machine-mode MODE. */
@@ -1093,9 +1097,10 @@ extern int rs6000_xilinx_fpu;
For any two classes, it is very desirable that there be another
class that represents their union. */
-/* The RS/6000 has three types of registers, fixed-point, floating-point,
- and condition registers, plus three special registers, MQ, CTR, and the
- link register. AltiVec adds a vector register class.
+/* The RS/6000 has three types of registers, fixed-point, floating-point, and
+ condition registers, plus three special registers, MQ, CTR, and the link
+ register. AltiVec adds a vector register class. VSX registers overlap the
+ FPR registers and the Altivec registers.
However, r0 is special in that it cannot be used as a base register.
So make a class for registers valid as base registers.
@@ -1110,6 +1115,7 @@ enum reg_class
GENERAL_REGS,
FLOAT_REGS,
ALTIVEC_REGS,
+ VSX_REGS,
VRSAVE_REGS,
VSCR_REGS,
SPE_ACC_REGS,
@@ -1140,6 +1146,7 @@ enum reg_class
"GENERAL_REGS", \
"FLOAT_REGS", \
"ALTIVEC_REGS", \
+ "VSX_REGS", \
"VRSAVE_REGS", \
"VSCR_REGS", \
"SPE_ACC_REGS", \
@@ -1169,6 +1176,7 @@ enum reg_class
{ 0xffffffff, 0x00000000, 0x00000008, 0x00020000 }, /* GENERAL_REGS */ \
{ 0x00000000, 0xffffffff, 0x00000000, 0x00000000 }, /* FLOAT_REGS */ \
{ 0x00000000, 0x00000000, 0xffffe000, 0x00001fff }, /* ALTIVEC_REGS */ \
+ { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff }, /* VSX_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, /* VRSAVE_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */ \
@@ -1216,8 +1224,8 @@ enum reg_class
: (REGNO) == CR0_REGNO ? CR0_REGS \
: CR_REGNO_P (REGNO) ? CR_REGS \
: (REGNO) == MQ_REGNO ? MQ_REGS \
- : (REGNO) == LR_REGNO ? LINK_REGS \
- : (REGNO) == CTR_REGNO ? CTR_REGS \
+ : (REGNO) == LR_REGNO ? LINK_REGS \
+ : (REGNO) == CTR_REGNO ? CTR_REGS \
: (REGNO) == ARG_POINTER_REGNUM ? BASE_REGS \
: (REGNO) == XER_REGNO ? XER_REGS \
: (REGNO) == VRSAVE_REGNO ? VRSAVE_REGS \
@@ -1250,13 +1258,7 @@ enum reg_class
*/
#define PREFERRED_RELOAD_CLASS(X,CLASS) \
- ((CONSTANT_P (X) \
- && reg_classes_intersect_p ((CLASS), FLOAT_REGS)) \
- ? NO_REGS \
- : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \
- && (CLASS) == NON_SPECIAL_REGS) \
- ? GENERAL_REGS \
- : (CLASS))
+ rs6000_preferred_reload_class (X, CLASS)
/* Return the register class of a scratch register needed to copy IN into
or out of a register in CLASS in MODE. If it can be done directly,
@@ -1271,18 +1273,7 @@ enum reg_class
are available.*/
#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \
- ((CLASS1) != (CLASS2) && (((CLASS1) == FLOAT_REGS \
- && (!TARGET_MFPGPR || !TARGET_POWERPC64 \
- || ((MODE != DFmode) \
- && (MODE != DDmode) \
- && (MODE != DImode)))) \
- || ((CLASS2) == FLOAT_REGS \
- && (!TARGET_MFPGPR || !TARGET_POWERPC64 \
- || ((MODE != DFmode) \
- && (MODE != DDmode) \
- && (MODE != DImode)))) \
- || (CLASS1) == ALTIVEC_REGS \
- || (CLASS2) == ALTIVEC_REGS))
+ rs6000_secondary_memory_needed (CLASS1, CLASS2, MODE)
/* For cpus that cannot load/store SDmode values from the 64-bit
FP registers without using a full 64-bit load/store, we need
@@ -1302,19 +1293,7 @@ enum reg_class
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
- ? ((GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8 \
- || TARGET_IEEEQUAD) \
- && reg_classes_intersect_p (FLOAT_REGS, CLASS)) \
- : (((TARGET_E500_DOUBLE \
- && ((((TO) == DFmode) + ((FROM) == DFmode)) == 1 \
- || (((TO) == TFmode) + ((FROM) == TFmode)) == 1 \
- || (((TO) == DDmode) + ((FROM) == DDmode)) == 1 \
- || (((TO) == TDmode) + ((FROM) == TDmode)) == 1 \
- || (((TO) == DImode) + ((FROM) == DImode)) == 1)) \
- || (TARGET_SPE \
- && (SPE_VECTOR_MODE (FROM) + SPE_VECTOR_MODE (TO)) == 1)) \
- && reg_classes_intersect_p (GENERAL_REGS, CLASS)))
+ rs6000_cannot_change_mode_class (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
@@ -1375,8 +1354,8 @@ extern enum rs6000_abi rs6000_current_ab
#define STARTING_FRAME_OFFSET \
(FRAME_GROWS_DOWNWARD \
? 0 \
- : (RS6000_ALIGN (crtl->outgoing_args_size, \
- TARGET_ALTIVEC ? 16 : 8) \
+ : (RS6000_ALIGN (crtl->outgoing_args_size, \
+ (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8) \
+ RS6000_SAVE_AREA))
/* Offset from the stack pointer register to an item dynamically
@@ -1386,8 +1365,8 @@ extern enum rs6000_abi rs6000_current_ab
length of the outgoing arguments. The default is correct for most
machines. See `function.c' for details. */
#define STACK_DYNAMIC_OFFSET(FUNDECL) \
- (RS6000_ALIGN (crtl->outgoing_args_size, \
- TARGET_ALTIVEC ? 16 : 8) \
+ (RS6000_ALIGN (crtl->outgoing_args_size, \
+ (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8) \
+ (STACK_POINTER_OFFSET))
/* If we generate an insn to push BYTES bytes,
@@ -1637,7 +1616,7 @@ typedef struct rs6000_args
#define EPILOGUE_USES(REGNO) \
((reload_completed && (REGNO) == LR_REGNO) \
|| (TARGET_ALTIVEC && (REGNO) == VRSAVE_REGNO) \
- || (crtl->calls_eh_return \
+ || (crtl->calls_eh_return \
&& TARGET_AIX \
&& (REGNO) == 2))
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 144172)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -182,10 +182,26 @@
(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
-;; Altivec move instructions, prefer VSX if we have it
-(define_insn "*mov_altivec_<mode>"
- [(set (match_operand:VM 0 "nonimmediate_operand" "=Z,v,v,o,r,r,v")
- (match_operand:VM 1 "input_operand" "v,Z,v,r,o,r,W"))]
+;; Generic LVX load instruction.
+(define_insn "altivec_lvx_<mode>"
+ [(set (match_operand:V 0 "altivec_register_operand" "=v")
+ (match_operand:V 1 "memory_operand" "Z"))]
+ "TARGET_ALTIVEC"
+ "lvx %0,%y1"
+ [(set_attr "type" "vecload")])
+
+;; Generic STVX store instruction.
+(define_insn "altivec_stvx_<mode>"
+ [(set (match_operand:V 0 "memory_operand" "=Z")
+ (match_operand:V 1 "altivec_register_operand" "v"))]
+ "TARGET_ALTIVEC && !TARGET_VSX"
+ "stvx %1,%y0"
+ [(set_attr "type" "vecstore")])
+
+;; Vector move instructions.
+(define_insn "*mov<mode>_altivec"
+ [(set (match_operand:V 0 "nonimmediate_operand" "=Z,v,v,o,r,r,v,v")
+ (match_operand:V 1 "input_operand" "v,Z,v,r,o,r,j,W"))]
"TARGET_ALTIVEC && !TARGET_VSX
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
@@ -198,11 +214,12 @@
case 3: return "#";
case 4: return "#";
case 5: return "#";
- case 6: return output_vec_const_move (operands);
+ case 6: return "vxor %0,%0,%0";
+ case 7: return output_vec_const_move (operands);
default: gcc_unreachable ();
}
}
- [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
(define_split
[(set (match_operand:VM 0 "altivec_register_operand" "")
--
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com
More information about the Gcc-patches
mailing list