This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch, RTL] Eliminate redundant vec_select moves.
- From: Kirill Yukhin <kirill dot yukhin at gmail dot com>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: Tejas Belagod <tbelagod at arm dot com>, "Yukhin, Kirill" <kirill dot yukhin at intel dot com>, Jeff Law <law at redhat dot com>, Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Richard Sandiford <rdsandiford at googlemail dot com>, Uros Bizjak <ubizjak at gmail dot com>, Richard Henderson <rth at redhat dot com>, Jakub Jelinek <jakub at redhat dot com>
- Date: Tue, 10 Dec 2013 17:50:50 +0300
- Subject: Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Authentication-results: sourceware.org; auth=none
- References: <529F666F dot 4000507 at redhat dot com> <CAMe9rOo+2LnE=T9y7bmoxfWov+T4WDizTmpU5jFhpYe_xadgXA at mail dot gmail dot com> <52A07CF6 dot 6010003 at arm dot com> <CAMe9rOpZ41Qe-PqoqyJaVaYPSQfQXSkXPJeUQa23v2=0UabSXA at mail dot gmail dot com> <20131205134000 dot GG44339 at msticlxl57 dot ims dot intel dot com> <20131209064909 dot GA21317 at msticlxl57 dot ims dot intel dot com> <52A593B1 dot 6080406 at arm dot com> <CAMe9rOod87YRhu5vYfHUvDEtG_7_VJHafmUUGc=2Sj9q92SAtQ at mail dot gmail dot com> <CAMe9rOrbyJku55xx0RNFaathvRPSJXwZ5g6ad5v9q+NGPdg9tg at mail dot gmail dot com> <CAMe9rOoCz-9QM8-zMsPkxKnzJ2=M8D9LYKuRFAjwKKP4EU4acg at mail dot gmail dot com>
Hello,
On 09 Dec 14:08, H.J. Lu wrote:
> There are no regressions on Linux/x86-64 with -m32 and -m64.
> Can you check if it improves code quality on x886?
That is exactly what I was talking about. However I wasn't sure
that we can change already defined (and used throughout ports)
target hook.
Anyway, this patch is not working for given test, because combine
of these insns is blocked:
(insn 2 4 3 2 (set (reg/v:V4SF 85 [ x ])
(reg:V4SF 21 xmm0 [ x ]))
(expr_list:REG_DEAD (reg:V4SF 21 xmm0 [ x ])
(nil)))
(insn 6 3 11 2 (set (reg:SF 86 [ D.1819 ])
(vec_select:SF (reg/v:V4SF 85 [ x ])
(parallel [
(const_int 0 [0])
])))
(expr_list:REG_DEAD (reg/v:V4SF 85 [ x ])
(nil)))
(insn 11 6 14 2 (set (reg/i:SF 21 xmm0)
(reg:SF 86 [ D.1819 ]))
(expr_list:REG_DEAD (reg:SF 86 [ D.1819 ])
(nil)))
This is because XMM0 is SSE_FIRST_REG which is likely_spilled_p.
Which I suspect is correct, since it is return value register.
Anyway, we may change the test so, that VEC_SELECT won't contain
XMM0 and will be successfully combined with input and output,
resulting to this pattern:
(insn 9 8 10 2 (set (reg:SF 22 xmm1)
(vec_select:SF (reg:V4SF 22 xmm1 [ y ])
(parallel [
(const_int 0 [0])
])))
(nil))
Which is noop-erased with the patch.
Attached patch + updated test.
Note. It still not working for 32-bit x86 because of different
paramter passing. I cannot invent solution :)
For x86_64 all workks fine.
Note2. Even without the patch such VEC_SELECT are removed during
split2 pass, due to such split (sse.md):
(define_split
[(set (match_operand:DF 0 "register_operand")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand")
(parallel [(const_int 0)])))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (match_dup 1))]
But I believe earlier we git rid of redundant code is better.
--
Thanks, K
---
gcc/combine.c | 2 ++
gcc/config/aarch64/aarch64.h | 6 +++---
gcc/config/alpha/alpha.h | 2 +-
gcc/config/arm/arm.h | 8 ++++----
gcc/config/i386/i386-protos.h | 4 +++-
gcc/config/i386/i386.c | 12 ++++++------
gcc/config/i386/i386.h | 7 ++++---
gcc/config/i386/i386.md | 1 +
gcc/config/ia64/ia64.h | 2 +-
gcc/config/m32c/m32c.h | 2 +-
gcc/config/mep/mep.h | 2 +-
gcc/config/mips/mips.h | 2 +-
gcc/config/msp430/msp430.h | 10 +++++-----
gcc/config/pa/pa32-regs.h | 2 +-
gcc/config/pa/pa64-regs.h | 2 +-
gcc/config/pdp11/pdp11.h | 2 +-
gcc/config/rs6000/rs6000.h | 2 +-
gcc/config/s390/s390.h | 2 +-
gcc/config/score/score.h | 4 ++--
gcc/config/sh/sh.h | 2 +-
gcc/config/sparc/sparc.h | 2 +-
gcc/config/spu/spu.h | 2 +-
gcc/emit-rtl.c | 2 +-
gcc/hard-reg-set.h | 6 +++---
gcc/postreload.c | 4 ++++
gcc/recog.c | 3 ++-
gcc/regcprop.c | 4 +++-
gcc/reginfo.c | 1 +
gcc/reload.c | 10 +++++++---
gcc/reload1.c | 10 +++++++---
gcc/rtlanal.c | 2 +-
gcc/testsuite/gcc.dg/vect/vect-nop-move.c | 22 +++++++++++++++++-----
32 files changed, 89 insertions(+), 55 deletions(-)
diff --git a/gcc/combine.c b/gcc/combine.c
index c7eb5e5..4575b16 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5084,6 +5084,7 @@ subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
&& REGNO (to) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (to),
GET_MODE (to),
+ SUBREG_BYTE (x),
GET_MODE (x)))
return gen_rtx_CLOBBER (VOIDmode, const0_rtx);
#endif
@@ -6450,6 +6451,7 @@ simplify_set (rtx x)
&& ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
GET_MODE (SUBREG_REG (src)),
+ SUBREG_BYTE (src),
GET_MODE (src)))
#endif
&& (REG_P (dest)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index cead022..5b3bead 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -820,9 +820,9 @@ do { \
/* VFP registers may only be accessed in the mode they
were set. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
- ? reg_classes_intersect_p (FP_REGS, (CLASS)) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
+ (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
+ ? reg_classes_intersect_p (FP_REGS, (CLASS)) \
: 0)
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index 2e7c078..fbdcb2d 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -541,7 +541,7 @@ enum reg_class {
/* Return the class of registers that cannot change mode from FROM to TO. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
(GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8b8b80e..18341af 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1247,10 +1247,10 @@ enum reg_class
In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
VFP registers in little-endian order. We can't describe that accurately to
GCC, so avoid taking subregs of such values. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (TARGET_VFP && TARGET_BIG_END \
- && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD \
- || GET_MODE_SIZE (TO) > UNITS_PER_WORD) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
+ (TARGET_VFP && TARGET_BIG_END \
+ && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD \
+ || GET_MODE_SIZE (TO) > UNITS_PER_WORD) \
&& reg_classes_intersect_p (VFP_REGS, (CLASS)))
/* The class value for index registers, and the one for base regs. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 73feef2..0cbb9ae 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -167,7 +167,9 @@ extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode);
extern bool ix86_secondary_memory_needed (enum reg_class, enum reg_class,
enum machine_mode, int);
extern bool ix86_cannot_change_mode_class (enum machine_mode,
- enum machine_mode, enum reg_class);
+ unsigned int,
+ enum machine_mode,
+ enum reg_class);
extern int ix86_mode_needed (int, rtx);
extern int ix86_mode_after (int, int, rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 382f8fb..3e5332d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -34988,10 +34988,12 @@ ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
}
/* Return true if the registers in CLASS cannot represent the change from
- modes FROM to TO. */
+ modes FROM at offset OFFSET to TO. */
bool
-ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+ix86_cannot_change_mode_class (enum machine_mode from,
+ unsigned int offset,
+ enum machine_mode to,
enum reg_class regclass)
{
if (from == to)
@@ -35012,10 +35014,8 @@ ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
return true;
/* Vector registers do not support subreg with nonzero offsets, which
- are otherwise valid for integer registers. Since we can't see
- whether we have a nonzero offset from here, prohibit all
- nonparadoxical subregs changing size. */
- if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+ are otherwise valid for integer registers. */
+ if (offset != 0 && GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
return true;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index db81aea..692fbcf 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1522,10 +1522,11 @@ enum reg_class
? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \
: MODE)
-/* Return a class of registers that cannot change FROM mode to TO mode. */
+/* Return a class of registers that cannot change FROM mode to TO mode
+ with OFFSET. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- ix86_cannot_change_mode_class (FROM, TO, CLASS)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
+ ix86_cannot_change_mode_class (FROM, OFFSET, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7138868..c461e36 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3095,6 +3095,7 @@
{
case MODE_DI:
return "movq\t{%1, %0|%0, %1}";
+ case MODE_SF:
case MODE_SI:
return "movd\t{%1, %0|%0, %1}";
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index ae9027c..05455af 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -856,7 +856,7 @@ enum reg_class
In FP regs, we can't change FP values to integer values and vice versa,
but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
(reg_classes_intersect_p (CLASS, BR_REGS) \
? (FROM) != (TO) \
: (SCALAR_FLOAT_MODE_P (FROM) != SCALAR_FLOAT_MODE_P (TO) \
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index 3ceb093..497a743 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -415,7 +415,7 @@ enum reg_class
#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
-#define CANNOT_CHANGE_MODE_CLASS(F,T,C) m32c_cannot_change_mode_class(F,T,C)
+#define CANNOT_CHANGE_MODE_CLASS(F,O,T,C) m32c_cannot_change_mode_class(F,T,C)
/* STACK AND CALLING */
diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h
index 023d73c..4beac52 100644
--- a/gcc/config/mep/mep.h
+++ b/gcc/config/mep/mep.h
@@ -321,7 +321,7 @@ extern char mep_leaf_registers[];
#define MODES_TIEABLE_P(MODE1, MODE2) 1
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
mep_cannot_change_mode_class (FROM, TO, CLASS)
enum reg_class
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 021419c..003ee12 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2104,7 +2104,7 @@ enum reg_class
#define CLASS_MAX_NREGS(CLASS, MODE) mips_class_max_nregs (CLASS, MODE)
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
mips_cannot_change_mode_class (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
diff --git a/gcc/config/msp430/msp430.h b/gcc/config/msp430/msp430.h
index 953c638..441bc21 100644
--- a/gcc/config/msp430/msp430.h
+++ b/gcc/config/msp430/msp430.h
@@ -394,11 +394,11 @@ typedef struct
((TARGET_LARGE && ((NREGS) <= 2)) ? PSImode : choose_hard_reg_mode ((REGNO), (NREGS), false))
/* Also stop GCC from thinking that it can eliminate (SUBREG:PSI (SI)). */
-#define CANNOT_CHANGE_MODE_CLASS(FROM,TO,CLASS) \
- ( ((TO) == PSImode && (FROM) == SImode) \
- || ((TO) == SImode && (FROM) == PSImode) \
- || ((TO) == DImode && (FROM) == PSImode) \
- || ((TO) == PSImode && (FROM) == DImode) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM,OFFSET,TO,CLASS) \
+ ( ((TO) == PSImode && (FROM) == SImode) \
+ || ((TO) == SImode && (FROM) == PSImode) \
+ || ((TO) == DImode && (FROM) == PSImode) \
+ || ((TO) == PSImode && (FROM) == DImode) \
)
#define ACCUMULATE_OUTGOING_ARGS 1
diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h
index 098e9ba..e053978 100644
--- a/gcc/config/pa/pa32-regs.h
+++ b/gcc/config/pa/pa32-regs.h
@@ -296,7 +296,7 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
/* Defines invalid mode changes. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
pa_cannot_change_mode_class (FROM, TO, CLASS)
/* Return the class number of the smallest class containing
diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h
index 002520a..df6ca4d 100644
--- a/gcc/config/pa/pa64-regs.h
+++ b/gcc/config/pa/pa64-regs.h
@@ -232,7 +232,7 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
/* Defines invalid mode changes. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
pa_cannot_change_mode_class (FROM, TO, CLASS)
/* Return the class number of the smallest class containing
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index d4bc19a..492bf36 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -282,7 +282,7 @@ enum reg_class { NO_REGS, MUL_REGS, GENERAL_REGS, LOAD_FPU_REGS, NO_LOAD_FPU_REG
1 \
)
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
pdp11_cannot_change_mode_class (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index eb59235..4807d63 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1505,7 +1505,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
rs6000_cannot_change_mode_class_ptr (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index bca18fe..e38ca1f 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -419,7 +419,7 @@ enum processor_flags
cannot use SUBREGs to switch between modes in FP registers.
Likewise for access registers, since they have only half the
word size on 64-bit. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
(GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
? ((reg_classes_intersect_p (FP_REGS, CLASS) \
&& (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \
diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h
index ca73401..8df1056 100644
--- a/gcc/config/score/score.h
+++ b/gcc/config/score/score.h
@@ -414,8 +414,8 @@ enum reg_class
#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
score_secondary_reload_class (CLASS, MODE, X)
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
+ (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
? reg_classes_intersect_p (HI_REG, (CLASS)) : 0)
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 9f07012..b35ce58 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1149,7 +1149,7 @@ extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
operand of a SUBREG that changes the mode of the object illegally.
??? We need to renumber the internal numbers for the frnn registers
when in little endian in order to allow mode size changes. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
sh_cannot_change_mode_class (FROM, TO, CLASS)
/* Stack layout; function entry, exit and calling. */
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index d96c1b6..40e1e59 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -912,7 +912,7 @@ extern enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
Likewise for SFmode, since word-mode paradoxical subregs are
problematic on big-endian architectures. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
(TARGET_ARCH64 \
&& GET_MODE_SIZE (FROM) == 4 \
&& GET_MODE_SIZE (TO) != 4 \
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index 64a2ba0..0e77250 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -226,7 +226,7 @@ enum reg_class {
/* GCC assumes that modes are in the lowpart of a register, which is
only true for SPU. */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+#define CANNOT_CHANGE_MODE_CLASS(FROM, OFFSET, TO, CLASS) \
((GET_MODE_SIZE (FROM) > 4 || GET_MODE_SIZE (TO) > 4) \
&& (GET_MODE_SIZE (FROM) < 16 || GET_MODE_SIZE (TO) < 16) \
&& GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO))
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index d7fa3a5..b8e3dfd 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -748,7 +748,7 @@ validate_subreg (enum machine_mode omode, enum machine_mode imode,
if ((COMPLEX_MODE_P (imode) || VECTOR_MODE_P (imode))
&& GET_MODE_INNER (imode) == omode)
;
- else if (REG_CANNOT_CHANGE_MODE_P (regno, imode, omode))
+ else if (REG_CANNOT_CHANGE_MODE_P (regno, imode, offset, omode))
return false;
#endif
diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h
index 09a09c5..5140339 100644
--- a/gcc/hard-reg-set.h
+++ b/gcc/hard-reg-set.h
@@ -716,9 +716,9 @@ extern struct target_hard_regs *this_target_hard_regs;
extern const char * reg_class_names[];
-/* Given a hard REGN a FROM mode and a TO mode, return nonzero if
+/* Given a hard REGN a FROM mode at OFFSET and a TO mode, return nonzero if
REGN cannot change modes between the specified modes. */
-#define REG_CANNOT_CHANGE_MODE_P(REGN, FROM, TO) \
- CANNOT_CHANGE_MODE_CLASS (FROM, TO, REGNO_REG_CLASS (REGN))
+#define REG_CANNOT_CHANGE_MODE_P(REGN, FROM, OFFSET, TO) \
+ CANNOT_CHANGE_MODE_CLASS (FROM, OFFSET, TO, REGNO_REG_CLASS (REGN))
#endif /* ! GCC_HARD_REG_SET_H */
diff --git a/gcc/postreload.c b/gcc/postreload.c
index b0c6342..6ecb7c9 100644
--- a/gcc/postreload.c
+++ b/gcc/postreload.c
@@ -349,6 +349,8 @@ reload_cse_simplify_set (rtx set, rtx insn)
&& extend_op != UNKNOWN
#ifdef CANNOT_CHANGE_MODE_CLASS
&& !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)),
+ (GET_CODE (SET_DEST (set)) == SUBREG
+ ? SUBREG_BYTE (SET_DEST (set)) : 0),
word_mode,
REGNO_REG_CLASS (REGNO (SET_DEST (set))))
#endif
@@ -459,6 +461,8 @@ reload_cse_simplify_operands (rtx insn, rtx testreg)
it cannot have been used in word_mode. */
else if (REG_P (SET_DEST (set))
&& CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)),
+ (GET_CODE (SET_DEST (set)) == SUBREG
+ ? SUBREG_BYTE (SET_DEST (set)) : 0),
word_mode,
REGNO_REG_CLASS (REGNO (SET_DEST (set)))))
; /* Continue ordinary processing. */
diff --git a/gcc/recog.c b/gcc/recog.c
index 7f59756..85e13d3 100644
--- a/gcc/recog.c
+++ b/gcc/recog.c
@@ -1069,7 +1069,8 @@ register_operand (rtx op, enum machine_mode mode)
#ifdef CANNOT_CHANGE_MODE_CLASS
if (REG_P (sub)
&& REGNO (sub) < FIRST_PSEUDO_REGISTER
- && REG_CANNOT_CHANGE_MODE_P (REGNO (sub), GET_MODE (sub), mode)
+ && REG_CANNOT_CHANGE_MODE_P (REGNO (sub), GET_MODE (sub),
+ SUBREG_BYTE (op), mode)
&& GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_INT
&& GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_FLOAT
/* LRA can generate some invalid SUBREGS just for matched
diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index 9b52a63..8afcc5e 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -389,7 +389,9 @@ mode_change_ok (enum machine_mode orig_mode, enum machine_mode new_mode,
return false;
#ifdef CANNOT_CHANGE_MODE_CLASS
- return !REG_CANNOT_CHANGE_MODE_P (regno, orig_mode, new_mode);
+ return !REG_CANNOT_CHANGE_MODE_P (regno, orig_mode,
+ (MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT),
+ new_mode);
#endif
return true;
diff --git a/gcc/reginfo.c b/gcc/reginfo.c
index db66a09..5dd652d 100644
--- a/gcc/reginfo.c
+++ b/gcc/reginfo.c
@@ -1222,6 +1222,7 @@ record_subregs_of_mode (rtx subreg, bitmap subregs_of_mode)
if (!bitmap_bit_p (invalid_mode_changes,
regno * N_REG_CLASSES + rclass)
&& CANNOT_CHANGE_MODE_CLASS (PSEUDO_REGNO_MODE (regno),
+ (MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT),
mode, (enum reg_class) rclass))
bitmap_set_bit (invalid_mode_changes,
regno * N_REG_CLASSES + rclass);
diff --git a/gcc/reload.c b/gcc/reload.c
index 96619f6..487d4d4 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -1064,7 +1064,8 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
if (in != 0 && GET_CODE (in) == SUBREG
&& (subreg_lowpart_p (in) || strict_low)
#ifdef CANNOT_CHANGE_MODE_CLASS
- && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass)
+ && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)),
+ SUBREG_BYTE (in), inmode, rclass)
#endif
&& contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))]
&& (CONSTANT_P (SUBREG_REG (in))
@@ -1113,7 +1114,8 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
|| (REG_P (SUBREG_REG (in))
&& REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P
- (REGNO (SUBREG_REG (in)), GET_MODE (SUBREG_REG (in)), inmode))
+ (REGNO (SUBREG_REG (in)), GET_MODE (SUBREG_REG (in)),
+ SUBREG_BYTE (in), inmode))
#endif
))
{
@@ -1174,7 +1176,8 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
if (out != 0 && GET_CODE (out) == SUBREG
&& (subreg_lowpart_p (out) || strict_low)
#ifdef CANNOT_CHANGE_MODE_CLASS
- && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass)
+ && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)),
+ SUBREG_BYTE (out), outmode, rclass)
#endif
&& contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))]
&& (CONSTANT_P (SUBREG_REG (out))
@@ -1209,6 +1212,7 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
&& REGNO (SUBREG_REG (out)) < FIRST_PSEUDO_REGISTER
&& REG_CANNOT_CHANGE_MODE_P (REGNO (SUBREG_REG (out)),
GET_MODE (SUBREG_REG (out)),
+ SUBREG_BYTE (out),
outmode))
#endif
))
diff --git a/gcc/reload1.c b/gcc/reload1.c
index 6864ec1..17b2c61 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -6609,7 +6609,7 @@ choose_reload_regs (struct insn_chain *chain)
mode MODE. */
&& !REG_CANNOT_CHANGE_MODE_P (REGNO (reg_last_reload_reg[regno]),
GET_MODE (reg_last_reload_reg[regno]),
- mode)
+ byte, mode)
#endif
)
{
@@ -8080,8 +8080,12 @@ inherit_piecemeal_p (int dest ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED)
{
#ifdef CANNOT_CHANGE_MODE_CLASS
- return (!REG_CANNOT_CHANGE_MODE_P (dest, mode, reg_raw_mode[dest])
- && !REG_CANNOT_CHANGE_MODE_P (src, mode, reg_raw_mode[src]));
+ return (!REG_CANNOT_CHANGE_MODE_P (dest, mode,
+ (MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT),
+ reg_raw_mode[dest])
+ && !REG_CANNOT_CHANGE_MODE_P (src, mode,
+ (MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT),
+ reg_raw_mode[src]));
#else
return true;
#endif
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 38f9e36..9687110 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3533,7 +3533,7 @@ simplify_subreg_regno (unsigned int xregno, enum machine_mode xmode,
/* Give the backend a chance to disallow the mode change. */
if (GET_MODE_CLASS (xmode) != MODE_COMPLEX_INT
&& GET_MODE_CLASS (xmode) != MODE_COMPLEX_FLOAT
- && REG_CANNOT_CHANGE_MODE_P (xregno, xmode, ymode)
+ && REG_CANNOT_CHANGE_MODE_P (xregno, xmode, offset, ymode)
/* We can use mode change in LRA for some transformations. */
&& ! lra_in_progress)
return -1;
diff --git a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
index 1941933..76c07bd 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run } */
/* { dg-require-effective-target vect_float } */
/* { dg-options "-O3 -fdump-rtl-combine-details" } */
@@ -16,9 +16,15 @@ foo32x4_be (float32x4_t x)
}
NOINLINE float
-foo32x4_le (float32x4_t x)
+bar_2 (float a, float b)
{
- return x[0];
+ return a;
+}
+
+NOINLINE float
+foo32x4_le (float32x4_t x, float32x4_t y)
+{
+ return bar_2 (x[0], y[0]);
}
NOINLINE float
@@ -30,12 +36,18 @@ bar (float a)
NOINLINE float
foo32x2_be (float32x2_t x)
{
+#ifdef __i386__
+ __builtin_ia32_emms ();
+#endif
return bar (x[1]);
}
NOINLINE float
foo32x2_le (float32x2_t x)
{
+#ifdef __i386__
+ __builtin_ia32_emms ();
+#endif
return bar (x[0]);
}
@@ -48,7 +60,7 @@ main()
if (foo32x4_be (a) != 3.0f)
abort ();
- if (foo32x4_le (a) != 0.0f)
+ if (foo32x4_le (a, a) != 0.0f)
abort ();
if (foo32x2_be (b) != 1.0f)
@@ -60,5 +72,5 @@ main()
return 0;
}
-/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target aarch64*-*-* x86_64-*-*} } } */
/* { dg-final { cleanup-rtl-dump "combine" } } */
- References:
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.
- Re: [Patch, RTL] Eliminate redundant vec_select moves.