This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c
- From: Alan Modra <amodra at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Segher Boessenkool <segher at kernel dot crashing dot org>
- Date: Wed, 8 May 2019 15:02:48 +0930
- Subject: Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c
- References: <20190327134006.GA3194@bubble.grove.modra.org>
This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with
the fixes Segher requested, plus a few more:
- delete PREFERRED_RELOAD_CLASS changes
- adjust for recent register renumbering
- use defines rather than hard coding register numbers
- flip altivec/float test when dealing with moves within vsx regs,
so that the altivec hard reg count is preferred over the fp hard reg
count when both reg types are possible.
- use 2 for power9 direct move cost, and remove more '?'s from insns.
- use reg_class_subset_p in the test for slow LR/CTR moves
Bootstrapped and regression tested powerpc64le-linux. OK for mainline?
PR target/89271
* config/rs6000/rs6000.h (enum reg_class, REG_CLASS_NAMES),
(REG_CLASS_CONTENTS): Add GEN_OR_VSX_REGS class.
* config/rs6000/rs6000.c (rs6000_register_move_cost): Correct
cost for general <-> vsx when direct moves are available.
Cost union classes at minimal cost for any reg in the class.
Correct calculation for moves between vsx, float, and altivec.
Don't return a low cost for moves between special regs. Don't
use hard coded register numbers.
(TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define.
(rs6000_ira_change_pseudo_allocno_class): New function.
* config/rs6000/rs6000.md (movsi_internal1, mov<mode>_internal),
(movdi_internal32, movdi_internal64): Remove '*' from vsx register
alternatives.
(movsi_internal1): Don't disparage vector alternatives.
(mov<mode>_internal): Likewise, excepting alternative that
will be split.
* config/rs6000/vsx.md (vsx_splat_<mode>_reg): Don't disparage
we <- b alternative.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5d5765d89b2..e7c63c263ae 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1729,6 +1729,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+ rs6000_ira_change_pseudo_allocno_class
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
#undef TARGET_RTX_COSTS
@@ -34648,22 +34651,54 @@ rs6000_register_move_cost (machine_mode mode,
reg_class_t from, reg_class_t to)
{
int ret;
+ reg_class_t rclass;
if (TARGET_DEBUG_COST)
dbg_cost_ctrl++;
+ /* If we have VSX, we can easily move between FPR or Altivec registers,
+ otherwise we can only easily move within classes.
+ Do this first so we give best-case answers for union classes
+ containing both gprs and vsx regs. */
+ HARD_REG_SET to_vsx, from_vsx;
+ COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
+ AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
+ COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
+ AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
+ if (!hard_reg_set_empty_p (to_vsx)
+ && !hard_reg_set_empty_p (from_vsx)
+ && (TARGET_VSX
+ || hard_reg_set_intersect_p (to_vsx, from_vsx)))
+ {
+ int reg = FIRST_FPR_REGNO;
+ if (TARGET_VSX
+ || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
+ && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
+ reg = FIRST_ALTIVEC_REGNO;
+ ret = 2 * hard_regno_nregs (reg, mode);
+ }
+
/* Moves from/to GENERAL_REGS. */
- if (reg_classes_intersect_p (to, GENERAL_REGS)
- || reg_classes_intersect_p (from, GENERAL_REGS))
+ else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
+ || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
{
- reg_class_t rclass = from;
-
- if (! reg_classes_intersect_p (to, GENERAL_REGS))
- rclass = to;
-
if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
- ret = (rs6000_memory_move_cost (mode, rclass, false)
- + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+ {
+ if (TARGET_DIRECT_MOVE)
+ {
+ if (rs6000_tune != PROCESSOR_POWER9)
+ ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+ else
+ ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+ /* SFmode requires a conversion when moving between gprs
+ and vsx. */
+ if (mode == SFmode)
+ ret += 2;
+ }
+ else
+ ret = (rs6000_memory_move_cost (mode, rclass, false)
+ + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+ }
/* It's more expensive to move CR_REGS than CR0_REGS because of the
shift. */
@@ -34676,24 +34711,14 @@ rs6000_register_move_cost (machine_mode mode,
|| rs6000_tune == PROCESSOR_POWER7
|| rs6000_tune == PROCESSOR_POWER8
|| rs6000_tune == PROCESSOR_POWER9)
- && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
- ret = 6 * hard_regno_nregs (0, mode);
+ && reg_class_subset_p (rclass, SPECIAL_REGS))
+ ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
else
/* A move will cost one instruction per GPR moved. */
- ret = 2 * hard_regno_nregs (0, mode);
+ ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
}
- /* If we have VSX, we can easily move between FPR or Altivec registers. */
- else if (VECTOR_MEM_VSX_P (mode)
- && reg_classes_intersect_p (to, VSX_REGS)
- && reg_classes_intersect_p (from, VSX_REGS))
- ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
-
- /* Moving between two similar registers is just one instruction. */
- else if (reg_classes_intersect_p (to, from))
- ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
-
/* Everything else has to go through GENERAL_REGS. */
else
ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
@@ -34746,6 +34771,64 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
return ret;
}
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
+
+ The register allocator chooses GEN_OR_VSX_REGS for the allocno
+ class if GENERAL_REGS and VSX_REGS cost is lower than the memory
+ cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
+ move cost between GENERAL_REGS and VSX_REGS low.
+
+ It might seem reasonable to use a union class. After all, if usage
+ of vsr is low and gpr high, it might make sense to spill gpr to vsr
+ rather than memory. However, in cases where register pressure of
+ both is high, like the cactus_adm spec test, allowing
+ GEN_OR_VSX_REGS as the allocno class results in bad decisions in
+ the first scheduling pass. This is partly due to an allocno of
+ GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
+ class, which gives too high a pressure for GENERAL_REGS and too low
+ for VSX_REGS. So, force a choice of the subclass here.
+
+ The best class is also the union if GENERAL_REGS and VSX_REGS have
+ the same cost. In that case we do use GEN_OR_VSX_REGS as the
+ allocno class, since trying to narrow down the class by regno mode
+ is prone to error. For example, SImode is allowed in VSX regs and
+ in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
+ it would be wrong to choose an allocno of GENERAL_REGS based on
+ SImode. */
+
+static reg_class_t
+rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
+ reg_class_t allocno_class,
+ reg_class_t best_class)
+{
+ switch (allocno_class)
+ {
+ case GEN_OR_VSX_REGS:
+ /* best_class must be a subset of allocno_class. */
+ gcc_checking_assert (best_class == GEN_OR_VSX_REGS
+ || best_class == GEN_OR_FLOAT_REGS
+ || best_class == VSX_REGS
+ || best_class == ALTIVEC_REGS
+ || best_class == FLOAT_REGS
+ || best_class == GENERAL_REGS
+ || best_class == BASE_REGS);
+ /* Use best_class but choose wider classes when copying from the
+ wider class to best_class is cheap. This mimics IRA choice
+ of allocno class. */
+ if (best_class == BASE_REGS)
+ return GENERAL_REGS;
+ if (TARGET_VSX
+ && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
+ return VSX_REGS;
+ return best_class;
+
+ default:
+ break;
+ }
+
+ return allocno_class;
+}
+
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 14a9e199bc8..30a72dd5e55 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1141,6 +1141,7 @@ enum reg_class
VRSAVE_REGS,
VSCR_REGS,
GEN_OR_FLOAT_REGS,
+ GEN_OR_VSX_REGS,
LINK_REGS,
CTR_REGS,
LINK_OR_CTR_REGS,
@@ -1169,6 +1170,7 @@ enum reg_class
"VRSAVE_REGS", \
"VSCR_REGS", \
"GEN_OR_FLOAT_REGS", \
+ "GEN_OR_VSX_REGS", \
"LINK_REGS", \
"CTR_REGS", \
"LINK_OR_CTR_REGS", \
@@ -1205,6 +1207,8 @@ enum reg_class
{ 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, \
/* GEN_OR_FLOAT_REGS. */ \
{ 0xffffffff, 0xffffffff, 0x00000000, 0x00004008 }, \
+ /* GEN_OR_VSX_REGS. */ \
+ { 0xffffffff, 0xffffffff, 0xffffffff, 0x00004008 }, \
/* LINK_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000001 }, \
/* CTR_REGS. */ \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 411d7f0d352..8da7aba4080 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6830,10 +6830,10 @@ (define_insn "movsi_low"
;; MF%1 MT%0 NOP
(define_insn "*movsi_internal1"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r, r, ?*wI, ?*wH,
- m, ?Z, ?Z, r, r,
- r, ?*wIwH, ?*wJwK, ?*wJwK, ?*wu,
- ?*wJwK, ?*wH, ?*wK, ?*wIwH, ?r,
+ "=r, r, r, wI, wH,
+ m, Z, Z, r, r,
+ r, wIwH, wJwK, wJwK, wu,
+ wJwK, wH, wK, wIwH, r,
r, *h, *h")
(match_operand:SI 1 "input_operand"
@@ -7104,13 +7104,13 @@ (define_expand "mov<mode>"
;; MTVSRWZ MF%1 MT%1 NOP
(define_insn "*mov<mode>_internal"
[(set (match_operand:QHI 0 "nonimmediate_operand"
- "=r, r, ?*wJwK, m, Z, r,
- ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r,
- ?*wJwK, r, *c*l, *h")
+ "=r, r, wJwK, m, Z, r,
+ wJwK, wJwK, wJwK, wK, ?wK, r,
+ wJwK, r, *c*l, *h")
(match_operand:QHI 1 "input_operand"
"r, m, Z, r, wJwK, i,
- wJwK, O, wM, wB, wS, ?*wJwK,
+ wJwK, O, wM, wB, wS, wJwK,
r, *h, r, 0"))]
"gpc_reg_operand (operands[0], <MODE>mode)
@@ -8671,8 +8671,8 @@ (define_insn "*movdi_internal32"
[(set (match_operand:DI 0 "nonimmediate_operand"
"=Y, r, r, m, ^d, ^d,
r, wY, Z, ^wb, $wv, ^wi,
- *wo, *wo, *wv, *wi, *wi, *wv,
- *wv")
+ wo, wo, wv, wi, *i, wv,
+ wv")
(match_operand:DI 1 "input_operand"
"r, Y, r, ^d, m, ^d,
@@ -8751,9 +8751,9 @@ (define_insn "*movdi_internal64"
[(set (match_operand:DI 0 "nonimmediate_operand"
"=YZ, r, r, r, r, r,
m, ^d, ^d, wY, Z, $wb,
- $wv, ^wi, *wo, *wo, *wv, *wi,
- *wi, *wv, *wv, r, *h, *h,
- ?*r, ?*wg, ?*r, ?*wj")
+ $wv, ^wi, wo, wo, wv, wi,
+ wi, wv, wv, r, *h, *h,
+ ?r, ?wg, ?r, ?wj")
(match_operand:DI 1 "input_operand"
"r, YZ, r, I, L, nF,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 607c0cd33f2..80434d10247 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4106,7 +4106,7 @@ (define_expand "vsx_splat_<mode>"
})
(define_insn "vsx_splat_<mode>_reg"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,we")
(vec_duplicate:VSX_D
(match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
--
Alan Modra
Australia Development Lab, IBM