[gcc(refs/users/meissner/heads/work087)] Add options to control load/store vector pair generation.
Michael Meissner
meissner@gcc.gnu.org
Wed Apr 27 15:45:16 GMT 2022
https://gcc.gnu.org/g:2efc7b6bee0b21960d902fc13f989d6ee320f867
commit 2efc7b6bee0b21960d902fc13f989d6ee320f867
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Apr 27 11:38:26 2022 -0400
Add options to control load/store vector pair generation.
This patch adds options to allow disabling generating either the load
vector pair instructions (lxvp, lxvpx, plxvp) or the store vector pair
instructions (stxvp, stxvpx, pstxvp).
2022-04-27 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (movoo): Add support to suppress load/store
vector pair instructions.
(movxo): Likewise.
* config/rs6000/rs6000.cc (rs6000_setup_reg_addr_masks): Disable
indexed loads for vector pair if either lxvp/stxvp are disabled.
(rs6000_split_multireg_move): Do not split vector quad to vector
pair if lxvp/stxvp is disabled.
* config/rs6000/rs6000.md (isa attribute): Add lxvp and stxvp
attributes.
(enabled attribute): Add lxvp/stxvp support.
* config/rs6000/rs6000.opt (-mload-vector-pair): New option.
(-mstore-vector-pair): New option.
gcc/testsuite/
* gcc.target/powerpc/p10-load-vector-pair-1.c: New test.
* gcc.target/powerpc/p10-load-vector-pair-2.c: New test.
* gcc.target/powerpc/p10-store-vector-pair-1.c: New test.
* gcc.target/powerpc/p10-store-vector-pair-2.c: New test.
Diff:
---
gcc/config/rs6000/mma.md | 39 ++++++----
gcc/config/rs6000/rs6000.cc | 10 ++-
gcc/config/rs6000/rs6000.md | 13 +++-
gcc/config/rs6000/rs6000.opt | 8 +++
.../gcc.target/powerpc/p10-load-vector-pair-1.c | 82 ++++++++++++++++++++++
.../gcc.target/powerpc/p10-load-vector-pair-2.c | 81 +++++++++++++++++++++
.../gcc.target/powerpc/p10-store-vector-pair-1.c | 82 ++++++++++++++++++++++
.../gcc.target/powerpc/p10-store-vector-pair-2.c | 81 +++++++++++++++++++++
8 files changed, 378 insertions(+), 18 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6d516..a9f3b736fdd 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -274,26 +274,35 @@
DONE;
})
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split lxvp into 2 lxv instructions, and/or stxvp into 2 stxv
+;; instructions.
(define_insn_and_split "*movoo"
- [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
- (match_operand:OO 1 "input_operand" "m,wa,wa"))]
+ [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,wa,m, o, wa")
+ (match_operand:OO 1 "input_operand" "m, o, wa,wa,wa"))]
"TARGET_MMA
&& (gpc_reg_operand (operands[0], OOmode)
|| gpc_reg_operand (operands[1], OOmode))"
"@
lxvp%X1 %x0,%1
+ #
stxvp%X0 %x1,%0
+ #
#"
"&& reload_completed
- && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+ && ((MEM_P (operands[1]) && !TARGET_LOAD_VECTOR_PAIR)
+ || (MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+ || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
[(const_int 0)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
- [(set_attr "type" "vecload,vecstore,veclogical")
+ [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
(set_attr "size" "256")
- (set_attr "length" "*,*,8")])
+ (set_attr "length" "*,*,*,*,8")
+ (set_attr "max_prefixed_insns" "*,2,*,2,*")
+ (set_attr "isa" "lxvp,*,stxvp,*,*")])
;; Vector quad support. XOmode can only live in FPRs.
@@ -306,25 +315,27 @@
DONE;
})
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split the loads into 4 lxv instructions instead of 2 lxvp
+;; instructions, and/or the stores into 4 stxv instructions instead of 2 stxvp
+;; instructions.
(define_insn_and_split "*movxo"
- [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
- (match_operand:XO 1 "input_operand" "m,d,d"))]
+ [(set (match_operand:XO 0 "nonimmediate_operand" "=d,d,m,o,d")
+ (match_operand:XO 1 "input_operand" "m,o,d,d,d"))]
"TARGET_MMA
&& (gpc_reg_operand (operands[0], XOmode)
|| gpc_reg_operand (operands[1], XOmode))"
- "@
- #
- #
- #"
+ "#"
"&& reload_completed"
[(const_int 0)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
DONE;
}
- [(set_attr "type" "vecload,vecstore,veclogical")
- (set_attr "length" "*,*,16")
- (set_attr "max_prefixed_insns" "2,2,*")])
+ [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
+ (set_attr "length" "*,*,*,*,16")
+ (set_attr "max_prefixed_insns" "2,4,2,4,*")
+ (set_attr "isa" "lxvp,*,stxvp,*,*")])
(define_expand "vsx_assemble_pair"
[(match_operand:OO 0 "vsx_register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f07e57cafb2..3a689e0fdfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2716,7 +2716,8 @@ rs6000_setup_reg_addr_masks (void)
/* Vector pairs can do both indexed and offset loads if the
instructions are enabled, otherwise they can only do offset loads
since it will be broken into two vector moves. Vector quads can
- only do offset loads. */
+ only do offset loads. If either stxvp or ldxvp is disabled, we
+ can't do indexed arithmetic. */
else if ((addr_mask != 0) && TARGET_MMA
&& (m2 == OOmode || m2 == XOmode))
{
@@ -2724,7 +2725,9 @@ rs6000_setup_reg_addr_masks (void)
if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
{
addr_mask |= RELOAD_REG_QUAD_OFFSET;
- if (m2 == OOmode)
+ if (m2 == OOmode
+ && TARGET_LOAD_VECTOR_PAIR
+ && TARGET_STORE_VECTOR_PAIR)
addr_mask |= RELOAD_REG_INDEXED;
}
}
@@ -26968,7 +26971,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* If we have a vector quad register for MMA, and this is a load or store,
see if we can use vector paired load/stores. */
if (mode == XOmode && TARGET_MMA
- && (MEM_P (dst) || MEM_P (src)))
+ && ((MEM_P (dst) && TARGET_STORE_VECTOR_PAIR)
+ || (MEM_P (src) && TARGET_LOAD_VECTOR_PAIR)))
{
reg_mode = OOmode;
nregs /= 2;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..90a11366266 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -354,7 +354,7 @@
(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,lxvp,stxvp"
(const_string "any"))
;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -402,6 +402,17 @@
(and (eq_attr "isa" "p10")
(match_test "TARGET_POWER10"))
(const_int 1)
+
+ (and (eq_attr "isa" "lxvp")
+ (match_test "TARGET_POWER10")
+ (match_test "TARGET_LOAD_VECTOR_PAIR"))
+ (const_int 1)
+
+ (and (eq_attr "isa" "stxvp")
+ (match_test "TARGET_POWER10")
+ (match_test "TARGET_STORE_VECTOR_PAIR"))
+ (const_int 1)
+
] (const_int 0)))
;; If this instruction is microcoded on the CELL processor
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6c4caf4c9ee..766f8f1591c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -624,6 +624,14 @@ mieee128-constant
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the LXVKQ instruction.
+; Generate (do not generate) code that uses the load vector pair instructions.
+mload-vector-pair
+Target Undocumented Var(TARGET_LOAD_VECTOR_PAIR) Init(1) Save
+
+; Generate (do not generate) code that uses the store vector pair instructions.
+mstore-vector-pair
+Target Undocumented Var(TARGET_STORE_VECTOR_PAIR) Init(1) Save
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
new file mode 100644
index 00000000000..d1f5790d238
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mload-vector-pair -mmma" } */
+
+/* Test if we generate load vector pair instructions if the user uses the
+ -mload-vector-pair option. */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* lxvp, stxvp. */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+ p[1] = q[1]; /* lxvp, stxvp. */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+ p[0x10000] = q[0x10000]; /* plxvp, pstxvp. */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+ p[n] = q[n]; /* lxvpx, 2x stxvp. */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+ *p = sp; /* plxvp, stxvp. */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+ sp = *p; /* lxvp, pstxvp. */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+ *p = *q; /* 2x lxvp, 2x stxvp. */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+ p[1] = q[1]; /* 2x lxvp, 2x stxvp. */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+ p[0x10000] = q[0x10000]; /* 2x plxvp, 2x pstxvp. */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+ p[n] = q[n]; /* 2x lxvp, 2x stxv. */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+ *p = sq; /* 2x plxvp, 2x stxvp. */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+ sq = *p; /* 2x lxvp, 2x stxvp. */
+}
+
+/* { dg-final { scan-assembler {\mp?lxvpx?\M} } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
new file mode 100644
index 00000000000..54f2e16314f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-load-vector-pair -mmma" } */
+
+/* Test if we do not generate load vector pair instructions if the user uses
+ the -mno-load-vector-pair option. */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* lxvp, 2x stxv. */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+ p[1] = q[1]; /* lxvp, 2x stxv. */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+ p[0x10000] = q[0x10000]; /* plxvp, 2x pstxv. */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+ p[n] = q[n]; /* lxvpx, 2x stxv. */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+ *p = sp; /* plxvp, 2x stxv. */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+ sp = *p; /* lxvp, 2x pstxv. */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+ *p = *q; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+ p[1] = q[1]; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+ p[0x10000] = q[0x10000]; /* 2x plxvp, 4x pstxv. */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+ p[n] = q[n]; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+ *p = sq; /* 2x plxvp, 4x stxv. */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+ sq = *p; /* 2x lxvp, 4x pstxv. */
+}
+
+/* { dg-final { scan-assembler-not {\mp?lxvpx?\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
new file mode 100644
index 00000000000..c1a36bf5fff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mstore-vector-pair -mmma" } */
+
+/* Test if we generate store vector pair instructions if the user uses the
+ -mstore-vector-pair option. */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* lxvp, stxvp. */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+ p[1] = q[1]; /* lxvp, stxvp. */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+ p[0x10000] = q[0x10000]; /* plxvp, pstxvp. */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+ p[n] = q[n]; /* lxvpx, 2x stxvp. */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+ *p = sp; /* plxvp, stxvp. */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+ sp = *p; /* lxvp, pstxvp. */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+ *p = *q; /* 2x lxvp, 2x stxvp. */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+ p[1] = q[1]; /* 2x lxvp, 2x stxvp. */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+ p[0x10000] = q[0x10000]; /* 2x plxvp, 2x pstxvp. */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+ p[n] = q[n]; /* 2x lxvp, 2x stxv. */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+ *p = sq; /* 2x plxvp, 2x stxvp. */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+ sq = *p; /* 2x lxvp, 2x stxvp. */
+}
+
+/* { dg-final { scan-assembler {\mp?stxvpx?\M} } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
new file mode 100644
index 00000000000..b8c3bdbfd89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-store-vector-pair -mmma" } */
+
+/* Test if we do not generate store vector pair instructions if the user uses
+ the -mno-store-vector-pair option. */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+ *p = *q; /* lxvp, 2x stxv. */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+ p[1] = q[1]; /* lxvp, 2x stxv. */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+ p[0x10000] = q[0x10000]; /* plxvp, 2x pstxv. */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+ p[n] = q[n]; /* lxvpx, 2x stxv. */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+ *p = sp; /* plxvp, 2x stxv. */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+ sp = *p; /* lxvp, 2x pstxv. */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+ *p = *q; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+ p[1] = q[1]; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+ p[0x10000] = q[0x10000]; /* 2x plxvp, 4x pstxv. */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+ p[n] = q[n]; /* 2x lxvp, 4x stxv. */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+ *p = sq; /* 2x plxvp, 4x stxv. */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+ sq = *p; /* 2x lxvp, 4x pstxv. */
+}
+
+/* { dg-final { scan-assembler-not {\mp?vstxvpx?\M} } } */
More information about the Gcc-cvs
mailing list