[gcc(refs/users/meissner/heads/work043)] Load V2DF vector constants with XXSPLTIDP
Michael Meissner
meissner@gcc.gnu.org
Tue Mar 30 20:28:57 GMT 2021
https://gcc.gnu.org/g:0f1494f41ae272ef267c0de2a5b9382b2ca0aacd
commit 0f1494f41ae272ef267c0de2a5b9382b2ca0aacd
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 30 16:28:19 2021 -0400
Load V2DF vector constants with XXSPLTIDP
This patch adds support for loading up a V2DF vector where each element is
the same and it is a DF constant that can be loaded with the XXSPLTIDP
instruction.
gcc/
2021-03-30 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/predicates.md (xxspltidp_operand): Add V2DF
support.
* config/rs6000/rs6000.c (rs6000_expand_vector_init): If we can,
load up a V2DF vector constant with VEC_DUPLICATE.
* config/rs6000/vsx.md (V2DF constant splitter): New splitter.
(vsx_mov<mode>_64bit): Support loading V2DF constants via the
XXSPLTIDP instruction.
(vsx_mov<mode>_32bit): Support loading V2DF constants via the
XXSPLTIDP instruction.
(vsx_splat_v2df_const): New insn.
Diff:
---
gcc/config/rs6000/predicates.md | 42 ++++++++++++++++-----
gcc/config/rs6000/rs6000.c | 9 +++++
gcc/config/rs6000/vsx.md | 81 ++++++++++++++++++++++++++++++++++-------
3 files changed, 109 insertions(+), 23 deletions(-)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 089a2597ce7..b5b17a8b7c6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -565,25 +565,47 @@
(ior (match_operand 0 "vsx_register_operand")
(match_operand 0 "reg_or_logical_cint_operand")))
-;; Return 1 if operand is a SF/DF CONST_DOUBLE that can be loaded via the ISA
-;; 3.1 XXSPLTIDP instruction. This function has to check, if the immediate
-;; specifies a single-precision denormal value (i.e., bits 1:8 equal to 0 and
-;; bits 9:31 not equal to 0), since the result is undefined in the hardware.
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via the ISA 3.1 XXSPLTIDP instruction. This function has to check,
+;; if the immediate specifies a single-precision denormal value (i.e., bits 1:8
+;; equal to 0 and bits 9:31 not equal to 0), since the result is undefined in
+;; the hardware.
(define_predicate "xxspltidp_operand"
- (match_code "const_double")
+ (match_code "const_double,const_vector,vec_duplicate")
{
long value;
+ rtx element;
if (!TARGET_POWER10 || !TARGET_VSX)
return 0;
- if (mode != SFmode && mode != DFmode)
+ if (mode == V2DFmode)
+ {
+ /* Handle VEC_DUPLICATE and CONST_VECTOR. */
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ element = XEXP (op, 0);
+
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ element = CONST_VECTOR_ELT (op, 0);
+ if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+ return 0;
+ }
+
+ else
+ return 0;
+ }
+
+ else if (mode == SFmode || mode == DFmode)
+ element = op;
+
+ else
return 0;
- if (!CONST_DOUBLE_P (op))
+ if (!CONST_DOUBLE_P (element))
return 0;
- const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
if (!exact_real_truncate (SFmode, rv))
return 0;
@@ -690,7 +712,9 @@
if (zero_constant (op, mode) || all_ones_constant (op, mode))
return true;
- if (TARGET_POWER10 && xxspltiw_constant_p (op, mode, &constant))
+ if (TARGET_POWER10
+ && (xxspltiw_constant_p (op, mode, &constant)
+ || xxspltidp_operand (op, mode)))
return true;
if (TARGET_P9_VECTOR
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9d5fe4f2bd4..7ce1990b883 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6694,6 +6694,15 @@ rs6000_expand_vector_init (rtx target, rtx vals)
return;
}
+ /* Generate XXSPLTIDP if we can. */
+ if (TARGET_POWER10 && all_same && mode == V2DFmode
+ && xxspltidp_operand (XVECEXP (vals, 0, 0), DFmode))
+ {
+ rtx dup = gen_rtx_VEC_DUPLICATE (mode, XVECEXP (vals, 0, 0));
+ emit_insn (gen_rtx_SET (target, dup));
+ return;
+ }
+
rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
if ((int_vector_p || TARGET_VSX) && all_const_zero)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4404407d330..5acd12d7fb4 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1158,6 +1158,20 @@
[(set_attr "type" "vecperm")
(set_attr "length" "8")])
+;; Split V2DF vector constants that can be loaded with XXSPLTIDP.
+;; Xxspltidp_operand will match either VEC_DUPLICATE or CONST_VECTOR, but we
+;; don't have to split the VEC_DUPLICATE case. The move pattern handles
+;; setting the vector to 0.
+(define_split
+ [(set (match_operand:V2DF 0 "vsx_register_operand")
+ (match_operand:V2DF 1 "xxspltidp_operand"))]
+ "TARGET_POWER10 && TARGET_VSX && GET_CODE (operands[1]) == CONST_VECTOR
+ && operands[1] != CONST0_RTX (V2DFmode)"
+ [(set (match_dup 0)
+ (vec_duplicate:V2DF (match_dup 2)))]
+{
+ operands[2] = CONST_VECTOR_ELT (operands[1], 0);
+})
;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
@@ -1167,17 +1181,17 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
+;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) XXSPLTIDP
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- ?wa, v, <??r>, wZ, v")
+ ?wa, v, <??r>, wZ, v, wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- ?jwM, W, <nW>, v, wZ"))]
+ ?jwM, W, <nW>, v, wZ, eF"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1188,36 +1202,44 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecsimple, *, *, vecstore, vecload")
+ vecsimple, *, *, vecstore, vecload, vecperm")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *, 5, 2, *, *")
+ *, 5, 2, *, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *, *, *, *, *")
+ *, *, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *, 20, 8, *, *")
+ *, 20, 8, *, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- <VSisa>, *, *, *, *")])
+ <VSisa>, *, *, *, *, p10")
+ (set_attr "prefixed"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, yes")
+ (set_attr "prefixed_prepend_p"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, *,
+ *, *, *, *, *, no")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
-;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
+;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const XXSPLTIDP
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
- wa, v, ?wa, v, <??r>,
+ wa, v, ?wa, v, <??r>, wa,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
- wE, jwM, ?jwM, W, <nW>,
+ wE, jwM, ?jwM, W, <nW>, eF,
v, wZ"))]
"!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1228,15 +1250,23 @@
}
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
- vecsimple, vecsimple, vecsimple, *, *,
+ vecsimple, vecsimple, vecsimple, *, *, vecperm,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
- *, *, *, 20, 16,
+ *, *, *, 20, 16, *,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
- p9v, *, <VSisa>, *, *,
+ p9v, *, <VSisa>, *, *, p10,
+ *, *")
+ (set_attr "prefixed"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, yes,
+ *, *")
+ (set_attr "prefixed_prepend_p"
+ "*, *, *, *, *, *,
+ *, *, *, *, *, no,
*, *")])
;; Explicit load/store expanders for the builtin functions
@@ -4471,6 +4501,9 @@
rtx op1 = operands[1];
if (MEM_P (op1))
operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
+ else if (TARGET_POWER10 && <MODE>mode == V2DFmode
+ && xxspltidp_operand (op1, <MODE>mode))
+ ;
else if (!REG_P (op1))
op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
})
@@ -4493,6 +4526,26 @@
"lxvdsx %x0,%y1"
[(set_attr "type" "vecload")])
+;; Load V2DFmode constant via the ISA 3.1 XXSPLTIDP instruction
+(define_insn "*vsx_splat_v2df_const"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
+ (vec_duplicate:V2DF (match_operand:DF 1 "xxspltidp_operand" "j,eF")))]
+ "TARGET_POWER10"
+{
+ rtx op1 = operands[1];
+ if (op1 == CONST0_RTX (DFmode))
+ return "xxspltib %x0,0";
+
+ long value;
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op1);
+ REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
+ operands[2] = GEN_INT (value);
+ return "xxspltidp %x0,%2";
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "prefixed" "*,yes")
+ (set_attr "prefixed_prepend_p" "*,no")])
+
;; V4SI splat support
(define_insn "vsx_splat_v4si"
[(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
More information about the Gcc-cvs
mailing list