[gcc(refs/users/meissner/heads/work164-vpair)] Add vector pair init and splat.
Michael Meissner
meissner@gcc.gnu.org
Tue Apr 9 05:15:35 GMT 2024
https://gcc.gnu.org/g:aa0d0f245a6d4a763b754944463a9c1800394e35
commit aa0d0f245a6d4a763b754944463a9c1800394e35
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Apr 9 01:08:44 2024 -0400
Add vector pair init and splat.
2024-04-09 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
built-in function.
(__builtin_vpair_f32_splat): Likewise.
(__builtin_vpair_f64_splat): Likewise.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
(UNSPEC_VPAIR_SPLAT): Likewise.
(VPAIR_SPLAT_VMODE): New mode iterator.
(VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
(vpair_splat_name): Likewise.
(vpair_zero): New insn.
(vpair_splat_<vpair_splat_name>): New define_expand.
(vpair_splat_<vpair_splat_name>_internal): New insns.
gcc/testsuite/
* gcc.target/powerpc/vector-pair-5.c: New test.
* gcc.target/powerpc/vector-pair-6.c: Likewise.
Diff:
---
gcc/config/rs6000/rs6000-builtins.def | 10 +++
gcc/config/rs6000/vector-pair.md | 102 ++++++++++++++++++++++-
gcc/doc/extend.texi | 9 ++
gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 56 +++++++++++++
gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 +++++++++++++
5 files changed, 232 insertions(+), 1 deletion(-)
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 4362cbb8fc7..b757a8630ff 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4132,6 +4132,10 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
+;; Vector pair built-in functions.
+ v256 __builtin_vpair_zero ();
+ VPAIR_ZERO vpair_zero {mma}
+
;; Vector pair built-in functions with float elements
v256 __builtin_vpair_f32_abs (v256);
VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -4169,6 +4173,9 @@
v256 __builtin_vpair_f32_nfms (v256, v256, v256);
VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
+ v256 __builtin_vpair_f32_splat (float);
+ VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
v256 __builtin_vpair_f32_sub (v256, v256);
VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
@@ -4209,5 +4216,8 @@
v256 __builtin_vpair_f64_nfms (v256, v256, v256);
VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
+ v256 __builtin_vpair_f64_splat (double);
+ VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
v256 __builtin_vpair_f64_sub (v256, v256);
VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 73ae46e6d40..39b419c6814 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -38,7 +38,9 @@
UNSPEC_VPAIR_NEG
UNSPEC_VPAIR_PLUS
UNSPEC_VPAIR_SMAX
- UNSPEC_VPAIR_SMIN])
+ UNSPEC_VPAIR_SMIN
+ UNSPEC_VPAIR_ZERO
+ UNSPEC_VPAIR_SPLAT])
;; Vector pair element ID that defines the scaler element within the vector pair.
(define_c_enum "vpair_element"
@@ -98,6 +100,104 @@
;; Map the scalar element ID into the appropriate insn type for divide.
(define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv")
(VPAIR_ELEMENT_DOUBLE "vecdiv")])
+
+;; Mode iterator for the vector modes that we provide splat operations for.
+(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF])
+
+;; Map element mode to 128-bit vector mode for splat operations
+(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF")
+ (DF "V2DF")])
+
+;; Map either element mode or vector mode into the name for the splat insn.
+(define_mode_attr vpair_splat_name [(SF "v8sf")
+ (DF "v4df")
+ (V4SF "v8sf")
+ (V2DF "v4df")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 1) (match_dup 3))
+ (set (match_dup 2) (match_dup 3))]
+{
+ rtx op0 = operands[0];
+
+ operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0);
+ operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16);
+ operands[3] = CONST0_RTX (V2DFmode);
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecperm")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_<vpair_splat_name>"
+ [(use (match_operand:OO 0 "vsx_register_operand"))
+ (use (match_operand:SFDF 1 "input_operand"))]
+ "TARGET_MMA"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ machine_mode element_mode = <MODE>mode;
+
+ if (op1 == CONST0_RTX (element_mode))
+ {
+ emit_insn (gen_vpair_zero (op0));
+ DONE;
+ }
+
+ machine_mode vector_mode = <VPAIR_SPLAT_ELEMENT_TO_VMODE>mode;
+ rtx vec = gen_reg_rtx (vector_mode);
+ unsigned num_elements = GET_MODE_NUNITS (vector_mode);
+ rtvec elements = rtvec_alloc (num_elements);
+ for (size_t i = 0; i < num_elements; i++)
+ RTVEC_ELT (elements, i) = copy_rtx (op1);
+
+ rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements));
+ emit_insn (gen_vpair_splat_<vpair_splat_name>_internal (op0, vec));
+ DONE;
+})
+
+;; Inner splat support. Operand1 is the vector splat created above. Allow
+;; operand 1 to overlap with the output registers to eliminate one move
+;; instruction.
+(define_insn_and_split "vpair_splat_<vpair_splat_name>_internal"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")]
+ UNSPEC_VPAIR_SPLAT))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op0_a = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0);
+ rtx op0_b = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16);
+ rtx op1 = operands[1];
+ unsigned op1_regno = reg_or_subregno (op1);
+
+ /* Check if the input is one of the output registers. */
+ if (op1_regno == reg_or_subregno (op0_a))
+ emit_move_insn (op0_b, op1);
+
+ else if (op1_regno == reg_or_subregno (op0_b))
+ emit_move_insn (op0_a, op1);
+
+ else
+ {
+ emit_move_insn (op0_a, op1);
+ emit_move_insn (op0_b, op1);
+ }
+
+ DONE;
+}
+ [(set_attr "length" "*,8")
+ (set_attr "type" "vecmove")])
;; Vector pair unary operations. The last argument in the UNSPEC is a
;; CONST_INT which identifies what the scalar element is.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7494e3bcc6e..d20bef9b967 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -23915,6 +23915,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the
The @code{nfms} built-in is a combination of @code{neg} of the
@code{fms} built-in.
+The following built-in function is independent on the type of the
+underlying vector:
+
+@smallexample
+__vector_pair __builtin_vpair_zero ();
+@end smallexample
+
The following built-in functions operate on pairs of
@code{vector float} values:
@@ -23935,6 +23942,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair,
__vector_pair);
__vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair,
__vector_pair);
+__vector_pair __builtin_vpair_f32_splat (float);
__vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair);
@end smallexample
@@ -23958,6 +23966,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair,
__vector_pair);
__vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair,
__vector_pair);
+__vector_pair __builtin_vpair_f64_splat (double);
__vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair);
@end smallexample
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
new file mode 100644
index 00000000000..9b645e626e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+ vector pairs zero and splat functions for vector pairs containing
+ doubles. */
+
+void
+test_zero (__vector_pair *p)
+{
+ /* 2 xxspltib/xxlxor. */
+ *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+ /* 2 xxspltib/xxlxor. */
+ *p = __builtin_vpair_f64_splat (0.0);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+ /* xxspltidp, xxlor. */
+ *p = __builtin_vpair_f64_splat (1.0);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+ /* plxv, xxlor (note, we cannot use xxspltidp). */
+ *p = __builtin_vpair_f64_splat (3.1415926535);
+}
+
+void
+test_splat_arg (__vector_pair *p, double x)
+{
+ /* xxpermdi, xxlor. */
+ *p = __builtin_vpair_f64_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, double *q)
+{
+ /* lxvdsx, xxlor. */
+ *p = __builtin_vpair_f64_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
new file mode 100644
index 00000000000..5ec53d4bfc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected instructions for
+ vector pairs zero and splat functions for vector pairs containing
+ floats. */
+
+void
+test_zero (__vector_pair *p)
+{
+ /* 2 xxspltib/xxlxor. */
+ *p = __builtin_vpair_zero ();
+}
+
+void
+test_splat_zero (__vector_pair *p)
+{
+ /* 2 xxspltib/xxlxor. */
+ *p = __builtin_vpair_f32_splat (0.0f);
+}
+
+void
+test_splat_one (__vector_pair *p)
+{
+ /* xxspltiw, xxlor. */
+ *p = __builtin_vpair_f32_splat (1.0f);
+}
+
+void
+test_splat_pi (__vector_pair *p)
+{
+ /* xxspltiw, xxlor. */
+ *p = __builtin_vpair_f32_splat (3.1415926535f);
+}
+
+void
+test_splat_arg (__vector_pair *p, float x)
+{
+ /* xscvdpspn, xxspltw, xxlor. */
+ *p = __builtin_vpair_f32_splat (x);
+}
+
+void
+test_splat_mem (__vector_pair *p, float *q)
+{
+ /* xlvwsx, xxlor. */
+ *p = __builtin_vpair_f32_splat (*q);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */
More information about the Gcc-cvs
mailing list