[PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]
Xiong Hu Luo
luoxhu@linux.ibm.com
Mon Aug 31 09:06:47 GMT 2020
vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
to be insert, arg2 is the place to insert arg1 to arg0. This patch adds
__builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
not expand too early in gimple stage if arg2 is variable, to avoid generate
store hit load instructions.
For Power9 V4SI:
addi 9,1,-16
rldic 6,6,2,60
stxv 34,-16(1)
stwx 5,9,6
lxv 34,-16(1)
=>
addis 9,2,.LC0@toc@ha
addi 9,9,.LC0@toc@l
mtvsrwz 33,5
lxv 32,0(9)
sradi 9,6,2
addze 9,9
sldi 9,9,2
subf 9,9,6
subfic 9,9,3
sldi 9,9,2
subfic 9,9,20
lvsl 13,0,9
xxperm 33,33,45
xxperm 32,32,45
xxsel 34,34,33,32
Though instructions increase from 5 to 15, the performance is improved
60% in typical cases.
gcc/ChangeLog:
* config/rs6000/altivec.md (altivec_lvsl_reg_<mode>2): Extend to
SDI mode.
* config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
macros for vec_insert built-in functions.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Generate built-in calls for vec_insert.
* config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
New function.
(altivec_expand_builtin): Add case entry for
VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
VSX_BUILTIN_VEC_INSERT_V4SF, VSX_BUILTIN_VEC_INSERT_V4SI,
VSX_BUILTIN_VEC_INSERT_V2DF, VSX_BUILTIN_VEC_INSERT_V2DI.
(altivec_init_builtins):
* config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
New declear.
* config/rs6000/rs6000.c (rs6000_expand_vector_insert):
New function.
* config/rs6000/rs6000.md (FQHS): New mode iterator.
(FD): New mode iterator.
p8_mtvsrwz_v16qi<mode>2: New define_insn.
p8_mtvsrd_v16qi<mode>2: New define_insn.
* config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr79251.c: New test.
---
gcc/config/rs6000/altivec.md | 4 +-
gcc/config/rs6000/rs6000-builtin.def | 6 +
gcc/config/rs6000/rs6000-c.c | 61 +++++++++
gcc/config/rs6000/rs6000-call.c | 74 +++++++++++
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 146 +++++++++++++++++++++
gcc/config/rs6000/rs6000.md | 19 +++
gcc/config/rs6000/vsx.md | 2 +-
gcc/testsuite/gcc.target/powerpc/pr79251.c | 23 ++++
9 files changed, 333 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 0a2e634d6b0..66b636059a6 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2772,10 +2772,10 @@
DONE;
})
-(define_insn "altivec_lvsl_reg"
+(define_insn "altivec_lvsl_reg_<mode>2"
[(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
(unspec:V16QI
- [(match_operand:DI 1 "gpc_reg_operand" "b")]
+ [(match_operand:SDI 1 "gpc_reg_operand" "b")]
UNSPEC_LVSL_REG))]
"TARGET_ALTIVEC"
"lvsl %0,0,%1"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f9f0fece549..d095b365c14 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI, "vec_init_v2di", CONST)
BU_VSX_X (VEC_SET_V1TI, "vec_set_v1ti", CONST)
BU_VSX_X (VEC_SET_V2DF, "vec_set_v2df", CONST)
BU_VSX_X (VEC_SET_V2DI, "vec_set_v2di", CONST)
+BU_VSX_X (VEC_INSERT_V16QI, "vec_insert_v16qi", CONST)
+BU_VSX_X (VEC_INSERT_V8HI, "vec_insert_v8hi", CONST)
+BU_VSX_X (VEC_INSERT_V4SI, "vec_insert_v4si", CONST)
+BU_VSX_X (VEC_INSERT_V4SF, "vec_insert_v4sf", CONST)
+BU_VSX_X (VEC_INSERT_V2DI, "vec_insert_v2di", CONST)
+BU_VSX_X (VEC_INSERT_V2DF, "vec_insert_v2df", CONST)
BU_VSX_X (VEC_EXT_V1TI, "vec_ext_v1ti", CONST)
BU_VSX_X (VEC_EXT_V2DF, "vec_ext_v2df", CONST)
BU_VSX_X (VEC_EXT_V2DI, "vec_ext_v2di", CONST)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 2fad3d94706..03b00738a5e 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1563,6 +1563,67 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
return build_call_expr (call, 3, arg1, arg0, arg2);
}
+ else if (VECTOR_MEM_VSX_P (mode))
+ {
+ tree call = NULL_TREE;
+
+ arg2 = fold_for_warn (arg2);
+
+ /* If the second argument is variable, we can optimize it if we are
+ generating 64-bit code on a machine with direct move. */
+ if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+ {
+ switch (mode)
+ {
+ default:
+ break;
+
+ case E_V2DImode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DI];
+ break;
+
+ case E_V2DFmode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DF];
+ break;
+
+ case E_V4SFmode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SF];
+ break;
+
+ case E_V4SImode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SI];
+ break;
+
+ case E_V8HImode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V8HI];
+ break;
+
+ case E_V16QImode:
+ call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V16QI];
+ break;
+ }
+ }
+
+ if (call)
+ {
+ if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+ arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+ else
+ arg2 = build_binary_op (
+ loc, BIT_AND_EXPR, arg2,
+ build_int_cst (TREE_TYPE (arg2),
+ TYPE_VECTOR_SUBPARTS (arg1_type) - 1),
+ 0);
+ tree result
+ = build_call_expr (call, 3, arg1,
+ convert (TREE_TYPE (arg1_type), arg0),
+ convert (integer_type_node, arg2));
+ /* Coerce the result to vector element type. May be no-op. */
+ result = fold_convert (TREE_TYPE (arg1), result);
+ return result;
+ }
+ }
+
/* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
arg1_inner_type = TREE_TYPE (arg1_type);
if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index e39cfcf672b..339e9ae87e3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -10660,6 +10660,40 @@ altivec_expand_vec_set_builtin (tree exp)
return op0;
}
+/* Expand vec_insert builtin. */
+static rtx
+altivec_expand_vec_insert_builtin (tree exp, rtx target)
+{
+ machine_mode tmode, mode1, mode2;
+ tree arg0, arg1, arg2;
+ rtx op0 = NULL_RTX, op1, op2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ tmode = TYPE_MODE (TREE_TYPE (arg0));
+ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ mode2 = TYPE_MODE ((TREE_TYPE (arg2)));
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+ op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+ op2 = expand_expr (arg2, NULL_RTX, mode2, EXPAND_NORMAL);
+
+ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+ op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+ op0 = force_reg (tmode, op0);
+ op1 = force_reg (mode1, op1);
+ op2 = force_reg (mode2, op2);
+
+ target = gen_reg_rtx (V16QImode);
+ rs6000_expand_vector_insert (target, op0, op1, op2);
+
+ return target;
+}
+
/* Expand vec_ext builtin. */
static rtx
altivec_expand_vec_ext_builtin (tree exp, rtx target)
@@ -10922,6 +10956,14 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case VSX_BUILTIN_VEC_SET_V1TI:
return altivec_expand_vec_set_builtin (exp);
+ case VSX_BUILTIN_VEC_INSERT_V16QI:
+ case VSX_BUILTIN_VEC_INSERT_V8HI:
+ case VSX_BUILTIN_VEC_INSERT_V4SF:
+ case VSX_BUILTIN_VEC_INSERT_V4SI:
+ case VSX_BUILTIN_VEC_INSERT_V2DF:
+ case VSX_BUILTIN_VEC_INSERT_V2DI:
+ return altivec_expand_vec_insert_builtin (exp, target);
+
case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
@@ -13681,6 +13723,38 @@ altivec_init_builtins (void)
integer_type_node, NULL_TREE);
def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
+ /* Access to the vec_insert patterns. */
+ ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ intQI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v16qi", ftype,
+ VSX_BUILTIN_VEC_INSERT_V16QI);
+
+ ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v8hi", ftype, VSX_BUILTIN_VEC_INSERT_V8HI);
+
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ integer_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v4si", ftype, VSX_BUILTIN_VEC_INSERT_V4SI);
+
+ ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+ float_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v4sf", ftype, VSX_BUILTIN_VEC_INSERT_V4SF);
+
+ ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ intDI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v2di", ftype, VSX_BUILTIN_VEC_INSERT_V2DI);
+
+ ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
+ double_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_insert_v2df", ftype, VSX_BUILTIN_VEC_INSERT_V2DF);
+
/* Access to the vec_extract patterns. */
ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
integer_type_node, NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 28e859f4381..78b5b31d79f 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -58,6 +58,7 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
extern void rs6000_expand_float128_convert (rtx, rtx, bool);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
+extern void rs6000_expand_vector_insert (rtx, rtx, rtx, rtx);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index fe93cf6ff2b..afa845f3dff 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6788,6 +6788,152 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
emit_insn (gen_rtx_SET (target, x));
}
+/* Insert value from VEC into idx of TARGET. */
+
+void
+rs6000_expand_vector_insert (rtx target, rtx vec, rtx val, rtx idx)
+{
+ machine_mode mode = GET_MODE (vec);
+
+ if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (idx))
+ gcc_unreachable ();
+ else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)
+ && TARGET_DIRECT_MOVE_64BIT)
+ {
+ gcc_assert (GET_MODE (idx) == E_SImode);
+ machine_mode inner_mode = GET_MODE (val);
+ HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
+
+ rtx tmp = gen_reg_rtx (GET_MODE (idx));
+ if (GET_MODE_SIZE (inner_mode) == 8)
+ {
+ if (!BYTES_BIG_ENDIAN)
+ {
+ /* idx = 1 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (1), idx));
+ /* idx = idx * 8. */
+ emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (3)));
+ /* idx = 16 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
+ }
+ else
+ {
+ emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (3)));
+ emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
+ }
+ }
+ else if (GET_MODE_SIZE (inner_mode) == 4)
+ {
+ if (!BYTES_BIG_ENDIAN)
+ {
+ /* idx = 3 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (3), idx));
+ /* idx = idx * 4. */
+ emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (2)));
+ /* idx = 20 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
+ }
+ else
+ {
+ emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (2)));
+ emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
+ }
+ }
+ else if (GET_MODE_SIZE (inner_mode) == 2)
+ {
+ if (!BYTES_BIG_ENDIAN)
+ {
+ /* idx = 7 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (7), idx));
+ /* idx = idx * 2. */
+ emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
+ /* idx = 22 - idx. */
+ emit_insn (gen_subsi3 (tmp, GEN_INT (22), tmp));
+ }
+ else
+ {
+ emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
+ emit_insn (gen_subsi3 (tmp, GEN_INT (22), idx));
+ }
+ }
+ else if (GET_MODE_SIZE (inner_mode) == 1)
+ if (!BYTES_BIG_ENDIAN)
+ emit_insn (gen_addsi3 (tmp, idx, GEN_INT (8)));
+ else
+ emit_insn (gen_subsi3 (tmp, GEN_INT (23), idx));
+ else
+ gcc_unreachable ();
+
+ /* lxv vs32, mask.
+ DImode: 0xffffffffffffffff0000000000000000
+ SImode: 0x00000000ffffffff0000000000000000
+ HImode: 0x000000000000ffff0000000000000000.
+ QImode: 0x00000000000000ff0000000000000000. */
+ rtx mask = gen_reg_rtx (V16QImode);
+ rtx mask_v2di = gen_reg_rtx (V2DImode);
+ rtvec v = rtvec_alloc (2);
+ if (!BYTES_BIG_ENDIAN)
+ {
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
+ }
+ else
+ {
+ RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
+ RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
+ }
+ emit_insn (
+ gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
+ rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
+ emit_insn (gen_rtx_SET (mask, sub_mask));
+
+ /* mtvsrd[wz] f0,val. */
+ rtx val_v16qi = gen_reg_rtx (V16QImode);
+ switch (inner_mode)
+ {
+ default:
+ gcc_unreachable ();
+ break;
+ case E_QImode:
+ emit_insn (gen_p8_mtvsrwz_v16qiqi2 (val_v16qi, val));
+ break;
+ case E_HImode:
+ emit_insn (gen_p8_mtvsrwz_v16qihi2 (val_v16qi, val));
+ break;
+ case E_SImode:
+ emit_insn (gen_p8_mtvsrwz_v16qisi2 (val_v16qi, val));
+ break;
+ case E_SFmode:
+ emit_insn (gen_p8_mtvsrwz_v16qisf2 (val_v16qi, val));
+ break;
+ case E_DImode:
+ emit_insn (gen_p8_mtvsrd_v16qidi2 (val_v16qi, val));
+ break;
+ case E_DFmode:
+ emit_insn (gen_p8_mtvsrd_v16qidf2 (val_v16qi, val));
+ break;
+ }
+
+ /* lvsl v1,0,idx. */
+ rtx pcv = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsl_reg_si2 (pcv, tmp));
+
+ /* xxperm vs0,vs0,vs33. */
+ /* xxperm vs32,vs32,vs33. */
+ rtx val_perm = gen_reg_rtx (V16QImode);
+ rtx mask_perm = gen_reg_rtx (V16QImode);
+ emit_insn (
+ gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
+ emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
+
+ rtx sub_target = simplify_gen_subreg (V16QImode, vec, mode, 0);
+ emit_insn (gen_rtx_SET (target, sub_target));
+
+ /* xxsel vs34,vs34,vs0,vs32. */
+ emit_insn (gen_vector_select_v16qi (target, target, val_perm, mask_perm));
+ }
+}
+
/* Extract field ELT from VEC into TARGET. */
void
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 43b620ae1c0..b02fda836d4 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8713,6 +8713,25 @@
"mtvsrwz %x0,%1"
[(set_attr "type" "mftgpr")])
+(define_mode_iterator FQHS [SF QI HI SI])
+(define_mode_iterator FD [DF DI])
+
+(define_insn "p8_mtvsrwz_v16qi<mode>2"
+ [(set (match_operand:V16QI 0 "register_operand" "=wa")
+ (unspec:V16QI [(match_operand:FQHS 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %x0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_v16qi<mode>2"
+ [(set (match_operand:V16QI 0 "register_operand" "=wa")
+ (unspec:V16QI [(match_operand:FD 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %x0,%1"
+ [(set_attr "type" "mftgpr")])
+
(define_insn_and_split "reload_fpr_from_gpr<mode>"
[(set (match_operand:FMOVE64X 0 "register_operand" "=d")
(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd750210758..7e82690d12d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5349,7 +5349,7 @@
rtx rtx_vtmp = gen_reg_rtx (V16QImode);
rtx tmp = gen_reg_rtx (DImode);
- emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
+ emit_insn (gen_altivec_lvsl_reg_di2 (shift_mask, operands[2]));
emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.c b/gcc/testsuite/gcc.target/powerpc/pr79251.c
new file mode 100644
index 00000000000..877659a0146
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr79251.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -maltivec" } */
+
+#include <stddef.h>
+#include <altivec.h>
+
+#define TYPE int
+
+__attribute__ ((noinline))
+vector TYPE test (vector TYPE v, TYPE i, size_t n)
+{
+ vector TYPE v1 = v;
+ v1 = vec_insert (i, v, n);
+
+ return v1;
+}
+
+/* { dg-final { scan-assembler-not {\mstxw\M} } } */
+/* { dg-final { scan-assembler-times {\mlvsl\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxperm\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */
--
2.27.0.90.geebb51ba8c
More information about the Gcc-patches
mailing list