[PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

Xiong Hu Luo luoxhu@linux.ibm.com
Mon Aug 31 09:06:47 GMT 2020


vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
__builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
not expand too early in gimple stage if arg2 is variable, to avoid generate
store hit load instructions.

For Power9 V4SI:
	addi 9,1,-16
	rldic 6,6,2,60
	stxv 34,-16(1)
	stwx 5,9,6
	lxv 34,-16(1)
=>
	addis 9,2,.LC0@toc@ha
	addi 9,9,.LC0@toc@l
	mtvsrwz 33,5
	lxv 32,0(9)
	sradi 9,6,2
	addze 9,9
	sldi 9,9,2
	subf 9,9,6
	subfic 9,9,3
	sldi 9,9,2
	subfic 9,9,20
	lvsl 13,0,9
	xxperm 33,33,45
	xxperm 32,32,45
	xxsel 34,34,33,32

Though instructions increase from 5 to 15, the performance is improved
60% in typical cases.

gcc/ChangeLog:

	* config/rs6000/altivec.md (altivec_lvsl_reg_<mode>2): Extend to
	SDI mode.
	* config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
	macros for vec_insert built-in functions.
	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
	Generate built-in calls for vec_insert.
	* config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
	New function.
	(altivec_expand_builtin): Add case entry for
	VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
	VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
	VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.
	(altivec_init_builtins):
	* config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
	New declear.
	* config/rs6000/rs6000.c (rs6000_expand_vector_insert):
	New function.
	* config/rs6000/rs6000.md (FQHS): New mode iterator.
	(FD): New mode iterator.
	p8_mtvsrwz_v16qi<mode>2: New define_insn.
	p8_mtvsrd_v16qi<mode>2: New define_insn.
	* config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr79251.c: New test.
---
 gcc/config/rs6000/altivec.md               |   4 +-
 gcc/config/rs6000/rs6000-builtin.def       |   6 +
 gcc/config/rs6000/rs6000-c.c               |  61 +++++++++
 gcc/config/rs6000/rs6000-call.c            |  74 +++++++++++
 gcc/config/rs6000/rs6000-protos.h          |   1 +
 gcc/config/rs6000/rs6000.c                 | 146 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.md                |  19 +++
 gcc/config/rs6000/vsx.md                   |   2 +-
 gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 ++++
 9 files changed, 333 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 0a2e634d6b0..66b636059a6 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2772,10 +2772,10 @@
   DONE;
 })
 
-(define_insn "altivec_lvsl_reg"
+(define_insn "altivec_lvsl_reg_<mode>2"
   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
 	(unspec:V16QI
-	[(match_operand:DI 1 "gpc_reg_operand" "b")]
+	[(match_operand:SDI 1 "gpc_reg_operand" "b")]
 	UNSPEC_LVSL_REG))]
   "TARGET_ALTIVEC"
   "lvsl %0,0,%1"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f9f0fece549..d095b365c14 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,      "vec_init_v2di",	CONST)
 BU_VSX_X (VEC_SET_V1TI,	      "vec_set_v1ti",	CONST)
 BU_VSX_X (VEC_SET_V2DF,	      "vec_set_v2df",	CONST)
 BU_VSX_X (VEC_SET_V2DI,	      "vec_set_v2di",	CONST)
+BU_VSX_X (VEC_INSERT_V16QI,	      "vec_insert_v16qi",	CONST)
+BU_VSX_X (VEC_INSERT_V8HI,	      "vec_insert_v8hi",	CONST)
+BU_VSX_X (VEC_INSERT_V4SI,	      "vec_insert_v4si",	CONST)
+BU_VSX_X (VEC_INSERT_V4SF,	      "vec_insert_v4sf",	CONST)
+BU_VSX_X (VEC_INSERT_V2DI,	      "vec_insert_v2di",	CONST)
+BU_VSX_X (VEC_INSERT_V2DF,	      "vec_insert_v2df",	CONST)
 BU_VSX_X (VEC_EXT_V1TI,	      "vec_ext_v1ti",	CONST)
 BU_VSX_X (VEC_EXT_V2DF,	      "vec_ext_v2df",	CONST)
 BU_VSX_X (VEC_EXT_V2DI,	      "vec_ext_v2di",	CONST)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 2fad3d94706..03b00738a5e 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1563,6 +1563,67 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  return build_call_expr (call, 3, arg1, arg0, arg2);
 	}
 
+      else if (VECTOR_MEM_VSX_P (mode))
+	{
+	  tree call = NULL_TREE;
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DF];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+		arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+	      else
+		arg2 = build_binary_op (
+		  loc, BIT_AND_EXPR, arg2,
+		  build_int_cst (TREE_TYPE (arg2),
+				 TYPE_VECTOR_SUBPARTS (arg1_type) - 1),
+		  0);
+	      tree result
+		= build_call_expr (call, 3, arg1,
+				   convert (TREE_TYPE (arg1_type), arg0),
+				   convert (integer_type_node, arg2));
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      result = fold_convert (TREE_TYPE (arg1), result);
+	      return result;
+	    }
+	}
+
       /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
       arg1_inner_type = TREE_TYPE (arg1_type);
       if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index e39cfcf672b..339e9ae87e3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -10660,6 +10660,40 @@ altivec_expand_vec_set_builtin (tree exp)
   return op0;
 }
 
+/* Expand vec_insert builtin.  */
+static rtx
+altivec_expand_vec_insert_builtin (tree exp, rtx target)
+{
+  machine_mode tmode, mode1, mode2;
+  tree arg0, arg1, arg2;
+  rtx op0 = NULL_RTX, op1, op2;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  tmode = TYPE_MODE (TREE_TYPE (arg0));
+  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  mode2 = TYPE_MODE ((TREE_TYPE (arg2)));
+  gcc_assert (VECTOR_MODE_P (tmode));
+
+  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, mode2, EXPAND_NORMAL);
+
+  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+  op0 = force_reg (tmode, op0);
+  op1 = force_reg (mode1, op1);
+  op2 = force_reg (mode2, op2);
+
+  target = gen_reg_rtx (V16QImode);
+  rs6000_expand_vector_insert (target, op0, op1, op2);
+
+  return target;
+}
+
 /* Expand vec_ext builtin.  */
 static rtx
 altivec_expand_vec_ext_builtin (tree exp, rtx target)
@@ -10922,6 +10956,14 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case VSX_BUILTIN_VEC_SET_V1TI:
       return altivec_expand_vec_set_builtin (exp);
 
+    case VSX_BUILTIN_VEC_INSERT_V16QI:
+    case VSX_BUILTIN_VEC_INSERT_V8HI:
+    case VSX_BUILTIN_VEC_INSERT_V4SF:
+    case VSX_BUILTIN_VEC_INSERT_V4SI:
+    case VSX_BUILTIN_VEC_INSERT_V2DF:
+    case VSX_BUILTIN_VEC_INSERT_V2DI:
+      return altivec_expand_vec_insert_builtin (exp, target);
+
     case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
     case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
     case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
@@ -13681,6 +13723,38 @@ altivec_init_builtins (void)
 				    integer_type_node, NULL_TREE);
   def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
 
+  /* Access to the vec_insert patterns.  */
+  ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+				    intQI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v16qi", ftype,
+	       VSX_BUILTIN_VEC_INSERT_V16QI);
+
+  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+				    intHI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v8hi", ftype, VSX_BUILTIN_VEC_INSERT_V8HI);
+
+  ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+				    integer_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v4si", ftype, VSX_BUILTIN_VEC_INSERT_V4SI);
+
+  ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+				    float_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v4sf", ftype, VSX_BUILTIN_VEC_INSERT_V4SF);
+
+  ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+				    intDI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v2di", ftype, VSX_BUILTIN_VEC_INSERT_V2DI);
+
+  ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
+				    double_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_vec_insert_v2df", ftype, VSX_BUILTIN_VEC_INSERT_V2DF);
+
   /* Access to the vec_extract patterns.  */
   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
 				    integer_type_node, NULL_TREE);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 28e859f4381..78b5b31d79f 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -58,6 +58,7 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
 extern void rs6000_expand_float128_convert (rtx, rtx, bool);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
+extern void rs6000_expand_vector_insert (rtx, rtx, rtx, rtx);
 extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
 extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
 extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index fe93cf6ff2b..afa845f3dff 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6788,6 +6788,152 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
   emit_insn (gen_rtx_SET (target, x));
 }
 
+/* Insert value from VEC into idx of TARGET.  */
+
+void
+rs6000_expand_vector_insert (rtx target, rtx vec, rtx val, rtx idx)
+{
+  machine_mode mode = GET_MODE (vec);
+
+  if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (idx))
+      gcc_unreachable ();
+  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)
+	   && TARGET_DIRECT_MOVE_64BIT)
+    {
+      gcc_assert (GET_MODE (idx) == E_SImode);
+      machine_mode inner_mode = GET_MODE (val);
+      HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
+
+      rtx tmp = gen_reg_rtx (GET_MODE (idx));
+      if (GET_MODE_SIZE (inner_mode) == 8)
+	{
+	  if (!BYTES_BIG_ENDIAN)
+	    {
+	      /*  idx = 1 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (1), idx));
+	      /*  idx = idx * 8.  */
+	      emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (3)));
+	      /*  idx = 16 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
+	    }
+	  else
+	    {
+	      emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (3)));
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
+	    }
+	}
+      else if (GET_MODE_SIZE (inner_mode) == 4)
+	{
+	  if (!BYTES_BIG_ENDIAN)
+	    {
+	      /*  idx = 3 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (3), idx));
+	      /*  idx = idx * 4.  */
+	      emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (2)));
+	      /*  idx = 20 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
+	    }
+	  else
+	  {
+	      emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (2)));
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
+	  }
+	}
+      else if (GET_MODE_SIZE (inner_mode) == 2)
+	{
+	  if (!BYTES_BIG_ENDIAN)
+	    {
+	      /*  idx = 7 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (7), idx));
+	      /*  idx = idx * 2.  */
+	      emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
+	      /*  idx = 22 - idx.  */
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (22), tmp));
+	    }
+	  else
+	    {
+	      emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
+	      emit_insn (gen_subsi3 (tmp, GEN_INT (22), idx));
+	    }
+	}
+      else if (GET_MODE_SIZE (inner_mode) == 1)
+	if (!BYTES_BIG_ENDIAN)
+	  emit_insn (gen_addsi3 (tmp, idx, GEN_INT (8)));
+	else
+	  emit_insn (gen_subsi3 (tmp, GEN_INT (23), idx));
+      else
+	gcc_unreachable ();
+
+      /*  lxv vs32, mask.
+	  DImode: 0xffffffffffffffff0000000000000000
+	  SImode: 0x00000000ffffffff0000000000000000
+	  HImode: 0x000000000000ffff0000000000000000.
+	  QImode: 0x00000000000000ff0000000000000000.  */
+      rtx mask = gen_reg_rtx (V16QImode);
+      rtx mask_v2di = gen_reg_rtx (V2DImode);
+      rtvec v = rtvec_alloc (2);
+      if (!BYTES_BIG_ENDIAN)
+	{
+	  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
+	  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
+	}
+      else
+      {
+	  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
+	  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
+	}
+      emit_insn (
+	gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
+      rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
+      emit_insn (gen_rtx_SET (mask, sub_mask));
+
+      /*  mtvsrd[wz] f0,val.  */
+      rtx val_v16qi = gen_reg_rtx (V16QImode);
+      switch (inner_mode)
+	{
+	default:
+	  gcc_unreachable ();
+	  break;
+	case E_QImode:
+	  emit_insn (gen_p8_mtvsrwz_v16qiqi2 (val_v16qi, val));
+	  break;
+	case E_HImode:
+	  emit_insn (gen_p8_mtvsrwz_v16qihi2 (val_v16qi, val));
+	  break;
+	case E_SImode:
+	  emit_insn (gen_p8_mtvsrwz_v16qisi2 (val_v16qi, val));
+	  break;
+	case E_SFmode:
+	  emit_insn (gen_p8_mtvsrwz_v16qisf2 (val_v16qi, val));
+	  break;
+	case E_DImode:
+	  emit_insn (gen_p8_mtvsrd_v16qidi2 (val_v16qi, val));
+	  break;
+	case E_DFmode:
+	  emit_insn (gen_p8_mtvsrd_v16qidf2 (val_v16qi, val));
+	  break;
+	}
+
+      /*  lvsl    v1,0,idx.  */
+      rtx pcv = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsl_reg_si2 (pcv, tmp));
+
+      /*  xxperm  vs0,vs0,vs33.  */
+      /*  xxperm  vs32,vs32,vs33.  */
+      rtx val_perm = gen_reg_rtx (V16QImode);
+      rtx mask_perm = gen_reg_rtx (V16QImode);
+      emit_insn (
+	gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
+      emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
+
+      rtx sub_target = simplify_gen_subreg (V16QImode, vec, mode, 0);
+      emit_insn (gen_rtx_SET (target, sub_target));
+
+      /*  xxsel   vs34,vs34,vs0,vs32.  */
+      emit_insn (gen_vector_select_v16qi (target, target, val_perm, mask_perm));
+    }
+}
+
 /* Extract field ELT from VEC into TARGET.  */
 
 void
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 43b620ae1c0..b02fda836d4 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8713,6 +8713,25 @@
   "mtvsrwz %x0,%1"
   [(set_attr "type" "mftgpr")])
 
+(define_mode_iterator FQHS [SF QI HI SI])
+(define_mode_iterator FD [DF DI])
+
+(define_insn "p8_mtvsrwz_v16qi<mode>2"
+  [(set (match_operand:V16QI 0 "register_operand" "=wa")
+	(unspec:V16QI [(match_operand:FQHS 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_v16qi<mode>2"
+  [(set (match_operand:V16QI 0 "register_operand" "=wa")
+	(unspec:V16QI [(match_operand:FD 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
 (define_insn_and_split "reload_fpr_from_gpr<mode>"
   [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
 	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd750210758..7e82690d12d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5349,7 +5349,7 @@
   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
   rtx tmp = gen_reg_rtx (DImode);
 
-  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
+  emit_insn (gen_altivec_lvsl_reg_di2 (shift_mask, operands[2]));
   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.c b/gcc/testsuite/gcc.target/powerpc/pr79251.c
new file mode 100644
index 00000000000..877659a0146
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr79251.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -maltivec" } */
+
+#include <stddef.h>
+#include <altivec.h>
+
+#define TYPE int
+  
+__attribute__ ((noinline))
+vector TYPE test (vector TYPE v, TYPE i, size_t n)
+{
+  vector TYPE v1 = v;
+  v1 = vec_insert (i, v, n);
+
+  return v1;
+}
+
+/* { dg-final { scan-assembler-not {\mstxw\M} } } */
+/* { dg-final { scan-assembler-times {\mlvsl\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxperm\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */
-- 
2.27.0.90.geebb51ba8c



More information about the Gcc-patches mailing list