[PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

Richard Biener richard.guenther@gmail.com
Mon Aug 31 12:43:32 GMT 2020


On Mon, Aug 31, 2020 at 11:09 AM Xiong Hu Luo via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
> to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
> __builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
> not expand too early in gimple stage if arg2 is variable, to avoid generate
> store hit load instructions.
>
> For Power9 V4SI:
>         addi 9,1,-16
>         rldic 6,6,2,60
>         stxv 34,-16(1)
>         stwx 5,9,6
>         lxv 34,-16(1)
> =>
>         addis 9,2,.LC0@toc@ha
>         addi 9,9,.LC0@toc@l
>         mtvsrwz 33,5
>         lxv 32,0(9)
>         sradi 9,6,2
>         addze 9,9
>         sldi 9,9,2
>         subf 9,9,6
>         subfic 9,9,3
>         sldi 9,9,2
>         subfic 9,9,20
>         lvsl 13,0,9
>         xxperm 33,33,45
>         xxperm 32,32,45
>         xxsel 34,34,33,32
>
> Though instructions increase from 5 to 15, the performance is improved
> 60% in typical cases.

Not sure if it is already possible but maybe use internal functions for
those purely internal builtins instead?  That makes it possible
to overload with a single IFN.

Richard.

> gcc/ChangeLog:
>
>         * config/rs6000/altivec.md (altivec_lvsl_reg_<mode>2): Extend to
>         SDI mode.
>         * config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
>         macros for vec_insert built-in functions.
>         * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>         Generate built-in calls for vec_insert.
>         * config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
>         New function.
>         (altivec_expand_builtin): Add case entry for
>         VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
>         VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
>         VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.
>         (altivec_init_builtins):
>         * config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
>         New declear.
>         * config/rs6000/rs6000.c (rs6000_expand_vector_insert):
>         New function.
>         * config/rs6000/rs6000.md (FQHS): New mode iterator.
>         (FD): New mode iterator.
>         p8_mtvsrwz_v16qi<mode>2: New define_insn.
>         p8_mtvsrd_v16qi<mode>2: New define_insn.
>         * config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/powerpc/pr79251.c: New test.
> ---
>  gcc/config/rs6000/altivec.md               |   4 +-
>  gcc/config/rs6000/rs6000-builtin.def       |   6 +
>  gcc/config/rs6000/rs6000-c.c               |  61 +++++++++
>  gcc/config/rs6000/rs6000-call.c            |  74 +++++++++++
>  gcc/config/rs6000/rs6000-protos.h          |   1 +
>  gcc/config/rs6000/rs6000.c                 | 146 +++++++++++++++++++++
>  gcc/config/rs6000/rs6000.md                |  19 +++
>  gcc/config/rs6000/vsx.md                   |   2 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 ++++
>  9 files changed, 333 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 0a2e634d6b0..66b636059a6 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2772,10 +2772,10 @@
>    DONE;
>  })
>
> -(define_insn "altivec_lvsl_reg"
> +(define_insn "altivec_lvsl_reg_<mode>2"
>    [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
>         (unspec:V16QI
> -       [(match_operand:DI 1 "gpc_reg_operand" "b")]
> +       [(match_operand:SDI 1 "gpc_reg_operand" "b")]
>         UNSPEC_LVSL_REG))]
>    "TARGET_ALTIVEC"
>    "lvsl %0,0,%1"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
> index f9f0fece549..d095b365c14 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,      "vec_init_v2di",   CONST)
>  BU_VSX_X (VEC_SET_V1TI,              "vec_set_v1ti",   CONST)
>  BU_VSX_X (VEC_SET_V2DF,              "vec_set_v2df",   CONST)
>  BU_VSX_X (VEC_SET_V2DI,              "vec_set_v2di",   CONST)
> +BU_VSX_X (VEC_INSERT_V16QI,          "vec_insert_v16qi",       CONST)
> +BU_VSX_X (VEC_INSERT_V8HI,           "vec_insert_v8hi",        CONST)
> +BU_VSX_X (VEC_INSERT_V4SI,           "vec_insert_v4si",        CONST)
> +BU_VSX_X (VEC_INSERT_V4SF,           "vec_insert_v4sf",        CONST)
> +BU_VSX_X (VEC_INSERT_V2DI,           "vec_insert_v2di",        CONST)
> +BU_VSX_X (VEC_INSERT_V2DF,           "vec_insert_v2df",        CONST)
>  BU_VSX_X (VEC_EXT_V1TI,              "vec_ext_v1ti",   CONST)
>  BU_VSX_X (VEC_EXT_V2DF,              "vec_ext_v2df",   CONST)
>  BU_VSX_X (VEC_EXT_V2DI,              "vec_ext_v2di",   CONST)
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 2fad3d94706..03b00738a5e 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -1563,6 +1563,67 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>           return build_call_expr (call, 3, arg1, arg0, arg2);
>         }
>
> +      else if (VECTOR_MEM_VSX_P (mode))
> +       {
> +         tree call = NULL_TREE;
> +
> +         arg2 = fold_for_warn (arg2);
> +
> +         /* If the second argument is variable, we can optimize it if we are
> +            generating 64-bit code on a machine with direct move.  */
> +         if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
> +           {
> +             switch (mode)
> +               {
> +               default:
> +                 break;
> +
> +               case E_V2DImode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DI];
> +                 break;
> +
> +               case E_V2DFmode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V2DF];
> +                 break;
> +
> +               case E_V4SFmode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SF];
> +                 break;
> +
> +               case E_V4SImode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V4SI];
> +                 break;
> +
> +               case E_V8HImode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V8HI];
> +                 break;
> +
> +               case E_V16QImode:
> +                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_INSERT_V16QI];
> +                 break;
> +               }
> +           }
> +
> +         if (call)
> +           {
> +             if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
> +               arg2 = build_int_cst (TREE_TYPE (arg2), 0);
> +             else
> +               arg2 = build_binary_op (
> +                 loc, BIT_AND_EXPR, arg2,
> +                 build_int_cst (TREE_TYPE (arg2),
> +                                TYPE_VECTOR_SUBPARTS (arg1_type) - 1),
> +                 0);
> +             tree result
> +               = build_call_expr (call, 3, arg1,
> +                                  convert (TREE_TYPE (arg1_type), arg0),
> +                                  convert (integer_type_node, arg2));
> +             /* Coerce the result to vector element type.  May be no-op.  */
> +             result = fold_convert (TREE_TYPE (arg1), result);
> +             return result;
> +           }
> +       }
> +
>        /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
>        arg1_inner_type = TREE_TYPE (arg1_type);
>        if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index e39cfcf672b..339e9ae87e3 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -10660,6 +10660,40 @@ altivec_expand_vec_set_builtin (tree exp)
>    return op0;
>  }
>
> +/* Expand vec_insert builtin.  */
> +static rtx
> +altivec_expand_vec_insert_builtin (tree exp, rtx target)
> +{
> +  machine_mode tmode, mode1, mode2;
> +  tree arg0, arg1, arg2;
> +  rtx op0 = NULL_RTX, op1, op2;
> +
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  arg1 = CALL_EXPR_ARG (exp, 1);
> +  arg2 = CALL_EXPR_ARG (exp, 2);
> +
> +  tmode = TYPE_MODE (TREE_TYPE (arg0));
> +  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
> +  mode2 = TYPE_MODE ((TREE_TYPE (arg2)));
> +  gcc_assert (VECTOR_MODE_P (tmode));
> +
> +  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
> +  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
> +  op2 = expand_expr (arg2, NULL_RTX, mode2, EXPAND_NORMAL);
> +
> +  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
> +    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
> +
> +  op0 = force_reg (tmode, op0);
> +  op1 = force_reg (mode1, op1);
> +  op2 = force_reg (mode2, op2);
> +
> +  target = gen_reg_rtx (V16QImode);
> +  rs6000_expand_vector_insert (target, op0, op1, op2);
> +
> +  return target;
> +}
> +
>  /* Expand vec_ext builtin.  */
>  static rtx
>  altivec_expand_vec_ext_builtin (tree exp, rtx target)
> @@ -10922,6 +10956,14 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
>      case VSX_BUILTIN_VEC_SET_V1TI:
>        return altivec_expand_vec_set_builtin (exp);
>
> +    case VSX_BUILTIN_VEC_INSERT_V16QI:
> +    case VSX_BUILTIN_VEC_INSERT_V8HI:
> +    case VSX_BUILTIN_VEC_INSERT_V4SF:
> +    case VSX_BUILTIN_VEC_INSERT_V4SI:
> +    case VSX_BUILTIN_VEC_INSERT_V2DF:
> +    case VSX_BUILTIN_VEC_INSERT_V2DI:
> +      return altivec_expand_vec_insert_builtin (exp, target);
> +
>      case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
>      case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
>      case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
> @@ -13681,6 +13723,38 @@ altivec_init_builtins (void)
>                                     integer_type_node, NULL_TREE);
>    def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
>
> +  /* Access to the vec_insert patterns.  */
> +  ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
> +                                   intQI_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v16qi", ftype,
> +              VSX_BUILTIN_VEC_INSERT_V16QI);
> +
> +  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
> +                                   intHI_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v8hi", ftype, VSX_BUILTIN_VEC_INSERT_V8HI);
> +
> +  ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
> +                                   integer_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v4si", ftype, VSX_BUILTIN_VEC_INSERT_V4SI);
> +
> +  ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
> +                                   float_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v4sf", ftype, VSX_BUILTIN_VEC_INSERT_V4SF);
> +
> +  ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
> +                                   intDI_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v2di", ftype, VSX_BUILTIN_VEC_INSERT_V2DI);
> +
> +  ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
> +                                   double_type_node,
> +                                   integer_type_node, NULL_TREE);
> +  def_builtin ("__builtin_vec_insert_v2df", ftype, VSX_BUILTIN_VEC_INSERT_V2DF);
> +
>    /* Access to the vec_extract patterns.  */
>    ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
>                                     integer_type_node, NULL_TREE);
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 28e859f4381..78b5b31d79f 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -58,6 +58,7 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
>  extern void rs6000_expand_float128_convert (rtx, rtx, bool);
>  extern void rs6000_expand_vector_init (rtx, rtx);
>  extern void rs6000_expand_vector_set (rtx, rtx, int);
> +extern void rs6000_expand_vector_insert (rtx, rtx, rtx, rtx);
>  extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
>  extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
>  extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index fe93cf6ff2b..afa845f3dff 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6788,6 +6788,152 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
>    emit_insn (gen_rtx_SET (target, x));
>  }
>
> +/* Insert value from VEC into idx of TARGET.  */
> +
> +void
> +rs6000_expand_vector_insert (rtx target, rtx vec, rtx val, rtx idx)
> +{
> +  machine_mode mode = GET_MODE (vec);
> +
> +  if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (idx))
> +      gcc_unreachable ();
> +  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)
> +          && TARGET_DIRECT_MOVE_64BIT)
> +    {
> +      gcc_assert (GET_MODE (idx) == E_SImode);
> +      machine_mode inner_mode = GET_MODE (val);
> +      HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
> +
> +      rtx tmp = gen_reg_rtx (GET_MODE (idx));
> +      if (GET_MODE_SIZE (inner_mode) == 8)
> +       {
> +         if (!BYTES_BIG_ENDIAN)
> +           {
> +             /*  idx = 1 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (1), idx));
> +             /*  idx = idx * 8.  */
> +             emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (3)));
> +             /*  idx = 16 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
> +           }
> +         else
> +           {
> +             emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (3)));
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (16), tmp));
> +           }
> +       }
> +      else if (GET_MODE_SIZE (inner_mode) == 4)
> +       {
> +         if (!BYTES_BIG_ENDIAN)
> +           {
> +             /*  idx = 3 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (3), idx));
> +             /*  idx = idx * 4.  */
> +             emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (2)));
> +             /*  idx = 20 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
> +           }
> +         else
> +         {
> +             emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (2)));
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (20), tmp));
> +         }
> +       }
> +      else if (GET_MODE_SIZE (inner_mode) == 2)
> +       {
> +         if (!BYTES_BIG_ENDIAN)
> +           {
> +             /*  idx = 7 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (7), idx));
> +             /*  idx = idx * 2.  */
> +             emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
> +             /*  idx = 22 - idx.  */
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (22), tmp));
> +           }
> +         else
> +           {
> +             emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (1)));
> +             emit_insn (gen_subsi3 (tmp, GEN_INT (22), idx));
> +           }
> +       }
> +      else if (GET_MODE_SIZE (inner_mode) == 1)
> +       if (!BYTES_BIG_ENDIAN)
> +         emit_insn (gen_addsi3 (tmp, idx, GEN_INT (8)));
> +       else
> +         emit_insn (gen_subsi3 (tmp, GEN_INT (23), idx));
> +      else
> +       gcc_unreachable ();
> +
> +      /*  lxv vs32, mask.
> +         DImode: 0xffffffffffffffff0000000000000000
> +         SImode: 0x00000000ffffffff0000000000000000
> +         HImode: 0x000000000000ffff0000000000000000.
> +         QImode: 0x00000000000000ff0000000000000000.  */
> +      rtx mask = gen_reg_rtx (V16QImode);
> +      rtx mask_v2di = gen_reg_rtx (V2DImode);
> +      rtvec v = rtvec_alloc (2);
> +      if (!BYTES_BIG_ENDIAN)
> +       {
> +         RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
> +         RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
> +       }
> +      else
> +      {
> +         RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
> +         RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
> +       }
> +      emit_insn (
> +       gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
> +      rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
> +      emit_insn (gen_rtx_SET (mask, sub_mask));
> +
> +      /*  mtvsrd[wz] f0,val.  */
> +      rtx val_v16qi = gen_reg_rtx (V16QImode);
> +      switch (inner_mode)
> +       {
> +       default:
> +         gcc_unreachable ();
> +         break;
> +       case E_QImode:
> +         emit_insn (gen_p8_mtvsrwz_v16qiqi2 (val_v16qi, val));
> +         break;
> +       case E_HImode:
> +         emit_insn (gen_p8_mtvsrwz_v16qihi2 (val_v16qi, val));
> +         break;
> +       case E_SImode:
> +         emit_insn (gen_p8_mtvsrwz_v16qisi2 (val_v16qi, val));
> +         break;
> +       case E_SFmode:
> +         emit_insn (gen_p8_mtvsrwz_v16qisf2 (val_v16qi, val));
> +         break;
> +       case E_DImode:
> +         emit_insn (gen_p8_mtvsrd_v16qidi2 (val_v16qi, val));
> +         break;
> +       case E_DFmode:
> +         emit_insn (gen_p8_mtvsrd_v16qidf2 (val_v16qi, val));
> +         break;
> +       }
> +
> +      /*  lvsl    v1,0,idx.  */
> +      rtx pcv = gen_reg_rtx (V16QImode);
> +      emit_insn (gen_altivec_lvsl_reg_si2 (pcv, tmp));
> +
> +      /*  xxperm  vs0,vs0,vs33.  */
> +      /*  xxperm  vs32,vs32,vs33.  */
> +      rtx val_perm = gen_reg_rtx (V16QImode);
> +      rtx mask_perm = gen_reg_rtx (V16QImode);
> +      emit_insn (
> +       gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
> +      emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
> +
> +      rtx sub_target = simplify_gen_subreg (V16QImode, vec, mode, 0);
> +      emit_insn (gen_rtx_SET (target, sub_target));
> +
> +      /*  xxsel   vs34,vs34,vs0,vs32.  */
> +      emit_insn (gen_vector_select_v16qi (target, target, val_perm, mask_perm));
> +    }
> +}
> +
>  /* Extract field ELT from VEC into TARGET.  */
>
>  void
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 43b620ae1c0..b02fda836d4 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -8713,6 +8713,25 @@
>    "mtvsrwz %x0,%1"
>    [(set_attr "type" "mftgpr")])
>
> +(define_mode_iterator FQHS [SF QI HI SI])
> +(define_mode_iterator FD [DF DI])
> +
> +(define_insn "p8_mtvsrwz_v16qi<mode>2"
> +  [(set (match_operand:V16QI 0 "register_operand" "=wa")
> +       (unspec:V16QI [(match_operand:FQHS 1 "register_operand" "r")]
> +                  UNSPEC_P8V_MTVSRWZ))]
> +  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
> +  "mtvsrwz %x0,%1"
> +  [(set_attr "type" "mftgpr")])
> +
> +(define_insn "p8_mtvsrd_v16qi<mode>2"
> +  [(set (match_operand:V16QI 0 "register_operand" "=wa")
> +       (unspec:V16QI [(match_operand:FD 1 "register_operand" "r")]
> +                  UNSPEC_P8V_MTVSRD))]
> +  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
> +  "mtvsrd %x0,%1"
> +  [(set_attr "type" "mftgpr")])
> +
>  (define_insn_and_split "reload_fpr_from_gpr<mode>"
>    [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
>         (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index dd750210758..7e82690d12d 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -5349,7 +5349,7 @@
>    rtx rtx_vtmp = gen_reg_rtx (V16QImode);
>    rtx tmp = gen_reg_rtx (DImode);
>
> -  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
> +  emit_insn (gen_altivec_lvsl_reg_di2 (shift_mask, operands[2]));
>    emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
>    emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
>    emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.c b/gcc/testsuite/gcc.target/powerpc/pr79251.c
> new file mode 100644
> index 00000000000..877659a0146
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9 -maltivec" } */
> +
> +#include <stddef.h>
> +#include <altivec.h>
> +
> +#define TYPE int
> +
> +__attribute__ ((noinline))
> +vector TYPE test (vector TYPE v, TYPE i, size_t n)
> +{
> +  vector TYPE v1 = v;
> +  v1 = vec_insert (i, v, n);
> +
> +  return v1;
> +}
> +
> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
> +/* { dg-final { scan-assembler-times {\mlvsl\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxxperm\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */
> --
> 2.27.0.90.geebb51ba8c
>


More information about the Gcc-patches mailing list