[PATCH 3/4] rs6000: Enable vec_insert for P8 with rs6000_expand_vector_set_var_p8

Xionghu Luo luoxhu@linux.ibm.com
Thu Dec 10 03:32:09 GMT 2020


Ping^2. Thanks.

On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote:
> Ping. Thanks.
> 
> 
> On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote:
>> Hi Segher,
>> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your
>> opinion of this [PATCH 3/4] for P8, please?  xxinsertw only exists since
>> v3.0, so we had to implement by another way.
>>
>>
>> Xionghu
>>
>>
>> On 2020/10/10 16:08, Xionghu Luo wrote:
>>> gcc/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>>>     Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later
>>>     platforms.
>>>     * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update
>>>     to call different path for P8 and P9.
>>>     (rs6000_expand_vector_set_var_p9): New function.
>>>     (rs6000_expand_vector_set_var_p8): New function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2020-10-10  Xionghu Luo  <luoxhu@linux.ibm.com>
>>>
>>>     * gcc.target/powerpc/pr79251.p8.c: New test.
>>> ---
>>>    gcc/config/rs6000/rs6000-c.c                  |  27 +++-
>>>    gcc/config/rs6000/rs6000.c                    | 117 
>>> +++++++++++++++++-
>>>    gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +++
>>>    3 files changed, 155 insertions(+), 6 deletions(-)
>>>    create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
>>> index 5551a21d738..4bea8001ec6 100644
>>> --- a/gcc/config/rs6000/rs6000-c.c
>>> +++ b/gcc/config/rs6000/rs6000-c.c
>>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin 
>>> (location_t loc, tree fndecl,
>>>          SET_EXPR_LOCATION (stmt, loc);
>>>          stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>>>        }
>>> -      stmt = build_array_ref (loc, stmt, arg2);
>>> -      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> -              convert (TREE_TYPE (stmt), arg0));
>>> -      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +
>>> +      if (TARGET_P8_VECTOR)
>>> +    {
>>> +      stmt = build_array_ref (loc, stmt, arg2);
>>> +      stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
>>> +                  convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>> +      else
>>> +    {
>>> +      tree arg1_inner_type;
>>> +      tree innerptrtype;
>>> +      arg1_inner_type = TREE_TYPE (arg1_type);
>>> +      innerptrtype = build_pointer_type (arg1_inner_type);
>>> +
>>> +      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
>>> +      stmt = convert (innerptrtype, stmt);
>>> +      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
>>> +      stmt = build_indirect_ref (loc, stmt, RO_NULL);
>>> +      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
>>> +             convert (TREE_TYPE (stmt), arg0));
>>> +      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>>> +    }
>>>          return stmt;
>>>        }
>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>> index 96f76c7a74c..33ca839cb28 100644
>>> --- a/gcc/config/rs6000/rs6000.c
>>> +++ b/gcc/config/rs6000/rs6000.c
>>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx 
>>> val, rtx elt_rtx)
>>>    }
>>>    /* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> -   is variable and also counts by vector element size.  */
>>> +   is variable and also counts by vector element size for p9 and 
>>> above.  */
>>>    void
>>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
>>>    {
>>>      machine_mode mode = GET_MODE (target);
>>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx 
>>> val, rtx idx)
>>>      emit_insn (perml);
>>>    }
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size for p8.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +
>>> +  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
>>> +
>>> +  gcc_assert (GET_MODE (idx) == E_SImode);
>>> +
>>> +  machine_mode inner_mode = GET_MODE (val);
>>> +  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
>>> +
>>> +  rtx tmp = gen_reg_rtx (GET_MODE (idx));
>>> +  int width = GET_MODE_SIZE (inner_mode);
>>> +
>>> +  gcc_assert (width >= 1 && width <= 4);
>>> +
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      /*  idx = idx * width.  */
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      /*  idx = idx + 8.  */
>>> +      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
>>> +    }
>>> +  else
>>> +    {
>>> +      emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
>>> +      emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
>>> +    }
>>> +
>>> +  /*  lxv vs33, mask.
>>> +      DImode: 0xffffffffffffffff0000000000000000
>>> +      SImode: 0x00000000ffffffff0000000000000000
>>> +      HImode: 0x000000000000ffff0000000000000000.
>>> +      QImode: 0x00000000000000ff0000000000000000.  */
>>> +  rtx mask = gen_reg_rtx (V16QImode);
>>> +  rtx mask_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec v = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +    }
>>> +  else
>>> +    {
>>> +      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
>>> +      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +    }
>>> +  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL 
>>> (V2DImode, v)));
>>> +  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, 
>>> V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (mask, sub_mask));
>>> +
>>> +  /*  mtvsrd[wz] f0,tmp_val.  */
>>> +  rtx tmp_val = gen_reg_rtx (SImode);
>>> +  if (inner_mode == E_SFmode)
>>> +    emit_insn (gen_movsi_from_sf (tmp_val, val));
>>> +  else
>>> +    tmp_val = force_reg (SImode, val);
>>> +
>>> +  rtx val_v16qi = gen_reg_rtx (V16QImode);
>>> +  rtx val_v2di = gen_reg_rtx (V2DImode);
>>> +  rtvec vec_val = rtvec_alloc (2);
>>> +  if (!BYTES_BIG_ENDIAN)
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
>>> +    RTVEC_ELT (vec_val, 1) = tmp_val;
>>> +  }
>>> +  else
>>> +  {
>>> +    RTVEC_ELT (vec_val, 0) = tmp_val;
>>> +    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
>>> +  }
>>> +  emit_insn (
>>> +    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, 
>>> vec_val)));
>>> +  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
>>> +  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
>>> +
>>> +  /*  lvsl    13,0,idx.  */
>>> +  tmp = convert_modes (DImode, SImode, tmp, 1);
>>> +  rtx pcv = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
>>> +
>>> +  /*  vperm 1,1,1,13.  */
>>> +  /*  vperm 0,0,0,13.  */
>>> +  rtx val_perm = gen_reg_rtx (V16QImode);
>>> +  rtx mask_perm = gen_reg_rtx (V16QImode);
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, 
>>> val_v16qi, pcv));
>>> +  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
>>> +
>>> +  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
>>> +
>>> +  /*  xxsel 34,34,32,33.  */
>>> +  emit_insn (
>>> +    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, 
>>> mask_perm));
>>> +}
>>> +
>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector 
>>> element, IDX
>>> +   is variable and also counts by vector element size.  */
>>> +
>>> +void
>>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx)
>>> +{
>>> +  machine_mode mode = GET_MODE (target);
>>> +  machine_mode inner_mode = GET_MODE_INNER (mode);
>>> +  if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8)
>>> +    rs6000_expand_vector_set_var_p9 (target, val, idx);
>>> +  else
>>> +    rs6000_expand_vector_set_var_p8 (target, val, idx);
>>> +}
>>> +
>>>    /* Extract field ELT from VEC into TARGET.  */
>>>    void
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> new file mode 100644
>>> index 00000000000..06da47b7758
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c
>>> @@ -0,0 +1,17 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */
>>> +
>>> +#include <stddef.h>
>>> +#include <altivec.h>
>>> +#include "pr79251.h"
>>> +
>>> +TEST_VEC_INSERT_ALL (test)
>>> +
>>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */
>>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */
>>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */
>>> +
>>>
>>
> 

-- 
Thanks,
Xionghu


More information about the Gcc-patches mailing list