[PATCH] rs6000: Add vec_unpacku_{hi,lo}_v4si

Bill Schmidt wschmidt@linux.ibm.com
Fri Aug 6 13:10:05 GMT 2021


Hi Kewen,

On 8/4/21 9:06 PM, Kewen.Lin wrote:
> Hi,
>
> The existing vec_unpacku_{hi,lo} supports emulated unsigned
> unpacking for short and char but misses the support for int.
> This patch adds the support for vec_unpacku_{hi,lo}_v4si.
>
> Meanwhile, the current implementation uses vector permutation
> way, which requires one extra customized constant vector as
> the permutation control vector.  It's better to use vector
> merge high/low with zero constant vector, to save the space
> in constant area as well as the cost to initialize pcv in
> prologue.  This patch updates it with vector merging and
> simplify it with iterators.
>
> Bootstrapped & regtested on powerpc64le-linux-gnu P9 and
> powerpc64-linux-gnu P8.
>
> btw, the loop in unpack-vectorize-2.c doesn't get vectorized
> without this patch, unpack-vectorize-[13]* is to verify
> the vector merging and simplification works expectedly.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -----
> gcc/ChangeLog:
>
> 	* config/rs6000/altivec.md (vec_unpacku_hi_v16qi): Remove.
> 	(vec_unpacku_hi_v8hi): Likewise.
> 	(vec_unpacku_lo_v16qi): Likewise.
> 	(vec_unpacku_lo_v8hi): Likewise.
> 	(vec_unpacku_hi_<VP_small_lc>): New define_expand.
> 	(vec_unpacku_lo_<VP_small_lc>): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/unpack-vectorize-1.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize-1.h: New test.
> 	* gcc.target/powerpc/unpack-vectorize-2.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize-2.h: New test.
> 	* gcc.target/powerpc/unpack-vectorize-3.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize-3.h: New test.
> 	* gcc.target/powerpc/unpack-vectorize-run-1.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize-run-2.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize-run-3.c: New test.
> 	* gcc.target/powerpc/unpack-vectorize.h: New test.
> ---
>  gcc/config/rs6000/altivec.md                  | 158 ++++--------------
>  .../gcc.target/powerpc/unpack-vectorize-1.c   |  18 ++
>  .../gcc.target/powerpc/unpack-vectorize-1.h   |  14 ++
>  .../gcc.target/powerpc/unpack-vectorize-2.c   |  12 ++
>  .../gcc.target/powerpc/unpack-vectorize-2.h   |   7 +
>  .../gcc.target/powerpc/unpack-vectorize-3.c   |  11 ++
>  .../gcc.target/powerpc/unpack-vectorize-3.h   |   7 +
>  .../powerpc/unpack-vectorize-run-1.c          |  24 +++
>  .../powerpc/unpack-vectorize-run-2.c          |  16 ++
>  .../powerpc/unpack-vectorize-run-3.c          |  16 ++
>  .../gcc.target/powerpc/unpack-vectorize.h     |  42 +++++
>  11 files changed, 196 insertions(+), 129 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index d70c17e6bc2..0e8b66cd6a5 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -134,10 +134,8 @@ (define_c_enum "unspec"
>     UNSPEC_VMULWLUH
>     UNSPEC_VMULWHSH
>     UNSPEC_VMULWLSH
> -   UNSPEC_VUPKHUB
> -   UNSPEC_VUPKHUH
> -   UNSPEC_VUPKLUB
> -   UNSPEC_VUPKLUH
> +   UNSPEC_VUPKHUBHW
> +   UNSPEC_VUPKLUBHW


Up to you, but... maybe just UNSPEC_VUPKHU and UNSPEC_VUPKLU, in case we 
extend this later to other types.  Fine either way.

>     UNSPEC_VPERMSI
>     UNSPEC_VPERMHI
>     UNSPEC_INTERHI
> @@ -3885,143 +3883,45 @@ (define_insn "xxeval"
>     [(set_attr "type" "vecsimple")
>      (set_attr "prefixed" "yes")])
>
> -(define_expand "vec_unpacku_hi_v16qi"
> -  [(set (match_operand:V8HI 0 "register_operand" "=v")
> -        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
> -                     UNSPEC_VUPKHUB))]
> -  "TARGET_ALTIVEC"
> -{
> -  rtx vzero = gen_reg_rtx (V8HImode);
> -  rtx mask = gen_reg_rtx (V16QImode);
> -  rtvec v = rtvec_alloc (16);
> -  bool be = BYTES_BIG_ENDIAN;
> -
> -  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
> -
> -  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
> -  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  0 : 16);
> -  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 :  6);
> -  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
> -  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
> -  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ?  2 : 16);
> -  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 :  4);
> -  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
> -  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
> -  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ?  4 : 16);
> -  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 :  2);
> -  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
> -  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
> -  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ?  6 : 16);
> -  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
> -  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
> -
> -  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, 
> v)));
> -  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, 
> mask));
> -  DONE;
> -})
> -
> -(define_expand "vec_unpacku_hi_v8hi"
> -  [(set (match_operand:V4SI 0 "register_operand" "=v")
> -        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
> -                     UNSPEC_VUPKHUH))]
> +(define_expand "vec_unpacku_hi_<VP_small_lc>"
> +  [(set (match_operand:VP 0 "register_operand" "=v")
> +        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
> +         UNSPEC_VUPKHUBHW))]
>    "TARGET_ALTIVEC"
>  {
> -  rtx vzero = gen_reg_rtx (V4SImode);
> -  rtx mask = gen_reg_rtx (V16QImode);
> -  rtvec v = rtvec_alloc (16);
> -  bool be = BYTES_BIG_ENDIAN;
> +  rtx vzero = gen_reg_rtx (<VP_small>mode);
> +  emit_insn (gen_altivec_vspltis<VU_char> (vzero, const0_rtx));
>
> -  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
> -
> -  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
> -  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 :  6);
> -  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  0 : 17);
> -  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
> -  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
> -  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 :  4);
> -  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ?  2 : 17);
> -  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
> -  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
> -  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 :  2);
> -  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ?  4 : 17);
> -  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
> -  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
> -  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  0);
> -  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
> -  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
> -
> -  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, 
> v)));
> -  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
> -  DONE;
> -})
> +  rtx res = gen_reg_rtx (<VP_small>mode);
> +  rtx op1 = operands[1];
>
> -(define_expand "vec_unpacku_lo_v16qi"
> -  [(set (match_operand:V8HI 0 "register_operand" "=v")
> -        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
> -                     UNSPEC_VUPKLUB))]
> -  "TARGET_ALTIVEC"
> -{
> -  rtx vzero = gen_reg_rtx (V8HImode);
> -  rtx mask = gen_reg_rtx (V16QImode);
> -  rtvec v = rtvec_alloc (16);
> -  bool be = BYTES_BIG_ENDIAN;
> -
> -  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
> -
> -  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
> -  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  8 : 16);
> -  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
> -  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
> -  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
> -  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
> -  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
> -  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
> -  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
> -  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
> -  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
> -  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
> -  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
> -  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
> -  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
> -  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_altivec_vmrgh<VU_char> (res, vzero, op1));
> +  else
> +    emit_insn (gen_altivec_vmrgl<VU_char> (res, op1, vzero));
>
> -  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, 
> v)));
> -  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, 
> mask));
> +  emit_insn (gen_move_insn (operands[0], gen_lowpart (<MODE>mode, res)));
>    DONE;
>  })
>
> -(define_expand "vec_unpacku_lo_v8hi"
> -  [(set (match_operand:V4SI 0 "register_operand" "=v")
> -        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
> -                     UNSPEC_VUPKLUH))]
> +(define_expand "vec_unpacku_lo_<VP_small_lc>"
> +  [(set (match_operand:VP 0 "register_operand" "=v")
> +        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
> +         UNSPEC_VUPKLUBHW))]
>    "TARGET_ALTIVEC"
>  {
> -  rtx vzero = gen_reg_rtx (V4SImode);
> -  rtx mask = gen_reg_rtx (V16QImode);
> -  rtvec v = rtvec_alloc (16);
> -  bool be = BYTES_BIG_ENDIAN;
> +  rtx vzero = gen_reg_rtx (<VP_small>mode);
> +  emit_insn (gen_altivec_vspltis<VU_char> (vzero, const0_rtx));
>
> -  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
> -
> -  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
> -  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
> -  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  8 : 17);
> -  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
> -  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
> -  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
> -  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
> -  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
> -  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
> -  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
> -  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
> -  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
> -  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
> -  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  8);
> -  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
> -  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
> +  rtx res = gen_reg_rtx (<VP_small>mode);
> +  rtx op1 = operands[1];
>
> -  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, 
> v)));
> -  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_altivec_vmrgl<VU_char> (res, vzero, op1));
> +  else
> +    emit_insn (gen_altivec_vmrgh<VU_char> (res, op1, vzero));
> +
> +  emit_insn (gen_move_insn (operands[0], gen_lowpart (<MODE>mode, res)));
>    DONE;
>  })
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
> new file mode 100644
> index 00000000000..2621d753baa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */


I guess powerpc_altivec_ok is fine.  I was initially concerned since 
unpack-vectorize.h mentions vector long long, but the types aren't 
actually used here.  OK.

> +/* { dg-options "-maltivec -O2 -ftree-vectorize -fno-vect-cost-model 
> -fdump-tree-vect-details" } */
> +
> +/* Test if unpack vectorization succeeds for type signed/unsigned
> +   short and char.  */
> +
> +#include "unpack-vectorize-1.h"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
> +/* { dg-final { scan-assembler {\mvupkhsb\M} } } */
> +/* { dg-final { scan-assembler {\mvupklsb\M} } } */
> +/* { dg-final { scan-assembler {\mvupkhsh\M} } } */
> +/* { dg-final { scan-assembler {\mvupklsh\M} } } */
> +/* { dg-final { scan-assembler {\mvmrghb\M} } } */
> +/* { dg-final { scan-assembler {\mvmrglb\M} } } */
> +/* { dg-final { scan-assembler {\mvmrghh\M} } } */
> +/* { dg-final { scan-assembler {\mvmrglh\M} } } */


Suggest that you consider scan-assembler-times 1 to make the tests more 
robust, here and for other tests.

Otherwise the patch looks fine to me.  Recommend maintainers approve 
with or without changes.

Thanks for the improvements!
Bill

> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
> new file mode 100644
> index 00000000000..1cb89aba392
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
> @@ -0,0 +1,14 @@
> +#include "unpack-vectorize.h"
> +
> +DEF_ARR (si)
> +DEF_ARR (ui)
> +DEF_ARR (sh)
> +DEF_ARR (uh)
> +DEF_ARR (sc)
> +DEF_ARR (uc)
> +
> +TEST1 (sh, si)
> +TEST1 (uh, ui)
> +TEST1 (sc, sh)
> +TEST1 (uc, uh)
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
> new file mode 100644
> index 00000000000..3e7e97da43c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-mdejagnu-cpu=power7 -O2 -ftree-vectorize 
> -fno-vect-cost-model -fdump-tree-vect-details" } */
> +
> +/* Test if unsigned int unpack vectorization succeeds.  V2DImode is
> +   supported since Power7 so guard it under Power7 and up.  */
> +
> +#include "unpack-vectorize-2.h"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { scan-assembler {\mxxmrghw\M} } } */
> +/* { dg-final { scan-assembler {\mxxmrglw\M} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
> new file mode 100644
> index 00000000000..e199229e6f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
> @@ -0,0 +1,7 @@
> +#include "unpack-vectorize.h"
> +
> +DEF_ARR (ui)
> +DEF_ARR (ull)
> +
> +TEST1 (ui, ull)
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
> new file mode 100644
> index 00000000000..a246e7e26b6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize 
> -fno-vect-cost-model -fdump-tree-vect-details" } */
> +
> +/* Test if signed int unpack vectorization succeeds.  */
> +
> +#include "unpack-vectorize-3.h"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
> +/* { dg-final { scan-assembler {\mvupklsw\M} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
> new file mode 100644
> index 00000000000..6a5191d28a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
> @@ -0,0 +1,7 @@
> +#include "unpack-vectorize.h"
> +
> +DEF_ARR (si)
> +DEF_ARR (sll)
> +
> +TEST1 (si, sll)
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
> new file mode 100644
> index 00000000000..51f0e67524f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
> @@ -0,0 +1,24 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target vmx_hw } */
> +/* { dg-options "-maltivec -O2 -ftree-vectorize -fno-vect-cost-model" 
> } */
> +
> +#include "unpack-vectorize-1.h"
> +
> +/* Test if unpack vectorization cases on signed/unsigned short and char
> +   run successfully.  */
> +
> +CHECK1 (sh, si)
> +CHECK1 (uh, ui)
> +CHECK1 (sc, sh)
> +CHECK1 (uc, uh)
> +
> +int
> +main ()
> +{
> +  check1_sh_si ();
> +  check1_uh_ui ();
> +  check1_sc_sh ();
> +  check1_uc_uh ();
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
> new file mode 100644
> index 00000000000..6d243602bbf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target vsx_hw } */
> +/* { dg-options "-mdejagnu-cpu=power7 -O2 -ftree-vectorize 
> -fno-vect-cost-model" } */
> +
> +#include "unpack-vectorize-2.h"
> +
> +/* Test if unpack vectorization cases on unsigned int run 
> successfully.  */
> +
> +CHECK1 (ui, ull)
> +
> +int
> +main ()
> +{
> +  check1_ui_ull ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
> new file mode 100644
> index 00000000000..fec33c46abc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize 
> -fno-vect-cost-model" } */
> +
> +#include "unpack-vectorize-3.h"
> +
> +/* Test if unpack vectorization cases on signed int run successfully.  */
> +
> +CHECK1 (si, sll)
> +
> +int
> +main ()
> +{
> +  check1_si_sll ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h 
> b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
> new file mode 100644
> index 00000000000..11fa7d4aa6f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
> @@ -0,0 +1,42 @@
> +typedef signed long long sll;
> +typedef unsigned long long ull;
> +typedef signed int si;
> +typedef unsigned int ui;
> +typedef signed short sh;
> +typedef unsigned short uh;
> +typedef signed char sc;
> +typedef unsigned char uc;
> +
> +#ifndef ALIGN
> +#define ALIGN 32
> +#endif
> +
> +#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
> +
> +#define N 128
> +
> +#define DEF_ARR(TYPE) \
> +  TYPE TYPE##_a[N] 
> ALIGN_ATTR;                                                \
> +  TYPE TYPE##_b[N] 
> ALIGN_ATTR;                                                \
> +  TYPE TYPE##_c[N] ALIGN_ATTR;
> +
> +#define TEST1(NTYPE, 
> WTYPE)                                                    \
> +  __attribute__((noipa)) void test1_##NTYPE##_##WTYPE() 
> {                      \
> +    for (int i = 0; i < N; 
> i++)                                                \
> +      WTYPE##_c[i] = NTYPE##_a[i] + 
> NTYPE##_b[i];                              \
> +  }
> +
> +#define CHECK1(NTYPE, 
> WTYPE)                                                   \
> +  __attribute__((noipa, optimize(0))) void check1_##NTYPE##_##WTYPE() 
> {        \
> +    for (int i = 0; i < N; i++) 
> {                                              \
> +      NTYPE##_a[i] = 2 * i * sizeof(NTYPE) + 
> 10;                               \
> +      NTYPE##_b[i] = 7 * i * sizeof(NTYPE) / 5 - 
> 10;                           \
> + } \
> + test1_##NTYPE##_##WTYPE(); \
> +    for (int i = 0; i < N; i++) 
> {                                              \
> +      WTYPE exp = NTYPE##_a[i] + 
> NTYPE##_b[i];                                 \
> +      if (WTYPE##_c[i] != 
> exp)                                                 \
> + __builtin_abort(); \
> + } \
> +  }
> +
> -- 
> 2.17.1
>



More information about the Gcc-patches mailing list