[PATCH][Arm] Implement vector average patterns in aarch32

Kyrill Tkachov kyrylo.tkachov@foss.arm.com
Tue Jul 2 09:18:00 GMT 2019


Hi Iain,

On 6/28/19 1:56 PM, Iain Apreotesei wrote:
> gcc/ChangeLog:
>
> 2019-06-28  Iain Apreotesei  <iain.apreotesei@arm.com>
>
>      * config/arm/iterators.md (VRHADD, VHADD): Add, update int_iterators.
>      (u) new int_attr.
>      * config/arm/neon.md (<u>avg<mode>3_floor, <u>avg<mode>3_ceil)
>      (neon_vhadd<sup><mode>, neon_vrhadd<sup><mode>): Add new patterns.
>
> gcc/testsuite/ChangeLog:
>
> 2019-06-28  Iain Apreotesei  <iain.apreotesei@arm.com>
>
>      * gcc.target/arm/vect_vhadd_1.c: New test.
>      * gcc.target/arm/vect_vhadd_1.h: New test.
>      * gcc.target/arm/vect_vrhadd_1.c: New test.
>
Thanks for the patch.

> Change-Id: Ief4009984ca9974993530b582bd9ba431e42c3ed
> ---
>   gcc/config/arm/iterators.md                  |  9 +++++--
>   gcc/config/arm/neon.md                       | 32 
> +++++++++++++++++++++---
>   gcc/testsuite/gcc.target/arm/vect_vhadd_1.c  | 22 +++++++++++++++++
>   gcc/testsuite/gcc.target/arm/vect_vhadd_1.h  | 37
> ++++++++++++++++++++++++++++
>   gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c | 22 +++++++++++++++++
>   5 files changed, 117 insertions(+), 5 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
>   create mode 100644 gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
>   create mode 100644 gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
>
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index c33e572..43ecc60 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -308,8 +308,8 @@
>
>   (define_int_iterator VADDW [UNSPEC_VADDW_S UNSPEC_VADDW_U])
>
> -(define_int_iterator VHADD [UNSPEC_VRHADD_S UNSPEC_VRHADD_U
> -                UNSPEC_VHADD_S UNSPEC_VHADD_U])
> +(define_int_iterator VHADD[UNSPEC_VHADD_S UNSPEC_VHADD_U])
> +(define_int_iterator VRHADD[UNSPEC_VRHADD_S UNSPEC_VRHADD_U])
>
>   (define_int_iterator VQADD [UNSPEC_VQADD_S UNSPEC_VQADD_U])
>
> @@ -818,6 +818,11 @@
>
>   ;; Mapping between vector UNSPEC operations and the signed ('s'),
>   ;; unsigned ('u'), poly ('p') or float ('f') nature of their data type.
> +
> +(define_int_attr u[
> +            (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "u")
> +            (UNSPEC_VRHADD_S "") (UNSPEC_VRHADD_U "u")])
> +
>   (define_int_attr sup [
>     (UNSPEC_VADDL_S "s") (UNSPEC_VADDL_U "u")
>     (UNSPEC_VADDW_S "s") (UNSPEC_VADDW_U "u")
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index f9d7ba3..1127bdb 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -2179,15 +2179,41 @@
>     [(set_attr "type" "neon_add_widen")]
>   )
>
> +(define_expand "<u>avg<mode>3_floor"
> +  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +    (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +              (match_operand:VDQIW 2 "s_register_operand" "w")]
> +             VHADD))]
> +  "TARGET_NEON"
> +)
> +
> +(define_expand "<u>avg<mode>3_ceil"
> +  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +    (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +              (match_operand:VDQIW 2 "s_register_operand" "w")]
> +             VRHADD))]
> +  "TARGET_NEON"
> +)
> +
>   ; vhadd and vrhadd.
>
> -(define_insn "neon_v<r>hadd<sup><mode>"
> +(define_insn "neon_vhadd<sup><mode>"
>     [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> -        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
> +    (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
>                  (match_operand:VDQIW 2 "s_register_operand" "w")]
>                 VHADD))]
>     "TARGET_NEON"
> - "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  "vhadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set_attr "type" "neon_add_halve_q")]
> +)
> +
> +(define_insn "neon_vrhadd<sup><mode>"
> +  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +    (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +               (match_operand:VDQIW 2 "s_register_operand" "w")]
> +              VRHADD))]
> +  "TARGET_NEON"
> +  "vrhadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
>     [(set_attr "type" "neon_add_halve_q")]
>   )
>

Why are you splitting the neon_v<r>hadd<sup><mode> pattern here?

It seems to me that just defining the expanders is enough to get the 
codegen we want?

Thanks,

Kyrill


> diff --git a/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> new file mode 100644
> index 0000000..946171c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "vect_vhadd_1.h"
> +
> +#define BIAS 0
> +
> +FOR_EACH_SIGNED_TYPE (DEF_FUNC)
> +
> +int
> +main (void)
> +{
> +  FOR_EACH_SIGNED_TYPE (TEST_FUNC);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler-not {\tvrhadd\.s[0-9]+} } } */
> diff --git a/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> new file mode 100644
> index 0000000..e093b42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> @@ -0,0 +1,37 @@
> +#include <stdint.h>
> +
> +#define N 100
> +
> +#define DEF_FUNC(TYPE, B1, B2, C1, C2)\
> +void __attribute__ ((noipa)) \
> +f_##TYPE (TYPE *restrict a, TYPE *restrict b, TYPE *restrict c)\
> +{ \
> +    for (int i = 0; i < N; ++i) \
> +        a[i] = (b[i] + c[i] + BIAS) >> 1;\
> +}
> +
> +#define TEST_FUNC(TYPE, B1, B2, C1, C2)\
> +{ \
> +    TYPE a[N], b[N], c[N]; \
> +    for (TYPE i = 0; i < N; ++i)\
> +    { \
> +        b[i] = B1 + i * B2; \
> +        c[i] = C1 + i * C2; \
> +    } \
> +    f_##TYPE (a, b, c);  \
> +    for (TYPE i = 0; i < N; ++i)\
> +        if (a[i] != ((B1 + C1 + BIAS + i * (B2 + C2)) >> 1)) \
> +        __builtin_abort (); \
> +}
> +
> +#define FOR_EACH_SIGNED_TYPE(T) \
> +    T (int8_t, -124, 2, -40, 1) \
> +    T (int16_t, -32000, 510, -10000, 257) \
> +    T (int32_t, -2000000000, 131072, -3277000, 65537) \
> +    T (int64_t, -44, 100, -10000, 99)
> +
> +#define FOR_EACH_UNSIGNED_TYPE(T) \
> +    T (uint8_t, 4, 2, 40, 1) \
> +    T (uint16_t, 12, 510, 10000, 257) \
> +    T (uint32_t, 20, 131072, 3277000, 65537) \
> +    T (uint64_t, 90, 100, 10000, 99)
> diff --git a/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> b/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> new file mode 100644
> index 0000000..f6d67c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "vect_vhadd_1.h"
> +
> +#define BIAS 1
> +
> +FOR_EACH_SIGNED_TYPE (DEF_FUNC)
> +
> +int
> +main (void)
> +{
> +  FOR_EACH_SIGNED_TYPE (TEST_FUNC);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler-not {\tvhadd\.s[0-9]+} } } */
>
> -- 
> 1.8.3
>
>
>
>



More information about the Gcc-patches mailing list