[PATCH][Arm] Implement vector average patterns in aarch32

Tue Jul 2 09:18:00 GMT 2019

Hi Iain,

On 6/28/19 1:56 PM, Iain Apreotesei wrote:
> gcc/ChangeLog:
>
> 2019-06-28Â  Iain ApreoteseiÂ  <iain.apreotesei@arm.com>
>
> Â Â Â Â  * config/arm/iterators.md (VRHADD, VHADD): Add, update int_iterators.
> Â Â Â Â  (u) new int_attr.
> Â Â Â Â  * config/arm/neon.md (<u>avg<mode>3_floor, <u>avg<mode>3_ceil)
> Â Â Â Â  (neon_vhadd<sup><mode>, neon_vrhadd<sup><mode>): Add new patterns.
>
> gcc/testsuite/ChangeLog:
>
> 2019-06-28Â  Iain ApreoteseiÂ  <iain.apreotesei@arm.com>
>
> Â Â Â Â  * gcc.target/arm/vect_vhadd_1.c: New test.
> Â Â Â Â  * gcc.target/arm/vect_vhadd_1.h: New test.
> Â Â Â Â  * gcc.target/arm/vect_vrhadd_1.c: New test.
>
Thanks for the patch.

> Change-Id: Ief4009984ca9974993530b582bd9ba431e42c3ed
> ---
> Â Â gcc/config/arm/iterators.mdÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  |Â  9 +++++--
> Â Â gcc/config/arm/neon.mdÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  | 32 
> +++++++++++++++++++++---
> Â Â gcc/testsuite/gcc.target/arm/vect_vhadd_1.cÂ  | 22 +++++++++++++++++
> Â Â gcc/testsuite/gcc.target/arm/vect_vhadd_1.hÂ  | 37
> ++++++++++++++++++++++++++++
> Â Â gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c | 22 +++++++++++++++++
> Â Â 5 files changed, 117 insertions(+), 5 deletions(-)
> Â Â create mode 100644 gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> Â Â create mode 100644 gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> Â Â create mode 100644 gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
>
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index c33e572..43ecc60 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -308,8 +308,8 @@
>
> Â Â (define_int_iterator VADDW [UNSPEC_VADDW_S UNSPEC_VADDW_U])
>
> -(define_int_iterator VHADD [UNSPEC_VRHADD_S UNSPEC_VRHADD_U
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  UNSPEC_VHADD_S UNSPEC_VHADD_U])
> +(define_int_iterator VHADD[UNSPEC_VHADD_S UNSPEC_VHADD_U])
> +(define_int_iterator VRHADD[UNSPEC_VRHADD_S UNSPEC_VRHADD_U])
>
> Â Â (define_int_iterator VQADD [UNSPEC_VQADD_S UNSPEC_VQADD_U])
>
> @@ -818,6 +818,11 @@
>
> Â Â ;; Mapping between vector UNSPEC operations and the signed ('s'),
> Â Â ;; unsigned ('u'), poly ('p') or float ('f') nature of their data type.
> +
> +(define_int_attr u[
> +Â Â Â Â Â Â Â Â Â Â Â  (UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "u")
> +Â Â Â Â Â Â Â Â Â Â Â  (UNSPEC_VRHADD_S "") (UNSPEC_VRHADD_U "u")])
> +
> Â Â (define_int_attr sup [
> Â Â Â  (UNSPEC_VADDL_S "s") (UNSPEC_VADDL_U "u")
> Â Â Â  (UNSPEC_VADDW_S "s") (UNSPEC_VADDW_U "u")
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index f9d7ba3..1127bdb 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -2179,15 +2179,41 @@
> Â Â Â  [(set_attr "type" "neon_add_widen")]
> Â Â )
>
> +(define_expand "<u>avg<mode>3_floor"
> +Â  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +Â Â Â  (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +Â Â Â Â Â Â Â Â Â Â Â Â Â  (match_operand:VDQIW 2 "s_register_operand" "w")]
> +Â Â Â Â Â Â Â Â Â Â Â Â  VHADD))]
> +Â  "TARGET_NEON"
> +)
> +
> +(define_expand "<u>avg<mode>3_ceil"
> +Â  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +Â Â Â  (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +Â Â Â Â Â Â Â Â Â Â Â Â Â  (match_operand:VDQIW 2 "s_register_operand" "w")]
> +Â Â Â Â Â Â Â Â Â Â Â Â  VRHADD))]
> +Â  "TARGET_NEON"
> +)
> +
> Â Â ; vhadd and vrhadd.
>
> -(define_insn "neon_v<r>hadd<sup><mode>"
> +(define_insn "neon_vhadd<sup><mode>"
> Â Â Â  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> -Â Â Â Â Â Â Â  (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
> +Â Â Â  (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  (match_operand:VDQIW 2 "s_register_operand" "w")]
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  VHADD))]
> Â Â Â  "TARGET_NEON"
> - "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +Â  "vhadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +Â  [(set_attr "type" "neon_add_halve_q")]
> +)
> +
> +(define_insn "neon_vrhadd<sup><mode>"
> +Â  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> +Â Â Â  (unspec:VDQIW[(match_operand:VDQIW 1 "s_register_operand" "w")
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â  (match_operand:VDQIW 2 "s_register_operand" "w")]
> +Â Â Â Â Â Â Â Â Â Â Â Â Â  VRHADD))]
> +Â  "TARGET_NEON"
> +Â  "vrhadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> Â Â Â  [(set_attr "type" "neon_add_halve_q")]
> Â Â )
>

Why are you splitting the neon_v<r>hadd<sup><mode> pattern here?

It seems to me that just defining the expanders is enough to get the 
codegen we want?

Thanks,

Kyrill

> diff --git a/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> new file mode 100644
> index 0000000..946171c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "vect_vhadd_1.h"
> +
> +#define BIAS 0
> +
> +FOR_EACH_SIGNED_TYPE (DEF_FUNC)
> +
> +int
> +main (void)
> +{
> +Â  FOR_EACH_SIGNED_TYPE (TEST_FUNC);
> +Â  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler-not {\tvrhadd\.s[0-9]+} } } */
> diff --git a/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> new file mode 100644
> index 0000000..e093b42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vhadd_1.h
> @@ -0,0 +1,37 @@
> +#include <stdint.h>
> +
> +#define N 100
> +
> +#define DEF_FUNC(TYPE, B1, B2, C1, C2)\
> +void __attribute__ ((noipa)) \
> +f_##TYPE (TYPE *restrict a, TYPE *restrict b, TYPE *restrict c)\
> +{ \
> +Â Â Â  for (int i = 0; i < N; ++i) \
> +Â Â Â Â Â Â Â  a[i] = (b[i] + c[i] + BIAS) >> 1;\
> +}
> +
> +#define TEST_FUNC(TYPE, B1, B2, C1, C2)\
> +{ \
> +Â Â Â  TYPE a[N], b[N], c[N]; \
> +Â Â Â  for (TYPE i = 0; i < N; ++i)\
> +Â Â Â  { \
> +Â Â Â Â Â Â Â  b[i] = B1 + i * B2; \
> +Â Â Â Â Â Â Â  c[i] = C1 + i * C2; \
> +Â Â Â  } \
> +Â Â Â  f_##TYPE (a, b, c);Â  \
> +Â Â Â  for (TYPE i = 0; i < N; ++i)\
> +Â Â Â Â Â Â Â  if (a[i] != ((B1 + C1 + BIAS + i * (B2 + C2)) >> 1)) \
> +Â Â Â Â Â Â Â  __builtin_abort (); \
> +}
> +
> +#define FOR_EACH_SIGNED_TYPE(T) \
> +Â Â Â  T (int8_t, -124, 2, -40, 1) \
> +Â Â Â  T (int16_t, -32000, 510, -10000, 257) \
> +Â Â Â  T (int32_t, -2000000000, 131072, -3277000, 65537) \
> +Â Â Â  T (int64_t, -44, 100, -10000, 99)
> +
> +#define FOR_EACH_UNSIGNED_TYPE(T) \
> +Â Â Â  T (uint8_t, 4, 2, 40, 1) \
> +Â Â Â  T (uint16_t, 12, 510, 10000, 257) \
> +Â Â Â  T (uint32_t, 20, 131072, 3277000, 65537) \
> +Â Â Â  T (uint64_t, 90, 100, 10000, 99)
> diff --git a/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> b/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> new file mode 100644
> index 0000000..f6d67c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/vect_vrhadd_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "vect_vhadd_1.h"
> +
> +#define BIAS 1
> +
> +FOR_EACH_SIGNED_TYPE (DEF_FUNC)
> +
> +int
> +main (void)
> +{
> +Â  FOR_EACH_SIGNED_TYPE (TEST_FUNC);
> +Â  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler {\tvrhadd\.s[0-9]+} } } */
> +/* { dg-final { scan-assembler-not {\tvhadd\.s[0-9]+} } } */
>
> -- 
> 1.8.3
>
>
>
>