This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [ARM] Fix widen-sum pattern in neon.md.



On 05/03/15 13:34, Xingxing Pan wrote:
Hi,
Hi Xingxing,
Thanks for improving this! Some comments inline.


The expanding of widen-sum pattern always fails. The vectorizer expects
the operands to have the same size, while the current implementation of
widen-sum pattern dose not conform to this.

This patch implements the widen-sum pattern with vpadal. Change the
vaddw pattern to anonymous. Add widen-sum test cases for neon.

How has this been tested? Bootstrap and testsuite?

-- Regards, Xingxing

fix-widen-sum.patch


commit 62637f371a3329ff56644526bc5dbf9356cbdd6c
Author: Xingxing Pan<xxingpan@marvell.com>
Date:   Wed Feb 25 16:44:25 2015 +0800

     Fix widen-sum pattern in neon.md.

     2015-03-05  Xingxing Pan<xxingpan@marvell.com>
config/arm/
         * iterators.md:
         (VWSD): New define_mode_iterator.
         (V_widen_sum_d): New define_mode_attr.
         * neon.md
         (widen_ssum<mode>3): Redefined.
         (widen_usum<mode>3): Ditto.
         (neon_svaddw<mode>3): New anonymous define_insn.
         (neon_uvaddw<mode>3): Ditto.

Please use proper ChangeLog format:
    * config/arm/iterators.md (VWSD): New.

and so on. Separate ChangeLog for the testsuite.

         testsuite/gcc.target/arm/neon/
         * vect-widen-sum-char2short-s-d.c: New file.
         * vect-widen-sum-char2short-s.c: Ditto.
         * vect-widen-sum-char2short-u-d.c: Ditto.
         * vect-widen-sum-char2short-u.c: Ditto.
         * vect-widen-sum-short2int-s-d.c: Ditto.
         * vect-widen-sum-short2int-s.c: Ditto.
         * vect-widen-sum-short2int-u-d.c: Ditto.
         * vect-widen-sum-short2int-u.c: Ditto.
         testsuite/lib/
         * target-supports.exp:
         (check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON.
         (check_effective_target_vect_widen_sum_hi_to_si): Ditto.
         (check_effective_target_vect_widen_sum_qi_to_hi): Ditto.

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f7f8ab7..4ba5901 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -95,6 +95,9 @@
  ;; Widenable modes.
  (define_mode_iterator VW [V8QI V4HI V2SI])
+;; Widenable modes. Used by widen sum.
+(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI])

Two spaces after full stop in comment.

+
  ;; Narrowable modes.
  (define_mode_iterator VN [V8HI V4SI V2DI])
@@ -558,6 +561,11 @@
  ;; Widen. Result is half the number of elements, but widened to double-width.
  (define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+;; Widen. Result is half the number of elements, but widened to double-width.
+;; Used by widen sum.
Likewise.

+(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI")
+                                 (V16QI "V8HI") (V8HI "V4SI")])
+
  ;; Conditions to be used in extend<mode>di patterns.
  (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
  (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327e..6cac36d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1174,7 +1174,31 @@
;; Widening operations -(define_insn "widen_ssum<mode>3"
+(define_expand "widen_usum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_move_insn(operands[0], operands[2]);
+    emit_insn (gen_neon_vpadalu<mode> (operands[0], operands[0], operands[1]));
+    DONE;
+  }
+)

Is the move from operands[2] to operands[0] necessary?
Can you take advantage of the fact that neon_vpadal<sup><mode> has
"0" in it's constraint, thus making register-allocation tie the operands to the same register?

+
+(define_expand "widen_ssum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+  (match_operand:VWSD 1 "s_register_operand" "")
+  (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+  "TARGET_NEON"
+  {
+    emit_move_insn(operands[0], operands[2]);
+    emit_insn (gen_neon_vpadals<mode> (operands[0], operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "*neon_svaddw<mode>3"
    [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
  	(plus:<V_widen> (sign_extend:<V_widen>
  			  (match_operand:VW 1 "s_register_operand" "%w"))
@@ -1184,7 +1208,7 @@
    [(set_attr "type" "neon_add_widen")]
  )
-(define_insn "widen_usum<mode>3"
+(define_insn "*neon_uvaddw<mode>3"
    [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
  	(plus:<V_widen> (zero_extend:<V_widen>
  			  (match_operand:VW 1 "s_register_operand" "%w"))
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
new file mode 100644
index 0000000..c81c325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>

Do you need this stdlib.h include? If not, please remove it, we try to avoid including system headers in tests if we can. To get the definition of abort () you can just do declare extern void abort (void); at the top of the file. Same in the other tests.

Cheers,
Kyrill


+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata<kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
new file mode 100644
index 0000000..de53f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  /* widenning sum: sum chars into short.
+
+     Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+     is currently not detected because of this patch:
+
+     2005-12-26  Kazu Hirata<kazu@codesourcery.com>
+        PR tree-optimization/25125
+   */
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
new file mode 100644
index 0000000..bfa17d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
new file mode 100644
index 0000000..38d8179
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
new file mode 100644
index 0000000..7aef5d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
new file mode 100644
index 0000000..914ad09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+  int i;
+  STYPE2 sum = 0;
+  STYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      sdata[i] = i*2;
+      check_sum += sdata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += sdata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  ssum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
new file mode 100644
index 0000000..6f4a29b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
new file mode 100644
index 0000000..194b260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+  int i;
+  UTYPE2 sum = 0;
+  UTYPE2 check_sum = 0;
+
+  for (i = 0; i < N; i++)
+    {
+      udata[i] = i*2;
+      check_sum += udata[i];
+      /* Avoid vectorization.  */
+      if (y)
+	abort ();
+    }
+
+  /* widenning sum: sum chars into int.  */
+  for (i = 0; i < N; i++)
+    {
+      sum += udata[i];
+    }
+
+  /* check results:  */
+  if (sum != check_sum)
+    abort ();
+
+  return 0;
+}
+
+int
+main (void)
+{
+  usum ();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6b957de..eaccb83 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
      } else {
          set et_vect_widen_sum_hi_to_si_pattern_saved 0
          if { [istarget powerpc*-*-*]
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
               || [istarget ia64-*-*] } {
              set et_vect_widen_sum_hi_to_si_pattern_saved 1
          }
@@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
      } else {
          set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
          if { [istarget powerpc*-*-*]
-	     || [istarget ia64-*-*] } {
+             || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+             || [istarget ia64-*-*] } {
              set et_vect_widen_sum_hi_to_si_saved 1
          }
      }
@@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
      } else {
          set et_vect_widen_sum_qi_to_hi_saved 0
  	if { [check_effective_target_vect_unpack]
-	     || [check_effective_target_arm_neon_ok]
+	     || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
  	     || [istarget ia64-*-*] } {
              set et_vect_widen_sum_qi_to_hi_saved 1
  	}



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]