Tweak gcc.dg/vect/bb-slp-4[01].c (PR92366)

Richard Biener richard.guenther@gmail.com
Thu Nov 14 19:03:00 GMT 2019


On November 14, 2019 7:10:10 PM GMT+01:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>gcc.dg/vect/bb-slp-40.c was failing on some targets because the
>explicit dg-options overrode things like -maltivec.  This patch
>uses dg-additional-options instead.
>
>Also, it seems safer not to require exactly 1 instance of each message,
>since that depends on the target vector length.
>
>gcc.dg/vect/bb-slp-41.c contained invariant constructors that are
>vectorised on AArch64 (foo) and constructors that aren't (bar).
>This meant that the number of times we print "Found vectorizable
>constructor" depended on how many vector sizes we try, since we'd
>print it for each failed attempt.
>
>In foo, we create invariant { b[0], ... } and { b[1], ... },
>and the test is making sure that the two separate invariant vectors
>can be fed from the same vector load at b.  This is a different case
>from bb-slp-40.c, where the constructors are naturally separate.
>(The expected count is 4 rather than 2 because we can vectorise the
>epilogue too.)
>
>However, due to limitations in the loop vectoriser, we still do the
>addition of { b[0], ... } and { b[1], ... } in the loop.  Hopefully
>that'll be fixed at some point, so this patch adds an alternative test
>that directly needs 4 separate invariant constructors.  E.g. with
>Joel's
>SLP optimisation, the new test generates:
>
>        ldr     q4, [x1]
>        dup     v7.4s, v4.s[0]
>        dup     v6.4s, v4.s[1]
>        dup     v5.4s, v4.s[2]
>        dup     v4.4s, v4.s[3]
>
>instead of the somewhat bizarre:
>
>        ldp     s6, s5, [x1, 4]
>        ldr     s4, [x1, 12]
>        ld1r    {v7.4s}, [x1]
>        dup     v6.4s, v6.s[0]
>        dup     v5.4s, v5.s[0]
>        dup     v4.4s, v4.s[0]
>
>The patch then disables vectorisation of the original foo in
>bb-vect-slp-41.c, so that we get the same correctness testing
>for bar but don't need to test for specific counts.
>
>Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
>OK to install?

Ok. 

Richard. 

>Richard
>
>
>2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
>
>gcc/testsuite/
>	PR testsuite/92366
>	* gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead
>	of dg-options.  Remove expected counts.
>	* gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit
>	dg-do run.  Suppress vectorization of foo.
>	* gcc.dg/vect/bb-slp-42.c: New test.
>
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c
>===================================================================
>--- gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-04 21:13:57.363758109
>+0000
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -1,5 +1,5 @@
> /* { dg-do compile } */
>-/* { dg-options "-O3 -fdump-tree-slp-all" } */
>+/* { dg-additional-options "-fvect-cost-model=dynamic" } */
> /* { dg-require-effective-target vect_int } */
> 
> char g_d[1024], g_s1[1024], g_s2[1024];
>@@ -30,5 +30,5 @@ void foo(void)
> }
> 
> /* See that we vectorize an SLP instance.  */
>-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor"
>1 "slp1" } } */
>-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1
>"slp1" } } */
>+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1"
>} } */
>+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" }
>} */
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c
>===================================================================
>--- gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-04 21:13:57.363758109
>+0000
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -1,10 +1,9 @@
>-/* { dg-do run } */
>-/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
> /* { dg-require-effective-target vect_int } */
> 
> #define ARR_SIZE 1000
> 
>-void foo (int *a, int *b)
>+void __attribute__((optimize (0)))
>+foo (int *a, int *b)
> {
>   int i;
>   for (i = 0; i < (ARR_SIZE - 2); ++i)
>@@ -56,6 +55,4 @@ int main ()
>   return 0;
> 
> }
>-/* See that we vectorize an SLP instance.  */
>-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor"
>12 "slp1" } } */
>-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4
>"slp1" } } */
>+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP"
>"slp1" } } */
>Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c
>===================================================================
>--- /dev/null	2019-09-17 11:41:18.176664108 +0100
>+++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c	2019-11-14 18:08:36.323546916
>+0000
>@@ -0,0 +1,49 @@
>+/* { dg-require-effective-target vect_int } */
>+/* { dg-require-effective-target vect_perm } */
>+
>+#include "tree-vect.h"
>+
>+#define ARR_SIZE 1024
>+
>+void __attribute__((noipa))
>+foo (int a[][ARR_SIZE], int *b)
>+{
>+  int i;
>+  for (i = 0; i < ARR_SIZE; ++i)
>+    {
>+      a[0][i] += b[0];
>+      a[1][i] += b[1];
>+      a[2][i] += b[2];
>+      a[3][i] += b[3];
>+    }
>+}
>+
>+int
>+main ()
>+{
>+  int a[4][ARR_SIZE];
>+  int b[4];
>+
>+  check_vect ();
>+
>+  for (int i = 0; i < 4; ++i)
>+    {
>+      b[i] = 20 * i;
>+      for (int j = 0; j < ARR_SIZE; ++j)
>+	a[i][j] = (i + 1) * ARR_SIZE - j;
>+    }
>+
>+  foo (a, b);
>+
>+  for (int i = 0; i < 4; ++i)
>+    for (int j = 0; j < ARR_SIZE; ++j)
>+      if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i)
>+	__builtin_abort ();
>+
>+  return 0;
>+
>+}
>+
>+/* See that we vectorize an SLP instance.  */
>+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1"
>{ target { ! vect_fully_masked } } } } */
>+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4
>"slp1" { target { ! vect_fully_masked } } } } */



More information about the Gcc-patches mailing list