This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fix PEELING_FOR_NITERS calculation (PR 87288)


On Thu, Sep 20, 2018 at 1:44 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> PEELING_FOR_GAPS now means "peel one iteration for the epilogue",
> in much the same way that PEELING_FOR_ALIGNMENT > 0 means
> "peel that number of iterations for the prologue".  We weren't
> taking this into account when deciding whether we needed to peel
> further scalar iterations beyond the iterations for "gaps" and
> "alignment".
>
> Only the first test failed before the patch.  The other two
> are just for completeness.
>
> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
> and x86_64-linux-gnu.  OK to install?

OK.

Richard.

> Richard
>
>
> 2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         PR tree-optimization/87288
>         * tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
>         into account when determining PEELING_FOR_NITERS.
>
> gcc/testsuite/
>         PR tree-optimization/87288
>         * gcc.dg/vect/pr87288-1.c: New test.
>         * gcc.dg/vect/pr87288-2.c: Likewise,
>         * gcc.dg/vect/pr87288-3.c: Likewise.
>
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        2018-09-20 12:39:14.541555902 +0100
> +++ gcc/tree-vect-loop.c        2018-09-20 12:39:19.013518199 +0100
> @@ -2074,14 +2074,22 @@ vect_analyze_loop_2 (loop_vec_info loop_
>      /* The main loop handles all iterations.  */
>      LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
>    else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> -          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
> +          && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
>      {
> -      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
> -                      - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
> +      /* Work out the (constant) number of iterations that need to be
> +        peeled for reasons other than niters.  */
> +      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
> +      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
> +       peel_niter += 1;
> +      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
>                        LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
>         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
>      }
>    else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
> +          /* ??? When peeling for gaps but not alignment, we could
> +             try to check whether the (variable) niters is known to be
> +             VF * N + 1.  That's something of a niche case though.  */
> +          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
>            || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
>            || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
>                 < (unsigned) exact_log2 (const_vf))
> Index: gcc/testsuite/gcc.dg/vect/pr87288-1.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-1.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,49 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +void __attribute__ ((noipa))
> +run (int *restrict a, int *restrict b, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    {
> +      a[i * 2] = b[i * 2] + count;
> +      a[i * 2 + 1] = count;
> +    }
> +}
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  for (int i = 0; i <= MAX_COUNT; ++i)
> +    {
> +      a[i * 2 * N] = 999;
> +      run (a, b, i);
> +      check (a, i);
> +    }
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr87288-2.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-2.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,64 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +#define RUN_COUNT(COUNT)                               \
> +  void __attribute__ ((noipa))                         \
> +  run_##COUNT (int *restrict a, int *restrict b)       \
> +  {                                                    \
> +    for (int i = 0; i < N * COUNT; ++i)                        \
> +      {                                                        \
> +       a[i * 2] = b[i * 2] + COUNT;                    \
> +       a[i * 2 + 1] = COUNT;                           \
> +      }                                                        \
> +  }
> +
> +RUN_COUNT (1)
> +RUN_COUNT (2)
> +RUN_COUNT (3)
> +RUN_COUNT (4)
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  a[N * 2] = 999;
> +  run_1 (a, b);
> +  check (a, 1);
> +
> +  a[N * 4] = 999;
> +  run_2 (a, b);
> +  check (a, 2);
> +
> +  a[N * 6] = 999;
> +  run_3 (a, b);
> +  check (a, 3);
> +
> +  a[N * 8] = 999;
> +  run_4 (a, b);
> +  check (a, 4);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr87288-3.c
> ===================================================================
> --- /dev/null   2018-09-14 11:16:31.122530289 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr87288-3.c       2018-09-20 12:39:19.009518233 +0100
> @@ -0,0 +1,64 @@
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS / 32)
> +#define MAX_COUNT 4
> +
> +#define RUN_COUNT(COUNT)                               \
> +  void __attribute__ ((noipa))                         \
> +  run_##COUNT (int *restrict a, int *restrict b)       \
> +  {                                                    \
> +    for (int i = 0; i < N * COUNT + 1; ++i)            \
> +      {                                                        \
> +       a[i * 2] = b[i * 2] + COUNT;                    \
> +       a[i * 2 + 1] = COUNT;                           \
> +      }                                                        \
> +  }
> +
> +RUN_COUNT (1)
> +RUN_COUNT (2)
> +RUN_COUNT (3)
> +RUN_COUNT (4)
> +
> +void __attribute__ ((noipa))
> +check (int *restrict a, int count)
> +{
> +  for (int i = 0; i < count * N + 1; ++i)
> +    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
> +      __builtin_abort ();
> +  if (a[count * 2 * N + 2] != 999)
> +    __builtin_abort ();
> +}
> +
> +int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
> +
> +int
> +main (void)
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < N * MAX_COUNT + 1; ++i)
> +    {
> +      b[i * 2] = i * 41;
> +      asm volatile ("" ::: "memory");
> +    }
> +
> +  a[N * 2 + 2] = 999;
> +  run_1 (a, b);
> +  check (a, 1);
> +
> +  a[N * 4 + 2] = 999;
> +  run_2 (a, b);
> +  check (a, 2);
> +
> +  a[N * 6 + 2] = 999;
> +  run_3 (a, b);
> +  check (a, 3);
> +
> +  a[N * 8 + 2] = 999;
> +  run_4 (a, b);
> +  check (a, 4);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]