Bug 97821 - [9/10/11/12 Regression] wrong code with -ftree-vectorize at -O1 on x86_64-pc-linux-gnu by r6-3608
Summary: [9/10/11/12 Regression] wrong code with -ftree-vectorize at -O1 on x86_64-pc-...
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 11.0
: P2 normal
Target Milestone: 9.5
Assignee: Not yet assigned to anyone
URL:
Keywords: wrong-code
Depends on:
Blocks:
 
Reported: 2020-11-13 22:17 UTC by Zhendong Su
Modified: 2021-11-09 08:27 UTC (History)
6 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2020-11-14 00:00:00


Attachments
for the testsuite (1.22 KB, text/plain)
2020-11-16 10:26 UTC, Richard Biener
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Zhendong Su 2020-11-13 22:17:21 UTC
The code is valid, but it is hard to reduce, so still quite large.

[509] % gcctk -v
Using built-in specs.
COLLECT_GCC=gcctk
COLLECT_LTO_WRAPPER=/local/suz-local/software/local/gcc-trunk/libexec/gcc/x86_64-pc-linux-gnu/11.0.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc-trunk/configure --disable-bootstrap --prefix=/local/suz-local/software/local/gcc-trunk --enable-languages=c,c++ --disable-werror --enable-multilib --with-system-zlib
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 11.0.0 20201113 (experimental) [master revision 54896b10dbe:c3a97a9df4b:a514934a0565255276adaa4fbd4aa35579ec33c6] (GCC) 
[510] % 
[510] % gcctk -O1 small.c; ./a.out
00005030-170
[511] % gcctk -O1 -ftree-vectorize small.c; ./a.out
00005030-176
[512] % 
[512] % cat small.c
int printf (const char *, ...);

static unsigned a, f, v;
int b, h, aa, ab, ac, ad, ae, y, z, af;
static long c, m, t, ag, ah = 3;
static signed d;
static char e, ai;
static short g, j = 1, o, w;
int *i, *s;
long long l;
static int *n;
char p;
static int q;
static int r;
static int u;
short x;
long long *aj = &l;
static signed ak;
static volatile unsigned al = 5;
static volatile short am = 1;
int *an(int *ao, int *ap) { return ap; }
static int aq() {
  int ar[] = {2, 2, 2, 2, 2, 2};
  short *as = &x;
  int at[] = {0, 1, 0, 1};
  int au = ab = 0;
  for (; m <= 1; m++) {
    int av = 0, k, aw = e && u, ax = aw || ag;
    int **ay = &n;
    for (; ab; ab++)
      ac = 0;
    for (; ac; ac++)
      am;
    u &&am;
    short az = am || a ^ w;
    unsigned bc = am & w | am || ag;
  ba:
    aw = u;
    i = 0;
    for (; i; i++)
      b = a;
    printf("0");
    if (p) {
      printf("%ld", ag);
      continue;
    }
    if (ag) {
      printf("7");
      e = w | ag<e> c < ax;
    }
    if (w) {
      printf("%d", u);
      goto bb;
    }
    if (u)
      printf("%d", e);
    s = &k;
    u = aw;
    t = 0;
    for (; t <= 1; t++)
      *ay = an(&au, &av);
    e++;
  }
  for (; r >= 0;)
    for (; ag <= 5;) {
      signed bd[6];
      int be = 0, bf = am % al;
      for (; be < 6; be++)
        bd[0] = 9;
      h = 0;
      for (; h <= 5; h++)
        *aj = *as = aa;
      for (; w; w = d)
        ;
      short bg = d + j ^ e + r;
      al % am;
      int bi = bg & al >> am;
      am ^ al;
      am / al;
      am &al;
      al;
      am / al;
      if (c)
        if (q) {
          be = 0;
          for (; be; be++)
            z = 0;
        }
      am;
      int bj = 0;
      if (m || q) {
      bh:
        l = ad = c;
        int bm = al || q;
        al;
        al;
        char bn = al || q;
        al;
        al;
        bm = q;
        ae = a;
      bk:
        ai = h || q > d;
        ag = d;
        al;
        al;
        printf("%d", q);
        if (a > 1)
          break;
        if (q)
          printf("%d", d);
        if (q) {
          printf("3");
          h = d | bm > q;
          goto bk;
        }
        if (!ai || al && 0) {
          printf("%d", d);
          al;
          printf("%d", a);
          goto bb;
        }
        d = al;
        printf("%lld", l);
        m = q;
        if (ak) {
          printf("%ld", c);
          ad = c & q;
        }
        if (!ah) {
          printf("%d", q);
          goto bh;
        }
      }
      if (c)
        s = &bj;
      m = q = d && c;
      r = ~(e / j & al > r);
      f |= d = al;
      v |= am;
      al / al ^ am;
      ak = am + al | al;
      am / al + al ^ am;
      j = am;
      al;
    bb:
      if (c)
        g++;
      a = q || e & d;
      am || al;
      am;
      am;
      am;
      al &am;
      am;
      am;
    bl:
      am;
      if (q) {
        printf("%d", q);
        a = q - am;
        goto bl;
      }
      am;
      printf("%d", d);
      m = q & am;
      am;
      printf("%d", a);
      if (d < -41) {
        printf("%ld", ag);
        goto ba;
      }
      h = *n;
      printf("3");
      c = e / d;
      printf("%ld", m);
      d = d << q / ag;
      o = 2;
      for (; o; o++)
        i = &be;
      x = m = e;
      printf("%d", r) && (ah = r) || (d = ak && e);
      printf("%d", ak);
      if (!bf) {
        printf("%d", e);
        *as = a;
        i = n;
        bi = ak / am > r;
        *n = 0;
        for (; n; n++)
          ;
      }
      y = bi;
    }
  return 0;
}
int main() {
  for (; af < 6; af++) {
    d = 8;
    aq();
  }
  printf("%d\n", h);
  return 0;
}
Comment 1 H.J. Lu 2020-11-14 18:47:06 UTC
It was caused by r6-3608.
Comment 2 Richard Biener 2020-11-16 10:26:15 UTC
Created attachment 49568 [details]
for the testsuite
Comment 3 Richard Biener 2020-11-16 10:48:17 UTC
Hmm.  It takes quite some time for us to elide the dead stores to bd, ar and at
(and only ar and at are vectorized).  I guess we run into some alias-walk
limits of DCE and vectorizing the stores fixes that.   Unfortunately those
limits are hard-coded:

          if (/* Constant but quadratic for small functions.  */
              total_chain > 128 * 128
              /* Linear in the number of may-defs.  */
              && total_chain > 32 * longest_chain
              /* Linear in the number of uses.  */
              && total_chain > nr_walks * 32)
            {

this points to the known issue of stack var coalescing with CLOBBERs and
indeed -fstack-reuse=none fixes the testcase, without actually spotting
the bad coalescing.

So related to that duplicate bug we have about this issue.
Comment 4 Jakub Jelinek 2021-02-03 16:25:04 UTC
Do you know the PR number of that other bug?
Comment 5 Richard Biener 2021-02-04 09:02:12 UTC
(In reply to Jakub Jelinek from comment #4)
> Do you know the PR number of that other bug?

PR90348
Comment 6 Jakub Jelinek 2021-05-14 09:54:06 UTC
GCC 8 branch is being closed.
Comment 7 Richard Biener 2021-06-01 08:18:54 UTC
GCC 9.4 is being released, retargeting bugs to GCC 9.5.
Comment 8 Andrew Pinski 2021-11-09 07:53:30 UTC
Hmm, the trunk fails even with -fno-tree-vectorize -O1 :).