[Bug tree-optimization/69720] [4.9/5/6 Regression] wrong code at -O3 on x86_64-linux-gnu

Mon Feb 8 13:56:00 GMT 2016

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69720

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P1                          |P2
      Known to work|                            |4.6.4
   Target Milestone|6.0                         |4.9.4
            Summary|[6 Regression] wrong code   |[4.9/5/6 Regression] wrong
                   |at -O3 on x86_64-linux-gnu  |code at -O3 on
                   |                            |x86_64-linux-gnu
      Known to fail|                            |4.7.3, 4.8.5, 4.9.3, 5.3.0,
                   |                            |6.0

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
Ok, so it looks like inner loop reduction for outer loop vectorization isn't
handled correctly if ncopies is > 1.

extern void abort (void);

int a[128];
double b[128] = { 1., 2., 3., 4. };

void __attribute__((noinline)) foo()
{
  int i;
  for (i = 0; i < 128; ++i)
    {
      double tem1 = b[i];
      for (int j = 0; j < 32; ++j)
        tem1 += 1;
      b[i] = tem1;
      a[i] = i;
    }
}

int main()
{
  foo ();
  if (b[0] != 33. || b[1] != 34.
      || b[2] != 35. || b[3] != 36.)
    abort ();
  return 0;
}

This is vectorized to

  <bb 4>:
  # tem1_20 = PHI <tem1_7(5), tem1_6(3)>
  # j_21 = PHI <j_8(5), 0(3)>
  # ivtmp_12 = PHI <ivtmp_1(5), 32(3)>
  # vect_tem1_7.9_26 = PHI <vect_tem1_7.9_28(5), { 0.0, 0.0 }(3)>
  # vect_tem1_7.9_29 = PHI <vect_tem1_7.9_30(5), { 0.0, 0.0 }(3)>
  vect_tem1_7.9_28 = vect_tem1_7.9_26 + vect_cst__27;
  vect_tem1_7.9_30 = vect_tem1_7.9_29 + vect_cst__27;
  tem1_7 = tem1_20 + 1.0e+0;
  j_8 = j_21 + 1;
  ivtmp_1 = ivtmp_12 - 1;
  if (ivtmp_1 != 0)
    goto <bb 5>;
  else
    goto <bb 6>;

  <bb 5>:
  goto <bb 4>;
  <bb 6>:
  # tem1_16 = PHI <tem1_7(4)>
  # vect_tem1_7.9_31 = PHI <vect_tem1_7.9_28(4)>
  # vect_tem1_7.9_32 = PHI <vect_tem1_7.9_30(4)>
  vect_tem1_7.11_33 = vect_tem1_7.9_31 + vect_tem1_6.7_23;
  MEM[(double *)vectp_b.12_34] = vect_tem1_7.11_33;
  vectp_b.12_37 = vectp_b.12_34 + 16;
  MEM[(double *)vectp_b.12_37] = vect_tem1_7.9_32;

note how we miss a second

  vect_tem1_7.11_33 = vect_tem1_7.9_31 + vect_tem1_6.7_23;

to adjust the second inner reduction PHI.  Already broken in GCC 4.8 and 4.7.