double a[1024], b[1024]; void bar(); void foo(double x) { double a0 = a[0]; double a1 = a[1]; for (int i = 0; i < 511;) { b[2*i] = a0 + x; b[2*i + 1] = a1 + x; i++; bar (); a0 = a[2*i]; a1 = a[2*i+1]; } } can be BB vectorized but that needs handling of PHI nodes and multiple BBs for an optimal result.
Fixed for GCC11: <bb 2> [local count: 10737416]: _8 = {x_14(D), x_14(D)}; vect_a0_12.8_24 = MEM <vector(2) double> [(double *)&a]; <bb 3> [local count: 1063004409]: # vect_a0_26.9_23 = PHI <vect_a0_19.5_25(3), vect_a0_12.8_24(2)> # ivtmp.23_16 = PHI <ivtmp.23_15(3), 0(2)> vect__2.10_22 = _8 + vect_a0_26.9_23; MEM <vector(2) double> [(double *)&b + ivtmp.23_16 * 1] = vect__2.10_22; bar (); vect_a0_19.5_25 = MEM <vector(2) double> [(double *)&a + 16B + ivtmp.23_16 * 1]; ivtmp.23_15 = ivtmp.23_16 + 16; if (ivtmp.23_15 != 8176) goto <bb 3>; [98.99%] else goto <bb 4>; [1.01%] <bb 4> [local count: 10737416]: return;