[Bug tree-optimization/92130] Missed vectorization for iteration dependent loads and simple multiplicative accumulators
witold.baryluk+gcc at gmail dot com
gcc-bugzilla@gcc.gnu.org
Wed Oct 16 19:26:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92130
--- Comment #4 from Witold Baryluk <witold.baryluk+gcc at gmail dot com> ---
If I reduce minimized test case even further:
only frequency update: VECTORIZED:
static float perlin1d(float x) {
float accum = 0.0f;
float amplitude = 1.0f;
float frequency = 1.0f;
for (int i = 0; i < 8; i++) {
accum += amplitude * sinf(x * frequency);
frequency *= 2.131f;
}
return accum;
}
__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
for (int i = 0; i < width; i++) {
height_data[i] = perlin1d(i);
}
}
only amplitude update: VECTORIZED:
static float perlin1d(float x) {
float accum = 0.0f;
float amplitude = 1.0f;
float frequency = 1.0f;
for (int i = 0; i < 8; i++) {
accum += amplitude * sinf(x * frequency);
amplitude *= 0.781f;
}
return accum;
}
__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
for (int i = 0; i < width; i++) {
height_data[i] = perlin1d(i);
}
}
both frequency and amplitude update: NOT VECTORIZED:
static float perlin1d(float x) {
float accum = 0.0f;
float amplitude = 1.0f;
float frequency = 1.0f;
for (int i = 0; i < 8; i++) {
accum += amplitude * sinf(x * frequency);
amplitude *= 0.781f;
frequency *= 2.131f;
}
return accum;
}
__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
for (int i = 0; i < width; i++) {
height_data[i] = perlin1d(i);
}
}
More information about the Gcc-bugs
mailing list