[Bug tree-optimization/92130] Missed vectorization for iteration dependent loads and simple multiplicative accumulators

witold.baryluk+gcc at gmail dot com gcc-bugzilla@gcc.gnu.org
Wed Oct 16 19:26:00 GMT 2019


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92130

--- Comment #4 from Witold Baryluk <witold.baryluk+gcc at gmail dot com> ---
If I reduce minimized test case even further:

only frequency update: VECTORIZED:

static float perlin1d(float x) {
  float accum = 0.0f;
  float amplitude = 1.0f;
  float frequency = 1.0f;
  for (int i = 0; i < 8; i++) {
    accum += amplitude * sinf(x * frequency);
    frequency *= 2.131f;
  }
  return accum;
}

__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
  for (int i = 0; i < width; i++) {
    height_data[i] = perlin1d(i);
  }
}


only amplitude update: VECTORIZED:

static float perlin1d(float x) {
  float accum = 0.0f;
  float amplitude = 1.0f;
  float frequency = 1.0f;
  for (int i = 0; i < 8; i++) {
    accum += amplitude * sinf(x * frequency);
    amplitude *= 0.781f;
  }
  return accum;
}

__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
  for (int i = 0; i < width; i++) {
    height_data[i] = perlin1d(i);
  }
}

both frequency and amplitude update: NOT VECTORIZED:

static float perlin1d(float x) {
  float accum = 0.0f;
  float amplitude = 1.0f;
  float frequency = 1.0f;
  for (int i = 0; i < 8; i++) {
    accum += amplitude * sinf(x * frequency);
    amplitude *= 0.781f;
    frequency *= 2.131f;
  }
  return accum;
}

__attribute__((noinline))
static void fill_data(int width, float * __restrict__ height_data, float scale)
{
  for (int i = 0; i < width; i++) {
    height_data[i] = perlin1d(i);
  }
}


More information about the Gcc-bugs mailing list