[Bug middle-end/55266] vector expansion: 24 movs for 4 adds

vincenzo.innocente at cern dot ch gcc-bugzilla@gcc.gnu.org
Sun Mar 3 11:58:00 GMT 2013


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55266

--- Comment #4 from vincenzo Innocente <vincenzo.innocente at cern dot ch> 2013-03-03 11:58:24 UTC ---
I see still problems when calling inline functions.
It seems that the code to satisfy the "calling ABI" is generated anyhow.

take the example below and compare the code generated for "dotd1" wrt "dotd2"
dotd2 has a "storm" of move before the reduction

c++ -std=c++11 -Ofast -march=corei7 -S conversions.cc -fabi-version=0 
the avx version is better but for dotd4 (actually dotd1 is lelf see like)

typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;
typedef double  __attribute__( ( vector_size( 32 ) ) ) float64x4_t;


inline 
float64x4_t convert(float32x4_t f) {
  return float64x4_t{f[0],f[1],f[2],f[3]};
}

float dotf(float32x4_t x, float32x4_t y) {
  float ret=0;
  for (int i=0;i!=4;++i) ret+=x[i]*y[i];
  return ret;
}

inline
double dotd(float64x4_t x, float64x4_t y) {
  double ret=0;
  for (int i=0;i!=4;++i) ret+=x[i]*y[i];
  return ret;
}



float dotd1(float32x4_t x, float32x4_t y) {
  float64x4_t dx,dy;
  for (int i=0;i!=4;++i) {
    dx[i]=x[i]; dy[i]=y[i];
  }
  double ret=0;
  for (int i=0;i!=4;++i) ret+=dx[i]*dy[i];
  return ret;
}

float dotd2(float32x4_t x, float32x4_t y) {
  float64x4_t dx=convert(x);
  float64x4_t dy=convert(y);
  return dotd(dx,dy);
}


float dotd3(float32x4_t x, float32x4_t y) {
  float64x4_t dx{x[0],x[1],x[2],x[3]};
  float64x4_t dy{y[0],y[1],y[2],y[3]};
  double ret=0;
  for (int i=0;i!=4;++i) ret+=dx[i]*dy[i];
  return ret;
}

float dotd4(float32x4_t x, float32x4_t y) {
  float64x4_t dx,dy;
  for (int i=0;i!=4;++i) {
    dx[i]=x[i]; dy[i]=y[i];
  }
  return dotd(dx,dy);
}



More information about the Gcc-bugs mailing list