This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/83479] Register spilling in AVX code


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83479

--- Comment #5 from Daniel Fruzynski <bugzilla@poradnik-webmastera.com> ---
Here is also valid AVX version, it also spills a bit. Compiled with "-O3
-march=haswell -Wall -Werror".

[code]
#include "immintrin.h"

double test(const double data[5][4])
{
  __m256d vLastRow, vLastCol, vSqrtRow, vSqrtCol;

  __m256d v1 = _mm256_load_pd (&data[0][0]);
  __m256d v2 = _mm256_load_pd (&data[1][0]);
  __m256d v3 = _mm256_load_pd (&data[2][0]);
  __m256d v4 = _mm256_load_pd (&data[3][0]);

  // 4
  vLastRow = _mm256_load_pd (&data[4][0]);
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[3]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v4 = (v4 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 3
  vLastRow = v4;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[2]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v3 = (v3 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 2
  vLastRow = v3;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;
  vLastCol = _mm256_set1_pd(vLastRow[1]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v2 = (v2 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  // 1
  vLastRow = v2;
  vSqrtRow = _mm256_sqrt_pd(vLastRow);

  vLastCol = _mm256_set1_pd(vLastRow[0]);
  vSqrtCol = _mm256_sqrt_pd(vLastCol);
  v1 = (v1 - vLastRow * vLastCol) * vSqrtRow * vSqrtCol;

  return v1[0];
}
[/code]

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]