This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[TESTCASE] AltiVec code uses wrong rtl expression


Hija,

I extended the last testcase a bit to figure out where the code
is miscompiled and it turns out that gcc compiles all vec_mergel
into vmrghh instead of vmrglh. I checked altivec.h as well as
rs6000.[ch] but couldn't find the culprit; in altivec.h the correct
builtin is substitued and in rs6000.[ch] is at least no obvious
typo so it's probably somewhere deeper but I've not the slightest idea
where as the rtl output isn't really clear to me.

Attached are my testcase which should return:
Unaltered
 1  2  3  4  5  6  7  8 
 9 10 11 12 13 14 15 16 
17 18 19 20 21 22 23 24 
25 26 27 28 29 30 31 32 
33 34 35 36 37 38 39 40 
41 42 43 44 45 46 47 48 
49 50 51 52 53 54 55 56 
57 58 59 60 61 62 63 64 

After double transposing
 1  2  3  4  5  6  7  8 
 9 10 11 12 13 14 15 16 
17 18 19 20 21 22 23 24 
25 26 27 28 29 30 31 32 
33 34 35 36 37 38 39 40 
41 42 43 44 45 46 47 48 
49 50 51 52 53 54 55 56 
57 58 59 60 61 62 63 64 

when compiled correctly (double transposition of a 8x8 matrix) but
yields:

Unaltered
 1  2  3  4  5  6  7  8 
 9 10 11 12 13 14 15 16 
17 18 19 20 21 22 23 24 
25 26 27 28 29 30 31 32 
33 34 35 36 37 38 39 40 
41 42 43 44 45 46 47 48 
49 50 51 52 53 54 55 56 
57 58 59 60 61 62 63 64 

After double transposing
 1  9 17 25 33 41 49 57 
 9  1 25 17 41 33 57 49 
17 25  1  9 49 57 33 41 
25 17  9  1 57 49 41 33 
33 41 49 57  1  9 17 25 
41 33 57 49  9  1 25 17 
49 57 33 41 17 25  1  9 
57 49 41 33 25 17  9  1 

For more fun uncomment the c++-commented line which still segfaults
when compiled at -O0.

-- 
Servus,
       Daniel
#include <altivec.h>
#include <malloc.h>
#include <stdio.h>

#define Transpose(input, output) \
{ \
  vector signed short a0, a1, a2, a3, a4, a5, a6, a7; \
  vector signed short b0, b1, b2, b3, b4, b5, b6, b7; \
  \
  b0 = vec_mergeh (input[0], input[4]);      \
  b1 = vec_mergel (input[0], input[4]);      \
  b2 = vec_mergeh (input[1], input[5]);      \
  b3 = vec_mergel (input[1], input[5]);      \
  b4 = vec_mergeh (input[2], input[6]);      \
  b5 = vec_mergel (input[2], input[6]);      \
  b6 = vec_mergeh (input[3], input[7]);      \
  b7 = vec_mergel (input[3], input[7]);      \
  \
  a0 = vec_mergeh (b0, b4);                  \
  a1 = vec_mergel (b0, b4);                  \
  a2 = vec_mergeh (b1, b5);                  \
  a3 = vec_mergel (b1, b5);                  \
  a4 = vec_mergeh (b2, b6);                  \
  a5 = vec_mergel (b2, b6);                  \
  a6 = vec_mergeh (b3, b7);                  \
  a7 = vec_mergel (b3, b7);                  \
  \
  output[0] = vec_mergeh (a0, a4);           \
  output[1] = vec_mergel (a0, a4);           \
  output[2] = vec_mergeh (a1, a5);           \
  output[3] = vec_mergel (a1, a5);           \
  output[4] = vec_mergeh (a2, a6);           \
  output[5] = vec_mergel (a2, a6);           \
  output[6] = vec_mergeh (a3, a7);           \
  output[7] = vec_mergel (a3, a7);           \
}
  
void
do_something (signed short *mem)
{
  //const vector signed short zeros = (vector signed short) {0,0,0,0,0,0,0};
  vector signed short *vec;
  vector signed short v1[8], v2[8];

  vec = (vector signed short *) mem;
  
  v1[0] = vec[0];
  v1[1] = vec[1];
  v1[2] = vec[2];
  v1[3] = vec[3];
  v1[4] = vec[4];
  v1[5] = vec[5];
  v1[6] = vec[6];
  v1[7] = vec[7];

  Transpose (v1, v2); 
  Transpose (v2, v1); 
 
  vec[0] = v2[0];
  vec[1] = v2[1];
  vec[2] = v2[2];
  vec[3] = v2[3];
  vec[4] = v2[4];
  vec[5] = v2[5];
  vec[6] = v2[6];
  vec[7] = v2[7];
}

void
fill (signed short *mem)
{
  signed short i;

  for (i = 1; i <= 64; i++)
  {
    *mem++ = i;
  }
}

void
compare (signed short *mem)
{
  signed short i;

  for (i = 1; i <= 64; i++)
  {
    if (*mem != i)
      printf ("%dth component is %d\n", i, *mem);
    
    mem++;
  }
}

void
print (signed short *mem)
{
  unsigned int i, j;

  for (i = 0; i < 8; i++)
  {
    for (j = 0; j < 8; j++)
    {
      printf ("%2d ", *mem);
      mem++;
    }

    printf ("\n"); 
  }
}

int
main (void)
{
  void *mem = memalign (128, 16);
  
  if (mem)
  {
    fill (mem);
    
    printf ("Unaltered\n");
    print (mem);
    
    do_something (mem);
    
    printf ("\nAfter double transposing\n");
    print (mem);
    
    free (mem);
  }
 
  return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]