This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
[TESTCASE] AltiVec code uses wrong rtl expression
- From: Daniel Egger <degger at fhm dot edu>
- To: Aldy Hernandez <aldyh at redhat dot com>
- Cc: GCC Developer Mailinglist <gcc at gcc dot gnu dot org>
- Date: 26 Feb 2002 00:12:14 +0100
- Subject: [TESTCASE] AltiVec code uses wrong rtl expression
Hija,
I extended the last testcase a bit to figure out where the code
is miscompiled and it turns out that gcc compiles all vec_mergel
into vmrghh instead of vmrglh. I checked altivec.h as well as
rs6000.[ch] but couldn't find the culprit; in altivec.h the correct
builtin is substitued and in rs6000.[ch] is at least no obvious
typo so it's probably somewhere deeper but I've not the slightest idea
where as the rtl output isn't really clear to me.
Attached are my testcase which should return:
Unaltered
1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16
17 18 19 20 21 22 23 24
25 26 27 28 29 30 31 32
33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48
49 50 51 52 53 54 55 56
57 58 59 60 61 62 63 64
After double transposing
1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16
17 18 19 20 21 22 23 24
25 26 27 28 29 30 31 32
33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48
49 50 51 52 53 54 55 56
57 58 59 60 61 62 63 64
when compiled correctly (double transposition of a 8x8 matrix) but
yields:
Unaltered
1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16
17 18 19 20 21 22 23 24
25 26 27 28 29 30 31 32
33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48
49 50 51 52 53 54 55 56
57 58 59 60 61 62 63 64
After double transposing
1 9 17 25 33 41 49 57
9 1 25 17 41 33 57 49
17 25 1 9 49 57 33 41
25 17 9 1 57 49 41 33
33 41 49 57 1 9 17 25
41 33 57 49 9 1 25 17
49 57 33 41 17 25 1 9
57 49 41 33 25 17 9 1
For more fun uncomment the c++-commented line which still segfaults
when compiled at -O0.
--
Servus,
Daniel
#include <altivec.h>
#include <malloc.h>
#include <stdio.h>
#define Transpose(input, output) \
{ \
vector signed short a0, a1, a2, a3, a4, a5, a6, a7; \
vector signed short b0, b1, b2, b3, b4, b5, b6, b7; \
\
b0 = vec_mergeh (input[0], input[4]); \
b1 = vec_mergel (input[0], input[4]); \
b2 = vec_mergeh (input[1], input[5]); \
b3 = vec_mergel (input[1], input[5]); \
b4 = vec_mergeh (input[2], input[6]); \
b5 = vec_mergel (input[2], input[6]); \
b6 = vec_mergeh (input[3], input[7]); \
b7 = vec_mergel (input[3], input[7]); \
\
a0 = vec_mergeh (b0, b4); \
a1 = vec_mergel (b0, b4); \
a2 = vec_mergeh (b1, b5); \
a3 = vec_mergel (b1, b5); \
a4 = vec_mergeh (b2, b6); \
a5 = vec_mergel (b2, b6); \
a6 = vec_mergeh (b3, b7); \
a7 = vec_mergel (b3, b7); \
\
output[0] = vec_mergeh (a0, a4); \
output[1] = vec_mergel (a0, a4); \
output[2] = vec_mergeh (a1, a5); \
output[3] = vec_mergel (a1, a5); \
output[4] = vec_mergeh (a2, a6); \
output[5] = vec_mergel (a2, a6); \
output[6] = vec_mergeh (a3, a7); \
output[7] = vec_mergel (a3, a7); \
}
void
do_something (signed short *mem)
{
//const vector signed short zeros = (vector signed short) {0,0,0,0,0,0,0};
vector signed short *vec;
vector signed short v1[8], v2[8];
vec = (vector signed short *) mem;
v1[0] = vec[0];
v1[1] = vec[1];
v1[2] = vec[2];
v1[3] = vec[3];
v1[4] = vec[4];
v1[5] = vec[5];
v1[6] = vec[6];
v1[7] = vec[7];
Transpose (v1, v2);
Transpose (v2, v1);
vec[0] = v2[0];
vec[1] = v2[1];
vec[2] = v2[2];
vec[3] = v2[3];
vec[4] = v2[4];
vec[5] = v2[5];
vec[6] = v2[6];
vec[7] = v2[7];
}
void
fill (signed short *mem)
{
signed short i;
for (i = 1; i <= 64; i++)
{
*mem++ = i;
}
}
void
compare (signed short *mem)
{
signed short i;
for (i = 1; i <= 64; i++)
{
if (*mem != i)
printf ("%dth component is %d\n", i, *mem);
mem++;
}
}
void
print (signed short *mem)
{
unsigned int i, j;
for (i = 0; i < 8; i++)
{
for (j = 0; j < 8; j++)
{
printf ("%2d ", *mem);
mem++;
}
printf ("\n");
}
}
int
main (void)
{
void *mem = memalign (128, 16);
if (mem)
{
fill (mem);
printf ("Unaltered\n");
print (mem);
do_something (mem);
printf ("\nAfter double transposing\n");
print (mem);
free (mem);
}
return 0;
}