This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c/23570] New: Internal compiler error
- From: "chen at sys dot wakayama-u dot ac dot jp" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 26 Aug 2005 02:26:09 -0000
- Subject: [Bug c/23570] New: Internal compiler error
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
The compiler gives internal compiler error when I try to compile my program with
-O2.
If I compile with -O1, it's OK.
% gcc -O2 -msse2 a.c
a.c: In function 'ludcompf':
a.c:505: internal compiler error: in merge_assigned_reloads, at reload1.c:6091
Please submit a full bug report,
with preprocessed source if appropriate.
See <URL:http://gcc.gnu.org/bugs.html> for instructions.
gcc -v
Using built-in specs.
Target: i686-pc-linux-gnu
Configured with: ../gcc-4.0.2/configure --prefix=/usr --libexecdir=/usr/lib
--enable-shared --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-libada
--enable-languages=c,ada,c++,f95,java,objc,treelang
Thread model: posix
gcc version 4.0.2 20050825 (prerelease)
/* a.c */
extern int printf (__const char *__restrict __format, ...);
extern double fabs (double __x) __attribute__ ((__nothrow__)) __attribute__
((__const__)); extern double __fabs (double __x) __attribute__ ((__nothrow__))
__attribute__ ((__const__));
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16)));
static __inline __m128
_mm_setzero_ps (void)
{
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
}
static __inline __m128
_mm_max_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_cmpeq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_set1_ps (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
}
static __inline __m128
_mm_and_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andps (__A, __B);
}
static __inline __m128
_mm_loadu_ps (float const *__P)
{
return (__m128) __builtin_ia32_loadups (__P);
}
static __inline __m128
_mm_setr_ps (float __Z, float __Y, float __X, float __W)
{
return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
}
static __inline void
_mm_storeu_ps (float *__P, __m128 __A)
{
__builtin_ia32_storeups (__P, (__v4sf)__A);
}
static __inline __m128
_mm_add_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_sub_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
}
static __inline __m128
_mm_mul_ps (__m128 __A, __m128 __B)
{
return (__m128)__builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
}
typedef double __v2df __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef __v2di __m128i;
typedef __v2df __m128d;
static __inline __m128d
_mm_set1_pd (double __F)
{
return __extension__ (__m128d){ __F, __F };
}
static __inline __m128d
_mm_setr_pd (double __W, double __X)
{
return __extension__ (__m128d){ __W, __X };
}
static __inline __m128d
_mm_loadu_pd (double const *__P)
{
return __builtin_ia32_loadupd (__P);
}
static __inline void
_mm_storeu_pd (double *__P, __m128d __A)
{
__builtin_ia32_storeupd (__P, __A);
}
static __inline __m128d
_mm_set_sd (double __F)
{
return __extension__ (__m128d){ __F, 0 };
}
static __inline __m128d
_mm_load_sd (double const *__P)
{
return _mm_set_sd (*__P);
}
static __inline __m128d
_mm_and_pd (__m128d __A, __m128d __B)
{
return __builtin_ia32_andpd (__A, __B);
}
static __inline __m128d se2_abssd(__m128d a)
{
static const union {
__m128d m;
unsigned int i[4];
} u = {
.i[0] = 0xffffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0xffffffffUL, .i[3] = 0xffffffffUL
};
__m128d msk = u.m;
return (__m128d)_mm_and_pd(a, msk);
}
static __inline __m128d
_mm_add_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_sub_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_mul_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_mul_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_max_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_unpackhi_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
_mm_cmpeq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
}
static __inline int
_mm_comilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
}
static __inline __m128i
_mm_add_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
}
static __inline __m128i
_mm_and_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
_mm_andnot_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
_mm_or_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
}
typedef union {
__m128 xmm;
__m128i xmmi;
__m128d xmmd;
long long di[2];
unsigned long long udi[4];
int si[4];
unsigned int usi[4];
short hi[8];
unsigned short uhi[8];
signed char qi[16];
unsigned char uqi[16];
double df[2];
float sf[4];
} __attribute__ ((aligned(16))) um128;
static __inline __m128 se_absps(__m128 a)
{
static const union {
__m128 m;
unsigned int i[4];
} u = {
.i[0] = 0x7fffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0x7fffffffUL, .i[3] = 0x7fffffffUL
};
__m128 msk = u.m;
return (__m128)_mm_and_ps(a, msk);
}
static __inline __m128d se2_abspd(__m128d a)
{
static const union {
__m128d m;
unsigned int i[4];
} u = {
.i[0] = 0xffffffffUL, .i[1] = 0x7fffffffUL,
.i[2] = 0xffffffffUL, .i[3] = 0x7fffffffUL
};
__m128 msk = u.m;
return (__m128d)_mm_and_pd(a, msk);
}
static void swap_index(int *prow, int n1, int n2)
{
int *p1 = prow + n1;
int *p2 = prow + n2;
n1 = *p1;
n2 = *p2;
*p1 = n2;
*p2 = n1;
}
static int sse2_max_abs_index(double *v, int step, int n)
{
__m128d m1, mm;
__m128i mi1, mim, mi, msk;
um128 u;
double *v2end;
int step2, n2;
static const um128 i0i1 = {
.si[0]=0, .si[1]=0, .si[2]=1, .si[3] = 0
};
static const um128 i1i1 = {
.si[0]=2, .si[1]=0, .si[2]=2, .si[3] = 0
};
for (n2 = 0; n2 < n; ++n2) printf("%f ", v[step * n2]); printf("\n");
if (n <= 1) return 0;
step2 = step + step;
v2end = v + (n / 2) * step2;
mm = se2_abspd(_mm_setr_pd(v[0], v[step]));
v += step2;
mi1 = i1i1.xmmi;
mim = mi = i0i1.xmmi;
while (v < v2end) {
mi = _mm_add_epi32(mi, mi1);
m1 = se2_abspd(_mm_setr_pd(v[0], v[step]));
v += step2;
mm = _mm_max_pd(mm, m1);
msk = (__m128i)_mm_cmpeq_pd(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
if (n & 1) {
mi = _mm_add_epi32(mi, mi1);
m1 = se2_abssd(_mm_load_sd(v));
mm = _mm_max_pd(mm, m1);
msk = (__m128i)_mm_cmpeq_pd(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
m1 = _mm_unpackhi_pd(mm, mm);
u.xmmi = mim;
if (_mm_comilt_sd(mm, m1))
return u.si[2];
return u.si[0];
}
static void sse2_add_row(double *dst, double *src, double k, int n)
{
double *dst2end = dst + (n / 2) * 2;
__m128d mk = _mm_set1_pd(k);
while (dst < dst2end) {
__m128d s = _mm_loadu_pd(src);
__m128d d = _mm_loadu_pd(dst);
s = _mm_mul_pd(s, mk);
d = _mm_add_pd(d, s);
_mm_storeu_pd(dst, d);
src += 2;
dst += 2;
}
if (n & 1) {
dst[0] += k * src[0];
}
}
static void sse2_swap_row(double *r1, double *r2, int n)
{
double *r12end = r1 + (n / 2) * 2;
while (r1 < r12end) {
__m128d v1 = _mm_loadu_pd(r1);
__m128d v2 = _mm_loadu_pd(r2);
_mm_storeu_pd(r1, v2);
_mm_storeu_pd(r2, v1);
r1 += 2;
r2 += 2;
}
if (n & 1) {
double t = *r1;
*r1 = *r2;
*r2 = t;
}
}
static int sse_max_abs_indexf(float *v, int step, int n)
{
__m128 m1, mm;
__m128i mi1, mim, mi, msk;
um128 u, ui;
float *v4end, t;
int n4, step2, step3, step4;
static const um128 i0123 = {
.si[0]=0, .si[1]=1, .si[2]=2, .si[3]=3
};
static const um128 i1111 = {
.si[0]=4, .si[1]=4, .si[2]=4, .si[3]=4
};
if (n <= 1) return 0;
n4 = (n / 4) * 4;
mi1 = i1111.xmmi;
mim = mi = i0123.xmmi;
mm = _mm_setzero_ps();
if (n4 > 0) {
step2 = step + step;
step3 = step2 + step;
step4 = step2 + step2;
v4end = v + n4 * step;
mm = se_absps(_mm_setr_ps(v[0], v[step], v[step2], v[step3]));
v += step4;
mi = _mm_add_epi32(mi, mi1);
while (v < v4end) {
m1 = se_absps(_mm_setr_ps(v[0], v[step], v[step2], v[step3]));
mm = _mm_max_ps(mm, m1);
msk = (__m128i)_mm_cmpeq_ps(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
v += step4;
mi = _mm_add_epi32(mi, mi1);
}
}
n4 = n - n4;
if (n4) {
int i;
u.xmm = _mm_setzero_ps();
for (i = 0; i < n4; ++i) {
u.sf[i] = v[0];
v += step;
}
m1 = se_absps(u.xmm);
mm = _mm_max_ps(mm, m1);
msk = (__m128i)_mm_cmpeq_ps(m1, mm);
mim = _mm_or_si128(_mm_and_si128(msk, mi), _mm_andnot_si128(msk, mim));
}
ui.xmmi = mim;
u.xmm = mm;
t = u.sf[0];
n = 0;
if (u.sf[1] > t) { t = u.sf[1]; n = 1; }
if (u.sf[2] > t) { t = u.sf[2]; n = 2; }
if (u.sf[3] > t) { t = u.sf[3]; n = 3; }
return ui.si[n];
}
static void sse_add_rowf(float *dst, float *src, float k, int n)
{
int n4 = (n / 4) * 4;
int i;
float *dst4end = dst + n4;
__m128 mk = _mm_set1_ps(k);
while (dst < dst4end) {
__m128 s = _mm_loadu_ps(src);
__m128 d = _mm_loadu_ps(dst);
s = _mm_mul_ps(s, mk);
d = _mm_add_ps(d, s);
_mm_storeu_ps(dst, d);
src += 4;
dst += 4;
}
n4 = n - n4;
for (i = 0; i < n4; ++i) {
dst[i] += k * src[i];
}
}
static void sse_swap_rowf(float *r1, float *r2, int n)
{
int i;
int n4 = (n / 4) * 4;
float *r14end = r1 + n4;
while (r1 < r14end) {
__m128 v1 = _mm_loadu_ps(r1);
__m128 v2 = _mm_loadu_ps(r2);
_mm_storeu_ps(r1, v2);
_mm_storeu_ps(r2, v1);
r1 += 4;
r2 += 4;
}
r14end = r1 + n - n4;
while (r1 < r14end) {
float t = *r1;
*r1 = *r2;
*r2 = t;
r1++;
r2++;
}
}
int
ludcompd(double *m, int nw, int *prow, int n)
{
int i, s = 0;
double *pm;
for (i = 0; i < n; ++i) prow[i] = i;
printf("ludcompd(): SSE2 code is used.\n");
for (i = 0, pm = m; i < n - 1; ++i, pm += nw) {
int vi = sse2_max_abs_index(pm + i, nw, n - i);
double r, *pt;
int j;
if (vi != 0) {
sse2_swap_row(pm, pm + vi * nw, nw);
swap_index(prow, i, i + vi);
s = 1 - s;
}
r = pm[i];
for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw) {
double k = pt[i] / r;
pt[i] = k;
sse2_add_row(pt + i + 1, pm + i + 1, -k, n - i - 1);
}
}
return s;
}
int
ludcompf(float *m, int nw, int *prow, int n)
{
int i, s = 0;
float *pm;
for (i = 0; i < n; ++i) prow[i] = i;
printf("ludcompf(): SSE2 code is used.\n");
for (i = 0, pm = m; i < n - 1; ++i, pm += nw) {
int vi = sse_max_abs_indexf(pm + i, nw, n - i);
float r, *pt;
int j;
if (vi != 0) {
sse_swap_rowf(pm, pm + vi * nw, nw);
swap_index(prow, i, i + vi);
s = 1 - s;
}
r = pm[i];
for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw) {
float k = pt[i] / r;
pt[i] = k;
sse_add_rowf(pt + i + 1, pm + i + 1, -k, n - i - 1);
}
}
return s;
}
void test_ludcompd(void)
{
static double m[4][4] = {
{ 1, 2, 3, 4 },
{ 4, 2, 1, 7 },
{ 5, 6, 10, 78 },
{ 3, 2, 1, 0 }
};
int p[4];
printf("%d\n", ludcompd(&m[0][0], 4, p, 4));
printf("%d %d %d %d\n", p[0], p[1], p[2], p[3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[0][0], m[0][1], m[0][2], m[0][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[1][0], m[1][1], m[1][2], m[1][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[2][0], m[2][1], m[2][2], m[2][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[3][0], m[3][1], m[3][2], m[3][3]);
}
void test_ludcompf(void)
{
static float m[4][4] = {
{ 1, 2, 3, 4 },
{ 4, 2, 1, 7 },
{ 5, 6, 10, 78 },
{ 3, 2, 1, 0 }
};
int p[4];
printf("%d\n", ludcompf(&m[0][0], 4, p, 4));
printf("%d %d %d %d\n", p[0], p[1], p[2], p[3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[0][0], m[0][1], m[0][2], m[0][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[1][0], m[1][1], m[1][2], m[1][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[2][0], m[2][1], m[2][2], m[2][3]);
printf("%1.3f %1.3f %1.3f %1.3f\n", m[3][0], m[3][1], m[3][2], m[3][3]);
}
int main()
{
test_ludcompd();
test_ludcompf();
return 0;
}
--
Summary: Internal compiler error
Product: gcc
Version: 4.0.2
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: chen at sys dot wakayama-u dot ac dot jp
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i686-pc-linux-gnu
GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570