egcs-1.1.2-pre1 insn scheduling bug
Bruno Haible
haible@ilog.fr
Thu Feb 25 12:29:00 GMT 1999
Hi,
Wrong code is being generated by "gcc -O2" (egcs-1.1.2-pre1) on SPARC.
You find below a file bug.c, and a function compiled with "gcc -O" and
"gcc -O -fschedule-insns". Instructions which are clearly wrong are
marked as such. The bad code leads to wrong result in the multiprecision
gcd routine of CLISP and CLN.
$ uname -srm
SunOS 5.6 sun4u
$ gcc -v
Reading specs from /nfs/telecom/users/haible/gnu/arch/solaris/lib/gcc-lib/sparc-sun-solaris2.6/egcs-2.91.61/specs
gcc version egcs-2.91.61 19990216 (egcs-1.1.2 pre-release-1)
$ gcc -O -S bug.c
bug.c:4: warning: call-clobbered register used for global register variable
$ gcc -O -fschedule-insns bug.c
bug.c:4: warning: call-clobbered register used for global register variable
Here are side-by-side diffs of the relevant portion. It looks as if two
instructions "sub %g1,%i0,%g1" and "add %i2,-1,%i2" got swapped for
some obscure reason.
---- with -O -------------------- with -O -fschedule-insns --------
.LL32: .LL32:
ld [%l0+8],%l1 ld [%l0+8],%l1
mov %i3,%o0 | mov %l3,%o0
call mulu32_,0 call mulu32_,0
mov %i2,%o1 mov %i2,%o1
cmp %g1,%i0 cmp %g1,%i0
bgu,a .LL57 | bgu .LL57
add %i2,-1,%i2 | sub %g1,%i0,%g1 <== WRONG
bne .LL58 bne .LL58
cmp %i2,0 cmp %i2,0
cmp %o0,%l1 cmp %o0,%l1
bleu .LL58 bleu .LL58
cmp %i2,0 cmp %i2,0
add %i2,-1,%i2 | sub %g1,%i0,%g1 <== WRONG
.LL57: .LL57:
cmp %o0,%l1 cmp %o0,%l1
bgeu .LL38 bgeu .LL38
sub %g1,%i0,%g1 | add %i2,-1,%i2 <== WRONG
add %g1,-1,%g1 add %g1,-1,%g1
.LL38: .LL38:
================================= bug.c =====================================
register long STACK_reg __asm__ ("%g5");
register long mv_count_reg __asm__ ("%g6");
register long value1_reg __asm__ ("%g7");
register long subr_self_reg __asm__ ("%g4");
typedef unsigned long uint32;
typedef long sint32;
typedef struct
{
uint32 *MSDptr;
uint32 len;
uint32 *LSDptr;
}
DS;
extern uint32 addto_loop_down (const uint32 * sourceptr, uint32 * destptr, uint32 count);
extern uint32 *copy_loop_down (const uint32 * sourceptr, uint32 * destptr, uint32 count);
extern void divide_0 (void) __attribute__ ((__noreturn__));
extern uint32 divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
extern uint32 divucopy_loop_up (uint32 digit, const uint32 * sourceptr, uint32 * destptr, uint32 len);
extern uint32 mulu32_ (uint32 arg1, uint32 arg2);
extern uint32 mulusub_loop_down (uint32 digit, const uint32 * sourceptr, uint32 * destptr, uint32 len);
extern uint32 shiftleftcopy_loop_down (const uint32 * sourceptr, uint32 * destptr, uint32 count, uint32 i);
extern uint32 shiftright_loop_up (uint32 * ptr, uint32 count, uint32 i);
void UDS_divide_ (
uint32 * a_MSDptr,
uint32 a_len,
uint32 * a_LSDptr,
uint32 * b_MSDptr,
uint32 b_len,
uint32 * b_LSDptr,
uint32 * roomptr,
DS * q_,
DS * r_)
{
while ((a_len > 0) && (a_MSDptr[0] == 0))
{
a_MSDptr++;
a_len--;
}
while (1)
{
if (b_len == 0)
{
divide_0 ();
}
if (b_MSDptr[0] == 0)
{
b_MSDptr++;
b_len--;
}
else
break;
}
if (a_len < b_len)
{
uint32 *r_MSDptr = roomptr;
uint32 *r_LSDptr = &roomptr[a_len];
copy_loop_down (a_LSDptr, r_LSDptr, a_len);
q_->MSDptr = r_MSDptr;
q_->len = 0;
q_->LSDptr = r_MSDptr;
r_->MSDptr = r_MSDptr;
r_->len = a_len;
r_->LSDptr = r_LSDptr;
return;
}
else if (b_len == 1)
{
uint32 *q_MSDptr = roomptr;
uint32 *q_LSDptr = &roomptr[a_len];
uint32 *r_MSDptr = q_LSDptr;
uint32 *r_LSDptr = &r_MSDptr[1];
{
uint32 rest = divucopy_loop_up (b_MSDptr[0], a_MSDptr, q_MSDptr, a_len);
uint32 r_len;
if (!(rest == 0))
{
r_MSDptr[0] = rest;
r_len = 1;
}
else
{
r_MSDptr = r_LSDptr;
r_len = 0;
}
if (q_MSDptr[0] == 0)
{
q_MSDptr++;
a_len--;
}
q_->MSDptr = q_MSDptr;
q_->len = a_len;
q_->LSDptr = q_LSDptr;
r_->MSDptr = r_MSDptr;
r_->len = r_len;
r_->LSDptr = r_LSDptr;
return;
}
}
else
{
uint32 s;
{
uint32 msd = b_MSDptr[0];
if ((sint32) msd < 0)
{
s = 0;
goto shift_ok;
}
else
{
{
union
{
double f;
uint32 i[2];
}
__fi;
__fi.i[0] = (uint32) (52 + 1 + 1022) << (52 - 32);
__fi.i[1] = (msd);
__fi.f = __fi.f - (double) (4503599627370496.0L);
s = 32 - ((__fi.i[0] >> (52 - 32)) - 1022);
};
goto shift;
}
}
if (!(s == 0))
shift:
{
uint32 *old_b_LSDptr = b_LSDptr;
{
uint32 __need = (uint32) (b_len);
uint32 *__array = (uint32 *) __builtin_alloca (__need * sizeof (uint32));;
(void) (b_MSDptr = &__array[0]);
(void) (b_LSDptr = &__array[__need]);
};
shiftleftcopy_loop_down (old_b_LSDptr, b_LSDptr, b_len, s);
;
}
shift_ok:
{
uint32 *r_MSDptr = roomptr;
uint32 *r_LSDptr = &roomptr[a_len + 1];
if (s == 0)
{
copy_loop_down (a_LSDptr, r_LSDptr, a_len);
r_MSDptr[0] = 0;
}
else
{
r_MSDptr[0] = shiftleftcopy_loop_down (a_LSDptr, r_LSDptr, a_len, s);
}
{
uint32 j = a_len - b_len;
uint32 *r_ptr = &r_LSDptr[-(uint32) j];
uint32 *q_MSDptr = r_MSDptr;
uint32 q_len = j = j + 1;
uint32 b_msd = b_MSDptr[0];
uint32 b_2msd = b_MSDptr[1];
do
{
uint32 q_stern;
uint32 c1;
if (r_MSDptr[0] < b_msd)
{
(
{
uint32 _q = divu_6432_3232_ (r_MSDptr[0], r_MSDptr[1], b_msd);
register uint32 _r __asm__ ("%g1");
q_stern = _q;
c1 = _r;
}
);
}
else
{
q_stern = (2L << ((32) - 1)) - 1;
if ((r_MSDptr[0] > b_msd) || ((c1 = r_MSDptr[1] + b_msd) < b_msd))
{
goto subtract;
}
}
{
uint32 c2lo = r_MSDptr[2];
uint32 c3hi;
uint32 c3lo;
(
{
c3lo = mulu32_ (b_2msd, q_stern);
{
register uint32 _hi __asm__ ("%g1");
c3hi = _hi;
}
}
);
if ((c3hi > c1) || ((c3hi == c1) && (c3lo > c2lo)))
{
q_stern = q_stern - 1;
c3hi -= c1;
if (c3lo < c2lo)
{
c3hi--;
};
c3lo -= c2lo;
if ((c3hi > b_msd) || ((c3hi == b_msd) && (c3lo > b_2msd)))
{
q_stern = q_stern - 1;
}
}
}
if (!(q_stern == 0))
subtract:
{
uint32 carry = mulusub_loop_down (q_stern, b_LSDptr, r_ptr, b_len);
if (carry > r_MSDptr[0])
{
q_stern = q_stern - 1;
addto_loop_down (b_LSDptr, r_ptr, b_len);
}
}
*r_MSDptr++ = q_stern;
r_ptr++;
}
while (!(--j == 0));
if (q_MSDptr[0] == 0)
{
q_MSDptr++;
q_len--;
}
q_->MSDptr = q_MSDptr;
q_->len = q_len;
q_->LSDptr = r_MSDptr;
if (!(s == 0))
{
shiftright_loop_up (r_MSDptr, b_len, s);
}
while ((b_len > 0) && (r_MSDptr[0] == 0))
{
r_MSDptr++;
b_len--;
}
r_->MSDptr = r_MSDptr;
r_->len = b_len;
r_->LSDptr = r_LSDptr;
return;
}
}
}
}
========================== gcc -O -S bug.c ==================================
.file "bug.c"
gcc2_compiled.:
.section ".rodata"
.align 8
.LLC0:
.uaword 0x43300000 ! ~4.50359962737049600000e15
.uaword 0x0
.section ".text"
.align 4
.global UDS_divide_
.type UDS_divide_,#function
.proc 020
UDS_divide_:
!#PROLOGUE# 0
save %sp,-120,%sp
!#PROLOGUE# 1
mov %i0,%o1
cmp %i1,0
be .LL9
ld [%fp+92],%l3
b .LL53
ld [%o1],%o0
.LL4:
be .LL9
add %o1,4,%o1
ld [%o1],%o0
.LL53:
cmp %o0,0
be,a .LL4
addcc %i1,-1,%i1
.LL9:
cmp %i4,0
bne,a .LL10
ld [%i3],%o0
call divide_0,0
nop
.LL10:
cmp %o0,0
bne .LL8
cmp %i1,%i4
add %i3,4,%i3
b .LL9
add %i4,-1,%i4
.LL8:
bgeu .LL14
sll %i1,2,%l0
add %l3,%l0,%l0
mov %i2,%o0
mov %l0,%o1
call copy_loop_down,0
mov %i1,%o2
ld [%fp+96],%o4
st %l3,[%o4]
st %g0,[%o4+4]
st %l3,[%o4+8]
ld [%fp+100],%o4
st %l3,[%o4]
st %i1,[%o4+4]
b .LL1
st %l0,[%o4+8]
.LL14:
cmp %i4,1
bne .LL16
ld [%i3],%o0
mov %l3,%l0
sll %i1,2,%l2
add %l3,%l2,%l1
mov %l1,%l5
add %l1,4,%l4
mov %l3,%o2
call divucopy_loop_up,0
mov %i1,%o3
cmp %o0,0
be .LL17
mov 1,%o1
b .LL18
st %o0,[%l3+%l2]
.LL17:
mov %l4,%l1
mov 0,%o1
.LL18:
ld [%l0],%o0
cmp %o0,0
bne .LL55
ld [%fp+96],%o4
add %l0,4,%l0
add %i1,-1,%i1
.LL55:
st %l0,[%o4]
st %i1,[%o4+4]
st %l5,[%o4+8]
ld [%fp+100],%o4
st %l1,[%o4]
st %o1,[%o4+4]
b .LL1
st %l4,[%o4+8]
.LL16:
cmp %o0,0
bge .LL21
sethi %hi(1127219200),%o4
b .LL22
mov 0,%l5
.LL21:
st %o4,[%fp-20]
ld [%fp-20],%f4
st %o0,[%fp-20]
ld [%fp-20],%f5
sethi %hi(.LLC0),%o4
ldd [%o4+%lo(.LLC0)],%f2
fsubd %f4,%f2,%f4
st %f4,[%fp-20]
ld [%fp-20],%o4
srl %o4,20,%o1
mov 1054,%o0
sub %o0,%o1,%l5
mov %i5,%o0
sll %i4,2,%o2
add %o2,7,%o1
and %o1,-8,%o1
sub %sp,%o1,%sp
add %sp,96,%i3
add %i3,%o2,%i5
mov %i5,%o1
mov %i4,%o2
call shiftleftcopy_loop_down,0
mov %l5,%o3
.LL22:
mov %l3,%l0
sll %i1,2,%o0
add %o0,4,%o0
cmp %l5,0
bne .LL26
add %l0,%o0,%l7
mov %i2,%o0
mov %l7,%o1
call copy_loop_down,0
mov %i1,%o2
b .LL27
st %g0,[%l0]
.LL26:
mov %i2,%o0
mov %l7,%o1
mov %i1,%o2
call shiftleftcopy_loop_down,0
mov %l5,%o3
st %o0,[%l0]
.LL27:
sub %i1,%i4,%i1
sll %i1,2,%o0
sub %l7,%o0,%l3
mov %l0,%l4
add %i1,1,%i1
mov %i1,%l6
ld [%i3],%l2
ld [%i3+4],%i3
.LL28:
ld [%l0],%o0
cmp %o0,%l2
bgeu .LL31
nop
ld [%l0+4],%o1
call divu_6432_3232_,0
mov %l2,%o2
mov %o0,%i2
b .LL32
mov %g1,%i0
.LL31:
bgu .LL35
mov -1,%i2
ld [%l0+4],%o0
add %l2,%o0,%i0
cmp %i0,%l2
blu .LL56
mov %i2,%o0
.LL32:
ld [%l0+8],%l1
mov %i3,%o0
call mulu32_,0
mov %i2,%o1
cmp %g1,%i0
bgu,a .LL57
add %i2,-1,%i2
bne .LL58
cmp %i2,0
cmp %o0,%l1
bleu .LL58
cmp %i2,0
add %i2,-1,%i2
.LL57:
cmp %o0,%l1
bgeu .LL38
sub %g1,%i0,%g1
add %g1,-1,%g1
.LL38:
cmp %g1,%l2
bgu .LL40
sub %o0,%l1,%o0
bne .LL58
cmp %i2,0
cmp %o0,%i3
bleu .LL58
cmp %i2,0
.LL40:
add %i2,-1,%i2
cmp %i2,0
.LL58:
be,a .LL59
st %i2,[%l0]
.LL35:
mov %i2,%o0
.LL56:
mov %i5,%o1
mov %l3,%o2
call mulusub_loop_down,0
mov %i4,%o3
ld [%l0],%o1
cmp %o0,%o1
bleu .LL41
mov %i5,%o0
add %i2,-1,%i2
mov %l3,%o1
call addto_loop_down,0
mov %i4,%o2
.LL41:
st %i2,[%l0]
.LL59:
add %l0,4,%l0
addcc %i1,-1,%i1
bne .LL28
add %l3,4,%l3
ld [%l4],%o0
cmp %o0,0
bne .LL60
ld [%fp+96],%o4
add %l4,4,%l4
add %l6,-1,%l6
.LL60:
st %l4,[%o4]
st %l6,[%o4+4]
cmp %l5,0
be .LL45
st %l0,[%o4+8]
mov %l0,%o0
mov %i4,%o1
call shiftright_loop_up,0
mov %l5,%o2
.LL45:
cmp %i4,0
be .LL61
ld [%fp+100],%o4
b .LL54
ld [%l0],%o0
.LL48:
be .LL47
add %l0,4,%l0
ld [%l0],%o0
.LL54:
cmp %o0,0
be,a .LL48
addcc %i4,-1,%i4
.LL47:
ld [%fp+100],%o4
.LL61:
st %l0,[%o4]
st %i4,[%o4+4]
st %l7,[%o4+8]
.LL1:
ret
restore
.LLfe1:
.size UDS_divide_,.LLfe1-UDS_divide_
.ident "GCC: (GNU) egcs-2.91.61 19990216 (egcs-1.1.2 pre-release-1)"
========================== gcc -O -fschedule-insns -S bug.c =================
.file "bug.c"
gcc2_compiled.:
.section ".rodata"
.align 8
.LLC0:
.uaword 0x43300000 ! ~4.50359962737049600000e15
.uaword 0x0
.section ".text"
.align 4
.global UDS_divide_
.type UDS_divide_,#function
.proc 020
UDS_divide_:
!#PROLOGUE# 0
save %sp,-120,%sp
!#PROLOGUE# 1
ld [%fp+92],%l3
cmp %i1,0
be .LL9
mov %i0,%o1
b .LL53
ld [%o1],%o0
.LL4:
be .LL9
add %o1,4,%o1
ld [%o1],%o0
.LL53:
cmp %o0,0
be,a .LL4
addcc %i1,-1,%i1
.LL9:
cmp %i4,0
bne,a .LL10
ld [%i3],%o0
call divide_0,0
nop
.LL10:
cmp %o0,0
bne .LL8
cmp %i1,%i4
add %i3,4,%i3
b .LL9
add %i4,-1,%i4
.LL8:
bgeu .LL14
sll %i1,2,%l0
add %l3,%l0,%l0
mov %i2,%o0
mov %l0,%o1
call copy_loop_down,0
mov %i1,%o2
ld [%fp+96],%o4
st %l3,[%o4+8]
st %l3,[%o4]
st %g0,[%o4+4]
ld [%fp+100],%o4
st %l0,[%o4+8]
st %l3,[%o4]
b .LL1
st %i1,[%o4+4]
.LL14:
cmp %i4,1
bne .LL16
ld [%i3],%o0
sll %i1,2,%l2
add %l3,%l2,%l1
mov %l3,%l0
mov %l3,%o2
mov %i1,%o3
mov %l1,%l4
call divucopy_loop_up,0
add %l1,4,%i0
cmp %o0,0
be .LL17
mov 1,%o1
b .LL18
st %o0,[%l3+%l2]
.LL17:
mov %i0,%l1
mov 0,%o1
.LL18:
ld [%l0],%o0
cmp %o0,0
bne .LL55
ld [%fp+96],%o4
add %l0,4,%l0
add %i1,-1,%i1
.LL55:
st %l4,[%o4+8]
st %l0,[%o4]
st %i1,[%o4+4]
ld [%fp+100],%o4
st %i0,[%o4+8]
st %l1,[%o4]
b .LL1
st %o1,[%o4+4]
.LL16:
cmp %o0,0
bge .LL21
sethi %hi(1127219200),%o4
b .LL22
mov 0,%l5
.LL21:
st %o4,[%fp-20]
ld [%fp-20],%f4
sethi %hi(.LLC0),%o4
ldd [%o4+%lo(.LLC0)],%f2
st %o0,[%fp-20]
ld [%fp-20],%f5
fsubd %f4,%f2,%f4
sll %i4,2,%o3
add %o3,7,%o0
and %o0,-8,%o0
sub %sp,%o0,%sp
mov %i5,%o0
st %f4,[%fp-20]
ld [%fp-20],%o4
srl %o4,20,%o2
mov 1054,%o1
add %sp,96,%i3
sub %o1,%o2,%l5
add %i3,%o3,%i5
mov %i5,%o1
mov %i4,%o2
call shiftleftcopy_loop_down,0
mov %l5,%o3
.LL22:
sll %i1,2,%o0
mov %l3,%l0
add %o0,4,%o0
cmp %l5,0
bne .LL26
add %l0,%o0,%l7
mov %i2,%o0
mov %l7,%o1
call copy_loop_down,0
mov %i1,%o2
b .LL27
st %g0,[%l0]
.LL26:
mov %i2,%o0
mov %l7,%o1
mov %i1,%o2
call shiftleftcopy_loop_down,0
mov %l5,%o3
st %o0,[%l0]
.LL27:
sub %i1,%i4,%i1
sll %i1,2,%o0
ld [%i3+4],%l3
add %i1,1,%i1
ld [%i3],%i3
sub %l7,%o0,%l2
mov %l0,%l4
mov %i1,%l6
.LL28:
ld [%l0],%o0
cmp %o0,%i3
bgeu .LL31
nop
ld [%l0+4],%o1
call divu_6432_3232_,0
mov %i3,%o2
mov %o0,%i2
b .LL32
mov %g1,%i0
.LL31:
bgu .LL35
mov -1,%i2
ld [%l0+4],%o0
add %i3,%o0,%i0
cmp %i0,%i3
blu .LL56
mov %i2,%o0
.LL32:
ld [%l0+8],%l1
mov %l3,%o0
call mulu32_,0
mov %i2,%o1
cmp %g1,%i0
bgu .LL57
sub %g1,%i0,%g1
bne .LL58
cmp %i2,0
cmp %o0,%l1
bleu .LL58
cmp %i2,0
sub %g1,%i0,%g1
.LL57:
cmp %o0,%l1
bgeu .LL38
add %i2,-1,%i2
add %g1,-1,%g1
.LL38:
cmp %g1,%i3
bgu .LL40
sub %o0,%l1,%o0
bne .LL58
cmp %i2,0
cmp %o0,%l3
bleu .LL58
cmp %i2,0
.LL40:
add %i2,-1,%i2
cmp %i2,0
.LL58:
be,a .LL59
st %i2,[%l0]
.LL35:
mov %i2,%o0
.LL56:
mov %i5,%o1
mov %l2,%o2
call mulusub_loop_down,0
mov %i4,%o3
ld [%l0],%o1
cmp %o0,%o1
bleu .LL41
mov %i5,%o0
add %i2,-1,%i2
mov %l2,%o1
call addto_loop_down,0
mov %i4,%o2
.LL41:
st %i2,[%l0]
.LL59:
add %l0,4,%l0
addcc %i1,-1,%i1
bne .LL28
add %l2,4,%l2
ld [%l4],%o0
cmp %o0,0
bne .LL60
ld [%fp+96],%o4
add %l4,4,%l4
add %l6,-1,%l6
.LL60:
st %l4,[%o4]
st %l6,[%o4+4]
cmp %l5,0
be .LL45
st %l0,[%o4+8]
mov %l5,%o2
mov %l0,%o0
call shiftright_loop_up,0
mov %i4,%o1
.LL45:
cmp %i4,0
be .LL61
ld [%fp+100],%o4
b .LL54
ld [%l0],%o0
.LL48:
be .LL47
add %l0,4,%l0
ld [%l0],%o0
.LL54:
cmp %o0,0
be,a .LL48
addcc %i4,-1,%i4
.LL47:
ld [%fp+100],%o4
.LL61:
st %l7,[%o4+8]
st %l0,[%o4]
st %i4,[%o4+4]
.LL1:
ret
restore
.LLfe1:
.size UDS_divide_,.LLfe1-UDS_divide_
.ident "GCC: (GNU) egcs-2.91.61 19990216 (egcs-1.1.2 pre-release-1)"
=============================================================================
Best regards,
Bruno
----------------------------------------------------------------------------
Bruno Haible email: <haible@ilog.fr>
ILOG S.A. tel: +33 1 4908 3585
9, rue de Verdun - BP 85 fax: +33 1 4908 3510
94253 Gentilly Cedex url: http://www.ilog.fr/
France running Sparc-Linux-BSD-GNU-X11-KDE
More information about the Gcc-bugs
mailing list