This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

O3 bug in gcc and egcs.


Here's a simple program outputting the value of a 4x4 determinant.
It seems to compile just fine with -O2 and less, but after -O3 gcc
produces valid but incorrect assembly code. Command-line is very
simple - just 'gcc -O3 prog.c -o prog'. Here's some system information
on which this has been tested with:

Linux 2.2.1 with egcs 1.1.1:
- uname -mrs : Linux 2.2.1 i586.
- the program outputs -0.000000 as opposed to the correct value -99.
- below some assembly code with the problem pointed out.

Linux 2.0.35 with gcc 2.7.2.3:
- uname -mrs : Linux 2.0.35 i586.
- the program outputs incorrect value.
- defining determinant3() as a macro instead seems to avoid the problem.

Digital unix V4.0 with gcc 2.7.2.1:
- uname -mrsv : OSF1 V4.0 564.32 alpha.
- running results in "floating exception".
- defining determinant3() as a macro instead seems to avoid the problem.

HP-UX with gcc 2.8.1:
- uname -mrsv : HP-UX B.11.00 A 9000/778.
- the program outputs -98.0000 as opposed to the correct value -99.
- defining determinant3() as a macro instead seems to avoid the problem.

----------------------------
#include <stdio.h>

double determinant3(double d[3][3])
{
  return (d[0][0]*d[1][1]*d[2][2] +
	  d[1][0]*d[2][1]*d[0][2] +
	  d[2][0]*d[0][1]*d[1][2] -
	  d[2][0]*d[1][1]*d[0][2] -
	  d[1][0]*d[0][1]*d[2][2] -
	  d[0][0]*d[2][1]*d[1][2]);
}

double determinant4(double a[4][4])
{
    int h, i, j, k;
    double det3[3][3];
    double t1, A;

    A = 0.0;
    for (k = 0; k < 4; k++) {
        h = 0;
        for (j = 0; j < 4; j++)
            if (j != k) {
                for (i = 1; i < 4; i++)
                    det3[i-1][h] = a[i][j];
                h++;
            }
        t1 = a[0][k]*determinant3(det3);
        if (k & 1)
	    t1 = -t1;
        A += t1;
    }

    return A;
}

void main(void)
{
    double a[4][4] = { { 1.0, 2.0, 3.0, 4.0 },
		       { 5.0, 2.0, 4.0, 6.0 },
		       { 9.0, 3.0, 5.0, 7.0 },
		       { 2.0, 1.0, 9.0, 8.0 } };

    printf("%f\n", determinant4(a));
}
----------------------------

Here is the determinant4() code egcs 1.1.1 (-O3) produces for
my i586 (with some observations.)

----------------------------
determinant4:
	pushl	%ebp
	movl	%esp,%ebp
	subl	$128,%esp
	pushl	%edi
	pushl	%esi
	fldz
	pushl	%ebx
	leal	-96(%ebp),%esi
	movl	$0,-76(%ebp)
	movl	%esi,-112(%ebp)
--->
	fldl	-72(%ebp)
	fstpl	-84(%ebp)
	fldl	-40(%ebp)
	fstpl	-92(%ebp)
	fldl	-8(%ebp)
	fstpl	-100(%ebp)
	fldl	-48(%ebp)
	fstpl	-108(%ebp)
	fldl	-16(%ebp)
	fldl	-56(%ebp)
	fldl	-64(%ebp)
	fldl	-32(%ebp)
--->
Shouldn't be here (fstpl below neither). The compiler apparently wants
to copy the uninitialized det3 to it's own array (except for det3[2][0])
and use that for the rest of the function. Doesn't it see that det3
is modified later on?
---
	movl	8(%ebp),%esi
	fstpl	-128(%ebp)
	movl	%esi,-116(%ebp)
	.p2align 4,,7
.L21:
	xorl	%ebx,%ebx
	movl	-112(%ebp),%esi
	movl	8(%ebp),%edi
	movl	%esi,-120(%ebp)
	.p2align 4,,7
.L25:
	cmpl	-76(%ebp),%ebx
	je	.L24
	movl	$1,%ecx
	movl	-120(%ebp),%edx
	leal	32(%edi),%eax
	addl	$24,%edx
	.p2align 4,,7
.L30:
	fldl	(%eax)
	fstpl	(%edx)
	addl	$24,%edx
	addl	$32,%eax
	incl	%ecx
	cmpl	$3,%ecx
	jle	.L30
	addl	$8,-120(%ebp)
.L24:
	addl	$8,%edi
	incl	%ebx
	cmpl	$3,%ebx
	jle	.L25
---
I'll repair the code, although it would be somewhat different without
that absurd code above.

	fldl	-16(%ebp)
	fldl	-56(%ebp)
	fldl	-64(%ebp)

And the indices should be:
-84 -> -72
-92 -> -40
-100 -> -8
-108 -> -48
-128 -> -32
(from the code above)
---
	fldl	-84(%ebp)
	fmull	-92(%ebp)
	fldl	-108(%ebp)
	fmul	%st(4),%st
	fxch	%st(1)
..
..
<rest of the code deleted>
----------------------------


 - Allan Peramaki




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]