GCC 2.95.1 AMD K6 -O2 Problem

Mon Oct 18 09:49:00 GMT 1999

Problem environment: Homebuilt system based on AMD K6/233 on Shuttle
  HOT-595 mainboard, 64MB SDRAM.  Linux kernel 2.2.12, glibc 2.1.1,
  gcc 2.95.1 configured with --prefix=/usr --enable-shared
--enable-threads
  --enable-haifa, no modifications to 2.95.1 source.

The following source demonstrates a problem in K6 optimization in
gcc 2.95.1:
---------------------------------------------------------------------
int
sub2(x, y)
  int x, y;
{
   printf("sub2 %d %d\n", x, y);
   return x+y;
}
typedef int (*sub2_t) (int, int);
sub2_t jumptable[] = {sub2};
int
sub1(i, j)
  int i, j;
{
   return ((*jumptable[0])(i, j));
}
main()
{
   int i = 3, j = 6, k;
   printf("main calling sub1 %d %d\n", i, j);
   k = sub1(i, j);
   printf ("sub1 returned %d\n",k);
}
---------------------------------------------------------------------
When this program is compiled with any of these sets of options
     cc -o test4 test4.c
     cc -o test4 -O1 test4.c
     cc -o test4 -O2 test4.c
     cc -o test4 -O3 test4.c
     cc -o test4 -m386 -O2 test4.c
     cc -o test4 -mpentium -O2 test4.c
     cc -o test4 -mpentium -O3 test4.c
     cc -o test4 -mcpu=k6 -O1 test4.c
     cc -o test4 -mcpu=k6 -O3 test4.c
and run, it produces the expected output
     main calling sub1 3 6
     sub2 3 6
     sub1 returned 9
However, if it is compiled with this set of options
     cc -o test4 -mcpu=k6 -O2 test4.c
and run, it produces this unexpected output on my system:
     main calling sub1 3 6
     sub2 134513731 3
     sub1 returned 134513734
Examining the assembler produced for function sub1 in the -O1 and -O2
cases of -mcpu=k6 reveals the incorrect code produced.  Under -O1
the code is:
	pushl %ebp
	movl %esp,%ebp
	subl $8,%esp
	addl $-8,%esp
	pushl 12(%ebp)
	pushl 8(%ebp)
	movl jumptable,%eax
	call *%eax
	leave
	ret
whereas under -O2 the code is:
	pushl %ebp
	movl %esp,%ebp
	subl $8,%esp
	addl $-8,%esp
	pushl 12(%ebp)
	pushl 8(%ebp)
	movl jumptable,%eax
	leave
	call *%eax
	ret
The optimization of the leave instruction from after to before the call
instruction ruins the stack frame.

In the case of -mcpu=k6 -O3, the same erroneous code exists for function
sub1, but the optimization of the main function results in sub1 never
being called.

I am not especially troubled by this, and have no need of a fix sent to
me.  I turned on -mcpu=k6 optimization since I found it a new feature of
gcc 2.95.1, and found no problems with it when rebuilding such things as
linux kernel 2.2.12, XFree86 3.3.5, and various other products.  This
particular problem appeared when attempting to install glibc 2.1.2; the
test example is derived from the relevant portion of
glibc-2.1.2/libio/genops.c, function __overflow.  I have gone back to
using -mpentium for now, which is quite sufficient for me.

For reference, as requested by the gcc FAQ, here is the test4.s file
produced by gcc -g -o test4 -save-temps -mcpu=k6 -O2 test4.c:
---------------------------------------------------------------------
	.file	"test4.c"
	.version	"01.01"
.stabs "/home/djk/test/",100,0,0,.Ltext0
.stabs "test4.c",100,0,0,.Ltext0
.text
.Ltext0:
	.stabs	"gcc2_compiled.", 0x3c, 0, 0, 0
.stabs "int:t(0,1)=r(0,1);0020000000000;0017777777777;",128,0,0,0
.stabs "char:t(0,2)=r(0,2);0;127;",128,0,0,0
.stabs "long int:t(0,3)=r(0,1);0020000000000;0017777777777;",128,0,0,0
.stabs "unsigned
int:t(0,4)=r(0,1);0000000000000;0037777777777;",128,0,0,0
.stabs "long unsigned
int:t(0,5)=r(0,1);0000000000000;0037777777777;",128,0,0,0
.stabs "long long
int:t(0,6)=r(0,1);01000000000000000000000;0777777777777777777777;",128,0,0,0
.stabs "long long unsigned
int:t(0,7)=r(0,1);0000000000000;01777777777777777777777;",128,0,0,0
.stabs "short int:t(0,8)=r(0,8);-32768;32767;",128,0,0,0
.stabs "short unsigned int:t(0,9)=r(0,9);0;65535;",128,0,0,0
.stabs "signed char:t(0,10)=r(0,10);-128;127;",128,0,0,0
.stabs "unsigned char:t(0,11)=r(0,11);0;255;",128,0,0,0
.stabs "float:t(0,12)=r(0,1);4;0;",128,0,0,0
.stabs "double:t(0,13)=r(0,1);8;0;",128,0,0,0
.stabs "long double:t(0,14)=r(0,1);12;0;",128,0,0,0
.stabs "complex
int:t(0,15)=s8real:(0,1),0,32;imag:(0,1),32,32;;",128,0,0,0
.stabs "complex float:t(0,16)=r(0,16);4;0;",128,0,0,0
.stabs "complex double:t(0,17)=r(0,17);8;0;",128,0,0,0
.stabs "complex long double:t(0,18)=r(0,18);12;0;",128,0,0,0
.stabs "void:t(0,19)=(0,19)",128,0,0,0
.section	.rodata
.LC0:
	.string	"sub2 %d %d\n"
.text
	.align 4
.stabs "sub2:F(0,1)",36,0,3,sub2
.stabs "x:p(0,1)",160,0,3,8
.stabs "y:p(0,1)",160,0,3,12
.globl sub2
	.type	 sub2,@function
sub2:
.stabn 68,0,4,.LM1-sub2
.LM1:
.LBB2:
	pushl %ebp
	movl %esp,%ebp
	subl $16,%esp
	pushl %esi
	pushl %ebx
	movl 8(%ebp),%esi
.stabn 68,0,5,.LM2-sub2
.LM2:
	addl $-4,%esp
.stabn 68,0,4,.LM3-sub2
.LM3:
	movl 12(%ebp),%ebx
.stabn 68,0,5,.LM4-sub2
.LM4:
	pushl %ebx
	pushl %esi
	pushl $.LC0
	call printf
.stabn 68,0,6,.LM5-sub2
.LM5:
	leal (%ebx,%esi),%eax
	leal -24(%ebp),%esp
	popl %ebx
	popl %esi
	leave
	ret
.stabn 68,0,7,.LM6-sub2
.LM6:
.LBE2:
.Lfe1:
	.size	 sub2,.Lfe1-sub2
.stabs "x:r(0,1)",64,0,3,6
.stabs "y:r(0,1)",64,0,3,3
.stabn 192,0,0,.LBB2-sub2
.stabn 224,0,0,.LBE2-sub2
.Lscope0:
.stabs "",36,0,0,.Lscope0-sub2
.stabs "sub2_t:t(0,20)=(0,21)=*(0,22)=f(0,1)",128,0,8,0
.globl jumptable
.data
.stabs "jumptable:G(0,23)=ar(0,1);0;-1;(0,20)",32,0,9,0
	.align 4
	.type	 jumptable,@object
jumptable:
	.long sub2
	.size	 jumptable,4
.text
	.align 4
.stabs "sub1:F(0,1)",36,0,12,sub1
.stabs "i:p(0,1)",160,0,12,8
.stabs "j:p(0,1)",160,0,12,12
.globl sub1
	.type	 sub1,@function
sub1:
.stabn 68,0,13,.LM7-sub1
.LM7:
.stabn 68,0,14,.LM8-sub1
.LM8:
	pushl %ebp
	movl %esp,%ebp
	subl $8,%esp
	addl $-8,%esp
	pushl 12(%ebp)
	pushl 8(%ebp)
	movl jumptable,%eax
	leave
	call *%eax
	ret
.Lfe2:
	.size	 sub1,.Lfe2-sub1
.Lscope1:
.stabs "",36,0,0,.Lscope1-sub1
.section	.rodata
.LC1:
	.string	"main calling sub1 %d %d\n"
.LC2:
	.string	"sub1 returned %d\n"
.text
	.align 4
.stabs "main:F(0,1)",36,0,17,main
.globl main
	.type	 main,@function
main:
.stabn 68,0,17,.LM9-main
.LM9:
.stabn 68,0,18,.LM10-main
.LM10:
.LBB3:
.stabn 68,0,19,.LM11-main
.LM11:
	pushl %ebp
	movl %esp,%ebp
	subl $8,%esp
	addl $-4,%esp
	pushl $6
	pushl $3
	pushl $.LC1
	call printf
.stabn 68,0,20,.LM12-main
.LM12:
	addl $-8,%esp
	pushl $6
	pushl $3
	call sub1
.stabn 68,0,21,.LM13-main
.LM13:
	addl $32,%esp
	addl $-8,%esp
	pushl %eax
	pushl $.LC2
	call printf
	leave
	ret
.stabn 68,0,22,.LM14-main
.LM14:
.LBE3:
.Lfe3:
	.size	 main,.Lfe3-main
.stabs "k:r(0,1)",64,0,18,0
.stabn 192,0,0,.LBB3-main
.stabn 224,0,0,.LBE3-main
.Lscope2:
.stabs "",36,0,0,.Lscope2-main
	.text
	.stabs "",100,0,0,Letext
Letext:
	.ident	"GCC: (GNU) 2.95.1 19990816 (release)"
---------------------------------------------------------------------

David King
david.king@lmco.com