This is the mail archive of the gcc-prs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

optimization/8867: sh codegen does double booking of register


>Number:         8867
>Category:       optimization
>Synopsis:       sh codegen does double booking of register
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    unassigned
>State:          open
>Class:          wrong-code
>Submitter-Id:   net
>Arrival-Date:   Sat Dec 07 17:36:00 PST 2002
>Closed-Date:
>Last-Modified:
>Originator:     Marcus Comstedt
>Release:        3.2.1
>Organization:
>Environment:
System: SunOS continuity 5.8 Generic_108528-10 sun4m sparc SUNW,SPARCstation-10
Architecture: sun4

	
host: sparc-sun-solaris2.8
build: sparc-sun-solaris2.8
target: sh-unknown-elf
configured with: /home/marcus/gcc-3.2.1/configure --target=sh-elf --with-newlib
>Description:
	Given enough register pressure and large enough
	stack frame, the codegen can try to use the same register
	for frame_pointer+offset and a local variable at the same
	time.
>How-To-Repeat:
	Example program (sorry, can't make it more minimal than this):

	---8<--- 321bug.c ---8<---
	
	extern void bar(int, int);
	
	extern int g1[], g2[], g3[], g4[], g5[], g6[], g7[];
	
	void foo(int x, int y, int *z)
	{
	  int a[16];
	  int c;
	
	  while((c=*z++)) {
	    int l1 = g1[c], l2 = g2[c];
	    int l3 = g3[c], l4 = g4[c];
	    int l5 = g5[c], l6 = g6[c];
	    int l7 = g7[c];
	
	    if(l1)
	    {
	      bar(x+l5, y+l3);
	      bar(x+l3, x+l4);
	      bar(x+l3+l5, x+l4);
	      bar(x+l3, x+l4+l6);
	      bar(x+l3+l5, x+l4+l6);
	      bar(y+l3, l1+l5);
	    }
	
	    x += l7;
	    if((y -= l7)<=0)
	      break;
	  }
	}

	---8<---

	Compile with

	sh-elf-gcc -S -O1 321bug.c

	and this is the result:

	---8<--- 321bug.s ---8<---

		.file	"321bug.c"
		.text
		.text
		.align 2
		.global	_foo
		.type	_foo,@function
	_foo:
		mov.l	r8,@-r15
		mov.l	r9,@-r15
		mov.l	r10,@-r15
		mov.l	r11,@-r15
		mov.l	r12,@-r15
		mov.l	r13,@-r15
		mov.l	r14,@-r15
		sts.l	pr,@-r15
		add	#-84,r15
		mov	r15,r14
		mov	#64,r0
		mov.l	r4,@(r0,r14)
		mov	r5,r11
		mov	r6,r12
		mov.l	@r12+,r0
		tst	r0,r0
		bt	.L1
		.align 2
	.L7:
		shll2	r0
		mov.l	.L9,r1
		mov.l	@(r0,r1),r1
		mov	#64,r2
		add	r14,r2
		mov.l	r1,@(4,r2)
		mov.l	.L10,r1
		mov.l	@(r0,r1),r8
		mov.l	.L11,r1
		mov.l	@(r0,r1),r10
		mov.l	.L12,r1
		mov.l	@(r0,r1),r13
		mov.l	.L13,r1
		mov.l	@(r0,r1),r1
		mov.l	r1,@(8,r2)
		mov.l	.L14,r1
		mov.l	@(r0,r1),r1
		mov.l	r1,@(12,r2)
		mov.l	@(4,r2),r1
		tst	r1,r1
		bt	.L5
		mov.l	@(0,r2),r4
		add	r13,r4
		mov	r11,r1
		add	r8,r1
		mov.l	r1,@(16,r2)
		mov.l	.L20,r2
		jsr	@r2
		mov	r1,r5
		mov	#64,r0
		mov.l	@(r0,r14),r9
		add	r8,r9
		mov.l	@(r0,r14),r8
		add	r10,r8
		mov	r9,r4
		mov.l	.L20,r2
		jsr	@r2
		mov	r8,r5
		mov	r9,r10
		add	r13,r10
		mov	r10,r4
		mov.l	.L20,r1
		jsr	@r1
		mov	r8,r5
		mov	#72,r0
		mov.l	@(r0,r14),r2
		add	r2,r8
		mov	r9,r4
		mov.l	.L20,r1
		jsr	@r1
		mov	r8,r5
		mov	r10,r4
		mov.l	.L20,r2
		jsr	@r2
		mov	r8,r5
		mov	#64,r1
		add	r14,r1
		mov.l	@(4,r1),r5
		add	r13,r5
		mov.l	.L20,r2
		jsr	@r2
		mov.l	@(16,r1),r4
	.L5:
		mov	#64,r1
		add	r14,r1
		mov.l	@(0,r1),r2
		mov.l	@(12,r1),r1
		add	r1,r2
		mov.l	r2,@(0,r1)
		sub	r1,r11
		cmp/pl	r11
		bf	.L1
		mov.l	@r12+,r0
		tst	r0,r0
		bf	.L7
	.L1:
		add	#84,r14
		mov	r14,r15
		lds.l	@r15+,pr
		mov.l	@r15+,r14
		mov.l	@r15+,r13
		mov.l	@r15+,r12
		mov.l	@r15+,r11
		mov.l	@r15+,r10
		mov.l	@r15+,r9
		rts	
		mov.l	@r15+,r8
	.L21:
		.align 2
	.L9:
		.long	_g1
	.L10:
		.long	_g3
	.L11:
		.long	_g4
	.L12:
		.long	_g5
	.L13:
		.long	_g6
	.L14:
		.long	_g7
	.L20:
		.long	_bar
	.Lfe1:
		.size	_foo,.Lfe1-_foo
		.ident	"GCC: (GNU) 3.2.1"
	
	---8<---

	The interresting bit is the 6 instructions after		
	.L5:, corresponding to the statement `x += l7'.
	x is allocated at frame_pointer+64 and l7 at
	frame_pointer+76, so x and l7 are loaded into r2 and
	r1 respectively, and the sum is calculated into r2.
	So far so good.  But the final instruction, to write back
	the result to x, assumes that r1 still points to
	frame_pointer+64, even though r1 now holds the value of
	l7 (which is still live, I might add, since it's used
	in the following instruction which does `y -= l7' (y is
	allocated to r11) as well).

>Fix:
	Compiling without any optimization whatsoever seems to
	prevent the problem from occuring, although I can't say
	for sure that it always will.
>Release-Note:
>Audit-Trail:
>Unformatted:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]