This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Severe problems with vectorizing stuff in 4.0.3 HEAD


All,

I am getting a lot of test suite failures with almost all of
the vect/* tests. I am using pr18400.c from the test suite
as an example here, becuase its about the smallest one I
can find. Here is what is generated at -O2:

	.file	"pr18400.c"
	.version	"01.01"
	.text
	.align 16
	.globl	sig_ill_handler
	.type	sig_ill_handler, @function
sig_ill_handler:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$20, %esp
	pushl	$0
	call	exit
	.size	sig_ill_handler, .-sig_ill_handler
	.align 16
	.globl	check_vect
	.type	check_vect, @function
check_vect:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$16, %esp
	pushl	$sig_ill_handler
	pushl	$4
	call	signal
/APP
	.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
	popl	%eax
	popl	%edx
	pushl	$0
	pushl	$4
	call	signal
	addl	$16, %esp
	leave
	ret
	.size	check_vect, .-check_vect
	.section	.rodata
	.align 32
	.type	C.0.1905, @object
	.size	C.0.1905, 32
C.0.1905:
	.long	0
	.long	3
	.long	6
	.long	9
	.long	12
	.long	15
	.long	18
	.long	21
	.text
	.align 16
	.globl	main1
	.type	main1, @function
main1:
	pushl	%ebp
	movl	$8, %ecx
	movl	%esp, %ebp
	pushl	%edi
	cld
	pushl	%esi
	leal	-40(%ebp), %edi
	subl	$64, %esp
	movl	$C.0.1905, %esi
	rep
	movsl
	xorl	%edx, %edx
	leal	-40(%ebp), %esi
	leal	-72(%ebp), %ecx
	.align 16
.L6:
	leal	0(,%edx,4), %eax
	addl	$4, %edx
	cmpl	$8, %edx
*** At this point, the registers have the following values:
*** %eax = 0,  %ecx = 0x8047d84,  %edx = 4,  %ebx = 0x8047dec
*** %esi = 0x8047da4, %edi = 0x8047dc4, %ebp = 0x8047dcc
*** This is guaranteed to cause a SIGSEGV, and it does, becuase
*** %esi is aligned on a 16-byte boundary. But ... see below ...
	movdqa	(%esi,%eax), %xmm0
	movdqa	%xmm0, (%ecx,%eax)
	jne	.L6
	movb	$1, %dl
	.align 16
.L8:
	movl	-4(%ecx,%edx,4), %eax
	cmpl	-4(%esi,%edx,4), %eax
	jne	.L18
	incl	%edx
	cmpl	$9, %edx
	jne	.L8
	addl	$64, %esp
	xorl	%eax, %eax
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.L18:
	call	abort
	.size	main1, .-main1
	.align 16
	.globl	main
	.type	main, @function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	pushl	%ecx
	andl	$-16, %esp
	subl	$16, %esp
	call	check_vect
	leave
*** Looks like it was trying to align the stack on a 16-byte
*** boundary here. But on entry into main1(), its doing 3
*** push's at the beginning. Thus teh offsets into teh stack
*** (like the leal -40(%ebp), %edi close to the top of main1)
*** appear to be being incorrectly calculated.
	jmp	main1
	.size	main, .-main
	.ident	"GCC: (GNU) 4.0.3 20051013 (prerelease)"


Thats the first problem. I then compiled with -O6, and got this: .file "pr18400.c" .version "01.01" .text .align 16 .globl sig_ill_handler .type sig_ill_handler, @function sig_ill_handler: pushl %ebp movl %esp, %ebp subl $20, %esp pushl $0 call exit .size sig_ill_handler, .-sig_ill_handler .align 16 .globl check_vect .type check_vect, @function check_vect: pushl %ebp movl %esp, %ebp subl $16, %esp pushl $sig_ill_handler pushl $4 call signal /APP .byte 0xf2,0x0f,0x10,0xc0 /NO_APP popl %eax popl %edx pushl $0 pushl $4 call signal addl $16, %esp leave ret .size check_vect, .-check_vect .section .rodata .align 32 .type C.0.1905, @object .size C.0.1905, 32 C.0.1905: .long 0 .long 3 .long 6 .long 9 .long 12 .long 15 .long 18 .long 21 .text .align 16 .globl main1 .type main1, @function main1: pushl %ebp movl $8, %ecx movl %esp, %ebp pushl %edi cld pushl %esi leal -40(%ebp), %edi subl $64, %esp movl $C.0.1905, %esi rep movsl xorl %edx, %edx leal -40(%ebp), %esi leal -72(%ebp), %ecx .align 16 .L6: leal 0(,%edx,4), %eax addl $4, %edx cmpl $8, %edx movdqa (%esi,%eax), %xmm0 movdqa %xmm0, (%ecx,%eax) jne .L6 movb $1, %dl .align 16 .L8: movl -4(%ecx,%edx,4), %eax cmpl -4(%esi,%edx,4), %eax jne .L18 incl %edx cmpl $9, %edx jne .L8 addl $64, %esp xorl %eax, %eax popl %esi popl %edi popl %ebp ret .L18: call abort .size main1, .-main1 .align 16 .globl main .type main, @function main: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi subl $64, %esp andl $-16, %esp subl $24, %esp pushl $sig_ill_handler pushl $4 call signal /APP .byte 0xf2,0x0f,0x10,0xc0 /NO_APP popl %ecx popl %esi pushl $0 pushl $4 call signal leal -40(%ebp), %edi cld movl $C.0.1905, %esi movl $8, %ecx rep movsl xorl %edx, %edx leal -40(%ebp), %esi leal -72(%ebp), %ecx addl $16, %esp .align 16 .L20: leal 0(,%edx,4), %eax addl $4, %edx cmpl $8, %edx *** At this point the registers have: *** %eax 0, %esi = 0x8047da4, %ecx = 0x8047d84. *** Again, this will cause SIGSEGV becyase neither %esi *** nor %ecx are 16-byte aligned. They are both off by 4. movdqa (%esi,%eax), %xmm0 movdqa %xmm0, (%ecx,%eax) jne .L20 movb $1, %dl .align 16 .L22: movl -4(%ecx,%edx,4), %eax cmpl -4(%esi,%edx,4), %eax jne .L31 incl %edx cmpl $9, %edx jne .L22 leal -8(%ebp), %esp xorl %eax, %eax popl %esi popl %edi popl %ebp ret .L31: call abort .size main, .-main .ident "GCC: (GNU) 4.0.3 20051013 (prerelease)"

Notice that GCC has decided to inline main1 into main, but the
code for main1 is left in place. Surely thats wrong?

So it seems like the offset calculations are always off by
4. I am guessing thats becuase of a push that isn't being taken
into account, perhaps the push of %ebp at the top of the function?

Any help at all *greatly* appreciated.

Kean


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]