This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Severe problems with vectorizing stuff in 4.0.3 HEAD

From: Kean Johnston <jkj at sco dot com>
To: gcc mailing list <gcc at gcc dot gnu dot org>
Date: Fri, 14 Oct 2005 12:11:15 -0700
Subject: Severe problems with vectorizing stuff in 4.0.3 HEAD
Reply-to: jkj at sco dot com

All,

I am getting a lot of test suite failures with almost all of
the vect/* tests. I am using pr18400.c from the test suite
as an example here, becuase its about the smallest one I
can find. Here is what is generated at -O2:

	.file	"pr18400.c"
	.version	"01.01"
	.text
	.align 16
	.globl	sig_ill_handler
	.type	sig_ill_handler, @function
sig_ill_handler:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$20, %esp
	pushl	$0
	call	exit
	.size	sig_ill_handler, .-sig_ill_handler
	.align 16
	.globl	check_vect
	.type	check_vect, @function
check_vect:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$16, %esp
	pushl	$sig_ill_handler
	pushl	$4
	call	signal
/APP
	.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
	popl	%eax
	popl	%edx
	pushl	$0
	pushl	$4
	call	signal
	addl	$16, %esp
	leave
	ret
	.size	check_vect, .-check_vect
	.section	.rodata
	.align 32
	.type	C.0.1905, @object
	.size	C.0.1905, 32
C.0.1905:
	.long	0
	.long	3
	.long	6
	.long	9
	.long	12
	.long	15
	.long	18
	.long	21
	.text
	.align 16
	.globl	main1
	.type	main1, @function
main1:
	pushl	%ebp
	movl	$8, %ecx
	movl	%esp, %ebp
	pushl	%edi
	cld
	pushl	%esi
	leal	-40(%ebp), %edi
	subl	$64, %esp
	movl	$C.0.1905, %esi
	rep
	movsl
	xorl	%edx, %edx
	leal	-40(%ebp), %esi
	leal	-72(%ebp), %ecx
	.align 16
.L6:
	leal	0(,%edx,4), %eax
	addl	$4, %edx
	cmpl	$8, %edx
*** At this point, the registers have the following values:
*** %eax = 0,  %ecx = 0x8047d84,  %edx = 4,  %ebx = 0x8047dec
*** %esi = 0x8047da4, %edi = 0x8047dc4, %ebp = 0x8047dcc
*** This is guaranteed to cause a SIGSEGV, and it does, becuase
*** %esi is aligned on a 16-byte boundary. But ... see below ...
	movdqa	(%esi,%eax), %xmm0
	movdqa	%xmm0, (%ecx,%eax)
	jne	.L6
	movb	$1, %dl
	.align 16
.L8:
	movl	-4(%ecx,%edx,4), %eax
	cmpl	-4(%esi,%edx,4), %eax
	jne	.L18
	incl	%edx
	cmpl	$9, %edx
	jne	.L8
	addl	$64, %esp
	xorl	%eax, %eax
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.L18:
	call	abort
	.size	main1, .-main1
	.align 16
	.globl	main
	.type	main, @function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	pushl	%ecx
	andl	$-16, %esp
	subl	$16, %esp
	call	check_vect
	leave
*** Looks like it was trying to align the stack on a 16-byte
*** boundary here. But on entry into main1(), its doing 3
*** push's at the beginning. Thus teh offsets into teh stack
*** (like the leal -40(%ebp), %edi close to the top of main1)
*** appear to be being incorrectly calculated.
	jmp	main1
	.size	main, .-main
	.ident	"GCC: (GNU) 4.0.3 20051013 (prerelease)"


Thats the first problem. I then compiled with -O6, and got this:
	.file	"pr18400.c"
	.version	"01.01"
	.text
	.align 16
	.globl	sig_ill_handler
	.type	sig_ill_handler, @function
sig_ill_handler:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$20, %esp
	pushl	$0
	call	exit
	.size	sig_ill_handler, .-sig_ill_handler
	.align 16
	.globl	check_vect
	.type	check_vect, @function
check_vect:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$16, %esp
	pushl	$sig_ill_handler
	pushl	$4
	call	signal
/APP
	.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
	popl	%eax
	popl	%edx
	pushl	$0
	pushl	$4
	call	signal
	addl	$16, %esp
	leave
	ret
	.size	check_vect, .-check_vect
	.section	.rodata
	.align 32
	.type	C.0.1905, @object
	.size	C.0.1905, 32
C.0.1905:
	.long	0
	.long	3
	.long	6
	.long	9
	.long	12
	.long	15
	.long	18
	.long	21
	.text
	.align 16
	.globl	main1
	.type	main1, @function
main1:
	pushl	%ebp
	movl	$8, %ecx
	movl	%esp, %ebp
	pushl	%edi
	cld
	pushl	%esi
	leal	-40(%ebp), %edi
	subl	$64, %esp
	movl	$C.0.1905, %esi
	rep
	movsl
	xorl	%edx, %edx
	leal	-40(%ebp), %esi
	leal	-72(%ebp), %ecx
	.align 16
.L6:
	leal	0(,%edx,4), %eax
	addl	$4, %edx
	cmpl	$8, %edx
	movdqa	(%esi,%eax), %xmm0
	movdqa	%xmm0, (%ecx,%eax)
	jne	.L6
	movb	$1, %dl
	.align 16
.L8:
	movl	-4(%ecx,%edx,4), %eax
	cmpl	-4(%esi,%edx,4), %eax
	jne	.L18
	incl	%edx
	cmpl	$9, %edx
	jne	.L8
	addl	$64, %esp
	xorl	%eax, %eax
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.L18:
	call	abort
	.size	main1, .-main1
	.align 16
	.globl	main
	.type	main, @function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	subl	$64, %esp
	andl	$-16, %esp
	subl	$24, %esp
	pushl	$sig_ill_handler
	pushl	$4
	call	signal
/APP
	.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
	popl	%ecx
	popl	%esi
	pushl	$0
	pushl	$4
	call	signal
	leal	-40(%ebp), %edi
	cld
	movl	$C.0.1905, %esi
	movl	$8, %ecx
	rep
	movsl
	xorl	%edx, %edx
	leal	-40(%ebp), %esi
	leal	-72(%ebp), %ecx
	addl	$16, %esp
	.align 16
.L20:
	leal	0(,%edx,4), %eax
	addl	$4, %edx
	cmpl	$8, %edx
*** At this point the registers have:
*** %eax 0, %esi = 0x8047da4, %ecx = 0x8047d84.
*** Again, this will cause SIGSEGV becyase neither %esi
*** nor %ecx are 16-byte aligned. They are both off by 4.
	movdqa	(%esi,%eax), %xmm0
	movdqa	%xmm0, (%ecx,%eax)
	jne	.L20
	movb	$1, %dl
	.align 16
.L22:
	movl	-4(%ecx,%edx,4), %eax
	cmpl	-4(%esi,%edx,4), %eax
	jne	.L31
	incl	%edx
	cmpl	$9, %edx
	jne	.L22
	leal	-8(%ebp), %esp
	xorl	%eax, %eax
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.L31:
	call	abort
	.size	main, .-main
	.ident	"GCC: (GNU) 4.0.3 20051013 (prerelease)"

Notice that GCC has decided to inline main1 into main, but the
code for main1 is left in place. Surely thats wrong?

So it seems like the offset calculations are always off by
4. I am guessing thats becuase of a push that isn't being taken
into account, perhaps the push of %ebp at the top of the function?

Any help at all *greatly* appreciated.

Kean

Follow-Ups:
- Re: Severe problems with vectorizing stuff in 4.0.3 HEAD
  - From: Andrew Pinski
- Re: Severe problems with vectorizing stuff in 4.0.3 HEAD
  - From: Andrew Pinski

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]