This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Severe problems with vectorizing stuff in 4.0.3 HEAD
- From: Kean Johnston <jkj at sco dot com>
- To: gcc mailing list <gcc at gcc dot gnu dot org>
- Date: Fri, 14 Oct 2005 12:11:15 -0700
- Subject: Severe problems with vectorizing stuff in 4.0.3 HEAD
- Reply-to: jkj at sco dot com
All,
I am getting a lot of test suite failures with almost all of
the vect/* tests. I am using pr18400.c from the test suite
as an example here, becuase its about the smallest one I
can find. Here is what is generated at -O2:
.file "pr18400.c"
.version "01.01"
.text
.align 16
.globl sig_ill_handler
.type sig_ill_handler, @function
sig_ill_handler:
pushl %ebp
movl %esp, %ebp
subl $20, %esp
pushl $0
call exit
.size sig_ill_handler, .-sig_ill_handler
.align 16
.globl check_vect
.type check_vect, @function
check_vect:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl $sig_ill_handler
pushl $4
call signal
/APP
.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
popl %eax
popl %edx
pushl $0
pushl $4
call signal
addl $16, %esp
leave
ret
.size check_vect, .-check_vect
.section .rodata
.align 32
.type C.0.1905, @object
.size C.0.1905, 32
C.0.1905:
.long 0
.long 3
.long 6
.long 9
.long 12
.long 15
.long 18
.long 21
.text
.align 16
.globl main1
.type main1, @function
main1:
pushl %ebp
movl $8, %ecx
movl %esp, %ebp
pushl %edi
cld
pushl %esi
leal -40(%ebp), %edi
subl $64, %esp
movl $C.0.1905, %esi
rep
movsl
xorl %edx, %edx
leal -40(%ebp), %esi
leal -72(%ebp), %ecx
.align 16
.L6:
leal 0(,%edx,4), %eax
addl $4, %edx
cmpl $8, %edx
*** At this point, the registers have the following values:
*** %eax = 0, %ecx = 0x8047d84, %edx = 4, %ebx = 0x8047dec
*** %esi = 0x8047da4, %edi = 0x8047dc4, %ebp = 0x8047dcc
*** This is guaranteed to cause a SIGSEGV, and it does, becuase
*** %esi is aligned on a 16-byte boundary. But ... see below ...
movdqa (%esi,%eax), %xmm0
movdqa %xmm0, (%ecx,%eax)
jne .L6
movb $1, %dl
.align 16
.L8:
movl -4(%ecx,%edx,4), %eax
cmpl -4(%esi,%edx,4), %eax
jne .L18
incl %edx
cmpl $9, %edx
jne .L8
addl $64, %esp
xorl %eax, %eax
popl %esi
popl %edi
popl %ebp
ret
.L18:
call abort
.size main1, .-main1
.align 16
.globl main
.type main, @function
main:
pushl %ebp
movl %esp, %ebp
pushl %ecx
pushl %ecx
andl $-16, %esp
subl $16, %esp
call check_vect
leave
*** Looks like it was trying to align the stack on a 16-byte
*** boundary here. But on entry into main1(), its doing 3
*** push's at the beginning. Thus teh offsets into teh stack
*** (like the leal -40(%ebp), %edi close to the top of main1)
*** appear to be being incorrectly calculated.
jmp main1
.size main, .-main
.ident "GCC: (GNU) 4.0.3 20051013 (prerelease)"
Thats the first problem. I then compiled with -O6, and got this:
.file "pr18400.c"
.version "01.01"
.text
.align 16
.globl sig_ill_handler
.type sig_ill_handler, @function
sig_ill_handler:
pushl %ebp
movl %esp, %ebp
subl $20, %esp
pushl $0
call exit
.size sig_ill_handler, .-sig_ill_handler
.align 16
.globl check_vect
.type check_vect, @function
check_vect:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
pushl $sig_ill_handler
pushl $4
call signal
/APP
.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
popl %eax
popl %edx
pushl $0
pushl $4
call signal
addl $16, %esp
leave
ret
.size check_vect, .-check_vect
.section .rodata
.align 32
.type C.0.1905, @object
.size C.0.1905, 32
C.0.1905:
.long 0
.long 3
.long 6
.long 9
.long 12
.long 15
.long 18
.long 21
.text
.align 16
.globl main1
.type main1, @function
main1:
pushl %ebp
movl $8, %ecx
movl %esp, %ebp
pushl %edi
cld
pushl %esi
leal -40(%ebp), %edi
subl $64, %esp
movl $C.0.1905, %esi
rep
movsl
xorl %edx, %edx
leal -40(%ebp), %esi
leal -72(%ebp), %ecx
.align 16
.L6:
leal 0(,%edx,4), %eax
addl $4, %edx
cmpl $8, %edx
movdqa (%esi,%eax), %xmm0
movdqa %xmm0, (%ecx,%eax)
jne .L6
movb $1, %dl
.align 16
.L8:
movl -4(%ecx,%edx,4), %eax
cmpl -4(%esi,%edx,4), %eax
jne .L18
incl %edx
cmpl $9, %edx
jne .L8
addl $64, %esp
xorl %eax, %eax
popl %esi
popl %edi
popl %ebp
ret
.L18:
call abort
.size main1, .-main1
.align 16
.globl main
.type main, @function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
subl $64, %esp
andl $-16, %esp
subl $24, %esp
pushl $sig_ill_handler
pushl $4
call signal
/APP
.byte 0xf2,0x0f,0x10,0xc0
/NO_APP
popl %ecx
popl %esi
pushl $0
pushl $4
call signal
leal -40(%ebp), %edi
cld
movl $C.0.1905, %esi
movl $8, %ecx
rep
movsl
xorl %edx, %edx
leal -40(%ebp), %esi
leal -72(%ebp), %ecx
addl $16, %esp
.align 16
.L20:
leal 0(,%edx,4), %eax
addl $4, %edx
cmpl $8, %edx
*** At this point the registers have:
*** %eax 0, %esi = 0x8047da4, %ecx = 0x8047d84.
*** Again, this will cause SIGSEGV becyase neither %esi
*** nor %ecx are 16-byte aligned. They are both off by 4.
movdqa (%esi,%eax), %xmm0
movdqa %xmm0, (%ecx,%eax)
jne .L20
movb $1, %dl
.align 16
.L22:
movl -4(%ecx,%edx,4), %eax
cmpl -4(%esi,%edx,4), %eax
jne .L31
incl %edx
cmpl $9, %edx
jne .L22
leal -8(%ebp), %esp
xorl %eax, %eax
popl %esi
popl %edi
popl %ebp
ret
.L31:
call abort
.size main, .-main
.ident "GCC: (GNU) 4.0.3 20051013 (prerelease)"
Notice that GCC has decided to inline main1 into main, but the
code for main1 is left in place. Surely thats wrong?
So it seems like the offset calculations are always off by
4. I am guessing thats becuase of a push that isn't being taken
into account, perhaps the push of %ebp at the top of the function?
Any help at all *greatly* appreciated.
Kean