This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
Re: Bugreport on g77-2.95 19990629 (prerelease)
- To: craig at jcb-sc dot com
- Subject: Re: Bugreport on g77-2.95 19990629 (prerelease)
- From: Mathias Froehlich <frohlich at na dot uni-tuebingen dot de>
- Date: Tue, 6 Jul 1999 13:36:51 +0200 (MET DST)
- CC: egcs-bugs at egcs dot cygnus dot com
- References: <14207.52319.216353.567348@na12><19990705210152.2304.qmail@deer>
> >I have probably found a bug in g77-2.95 19990629 (prerelease)
> >(i686-pc-linux-gnu). I use a RedHat-6.0 system.
> >
> >The file sgeqpf.f from LAPACK (you can find it in the SRC
> >subdirectory of the LAPACK source tree) seem to be misscompiled using
> >this and many previous versions of egcs.
> >
> >I can observe an infinite loop while executing
> [...]
> >If you have further questions feel free to contact me.
>
> Could you please debug the program? Until you, or someone else,
> does that, it's not clear that the problem is miscompiled code --
> it might be buggy code that gets compiled "correctly", just differently,
> in a way that exposes the bug.
>
Ok. I have spent some time with that bug. I have traced down which asm
line is buggy. I have fixed the asm output of
g77 -fverbose-asm -Vgcc-2.95 -march=pentiumpro -O2 -c sgeqpf.f
by hand and will present you the diff of that fix. I hope this will
give you enough information to find that egcs bug.
I have appended that diff including some comments from me at the end
of that mail.
> The g77 docs provide further information on how to report bugs.
>
Excuse me if I have ommitted some information. But LAPACK is not easy
to understand and I am not very familar with f77. I do only use this
excelent package and usually test compilers with that huge codebase.
So I thought that it is easier for someone like you to find bugs like
that.
Regards,
Mathias Froehlich
----------------------------------------------------------------------------
###
### Original compiler output Fixed version
###
.file "sgeqpf.f" .file "sgeqpf.f"
.version "01.01" .version "01.01"
/ GNU F77 version gcc-2.95 19990629 ( / GNU F77 version gcc-2.95 19990629 (
/ options passed: -march=pentiumpro / options passed: -march=pentiumpro
/ options enabled: -fdefer-pop -fcse / options enabled: -fdefer-pop -fcse
/ -fexpensive-optimizations -fthread- / -fexpensive-optimizations -fthread-
/ -fmove-all-movables -freduce-all-gi / -fmove-all-movables -freduce-all-gi
/ -ffunction-cse -finline -fkeep-stat / -ffunction-cse -finline -fkeep-stat
/ -fpcc-struct-return -fgcse -frerun- / -fpcc-struct-return -fgcse -frerun-
/ -fschedule-insns2 -fcommon -fverbos / -fschedule-insns2 -fcommon -fverbos
/ -foptimize-register-move -fargument / -foptimize-register-move -fargument
/ -fident -m80387 -mhard-float -mno-s / -fident -m80387 -mhard-float -mno-s
/ -mschedule-prologue -mcpu=pentiumpr / -mschedule-prologue -mcpu=pentiumpr
gcc2_compiled.: gcc2_compiled.:
.section .rodata .section .rodata
.LC0: .LC0:
.byte 0x53,0x47,0x45,0x51,0 .byte 0x53,0x47,0x45,0x51,0
.LC2: .LC2:
.byte 0x54,0x72,0x61,0x6e,0 .byte 0x54,0x72,0x61,0x6e,0
.LC3: .LC3:
.byte 0x4c,0x65,0x66,0x74 .byte 0x4c,0x65,0x66,0x74
.LC4: .LC4:
.byte 0x4c,0x45,0x46,0x54 .byte 0x4c,0x45,0x46,0x54
.align 4 .align 4
.LC1: .LC1:
.long 1 .long 1
.align 4 .align 4
.LC5: .LC5:
.long 0x3d4ccccd .long 0x3d4ccccd
.text .text
.align 4 .align 4
.globl sgeqpf_ .globl sgeqpf_
.type sgeqpf_,@function .type sgeqpf_,@function
sgeqpf_: sgeqpf_:
pushl %ebp pushl %ebp
movl %esp,%ebp movl %esp,%ebp
subl $188,%esp subl $188,%esp
movl 20(%ebp),%eax movl 20(%ebp),%eax
pushl %edi pushl %edi
pushl %esi pushl %esi
pushl %ebx pushl %ebx
movl 36(%ebp),%ebx movl 36(%ebp),%ebx
movl (%eax),%ecx movl (%eax),%ecx
movl 8(%ebp),%eax movl 8(%ebp),%eax
leal 0(,%ecx,4),%edx leal 0(,%ecx,4),%edx
movl %edx,-56(%ebp) movl %edx,-56(%ebp)
movl $0,(%ebx) movl $0,(%ebx)
movl (%eax),%edx movl (%eax),%edx
testl %edx,%edx testl %edx,%edx
jge .L3 jge .L3
movl $-1,(%ebx) movl $-1,(%ebx)
jmp .L4 jmp .L4
.align 4 .align 4
.L3: .L3:
movl 12(%ebp),%ebx movl 12(%ebp),%ebx
cmpl $0,(%ebx) cmpl $0,(%ebx)
jge .L5 jge .L5
movl 36(%ebp),%eax movl 36(%ebp),%eax
movl $-2,(%eax) movl $-2,(%eax)
jmp .L4 jmp .L4
.align 4 .align 4
.L5: .L5:
movl $1,%eax movl $1,%eax
testl %edx,%edx testl %edx,%edx
cmovg %edx,%eax cmovg %edx,%eax
cmpl %eax,%ecx cmpl %eax,%ecx
jge .L4 jge .L4
movl 36(%ebp),%edx movl 36(%ebp),%edx
movl $-4,(%edx) movl $-4,(%edx)
.L4: .L4:
movl 36(%ebp),%ecx movl 36(%ebp),%ecx
movl (%ecx),%eax movl (%ecx),%eax
testl %eax,%eax testl %eax,%eax
je .L9 je .L9
addl $-4,%esp addl $-4,%esp
pushl $6 pushl $6
negl %eax negl %eax
movl %eax,-4(%ebp) movl %eax,-4(%ebp)
leal -4(%ebp),%eax leal -4(%ebp),%eax
pushl %eax pushl %eax
pushl $.LC0 pushl $.LC0
call xerbla_ call xerbla_
jmp .L2 jmp .L2
.align 4 .align 4
.L9: .L9:
### The line
###
### MN = MIN( M, N )
###
### seems to be compiled here
###
movl 8(%ebp),%ebx movl 8(%ebp),%ebx
movl 12(%ebp),%edx movl 12(%ebp),%edx
movl (%ebx),%ebx movl (%ebx),%ebx
movl (%edx),%eax movl (%edx),%eax
###
### This is the buggy line.
###
### the memory location -68(%ebp) is read here. This location
### is not initialized before.
###
cmpl %eax,-68(%ebp) | cmpl %eax,%ebx
cmovg %eax,%ebx cmovg %eax,%ebx
movl %ebx,-68(%ebp) movl %ebx,-68(%ebp)
movl $1,-64(%ebp) movl $1,-64(%ebp)
movl $1,-60(%ebp) movl $1,-60(%ebp)
decl %eax decl %eax
movl %eax,-76(%ebp) movl %eax,-76(%ebp)
js .L12 js .L12
movl 16(%ebp),%edi movl 16(%ebp),%edi
movl 24(%ebp),%ebx movl 24(%ebp),%ebx
movl %edi,-156(%ebp) movl %edi,-156(%ebp)
movl %ebx,%esi movl %ebx,%esi
.align 4 .align 4
.L14: .L14:
cmpl $0,(%ebx) cmpl $0,(%ebx)
je .L15 je .L15
movl -64(%ebp),%ecx movl -64(%ebp),%ecx
cmpl %ecx,-60(%ebp) cmpl %ecx,-60(%ebp)
je .L16 je .L16
addl $-12,%esp addl $-12,%esp
pushl $.LC1 pushl $.LC1
movl -156(%ebp),%eax movl -156(%ebp),%eax
movl 8(%ebp),%edx movl 8(%ebp),%edx
pushl %eax pushl %eax
pushl $.LC1 pushl $.LC1
pushl %edi pushl %edi
pushl %edx pushl %edx
call sswap_ call sswap_
movl (%esi),%eax movl (%esi),%eax
movl %eax,(%ebx) movl %eax,(%ebx)
movl -60(%ebp),%ecx movl -60(%ebp),%ecx
movl %ecx,(%esi) movl %ecx,(%esi)
addl $32,%esp addl $32,%esp
jmp .L17 jmp .L17
.align 4 .align 4
.L16: .L16:
movl -60(%ebp),%eax movl -60(%ebp),%eax
movl %eax,(%ebx) movl %eax,(%ebx)
.L17: .L17:
movl -56(%ebp),%edx movl -56(%ebp),%edx
addl $4,%esi addl $4,%esi
addl %edx,-156(%ebp) addl %edx,-156(%ebp)
incl -64(%ebp) incl -64(%ebp)
jmp .L13 jmp .L13
.align 4 .align 4
.L15: .L15:
movl -60(%ebp),%ecx movl -60(%ebp),%ecx
movl %ecx,(%ebx) movl %ecx,(%ebx)
.L13: .L13:
addl -56(%ebp),%edi addl -56(%ebp),%edi
addl $4,%ebx addl $4,%ebx
incl -60(%ebp) incl -60(%ebp)
decl -76(%ebp) decl -76(%ebp)
jns .L14 jns .L14
.L12: .L12:
decl -64(%ebp) decl -64(%ebp)
cmpl $0,-64(%ebp) cmpl $0,-64(%ebp)
jle .L21 jle .L21
movl 8(%ebp),%edx movl 8(%ebp),%edx
movl -64(%ebp),%ebx movl -64(%ebp),%ebx
movl %ebx,-52(%ebp) movl %ebx,-52(%ebp)
movl (%edx),%eax movl (%edx),%eax
cmpl %eax,%ebx cmpl %eax,%ebx
jle .L22 jle .L22
movl %eax,-52(%ebp) movl %eax,-52(%ebp)
.L22: .L22:
movl 36(%ebp),%ecx movl 36(%ebp),%ecx
movl 32(%ebp),%ebx movl 32(%ebp),%ebx
movl 28(%ebp),%eax movl 28(%ebp),%eax
movl 20(%ebp),%edx movl 20(%ebp),%edx
addl $-4,%esp addl $-4,%esp
pushl %ecx pushl %ecx
movl 16(%ebp),%ecx movl 16(%ebp),%ecx
pushl %ebx pushl %ebx
pushl %eax pushl %eax
movl 8(%ebp),%eax movl 8(%ebp),%eax
pushl %edx pushl %edx
pushl %ecx pushl %ecx
leal -52(%ebp),%ebx leal -52(%ebp),%ebx
pushl %ebx pushl %ebx
pushl %eax pushl %eax
call sgeqr2_ call sgeqr2_
movl 12(%ebp),%eax movl 12(%ebp),%eax
movl -52(%ebp),%edx movl -52(%ebp),%edx
addl $32,%esp addl $32,%esp
movl (%eax),%ecx movl (%eax),%ecx
cmpl %ecx,%edx cmpl %ecx,%edx
jge .L21 jge .L21
movl 36(%ebp),%eax movl 36(%ebp),%eax
addl $-8,%esp addl $-8,%esp
pushl $9 pushl $9
pushl $4 pushl $4
pushl %eax pushl %eax
movl 32(%ebp),%eax movl 32(%ebp),%eax
pushl %eax pushl %eax
movl 20(%ebp),%eax movl 20(%ebp),%eax
pushl %eax pushl %eax
movl -56(%ebp),%eax movl -56(%ebp),%eax
imull %edx,%eax imull %edx,%eax
addl 16(%ebp),%eax addl 16(%ebp),%eax
pushl %eax pushl %eax
movl 28(%ebp),%eax movl 28(%ebp),%eax
pushl %eax pushl %eax
movl 20(%ebp),%eax movl 20(%ebp),%eax
pushl %eax pushl %eax
movl 16(%ebp),%eax movl 16(%ebp),%eax
subl %edx,%ecx subl %edx,%ecx
movl 8(%ebp),%edx movl 8(%ebp),%edx
pushl %eax pushl %eax
pushl %ebx pushl %ebx
movl %ecx,-8(%ebp) movl %ecx,-8(%ebp)
leal -8(%ebp),%eax leal -8(%ebp),%eax
pushl %eax pushl %eax
pushl %edx pushl %edx
pushl $.LC2 pushl $.LC2
pushl $.LC3 pushl $.LC3
call sorm2r_ call sorm2r_
addl $64,%esp addl $64,%esp
.L21: .L21:
movl -68(%ebp),%ecx movl -68(%ebp),%ecx
cmpl %ecx,-64(%ebp) cmpl %ecx,-64(%ebp)
jge .L2 jge .L2
movl -64(%ebp),%eax movl -64(%ebp),%eax
incl %eax incl %eax
movl %eax,-60(%ebp) movl %eax,-60(%ebp)
movl 12(%ebp),%eax movl 12(%ebp),%eax
movl -60(%ebp),%edx movl -60(%ebp),%edx
movl (%eax),%ebx movl (%eax),%ebx
subl -60(%ebp),%ebx subl -60(%ebp),%ebx
movl %edx,-144(%ebp) movl %edx,-144(%ebp)
js .L26 js .L26
movl 32(%ebp),%eax movl 32(%ebp),%eax
movl %eax,-88(%ebp) movl %eax,-88(%ebp)
movl %edx,%eax movl %edx,%eax
imull -56(%ebp),%eax imull -56(%ebp),%eax
movl 16(%ebp),%edx movl 16(%ebp),%edx
leal -12(%ebp),%ecx leal -12(%ebp),%ecx
movl %ecx,-80(%ebp) movl %ecx,-80(%ebp)
movl -64(%ebp),%ecx movl -64(%ebp),%ecx
subl -56(%ebp),%edx subl -56(%ebp),%edx
sall $2,%ecx sall $2,%ecx
addl %ecx,%edx addl %ecx,%edx
leal (%edx,%eax),%edi leal (%edx,%eax),%edi
movl 32(%ebp),%edx movl 32(%ebp),%edx
leal (%edx,%ecx),%esi leal (%edx,%ecx),%esi
.align 4 .align 4
.L28: .L28:
movl 8(%ebp),%ecx movl 8(%ebp),%ecx
addl $-4,%esp addl $-4,%esp
pushl $.LC1 pushl $.LC1
pushl %edi pushl %edi
movl -80(%ebp),%edx movl -80(%ebp),%edx
movl (%ecx),%eax movl (%ecx),%eax
subl -64(%ebp),%eax subl -64(%ebp),%eax
movl %eax,-12(%ebp) movl %eax,-12(%ebp)
pushl %edx pushl %edx
call snrm2_ call snrm2_
fstps -16(%ebp) fstps -16(%ebp)
flds -16(%ebp) flds -16(%ebp)
fsts (%esi) fsts (%esi)
movl -60(%ebp),%eax movl -60(%ebp),%eax
movl 12(%ebp),%ecx movl 12(%ebp),%ecx
movl -88(%ebp),%edx movl -88(%ebp),%edx
addl $16,%esp addl $16,%esp
addl $4,%esi addl $4,%esi
addl (%ecx),%eax addl (%ecx),%eax
fstps -4(%edx,%eax,4) fstps -4(%edx,%eax,4)
addl -56(%ebp),%edi addl -56(%ebp),%edi
incl -60(%ebp) incl -60(%ebp)
decl %ebx decl %ebx
jns .L28 jns .L28
.L26: .L26:
movl -144(%ebp),%ecx movl -144(%ebp),%ecx
movl -68(%ebp),%ebx movl -68(%ebp),%ebx
movl %ecx,-60(%ebp) movl %ecx,-60(%ebp)
subl %ecx,%ebx subl %ecx,%ebx
movl %ebx,-84(%ebp) movl %ebx,-84(%ebp)
js .L2 js .L2
movl 16(%ebp),%edx movl 16(%ebp),%edx
movl 32(%ebp),%ecx movl 32(%ebp),%ecx
leal -20(%ebp),%eax leal -20(%ebp),%eax
movl %eax,-92(%ebp) movl %eax,-92(%ebp)
movl -56(%ebp),%eax movl -56(%ebp),%eax
leal -24(%ebp),%ebx leal -24(%ebp),%ebx
movl %ebx,-104(%ebp) movl %ebx,-104(%ebp)
movl -56(%ebp),%ebx movl -56(%ebp),%ebx
movl %edx,-96(%ebp) movl %edx,-96(%ebp)
movl %ecx,-100(%ebp) movl %ecx,-100(%ebp)
addl $4,%eax addl $4,%eax
movl %eax,-108(%ebp) movl %eax,-108(%ebp)
leal -28(%ebp),%edx leal -28(%ebp),%edx
movl %edx,-112(%ebp) movl %edx,-112(%ebp)
leal -32(%ebp),%ecx leal -32(%ebp),%ecx
movl %ecx,-116(%ebp) movl %ecx,-116(%ebp)
movl %ebx,-152(%ebp) movl %ebx,-152(%ebp)
negl -152(%ebp) negl -152(%ebp)
.align 4 .align 4
.L34: .L34:
addl $-4,%esp addl $-4,%esp
pushl $.LC1 pushl $.LC1
movl -60(%ebp),%ebx movl -60(%ebp),%ebx
movl 32(%ebp),%eax movl 32(%ebp),%eax
movl 12(%ebp),%edx movl 12(%ebp),%edx
decl %ebx decl %ebx
leal 0(,%ebx,4),%edi leal 0(,%ebx,4),%edi
addl %edi,%eax addl %edi,%eax
pushl %eax pushl %eax
movl -92(%ebp),%ecx movl -92(%ebp),%ecx
movl (%edx),%eax movl (%edx),%eax
subl -60(%ebp),%eax subl -60(%ebp),%eax
incl %eax incl %eax
movl %eax,-20(%ebp) movl %eax,-20(%ebp)
pushl %ecx pushl %ecx
call isamax_ call isamax_
movl -60(%ebp),%edx movl -60(%ebp),%edx
leal -1(%edx,%eax),%esi leal -1(%edx,%eax),%esi
addl $16,%esp addl $16,%esp
movl %ebx,-136(%ebp) movl %ebx,-136(%ebp)
cmpl %edx,%esi cmpl %edx,%esi
je .L35 je .L35
addl $-12,%esp addl $-12,%esp
pushl $.LC1 pushl $.LC1
movl -56(%ebp),%eax movl -56(%ebp),%eax
imull %ebx,%eax imull %ebx,%eax
addl -96(%ebp),%eax addl -96(%ebp),%eax
pushl %eax pushl %eax
pushl $.LC1 pushl $.LC1
movl -56(%ebp),%eax movl -56(%ebp),%eax
movl 8(%ebp),%ecx movl 8(%ebp),%ecx
leal -1(%esi),%ebx leal -1(%esi),%ebx
imull %ebx,%eax imull %ebx,%eax
addl -96(%ebp),%eax addl -96(%ebp),%eax
pushl %eax pushl %eax
pushl %ecx pushl %ecx
call sswap_ call sswap_
movl 24(%ebp),%eax movl 24(%ebp),%eax
movl 24(%ebp),%edx movl 24(%ebp),%edx
sall $2,%ebx sall $2,%ebx
movl (%ebx,%eax),%eax movl (%ebx,%eax),%eax
movl %eax,-64(%ebp) movl %eax,-64(%ebp)
movl (%edi,%edx),%eax movl (%edi,%edx),%eax
movl %eax,(%ebx,%edx) movl %eax,(%ebx,%edx)
movl -64(%ebp),%ecx movl -64(%ebp),%ecx
movl %ecx,(%edi,%edx) movl %ecx,(%edi,%edx)
movl -100(%ebp),%eax movl -100(%ebp),%eax
addl $32,%esp addl $32,%esp
movl 12(%ebp),%edx movl 12(%ebp),%edx
flds (%edi,%eax) flds (%edi,%eax)
fstps (%ebx,%eax) fstps (%ebx,%eax)
movl (%edx),%eax movl (%edx),%eax
movl -60(%ebp),%ecx movl -60(%ebp),%ecx
movl -100(%ebp),%ebx movl -100(%ebp),%ebx
leal -1(%esi,%eax),%edx leal -1(%esi,%eax),%edx
leal -1(%ecx,%eax),%eax leal -1(%ecx,%eax),%eax
flds (%ebx,%eax,4) flds (%ebx,%eax,4)
fstps (%ebx,%edx,4) fstps (%ebx,%edx,4)
.L35: .L35:
movl 8(%ebp),%eax movl 8(%ebp),%eax
movl (%eax),%ecx movl (%eax),%ecx
cmpl %ecx,-60(%ebp) cmpl %ecx,-60(%ebp)
jge .L36 jge .L36
movl 28(%ebp),%eax movl 28(%ebp),%eax
addl $-12,%esp addl $-12,%esp
addl %edi,%eax addl %edi,%eax
pushl %eax pushl %eax
pushl $.LC1 pushl $.LC1
movl -56(%ebp),%eax movl -56(%ebp),%eax
imull -136(%ebp),%eax imull -136(%ebp),%eax
movl -60(%ebp),%ebx movl -60(%ebp),%ebx
addl 16(%ebp),%eax addl 16(%ebp),%eax
leal (%eax,%ebx,4),%edx leal (%eax,%ebx,4),%edx
pushl %edx pushl %edx
addl %edi,%eax addl %edi,%eax
pushl %eax pushl %eax
movl -104(%ebp),%eax movl -104(%ebp),%eax
subl %ebx,%ecx subl %ebx,%ecx
incl %ecx incl %ecx
movl %ecx,-24(%ebp) movl %ecx,-24(%ebp)
pushl %eax pushl %eax
jmp .L53 jmp .L53
.align 4 .align 4
.L36: .L36:
movl 28(%ebp),%edx movl 28(%ebp),%edx
addl $-12,%esp addl $-12,%esp
leal -1(%ecx),%eax leal -1(%ecx),%eax
leal 0(,%eax,4),%ecx leal 0(,%eax,4),%ecx
addl %ecx,%edx addl %ecx,%edx
pushl %edx pushl %edx
pushl $.LC1 pushl $.LC1
imull -56(%ebp),%eax imull -56(%ebp),%eax
addl 16(%ebp),%eax addl 16(%ebp),%eax
addl %ecx,%eax addl %ecx,%eax
pushl %eax pushl %eax
pushl %eax pushl %eax
pushl $.LC1 pushl $.LC1
.L53: .L53:
call slarfg_ call slarfg_
addl $32,%esp addl $32,%esp
movl 12(%ebp),%ecx movl 12(%ebp),%ecx
movl -60(%ebp),%edx movl -60(%ebp),%edx
cmpl (%ecx),%edx cmpl (%ecx),%edx
jge .L38 jge .L38
movl -108(%ebp),%ebx movl -108(%ebp),%ebx
imull -136(%ebp),%ebx imull -136(%ebp),%ebx
movl -96(%ebp),%eax movl -96(%ebp),%eax
movl 32(%ebp),%edx movl 32(%ebp),%edx
addl $-8,%esp addl $-8,%esp
flds (%ebx,%eax) flds (%ebx,%eax)
fstps -72(%ebp) fstps -72(%ebp)
movl $1065353216,(%ebx,%eax) movl $1065353216,(%ebx,%eax)
pushl $4 pushl $4
movl (%ecx),%ecx movl (%ecx),%ecx
leal (%edx,%ecx,8),%eax leal (%edx,%ecx,8),%eax
pushl %eax pushl %eax
movl 20(%ebp),%eax movl 20(%ebp),%eax
pushl %eax pushl %eax
movl -56(%ebp),%eax movl -56(%ebp),%eax
movl -136(%ebp),%edx movl -136(%ebp),%edx
imull -60(%ebp),%eax imull -60(%ebp),%eax
addl -96(%ebp),%eax addl -96(%ebp),%eax
sall $2,%edx sall $2,%edx
addl %edx,%eax addl %edx,%eax
pushl %eax pushl %eax
movl 28(%ebp),%eax movl 28(%ebp),%eax
addl %edx,%eax addl %edx,%eax
pushl %eax pushl %eax
pushl $.LC1 pushl $.LC1
movl -56(%ebp),%eax movl -56(%ebp),%eax
imull -136(%ebp),%eax imull -136(%ebp),%eax
addl -96(%ebp),%eax addl -96(%ebp),%eax
addl %edx,%eax addl %edx,%eax
pushl %eax pushl %eax
subl -60(%ebp),%ecx subl -60(%ebp),%ecx
movl %ecx,-28(%ebp) movl %ecx,-28(%ebp)
movl 8(%ebp),%ecx movl 8(%ebp),%ecx
movl -112(%ebp),%edx movl -112(%ebp),%edx
pushl %edx pushl %edx
movl (%ecx),%eax movl (%ecx),%eax
subl -60(%ebp),%eax subl -60(%ebp),%eax
incl %eax incl %eax
movl %eax,-32(%ebp) movl %eax,-32(%ebp)
movl -116(%ebp),%eax movl -116(%ebp),%eax
pushl %eax pushl %eax
pushl $.LC4 pushl $.LC4
call slarf_ call slarf_
flds -72(%ebp) flds -72(%ebp)
movl 16(%ebp),%edx movl 16(%ebp),%edx
fstps (%ebx,%edx) fstps (%ebx,%edx)
addl $48,%esp addl $48,%esp
.L38: .L38:
movl -60(%ebp),%eax movl -60(%ebp),%eax
incl %eax incl %eax
movl %eax,%esi movl %eax,%esi
movl 12(%ebp),%eax movl 12(%ebp),%eax
movl -84(%ebp),%ecx movl -84(%ebp),%ecx
decl %ecx decl %ecx
movl (%eax),%edx movl (%eax),%edx
subl %esi,%edx subl %esi,%edx
movl %edx,-120(%ebp) movl %edx,-120(%ebp)
movl %esi,-140(%ebp) movl %esi,-140(%ebp)
movl %ecx,-148(%ebp) movl %ecx,-148(%ebp)
js .L33 js .L33
fldz fldz
movl -60(%ebp),%ebx movl -60(%ebp),%ebx
movl 32(%ebp),%edx movl 32(%ebp),%edx
movl -136(%ebp),%ecx movl -136(%ebp),%ecx
sall $2,%ebx sall $2,%ebx
movl %ebx,-124(%ebp) movl %ebx,-124(%ebp)
movl -152(%ebp),%ebx movl -152(%ebp),%ebx
movl -60(%ebp),%edi movl -60(%ebp),%edi
leal -36(%ebp),%eax leal -36(%ebp),%eax
movl %eax,-128(%ebp) movl %eax,-128(%ebp)
leal (%ebx,%ecx,4),%eax leal (%ebx,%ecx,4),%eax
movl 32(%ebp),%ebx movl 32(%ebp),%ebx
movl %edx,-132(%ebp) movl %edx,-132(%ebp)
movl %esi,%edx movl %esi,%edx
imull -56(%ebp),%edx imull -56(%ebp),%edx
imull -56(%ebp),%edi imull -56(%ebp),%edi
addl 16(%ebp),%eax addl 16(%ebp),%eax
movl %esi,%ecx movl %esi,%ecx
leal -4(%ebx,%ecx,4),%ecx leal -4(%ebx,%ecx,4),%ecx
movl %ecx,-160(%ebp) movl %ecx,-160(%ebp)
movl %ecx,%ebx movl %ecx,%ebx
addl %edx,%eax addl %edx,%eax
movl %eax,-164(%ebp) movl %eax,-164(%ebp)
.align 4 .align 4
.L42: .L42:
flds (%ebx) flds (%ebx)
fucomi %st(1),%st fucomi %st(1),%st
setne %al setne %al
setp %ah setp %ah
orb %al,%ah orb %al,%ah
je .L54 je .L54
movl -164(%ebp),%edx movl -164(%ebp),%edx
flds (%edx) flds (%edx)
fabs fabs
fdiv %st(1),%st fdiv %st(1),%st
fld1 fld1
fxch %st(1) fxch %st(1)
fmul %st(0),%st fmul %st(0),%st
fld %st(1) fld %st(1)
fsubp %st,%st(1) fsubp %st,%st(1)
fcomi %st(3),%st fcomi %st(3),%st
jae .L44 jae .L44
fstp %st(0) fstp %st(0)
fldz fldz
.L44: .L44:
fxch %st(2) fxch %st(2)
movl 12(%ebp),%edx movl 12(%ebp),%edx
movl 32(%ebp),%ecx movl 32(%ebp),%ecx
movl %esi,%eax movl %esi,%eax
addl (%edx),%eax addl (%edx),%eax
fdivs -4(%ecx,%eax,4) fdivs -4(%ecx,%eax,4)
fld %st(2) fld %st(2)
fmuls .LC5 fmuls .LC5
fxch %st(1) fxch %st(1)
fmul %st(0),%st fmul %st(0),%st
fmulp %st,%st(1) fmulp %st,%st(1)
fadd %st(1),%st fadd %st(1),%st
fucomip %st(1),%st fucomip %st(1),%st
fstp %st(0) fstp %st(0)
setne %al setne %al
setp %ah setp %ah
orb %al,%ah orb %al,%ah
jne .L45 jne .L45
fstp %st(0) fstp %st(0)
movl 8(%ebp),%ecx movl 8(%ebp),%ecx
movl (%ecx),%edx movl (%ecx),%edx
subl -60(%ebp),%edx subl -60(%ebp),%edx
testl %edx,%edx testl %edx,%edx
jle .L46 jle .L46
movl 16(%ebp),%eax movl 16(%ebp),%eax
addl $-4,%esp addl $-4,%esp
pushl $.LC1 pushl $.LC1
addl %edi,%eax addl %edi,%eax
addl -124(%ebp),%eax addl -124(%ebp),%eax
pushl %eax pushl %eax
movl -128(%ebp),%eax movl -128(%ebp),%eax
movl %edx,-36(%ebp) movl %edx,-36(%ebp)
pushl %eax pushl %eax
fstpt -176(%ebp) fstpt -176(%ebp)
call snrm2_ call snrm2_
fstps -16(%ebp) fstps -16(%ebp)
flds -16(%ebp) flds -16(%ebp)
movl -160(%ebp),%edx movl -160(%ebp),%edx
fsts (%edx) fsts (%edx)
movl 12(%ebp),%ecx movl 12(%ebp),%ecx
movl -132(%ebp),%edx movl -132(%ebp),%edx
movl %esi,%eax movl %esi,%eax
addl $16,%esp addl $16,%esp
addl (%ecx),%eax addl (%ecx),%eax
fstps -4(%edx,%eax,4) fstps -4(%edx,%eax,4)
fldt -176(%ebp) fldt -176(%ebp)
jmp .L41 jmp .L41
.align 4 .align 4
.L46: .L46:
movl $0,(%ebx) movl $0,(%ebx)
movl 12(%ebp),%ecx movl 12(%ebp),%ecx
movl 32(%ebp),%edx movl 32(%ebp),%edx
movl %esi,%eax movl %esi,%eax
addl (%ecx),%eax addl (%ecx),%eax
movl $0,-4(%edx,%eax,4) movl $0,-4(%edx,%eax,4)
jmp .L41 jmp .L41
.align 4 .align 4
.L45: .L45:
fsqrt fsqrt
fstps -16(%ebp) fstps -16(%ebp)
flds -16(%ebp) flds -16(%ebp)
fmuls (%ebx) fmuls (%ebx)
fstps (%ebx) fstps (%ebx)
jmp .L41 jmp .L41
.L54: .L54:
fstp %st(0) fstp %st(0)
.L41: .L41:
movl -56(%ebp),%eax movl -56(%ebp),%eax
addl %eax,-164(%ebp) addl %eax,-164(%ebp)
addl %eax,%edi addl %eax,%edi
addl $4,%ebx addl $4,%ebx
addl $4,-160(%ebp) addl $4,-160(%ebp)
incl %esi incl %esi
decl -120(%ebp) decl -120(%ebp)
jns .L42 jns .L42
fstp %st(0) fstp %st(0)
.L33: .L33:
movl -140(%ebp),%edx movl -140(%ebp),%edx
movl -148(%ebp),%ecx movl -148(%ebp),%ecx
movl %edx,-60(%ebp) movl %edx,-60(%ebp)
movl %ecx,-84(%ebp) movl %ecx,-84(%ebp)
testl %ecx,%ecx testl %ecx,%ecx
jge .L34 jge .L34
.L2: .L2:
leal -200(%ebp),%esp leal -200(%ebp),%esp
popl %ebx popl %ebx
popl %esi popl %esi
popl %edi popl %edi
movl %ebp,%esp movl %ebp,%esp
popl %ebp popl %ebp
ret ret
.Lfe1: .Lfe1:
.size sgeqpf_,.Lfe1-sgeqpf .size sgeqpf_,.Lfe1-sgeqpf
.ident "GCC: (GNU) gcc-2.95 .ident "GCC: (GNU) gcc-2.95
-----------------------------------------------------------------------------
--
Mathias Fr"ohlich e-mail: frohlich@na.uni-tuebingen.de
Institut f"ur Mathematik, Universit"at T"ubingen, D-72076 T"ubingen