This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Slowndown in latest snapshot
- To: egcs at cygnus dot com
- Subject: Slowndown in latest snapshot
- From: Jan Hubicka <hubicka at atrey dot karlin dot mff dot cuni dot cz>
- Date: Thu, 10 Sep 1998 14:07:11 +0200
Hi
I've noticed some slowdown in mandelbrot loop code, wich is base of my XaoS-
fractal browser. It seems to be caused by new loop code in first post 1.1
snapshot.
I've tested XaoS under about two months old snapshots and didn't noticed
any slowdowns. Currently I have only 1.0 and latest snapshot installed
so here are results:
simplified version of loop is:
#include <stdio.h>
#include <time.h>
static long maxiter=10000000;
typedef long double number_t;
static int mand_calc(register number_t cre,
register number_t cim,
register number_t pre,
register number_t pim)
{
register number_t rp = 0, ip = 0;
register unsigned long iter = maxiter;
register number_t zre, zim;
zre = cre;
zim = cim;
while ((iter) && (rp + ip < 4)) {
ip = (zim * zim);
zim = (zim * zre) * 2 + pim;
rp = (zre * zre);
zre = rp - ip + pre;
iter--;
}
iter = maxiter - iter;
return(iter);
}
main()
{clock_t t1,t2;
int i;
t1=clock();
i=mand_calc(0,0,0,0);
t2=clock();
printf("Mloops:%f",((double)maxiter)/(t2-t1)*CLOCKS_PER_SEC);
return(i);
}
Result of gcc 1.0 is:
egcc -O3 -mpentium -ffast-math -fomit-frame-pointer -ffast-math -funroll-loops
aa.c
(loop is unrooled and speed is 5405405.405405 loops per second.)
.file "aa.c"
.version "01.01"
/ GNU C version egcs-2.90.29 980515 (egcs-1.0.3 release) (i486-linux) compiled by GNU C version egcs-2.90.29 980515 (egcs-1.0.3 release).
/ options passed: -mcpu=pentium -mpentium -O3 -ffast-math
/ -fomit-frame-pointer -ffast-math -funroll-loops
/ options enabled: -fdefer-pop -fomit-frame-pointer -fcse-follow-jumps
/ -fcse-skip-blocks -fexpensive-optimizations -fthread-jumps
/ -fstrength-reduce -funroll-loops -fpeephole -fforce-mem -ffunction-cse
/ -finline-functions -finline -fkeep-static-consts -fcaller-saves
/ -fpcc-struct-return -frerun-cse-after-loop -frerun-loop-opt
/ -fschedule-insns2 -ffast-math -fcommon -fverbose-asm -fgnu-linker
/ -fregmove -falias-check -fargument-alias -m80387 -mhard-float
/ -mno-soft-float -mieee-fp -mfp-ret-in-387 -mschedule-prologue
/ -mcpu=pentium -march=pentium
gcc2_compiled.:
.data
.align 4
.type maxiter,@object
.size maxiter,4
maxiter:
.long 10000000
.section .rodata
.LC3:
.string "Mloops:%f"
.align 4
.LC4:
.long 0x0,0x80000000,0x4001
.align 4
.LC5:
.long 0x0,0x412e8480
.text
.align 4
.globl main
.type main,@function
main:
pushl %esi
pushl %ebx
call clock
movl %eax,%esi
fldz
movl maxiter,%ebx
fld %st(0)
fld %st(1)
fld %st(2)
testl %ebx,%ebx
je .L42
movl %ebx,%edx
negl %edx
fldt .LC4
andl $3,%edx
je .L9
cmpl $3,%edx
jge .L16
cmpl $2,%edx
jge .L17
decl %ebx
.L17:
fxch %st(4)
faddp %st,%st(3)
fxch %st(2)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L40
fld %st(1)
fld %st(1)
fxch %st(1)
fmul %st(3),%st
fxch %st(1)
fmul %st(2),%st
fxch %st(3)
fmulp %st,%st(2)
decl %ebx
fld %st(2)
fxch %st(2)
fadd %st(0),%st
fxch %st(2)
fsub %st(1),%st
fxch %st(2)
fxch %st(1)
fxch %st(3)
fxch %st(4)
.L16:
fxch %st(4)
faddp %st,%st(3)
fxch %st(2)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L40
fld %st(1)
fld %st(1)
fxch %st(1)
fmul %st(3),%st
fxch %st(1)
fmul %st(2),%st
fxch %st(3)
fmulp %st,%st(2)
fld %st(2)
fxch %st(2)
fadd %st(0),%st
fxch %st(2)
fsub %st(1),%st
decl %ebx
jz .L43
fxch %st(2)
fxch %st(1)
fxch %st(3)
fxch %st(4)
jmp .L9
.align 4
.L41:
fxch %st(2)
fxch %st(4)
fxch %st(1)
fxch %st(3)
fxch %st(1)
.L9:
fxch %st(4)
faddp %st,%st(3)
fxch %st(2)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L40
fld %st(1)
fld %st(1)
fmul %st(2),%st
fxch %st(1)
fmul %st(3),%st
fxch %st(3)
fmulp %st,%st(2)
decl %ebx
fld %st(0)
fadd %st(3),%st
fxch %st(2)
fadd %st(0),%st
fxch %st(1)
fsubp %st,%st(3)
fxch %st(1)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L40
fld %st(0)
fld %st(2)
fmul %st(3),%st
fxch %st(1)
fmul %st(2),%st
fxch %st(2)
fmulp %st,%st(3)
decl %ebx
fld %st(0)
fadd %st(2),%st
fxch %st(3)
fadd %st(0),%st
fxch %st(1)
fsubp %st,%st(2)
fxch %st(2)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L39
fld %st(1)
fld %st(1)
fmul %st(2),%st
fxch %st(1)
fmul %st(3),%st
fxch %st(3)
fmulp %st,%st(2)
decl %ebx
fld %st(0)
fadd %st(3),%st
fxch %st(2)
fadd %st(0),%st
fxch %st(1)
fsubp %st,%st(3)
fxch %st(1)
fcomp %st(3)
fnstsw %ax
andb $5,%ah
je .L40
fld %st(0)
fld %st(2)
fxch %st(1)
fmul %st(2),%st
fxch %st(1)
fmul %st(3),%st
fxch %st(2)
fmulp %st,%st(3)
fld %st(1)
fxch %st(3)
fadd %st(0),%st
fxch %st(3)
fsub %st(1),%st
decl %ebx
jnz .L41
.L43:
fstp %st(0)
.L42:
fstp %st(0)
.L39:
.L40:
fstp %st(0)
fstp %st(0)
fstp %st(0)
movl maxiter,%eax
subl %ebx,%eax
movl %eax,%ebx
call clock
movl %eax,%edx
subl %esi,%edx
pushl %edx
fildl (%esp)
addl $4,%esp
fidivrl maxiter
fldl .LC5
fmulp %st,%st(1)
subl $8,%esp
fstpl (%esp)
pushl $.LC3
call printf
addl $12,%esp
movl %ebx,%eax
popl %ebx
popl %esi
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) egcs-2.90.29 980515 (egcs-1.0.3 release)"
Output of egcs-19980906 is
(loop is not unrooled, speed is 4291845.493562 loops per second)
.file "aa.c"
.version "01.01"
gcc2_compiled.:
.data
.align 4
.type maxiter,@object
.size maxiter,4
maxiter:
.long 10000000
.section .rodata
.LC3:
.string "Mloops:%f"
.align 16
.LC4:
.long 0x0,0x80000000,0x4001
.align 8
.LC5:
.long 0x0,0x412e8480
.text
.align 4
.globl main
.type main,@function
main:
pushl %esi
pushl %ebx
call clock
movl maxiter,%ebx
movl %eax,%esi
movl %ebx,%edx
fldz
fld %st(0)
testl %ebx,%ebx
je .L19
fldt .LC4
jmp .L10
.L17:
fxch %st(1)
fxch %st(2)
fxch %st(1)
.align 4
.L10:
fld %st(1)
fld %st(3)
fmul %st(4),%st
fxch %st(3)
fmulp %st,%st(4)
fmul %st(0),%st
fld %st(2)
fxch %st(4)
fadd %st(0),%st
fxch %st(4)
fsub %st(1),%st
decl %ebx
jz .L16
fxch %st(3)
faddp %st,%st(1)
fcomp %st(1)
fnstsw %ax
andb $69,%ah
cmpb $1,%ah
je .L17
jmp .L18
.L16:
fstp %st(0)
fstp %st(0)
.L18:
fstp %st(0)
.L19:
fstp %st(0)
fstp %st(0)
subl %ebx,%edx
movl %edx,%ebx
call clock
subl %esi,%eax
pushl %eax
fildl (%esp)
addl $4,%esp
fidivrl maxiter
fldl .LC5
fmulp %st,%st(1)
subl $8,%esp
fstpl (%esp)
pushl $.LC3
call printf
addl $12,%esp
movl %ebx,%eax
popl %ebx
popl %esi
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) egcs-2.92.04 19980906 (gcc2 ss-980609 experimental)"
-funroll-all-loops help a bit. Loops is unrooled again in the different
way but is still slower - 5154639.175258 loops)
.file "aa.c"
.version "01.01"
gcc2_compiled.:
.data
.align 4
.type maxiter,@object
.size maxiter,4
maxiter:
.long 10000000
.section .rodata
.LC3:
.string "Mloops:%f"
.align 16
.LC4:
.long 0x0,0x80000000,0x4001
.align 8
.LC5:
.long 0x0,0x412e8480
.text
.align 4
.globl main
.type main,@function
main:
pushl %esi
pushl %ebx
call clock
movl maxiter,%ebx
movl %eax,%esi
movl %ebx,%edx
fldz
fld %st(0)
testl %ebx,%ebx
je .L32
fldt .LC4
.align 4
.L10:
fld %st(1)
fld %st(3)
fmul %st(4),%st
fxch %st(3)
fmulp %st,%st(4)
fmul %st(0),%st
fld %st(2)
fxch %st(4)
fadd %st(0),%st
fxch %st(4)
fsub %st(1),%st
decl %ebx
jz .L30
fxch %st(3)
faddp %st,%st(1)
fcomp %st(1)
fnstsw %ax
andb $5,%ah
je .L31
fld %st(2)
fld %st(2)
fmul %st(3),%st
fxch %st(4)
fmulp %st,%st(3)
fmul %st(0),%st
fld %st(3)
fxch %st(3)
fadd %st(0),%st
fxch %st(3)
fsub %st(1),%st
decl %ebx
jz .L30
fxch %st(4)
faddp %st,%st(1)
fcomp %st(1)
fnstsw %ax
andb $5,%ah
je .L31
fld %st(1)
fld %st(3)
fmul %st(4),%st
fxch %st(3)
fmulp %st,%st(4)
fmul %st(0),%st
fld %st(2)
fxch %st(4)
fadd %st(0),%st
fxch %st(4)
fsub %st(1),%st
decl %ebx
jz .L30
fxch %st(3)
faddp %st,%st(1)
fcomp %st(1)
fnstsw %ax
andb $5,%ah
je .L31
fld %st(2)
fld %st(2)
fmul %st(3),%st
fxch %st(4)
fmulp %st,%st(3)
fmul %st(0),%st
fld %st(3)
fxch %st(3)
fadd %st(0),%st
fxch %st(3)
fsub %st(1),%st
decl %ebx
jz .L30
fxch %st(4)
faddp %st,%st(1)
fcomp %st(1)
fnstsw %ax
andb $69,%ah
cmpb $1,%ah
je .L10
jmp .L31
.L30:
fstp %st(0)
fstp %st(0)
.L31:
fstp %st(0)
.L32:
fstp %st(0)
fstp %st(0)
subl %ebx,%edx
movl %edx,%ebx
call clock
subl %esi,%eax
pushl %eax
fildl (%esp)
addl $4,%esp
fidivrl maxiter
fldl .LC5
fmulp %st,%st(1)
subl $8,%esp
fstpl (%esp)
pushl $.LC3
call printf
addl $12,%esp
movl %ebx,%eax
popl %ebx
popl %esi
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) egcs-2.92.04 19980906 (gcc2 ss-980609 experimental)"
Loop in XaoS is probably bit different case, because it is unrooled by hand.
I expect it is big enought to prevent egcs from unrooling (contains 8
copies of main loop) Also number of iteration is unknown.
If you are interested, I might try to look at it too.
Honza
--
OK. Lets make a signature file.
+-------------------------------------------------------------------------+
| Jan Hubicka (Jan Hubi\v{c}ka in TeX) hubicka@freesoft.cz |
| Czech free software foundation: http://www.freesoft.cz |
|AA project - the new way for computer graphics - http://www.ta.jcu.cz/aa |
| homepage: http://www.paru.cas.cz/~hubicka/, games koules, Xonix, fast |
| fractal zoomer XaoS, index of Czech GNU/Linux/UN*X documentation etc. |
+-------------------------------------------------------------------------+