This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Example of sloooooooooooow code
- To: gcc at gcc dot gnu dot org
- Subject: Example of sloooooooooooow code
- From: Frank Klemm <pfk at fuchs dot offl dot uni-jena dot de>
- Date: Fri, 7 Sep 2001 00:53:11 +0200
- >Received: (from pfk@localhost)by fuchs.offl.uni-jena.de (8.9.3/8.9.3/SuSE Linux 8.9.3-0.1) id AAA25023for gcc@gcc.gnu.org; Fri, 7 Sep 2001 00:53:12 +0200
This shows an example of gcc code and the potentials of a little bit hand
optimized code. Performance ratio is only around 1:20.
#include <stdio.h>
#include <math.h>
#include <asm/msr.h>
/* optimization of this function */
static void
convert ( int* dst, const float* src, size_t len )
{
while (len--)
*dst++ = floor (*src++);
}
/*********************************/
static float F [4096];
static int I [4096];
int
main ( void )
{
size_t i;
long long t1;
long long t2;
long long t3;
long long t4;
long long t;
convert (I, F, 4096);
iopl(3);
asm ("cli");
rdtscll (t1);
for (i=0; i<1000; i++) {
;
}
rdtscll (t2);
rdtscll (t3);
for (i=0; i<1000; i++) {
convert (I, F, 4096);
}
rdtscll (t4);
asm ("sti");
t = t4+t1-t2-t3;
printf ("%f clocks/convertion\n", t/4096./1000 );
return 0;
}
.file "test.c"
.text
.align 16
.type convert,@function
convert:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $44, %esp
movl 16(%ebp), %ecx
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
decl %ecx
cmpl $-1, %ecx
je .L369
.p2align 4
.L366:
flds (%ebx)
movl %esi, %edx
addl $4, %ebx
addl $4, %esi
#APP
fnstcw -26(%ebp)
#NO_APP
movzwl -26(%ebp), %eax
andl $62463, %eax
orl $1024, %eax
movw %ax, -28(%ebp)
#APP
fldcw -28(%ebp)
frndint
fldcw -26(%ebp)
#NO_APP
fstpl -40(%ebp)
decl %ecx
cmpl $-1, %ecx
fldl -40(%ebp)
fnstcw -44(%ebp)
movl -44(%ebp), %edi
movb $12, -43(%ebp)
fldcw -44(%ebp)
movl %edi, -44(%ebp)
fistpl -48(%ebp)
fldcw -44(%ebp)
movl -48(%ebp), %eax
movl %eax, (%edx)
jne .L366
.L369:
addl $44, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe1:
.size convert,.Lfe1-convert
.section .rodata
.LC141:
.string "%f clocks/convertion\n"
.align 8
.LC139:
.long 0x0,0x3f300000
.align 8
.LC140:
.long 0xd2f1a9fc,0x3f50624d
.text
.align 16
.globl main
.type main,@function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $28, %esp
andl $-16, %esp
subl $4, %esp
pushl $4096
pushl $F
pushl $I
call convert
movl $3, (%esp)
call iopl
#APP
cli
rdtsc
#NO_APP
xorl %ebx, %ebx
addl $16, %esp
movl %eax, -24(%ebp)
movl %edx, -20(%ebp)
.p2align 4
.L373:
incl %ebx
cmpl $999, %ebx
jbe .L373
#APP
rdtsc
#NO_APP
movl %eax, %esi
movl %edx, %edi
#APP
rdtsc
#NO_APP
xorl %ebx, %ebx
movl %eax, -32(%ebp)
movl %edx, -28(%ebp)
.p2align 4
.L379:
subl $4, %esp
incl %ebx
pushl $4096
pushl $F
pushl $I
call convert
addl $16, %esp
cmpl $999, %ebx
jbe .L379
#APP
rdtsc
sti
#NO_APP
addl -24(%ebp), %eax
adcl -20(%ebp), %edx
subl %esi, %eax
sbbl %edi, %edx
subl -32(%ebp), %eax
sbbl -28(%ebp), %edx
subl $4, %esp
pushl %edx
pushl %eax
fildll (%esp)
addl $0, %esp
fmull .LC139
fmull .LC140
fstpl (%esp)
pushl $.LC141
call printf
leal -12(%ebp), %esp
xorl %eax, %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe2:
.size main,.Lfe2-main
.local F
.comm F,16384,32
.local I
.comm I,16384,32
.ident "GCC: (GNU) 3.0.2 20010903 (prerelease)"
.file "test.c"
.text
.align 16
.type convert,@function
convert:
pushl %ebp
movl %esp, %ebp
pushl %edi # -4
pushl %esi # -8
pushl %ebx # -12
pushl %eax # -16
movl 16(%ebp), %ecx
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
test %ecx, %ecx
je .Lende
fnstcw -16(%ebp)
movl -16(%ebp), %eax
andl $62463, %eax
orl $1024, %eax
movw %ax, -14(%ebp)
fldcw -14(%ebp)
.p2align 4
.Lloop:
flds (%ebx)
addl $4, %ebx
fistpl (%esi)
addl $4, %esi
decl %ecx
jne .Lloop
fldcw -16(%ebp)
.Lende:
popl %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe1:
.size convert,.Lfe1-convert
.section .rodata
.LC141:
.string "%f clocks/convertion\n"
.align 8
.LC139:
.long 0x0,0x3f300000
.align 8
.LC140:
.long 0xd2f1a9fc,0x3f50624d
.text
.align 16
.globl main
.type main,@function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $28, %esp
andl $-16, %esp
subl $4, %esp
pushl $4096
pushl $F
pushl $I
call convert
movl $3, (%esp)
call iopl
#APP
cli
rdtsc
#NO_APP
xorl %ebx, %ebx
addl $16, %esp
movl %eax, -24(%ebp)
movl %edx, -20(%ebp)
.p2align 4
.L373:
incl %ebx
cmpl $999, %ebx
jbe .L373
#APP
rdtsc
#NO_APP
movl %eax, %esi
movl %edx, %edi
#APP
rdtsc
#NO_APP
xorl %ebx, %ebx
movl %eax, -32(%ebp)
movl %edx, -28(%ebp)
.p2align 4
.L379:
subl $4, %esp
incl %ebx
pushl $4096
pushl $F
pushl $I
call convert
addl $16, %esp
cmpl $999, %ebx
jbe .L379
#APP
rdtsc
sti
#NO_APP
addl -24(%ebp), %eax
adcl -20(%ebp), %edx
subl %esi, %eax
sbbl %edi, %edx
subl -32(%ebp), %eax
sbbl -28(%ebp), %edx
subl $4, %esp
pushl %edx
pushl %eax
fildll (%esp)
addl $0, %esp
fmull .LC139
fmull .LC140
fstpl (%esp)
pushl $.LC141
call printf
leal -12(%ebp), %esp
xorl %eax, %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe2:
.size main,.Lfe2-main
.local F
.comm F,16384,32
.local I
.comm I,16384,32
.ident "GCC: (GNU) 3.0.2 20010903 (prerelease)"
.file "test.c"
.text
.align 16
.type convert,@function
convert:
pushl %ebp
movl %esp, %ebp
pushl %edi # -4
pushl %esi # -8
pushl %ebx # -12
pushl %eax # -16
movl 16(%ebp), %ecx
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
test %ecx, %ecx
je .Lende
fnstcw -16(%ebp)
movl -16(%ebp), %eax
andl $62463, %eax
orl $1024, %eax
movw %ax, -14(%ebp)
fldcw -14(%ebp)
.p2align 4
shrl %ecx
jz .L2
jnc .Lloop
flds (%ebx)
fistpl (%esi)
addl $4, %ebx
addl $4, %esi
.Lloop:
flds (%ebx)
fistpl (%esi)
flds 4(%ebx)
addl $8, %ebx
fistpl 4(%esi)
addl $8, %esi
dec %ecx
jne .Lloop
.L2:
fldcw -16(%ebp)
.Lende:
popl %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe1:
.size convert,.Lfe1-convert
.section .rodata
.LC141:
.string "%f clocks/convertion\n"
.align 8
.LC139:
.long 0x0,0x3f300000
.align 8
.LC140:
.long 0xd2f1a9fc,0x3f50624d
.text
.align 16
.globl main
.type main,@function
main:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $28, %esp
andl $-16, %esp
subl $4, %esp
pushl $4096
pushl $F
pushl $I
call convert
movl $3, (%esp)
call iopl
#APP
cli
rdtsc
#NO_APP
xorl %ebx, %ebx
addl $16, %esp
movl %eax, -24(%ebp)
movl %edx, -20(%ebp)
.p2align 4
.L373:
incl %ebx
cmpl $999, %ebx
jbe .L373
#APP
rdtsc
#NO_APP
movl %eax, %esi
movl %edx, %edi
#APP
rdtsc
#NO_APP
xorl %ebx, %ebx
movl %eax, -32(%ebp)
movl %edx, -28(%ebp)
.p2align 4
.L379:
subl $4, %esp
incl %ebx
pushl $4096
pushl $F
pushl $I
call convert
addl $16, %esp
cmpl $999, %ebx
jbe .L379
#APP
rdtsc
sti
#NO_APP
addl -24(%ebp), %eax
adcl -20(%ebp), %edx
subl %esi, %eax
sbbl %edi, %edx
subl -32(%ebp), %eax
sbbl -28(%ebp), %edx
subl $4, %esp
pushl %edx
pushl %eax
fildll (%esp)
addl $0, %esp
fmull .LC139
fmull .LC140
fstpl (%esp)
pushl $.LC141
call printf
leal -12(%ebp), %esp
xorl %eax, %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.Lfe2:
.size main,.Lfe2-main
.local F
.comm F,16384,32
.local I
.comm I,16384,32
.ident "GCC: (GNU) 3.0.2 20010903 (prerelease)"
test
test-opt
test-opt2
test: 87.268878 clocks/convertion
test-opt: 4.654123 clocks/convertion Ratio: 1 : 18.75
test-opt2: 3.995673 clocks/convertion Ratio: 1 : 21.84
-rw-r--r-- 1 pfk users 813 Sep 7 00:38 test.c Source file
-rwxr-xr-x 1 root root 2183 Sep 7 00:25 test.S Code generated by gcc-3.0.2
-rwxr-xr-x 1 root root 1954 Sep 7 00:38 test-opt.S A little bit optimized code
-rwxr-xr-x 1 root root 1954 Sep 7 00:38 test-opt2.S More hard optimized
-rwxr-xr-x 1 root root 12140 Sep 7 00:25 test Exec of test.c
-rwxr-xr-x 1 root root 12076 Sep 7 00:38 test-opt Exec of test-opt.S
-rwxr-xr-x 1 root root 12076 Sep 7 00:38 test-opt Exec of test-opt2.S
-rw-r--r-- 1 root root 73 Sep 7 00:39 test.speed This file