This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Example of sloooooooooooow code


This shows an example of gcc code and the potentials of a little bit hand
optimized code. Performance ratio is only around 1:20.

#include <stdio.h>
#include <math.h>
#include <asm/msr.h>


/* optimization of this function */

static void  
convert ( int* dst, const float* src, size_t len )
{
    while (len--)
        *dst++ = floor (*src++);
}

/*********************************/



static float  F [4096];
static int    I [4096];


int
main ( void )
{
    size_t     i;
    long long  t1;
    long long  t2;
    long long  t3;
    long long  t4;
    long long  t;
    
    convert (I, F, 4096);
    
    iopl(3);
    asm ("cli");
    
    rdtscll (t1);
    for (i=0; i<1000; i++) {
        ;
    }
    rdtscll (t2);
    rdtscll (t3);
    for (i=0; i<1000; i++) {
        convert (I, F, 4096);
    }
    rdtscll (t4);
    asm ("sti");
    
    t = t4+t1-t2-t3;
    printf ("%f clocks/convertion\n", t/4096./1000     );
    return 0;
}    
	.file	"test.c"
	.text
	.align 16
	.type	convert,@function
convert:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	subl	$44, %esp
	movl	16(%ebp), %ecx
	movl	8(%ebp), %esi
	movl	12(%ebp), %ebx
	decl	%ecx
	cmpl	$-1, %ecx
	je	.L369
	.p2align 4
.L366:
	flds	(%ebx)
	movl	%esi, %edx
	addl	$4, %ebx
	addl	$4, %esi
#APP
	fnstcw -26(%ebp)
#NO_APP
	movzwl	-26(%ebp), %eax
	andl	$62463, %eax
	orl	$1024, %eax
	movw	%ax, -28(%ebp)
#APP
	fldcw -28(%ebp)
	frndint
	fldcw -26(%ebp)
#NO_APP
	fstpl	-40(%ebp)
	decl	%ecx
	cmpl	$-1, %ecx
	fldl	-40(%ebp)
	fnstcw	-44(%ebp)
	movl	-44(%ebp), %edi
	movb	$12, -43(%ebp)
	fldcw	-44(%ebp)
	movl	%edi, -44(%ebp)
	fistpl	-48(%ebp)
	fldcw	-44(%ebp)
	movl	-48(%ebp), %eax
	movl	%eax, (%edx)
	jne	.L366
.L369:
	addl	$44, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.Lfe1:
	.size	convert,.Lfe1-convert
	.section	.rodata
.LC141:
	.string	"%f clocks/convertion\n"
	.align 8
.LC139:
	.long	0x0,0x3f300000
	.align 8
.LC140:
	.long	0xd2f1a9fc,0x3f50624d
	.text
	.align 16
.globl main
	.type	main,@function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	subl	$28, %esp
	andl	$-16, %esp
	subl	$4, %esp
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	movl	$3, (%esp)
	call	iopl
#APP
	cli
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	addl	$16, %esp
	movl	%eax, -24(%ebp)
	movl	%edx, -20(%ebp)
	.p2align 4
.L373:
	incl	%ebx
	cmpl	$999, %ebx
	jbe	.L373
#APP
	rdtsc
#NO_APP
	movl	%eax, %esi
	movl	%edx, %edi
#APP
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	movl	%eax, -32(%ebp)
	movl	%edx, -28(%ebp)
	.p2align 4
.L379:
	subl	$4, %esp
	incl	%ebx
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	addl	$16, %esp
	cmpl	$999, %ebx
	jbe	.L379
#APP
	rdtsc
	sti
#NO_APP
	addl	-24(%ebp), %eax
	adcl	-20(%ebp), %edx
	subl	%esi, %eax
	sbbl	%edi, %edx
	subl	-32(%ebp), %eax
	sbbl	-28(%ebp), %edx
	subl	$4, %esp
	pushl	%edx
	pushl	%eax
	fildll	(%esp)
	addl	$0, %esp
	fmull	.LC139
	fmull	.LC140
	fstpl	(%esp)
	pushl	$.LC141
	call	printf
	leal	-12(%ebp), %esp
	xorl	%eax, %eax
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.Lfe2:
	.size	main,.Lfe2-main
	.local	F
	.comm	F,16384,32
	.local	I
	.comm	I,16384,32
	.ident	"GCC: (GNU) 3.0.2 20010903 (prerelease)"
	.file	"test.c"
	.text
	.align 16
	.type	convert,@function
convert:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi			# -4
	pushl	%esi			# -8
	pushl	%ebx			# -12
	pushl	%eax			# -16
	
	movl	16(%ebp), %ecx
	movl	 8(%ebp), %esi
	movl	12(%ebp), %ebx
	test	%ecx, %ecx
	je	.Lende



	fnstcw 	-16(%ebp)
	movl	-16(%ebp), %eax
	andl	$62463, %eax
	orl	$1024, %eax
	movw	%ax, -14(%ebp)

	fldcw 	-14(%ebp)


	.p2align 4



.Lloop:
	flds	(%ebx)
	addl	$4, %ebx
	fistpl	(%esi)
	addl	$4, %esi
	decl	%ecx
	jne	.Lloop

	fldcw 	-16(%ebp)


.Lende:
	popl	%eax
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
	
.Lfe1:
	.size	convert,.Lfe1-convert
	.section	.rodata
.LC141:
	.string	"%f clocks/convertion\n"
	.align 8
.LC139:
	.long	0x0,0x3f300000
	.align 8
.LC140:
	.long	0xd2f1a9fc,0x3f50624d
	.text
	.align 16
.globl main
	.type	main,@function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	subl	$28, %esp
	andl	$-16, %esp
	subl	$4, %esp
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	movl	$3, (%esp)
	call	iopl
#APP
	cli
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	addl	$16, %esp
	movl	%eax, -24(%ebp)
	movl	%edx, -20(%ebp)
	.p2align 4
.L373:
	incl	%ebx
	cmpl	$999, %ebx
	jbe	.L373
#APP
	rdtsc
#NO_APP
	movl	%eax, %esi
	movl	%edx, %edi
#APP
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	movl	%eax, -32(%ebp)
	movl	%edx, -28(%ebp)
	.p2align 4
.L379:
	subl	$4, %esp
	incl	%ebx
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	addl	$16, %esp
	cmpl	$999, %ebx
	jbe	.L379
#APP
	rdtsc
	sti
#NO_APP
	addl	-24(%ebp), %eax
	adcl	-20(%ebp), %edx
	subl	%esi, %eax
	sbbl	%edi, %edx
	subl	-32(%ebp), %eax
	sbbl	-28(%ebp), %edx
	subl	$4, %esp
	pushl	%edx
	pushl	%eax
	fildll	(%esp)
	addl	$0, %esp
	fmull	.LC139
	fmull	.LC140
	fstpl	(%esp)
	pushl	$.LC141
	call	printf
	leal	-12(%ebp), %esp
	xorl	%eax, %eax
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.Lfe2:
	.size	main,.Lfe2-main
	.local	F
	.comm	F,16384,32
	.local	I
	.comm	I,16384,32
	.ident	"GCC: (GNU) 3.0.2 20010903 (prerelease)"
	.file	"test.c"
	.text
	.align 16
	.type	convert,@function
convert:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi			# -4
	pushl	%esi			# -8
	pushl	%ebx			# -12
	pushl	%eax			# -16
	
	movl	16(%ebp), %ecx
	movl	 8(%ebp), %esi
	movl	12(%ebp), %ebx
	test	%ecx, %ecx
	je	.Lende



	fnstcw 	-16(%ebp)
	movl	-16(%ebp), %eax
	andl	$62463, %eax
	orl	$1024, %eax
	movw	%ax, -14(%ebp)

	fldcw 	-14(%ebp)


	.p2align 4


	shrl	%ecx
	jz	.L2
	jnc	.Lloop
	
	flds	(%ebx)
	fistpl	(%esi)
	addl	$4, %ebx
	addl	$4, %esi

.Lloop:
	flds	(%ebx)
	fistpl	(%esi)
	flds	4(%ebx)
	addl	$8, %ebx
	fistpl	4(%esi)
	addl	$8, %esi
	dec	%ecx
	jne	.Lloop


.L2:

	fldcw 	-16(%ebp)



.Lende:
	popl	%eax
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
	
.Lfe1:
	.size	convert,.Lfe1-convert
	.section	.rodata
.LC141:
	.string	"%f clocks/convertion\n"
	.align 8
.LC139:
	.long	0x0,0x3f300000
	.align 8
.LC140:
	.long	0xd2f1a9fc,0x3f50624d
	.text
	.align 16
.globl main
	.type	main,@function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	subl	$28, %esp
	andl	$-16, %esp
	subl	$4, %esp
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	movl	$3, (%esp)
	call	iopl
#APP
	cli
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	addl	$16, %esp
	movl	%eax, -24(%ebp)
	movl	%edx, -20(%ebp)
	.p2align 4
.L373:
	incl	%ebx
	cmpl	$999, %ebx
	jbe	.L373
#APP
	rdtsc
#NO_APP
	movl	%eax, %esi
	movl	%edx, %edi
#APP
	rdtsc
#NO_APP
	xorl	%ebx, %ebx
	movl	%eax, -32(%ebp)
	movl	%edx, -28(%ebp)
	.p2align 4
.L379:
	subl	$4, %esp
	incl	%ebx
	pushl	$4096
	pushl	$F
	pushl	$I
	call	convert
	addl	$16, %esp
	cmpl	$999, %ebx
	jbe	.L379
#APP
	rdtsc
	sti
#NO_APP
	addl	-24(%ebp), %eax
	adcl	-20(%ebp), %edx
	subl	%esi, %eax
	sbbl	%edi, %edx
	subl	-32(%ebp), %eax
	sbbl	-28(%ebp), %edx
	subl	$4, %esp
	pushl	%edx
	pushl	%eax
	fildll	(%esp)
	addl	$0, %esp
	fmull	.LC139
	fmull	.LC140
	fstpl	(%esp)
	pushl	$.LC141
	call	printf
	leal	-12(%ebp), %esp
	xorl	%eax, %eax
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
.Lfe2:
	.size	main,.Lfe2-main
	.local	F
	.comm	F,16384,32
	.local	I
	.comm	I,16384,32
	.ident	"GCC: (GNU) 3.0.2 20010903 (prerelease)"

test

test-opt

test-opt2

test:		87.268878 clocks/convertion
test-opt:	 4.654123 clocks/convertion	Ratio: 1 : 18.75
test-opt2:	 3.995673 clocks/convertion	Ratio: 1 : 21.84



-rw-r--r--   1 pfk      users         813 Sep  7 00:38 test.c		Source file
-rwxr-xr-x   1 root     root         2183 Sep  7 00:25 test.S		Code generated by gcc-3.0.2
-rwxr-xr-x   1 root     root         1954 Sep  7 00:38 test-opt.S	A little bit optimized code
-rwxr-xr-x   1 root     root         1954 Sep  7 00:38 test-opt2.S	More hard optimized
-rwxr-xr-x   1 root     root        12140 Sep  7 00:25 test		Exec of test.c
-rwxr-xr-x   1 root     root        12076 Sep  7 00:38 test-opt		Exec of test-opt.S
-rwxr-xr-x   1 root     root        12076 Sep  7 00:38 test-opt		Exec of test-opt2.S
-rw-r--r--   1 root     root           73 Sep  7 00:39 test.speed	This file

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]