This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Performance of Integer Multiplication on PIII
- To: Jan Hubicka <jh at suse dot cz>
- Subject: Re: Performance of Integer Multiplication on PIII
- From: Kevin Atkinson <kevin at atkinson dot dhs dot org>
- Date: Mon, 5 Nov 2001 14:23:45 -0500 (EST)
- Cc: Tim Prince <tprince at computer dot org>, <gcc at gcc dot gnu dot org>,<rth at cygnus dot com>
On Mon, 5 Nov 2001, Jan Hubicka wrote:
> The attached patch should fix all three problems. Your testcase still
> does use some unwound multiplies, but runs faster on celeron machines here
> in lab than the assembly one you supplied.
Ok. Here are some more results including using your code.
$ gcc-3.0.2 -O2 -march=i686 read.c read-empty.c t.c && a.out
Loop: 1.33, Code: 4.72
Clocks: 35.16
$ gcc -O2 -march=i686 read.c read-empty.c t.c && a.out
Loop: 1.32, Code: 3.59
Clocks: 26.74
$ gcc -O2 -march=i686 read.hand.s read-empty.c t.c && a.out
Loop: 1.30, Code: 1.95
Clocks: 14.53
$ gcc -O2 -march=i686 read.new.s read-empty.c t.c && a.out
Loop: 1.32, Code: 2.32
Clocks: 17.28
read.new.s is your assembly. gcc is the compiler shipped with Mandrake 8.1
and Gcc 3.0.2 should be obvious.
So, my code still does better on my machine, however the new assembly
output is certainly acceptable. Especially since you say it outperforms my
code on your machine. A few clock cycles won't make that much diffrence....
Thanks for your effort. Will this patch appear in the next Gcc 3.0.*
release or will it have to wait for Gcc 3.1. If they is a way of
measuring performance in Gcc test cases May I also suggest that you
use my read.c and part of t.c as a test case to make sure you don't go
backward performance wise.
I attached the assembly output of gcc and gcc-3.0.2 in case you want to
compare. Sorry that I don't have Gcc 2.95.* installed, perhaps someone
can run my tests on that compiler. If you do please let me know what
machine it is own and adjust the cpu_speed for your machine. (On a Linux
system cat /proc/cpuinfo will give you an accurate number to use).
>
> Assembly now is:
>
> .globl read
> .type read,@function
> read:
> subl $8, %esp
> movl %ebx, (%esp)
> movl 12(%esp), %ebx
> movl %esi, 4(%esp)
> movl 20(%ebx), %eax
> movl 24(%ebx), %edx
> movl 12(%ebx), %esi
> imull $14406, %eax, %eax
> imull $86436, %edx, %edx
> imull $343, %esi, %esi
> movl 4(%ebx), %ecx
> addl %edx, %eax
> movl 16(%ebx), %edx
> imull $2401, %edx, %edx
> addl %edx, %esi
> leal 0(,%ecx,8), %edx
> subl %ecx, %edx
> imull $49, 8(%ebx), %ecx
> addl %ecx, %edx
> movl (%ebx), %ecx
> addl %ecx, %edx
> addl %edx, %esi
> movl 28(%ebx), %edx
> addl %esi, %eax
> movl (%esp), %ebx
> imull $518616, %edx, %edx
> movl 4(%esp), %esi
> addl $8, %esp
> addl %edx, %eax
> ret
>
.file "read.c"
.version "01.01"
gcc2_compiled.:
.text
.align 16
.globl read
.type read,@function
read:
pushl %ebp
movl %esp, %ebp
pushl %edi
movl 8(%ebp), %edi
pushl %esi
pushl %ebx
movl 20(%edi), %eax
movl 24(%edi), %edx
movl 16(%edi), %ecx
imull $14406, %eax, %eax
imull $86436, %edx, %edx
leal (%ecx,%ecx,4), %ebx
addl %edx, %eax
movl 12(%edi), %edx
leal (%edx,%edx,8), %esi
leal (%edx,%esi,2), %esi
leal (%esi,%esi,8), %esi
leal (%edx,%esi,2), %esi
movl %ebx, %edx
sall $4, %edx
subl %ebx, %edx
movl 8(%edi), %ebx
sall $5, %edx
addl %ecx, %edx
addl %edx, %esi
movl 4(%edi), %edx
leal 0(,%edx,8), %ecx
subl %edx, %ecx
leal (%ebx,%ebx,2), %edx
sall $4, %edx
addl %ebx, %edx
popl %ebx
addl %edx, %ecx
movl (%edi), %edx
addl %edx, %ecx
movl 28(%edi), %edx
addl %ecx, %esi
addl %esi, %eax
popl %esi
imull $518616, %edx, %edx
popl %edi
addl %edx, %eax
popl %ebp
ret
.Lfe1:
.size read,.Lfe1-read
.ident "GCC: (GNU) 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)"
.file "read.c"
.text
.align 16
.globl read
.type read,@function
read:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
movl 8(%ebp), %esi
pushl %ebx
movl 20(%esi), %edx
movl %edx, %eax
sall $8, %eax
addl %edx, %eax
leal (%edx,%eax,4), %eax
movl 16(%esi), %edx
leal 0(,%eax,8), %edi
subl %eax, %edi
movl 24(%esi), %eax
leal (%edx,%edx,4), %ecx
imull $86436, %eax, %eax
leal (%eax,%edi,2), %edi
movl 12(%esi), %eax
leal (%eax,%eax,8), %ebx
leal (%eax,%ebx,2), %ebx
leal (%ebx,%ebx,8), %ebx
leal (%eax,%ebx,2), %ebx
movl %ecx, %eax
sall $4, %eax
subl %ecx, %eax
movl 8(%esi), %ecx
sall $5, %eax
addl %edx, %eax
addl %eax, %ebx
movl 4(%esi), %eax
leal 0(,%eax,8), %edx
subl %eax, %edx
leal (%ecx,%ecx,2), %eax
sall $4, %eax
addl %ecx, %eax
addl %eax, %edx
movl (%esi), %eax
addl %eax, %edx
movl 28(%esi), %eax
addl %edx, %ebx
addl %ebx, %edi
popl %ebx
movl %eax, %edx
sall $8, %edx
addl %eax, %edx
leal (%eax,%edx,4), %edx
movl %edx, %eax
popl %esi
sall $6, %eax
subl %edx, %eax
leal (%edi,%eax,8), %eax
popl %edi
popl %ebp
ret
.Lfe1:
.size read,.Lfe1-read
.ident "GCC: (GNU) 3.0.2"