This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]

Re: Performance of Integer Multiplication on PIII

To: Jan Hubicka <jh at suse dot cz>
Subject: Re: Performance of Integer Multiplication on PIII
From: Kevin Atkinson <kevin at atkinson dot dhs dot org>
Date: Mon, 5 Nov 2001 14:23:45 -0500 (EST)
Cc: Tim Prince <tprince at computer dot org>, <gcc at gcc dot gnu dot org>,<rth at cygnus dot com>

On Mon, 5 Nov 2001, Jan Hubicka wrote:

> The attached patch should fix all three problems.  Your testcase still
> does use some unwound multiplies, but runs faster on celeron machines here
> in lab than the assembly one you supplied.

Ok.  Here are some more results including using your code.

$ gcc-3.0.2 -O2 -march=i686 read.c read-empty.c t.c && a.out
Loop: 1.33, Code: 4.72
Clocks: 35.16
$ gcc -O2 -march=i686 read.c read-empty.c t.c && a.out
Loop: 1.32, Code: 3.59
Clocks: 26.74
$ gcc -O2 -march=i686 read.hand.s read-empty.c t.c && a.out
Loop: 1.30, Code: 1.95
Clocks: 14.53
$ gcc -O2 -march=i686 read.new.s read-empty.c t.c && a.out
Loop: 1.32, Code: 2.32
Clocks: 17.28

read.new.s is your assembly. gcc is the compiler shipped with Mandrake 8.1
and Gcc 3.0.2 should be obvious.

So, my code still does better on my machine, however the new assembly
output is certainly acceptable.  Especially since you say it outperforms my
code on your machine.  A few clock cycles won't make that much diffrence....

Thanks for your effort.  Will this patch appear in the next Gcc 3.0.*
release or will it have to wait for Gcc 3.1.  If they is a way of
measuring performance in Gcc test cases May I also suggest that you
use my read.c and part of t.c as a test case to make sure you don't go
backward performance wise.

I attached the assembly output of gcc and gcc-3.0.2 in case you want to
compare.  Sorry that I don't have Gcc 2.95.* installed, perhaps someone
can run my tests on that compiler.  If you do please let me know what
machine it is own and adjust the cpu_speed for your machine.  (On a Linux
system cat /proc/cpuinfo will give you an accurate number to use).

>
> Assembly now is:
>
> .globl read
> 	.type	read,@function
> read:
> 	subl	$8, %esp
> 	movl	%ebx, (%esp)
> 	movl	12(%esp), %ebx
> 	movl	%esi, 4(%esp)
> 	movl	20(%ebx), %eax
> 	movl	24(%ebx), %edx
> 	movl	12(%ebx), %esi
> 	imull	$14406, %eax, %eax
> 	imull	$86436, %edx, %edx
> 	imull	$343, %esi, %esi
> 	movl	4(%ebx), %ecx
> 	addl	%edx, %eax
> 	movl	16(%ebx), %edx
> 	imull	$2401, %edx, %edx
> 	addl	%edx, %esi
> 	leal	0(,%ecx,8), %edx
> 	subl	%ecx, %edx
> 	imull	$49, 8(%ebx), %ecx
> 	addl	%ecx, %edx
> 	movl	(%ebx), %ecx
> 	addl	%ecx, %edx
> 	addl	%edx, %esi
> 	movl	28(%ebx), %edx
> 	addl	%esi, %eax
> 	movl	(%esp), %ebx
> 	imull	$518616, %edx, %edx
> 	movl	4(%esp), %esi
> 	addl	$8, %esp
> 	addl	%edx, %eax
> 	ret
>

	.file	"read.c"
	.version	"01.01"
gcc2_compiled.:
.text
	.align 16
.globl read
	.type	 read,@function
read:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	movl	8(%ebp), %edi
	pushl	%esi
	pushl	%ebx
	movl	20(%edi), %eax
	movl	24(%edi), %edx
	movl	16(%edi), %ecx
	imull	$14406, %eax, %eax
	imull	$86436, %edx, %edx
	leal	(%ecx,%ecx,4), %ebx
	addl	%edx, %eax
	movl	12(%edi), %edx
	leal	(%edx,%edx,8), %esi
	leal	(%edx,%esi,2), %esi
	leal	(%esi,%esi,8), %esi
	leal	(%edx,%esi,2), %esi
	movl	%ebx, %edx
	sall	$4, %edx
	subl	%ebx, %edx
	movl	8(%edi), %ebx
	sall	$5, %edx
	addl	%ecx, %edx
	addl	%edx, %esi
	movl	4(%edi), %edx
	leal	0(,%edx,8), %ecx
	subl	%edx, %ecx
	leal	(%ebx,%ebx,2), %edx
	sall	$4, %edx
	addl	%ebx, %edx
	popl	%ebx
	addl	%edx, %ecx
	movl	(%edi), %edx
	addl	%edx, %ecx
	movl	28(%edi), %edx
	addl	%ecx, %esi
	addl	%esi, %eax
	popl	%esi
	imull	$518616, %edx, %edx
	popl	%edi
	addl	%edx, %eax
	popl	%ebp
	ret
.Lfe1:
	.size	 read,.Lfe1-read
	.ident	"GCC: (GNU) 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)"

	.file	"read.c"
	.text
	.align 16
.globl read
	.type	read,@function
read:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	movl	8(%ebp), %esi
	pushl	%ebx
	movl	20(%esi), %edx
	movl	%edx, %eax
	sall	$8, %eax
	addl	%edx, %eax
	leal	(%edx,%eax,4), %eax
	movl	16(%esi), %edx
	leal	0(,%eax,8), %edi
	subl	%eax, %edi
	movl	24(%esi), %eax
	leal	(%edx,%edx,4), %ecx
	imull	$86436, %eax, %eax
	leal	(%eax,%edi,2), %edi
	movl	12(%esi), %eax
	leal	(%eax,%eax,8), %ebx
	leal	(%eax,%ebx,2), %ebx
	leal	(%ebx,%ebx,8), %ebx
	leal	(%eax,%ebx,2), %ebx
	movl	%ecx, %eax
	sall	$4, %eax
	subl	%ecx, %eax
	movl	8(%esi), %ecx
	sall	$5, %eax
	addl	%edx, %eax
	addl	%eax, %ebx
	movl	4(%esi), %eax
	leal	0(,%eax,8), %edx
	subl	%eax, %edx
	leal	(%ecx,%ecx,2), %eax
	sall	$4, %eax
	addl	%ecx, %eax
	addl	%eax, %edx
	movl	(%esi), %eax
	addl	%eax, %edx
	movl	28(%esi), %eax
	addl	%edx, %ebx
	addl	%ebx, %edi
	popl	%ebx
	movl	%eax, %edx
	sall	$8, %edx
	addl	%eax, %edx
	leal	(%eax,%edx,4), %edx
	movl	%edx, %eax
	popl	%esi
	sall	$6, %eax
	subl	%edx, %eax
	leal	(%edi,%eax,8), %eax
	popl	%edi
	popl	%ebp
	ret
.Lfe1:
	.size	read,.Lfe1-read
	.ident	"GCC: (GNU) 3.0.2"

Follow-Ups:
- Re: Performance of Integer Multiplication on PIII
  - From: Jan Hubicka

References:
- Re: Performance of Integer Multiplication on PIII
  - From: Jan Hubicka

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]