This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]

Re: Performance of Integer Multiplication on PIII (Results for gcc-2.95 & Athlon)

To: pete at ltoi dot iap dot physik dot tu-darmstadt dot de
Subject: Re: Performance of Integer Multiplication on PIII (Results for gcc-2.95 & Athlon)
From: Jan Hubicka <jh at suse dot cz>
Date: Mon, 5 Nov 2001 16:25:51 +0100
Cc: Jan Hubicka <jh at suse dot cz>, Kevin Atkinson <kevin at atkinson dot dhs dot org>, Tim Prince <tprince at computer dot org>, gcc at gcc dot gnu dot org
References: <20011105140852.G5501@atrey.karlin.mff.cuni.cz> <Pine.A32.4.40.0111051636570.14603-101000@ltoi.iap.physik.tu-darmstadt.de>

> 
> Hi,
Hi,
could you please try the patch I've attached if it solves the
slowdown?  it should IMO.
> gcc-2.95.2,3 (haifa):
>  Loop: 1.01, Code: 2.57
>  Clocks: 19.15
> 
> gcc-2.95.2: (normal scheduler)
>  Loop: 1.01, Code: 3.02
>  Clocks: 22.50
> 
> gcc-3.0.2 & gcc-3.0-haifa
>  Loop: 0.90, Code: 3.35
>  Clocks: 24.96
> 
> gcc-295.2 (haifa) -O2 -march=i686 -s -o imul imul.c read_empty.c read_asm.s
>   Loop: 1.01, Code: 3.02
>   Clocks: 22.50
> 
> gcc-295.2 (haifa) -O2 -march=i686 -s -o imul imul.c read_empty.c read_iasm.s

Note that with -march=i686 new gcc often perofrms worse on Athlon, but it is
mainly because it do more Athlon specific stuff.

> And now we outperform (again) the famous Intel compilers with gcc!
> 
> -ffplfj=2:= -fomit-frame-pointer -malign-loops=2 -malign-functions=2
>             -malign-jumps=2
> 
> gcc3.0.2 -O2 -march=athlon -ffplfj=2
>  Loop: 2.35, Code: 0.90
>  Clocks: 6.70

How much of this speedup is accounted to fomit-frame-pointer and how
much to the alignment changes?


I am attaching the assembly of pent.c (w/o midification) I get from
-march=athlon -O2 -fomit-frame-pointer on the mainline after my patch
so you can try how it works for you.

Honza

	.file	"pent.c"
	.section	.rodata
.LC0:
	.string	"%d\n"
.LC1:
	.string	"%f\n"
	.align 8
.LC2:
	.long	0x0,0x412e8480
	.text
	.align 2
	.p2align 4,,15
.globl main
	.type	main,@function
main:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	leal	-56(%ebp), %esi
	pushl	%ebx
	xorl	%ebx, %ebx
	subl	$60, %esp
	movl	$4, -28(%ebp)
	movl	$3, -32(%ebp)
	andl	$-16, %esp
	movl	$1, -56(%ebp)
	movl	$2, -52(%ebp)
	movl	%esi, (%esp)
	movl	$3, -48(%ebp)
	movl	$4, -44(%ebp)
	movl	$1, -40(%ebp)
	movl	$2, -36(%ebp)
	call	read
	movl	$.LC0, (%esp)
	movl	%eax, 4(%esp)
	call	printf
	call	clock
	movl	%eax, %edi
	.p2align 4,,15
.L5:
	incl	%ebx
	movl	%esi, (%esp)
	call	read
	cmpl	$67108864, %ebx
	jne	.L5
	call	clock
	subl	%edi, %eax
	movl	$.LC1, (%esp)
	pushl	%eax
	fildl	(%esp)
	fdivl	.LC2
	addl	$4, %esp
	fstpl	4(%esp)
	call	printf
	leal	-12(%ebp), %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	xorl	%eax, %eax
	leave
	ret
.Lfe1:
	.size	main,.Lfe1-main
	.align 2
	.p2align 4,,15
.globl read
	.type	read,@function
read:
	subl	$12, %esp
	movl	%esi, 4(%esp)
	movl	%ebx, (%esp)
	movl	%edi, 8(%esp)
	movl	16(%esp), %esi
	movl	20(%esi), %eax
	movl	24(%esi), %edx
	movl	12(%esi), %edi
	movl	8(%esi), %ecx
	imull	$86436, %edx, %edx
	imull	$14406, %eax, %eax
	imull	$343, %edi, %edi
	addl	%edx, %eax
	movl	16(%esi), %edx
	imull	$2401, %edx, %edx
	addl	%edx, %edi
	movl	4(%esi), %edx
	leal	0(,%edx,8), %ebx
	subl	%edx, %ebx
	leal	(%ecx,%ecx,2), %edx
	sall	$4, %edx
	addl	%ecx, %edx
	addl	%edx, %ebx
	movl	28(%esi), %edx
	addl	(%esi), %ebx
	movl	4(%esp), %esi
	imull	$518616, %edx, %edx
	addl	%ebx, %edi
	movl	(%esp), %ebx
	addl	%edi, %eax
	movl	8(%esp), %edi
	addl	$12, %esp
	addl	%edx, %eax
	ret
.Lfe2:
	.size	read,.Lfe2-read
	.ident	"GCC: (GNU) 3.1 20011103 (experimental)"

Follow-Ups:
- Re: Performance of Integer Multiplication on PIII (Results forgcc-2.95 & Athlon)
  - From: pete
- Re: Performance of Integer Multiplication on PIII (Results forgcc-2.95 & Athlon)
  - From: pete
- Re: Performance of Integer Multiplication on PIII (Results forgcc-2.95 & Athlon)
  - From: pete

References:
- Re: Performance of Integer Multiplication on PIII
  - From: Jan Hubicka
- Re: Performance of Integer Multiplication on PIII (Results forgcc-2.95 & Athlon)
  - From: pete

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]