This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

-fssa kicks butt on alphaev6 ieee floating point code


-fssa definitely helps IEEE floating-point register allocation on
alphaev6.  

The executive summary: with 

-mcpu=ev6 -fno-math-errno -fPIC -O2

the following test file was scheduled in the following number of clocks:

			-fssa		-fnossa
-mieee			29		60
(no ieee)		30(!)		48

The details: with this test file:

double
inner (double *x, double *y)
{
  double r0, r1, r2, r3, r4, r5, r6, r7;
    r0 = x[0]; r1 = y[0]; r0 = r0 * r1;
    r1 = x[1]; r2 = y[1]; r1 = r1 * r2;
    r2 = x[2]; r3 = y[2]; r2 = r2 * r3;
    r3 = x[3]; r4 = y[3]; r3 = r3 * r4;
    r4 = x[4]; r5 = y[4]; r4 = r4 * r5;
    r5 = x[5]; r6 = y[5]; r5 = r5 * r6;
    r6 = x[6]; r7 = y[6]; r6 = r6 * r7;
    return (r1 + (r2 + (r3 + (r4 + (r5 + r6)))));
}

with gcc 20000315, compiled with

 gcc -mcpu=ev6 -fno-math-errno -mieee -fPIC -O2 -S test.c

With -fssa, I get

	.file	1 "test.c"
	.set noat
	.set noreorder
	.arch ev6
.text
	.align 5
	.globl inner
	.ent inner
inner:
	.eflag 48
	.frame $30,0,$26,0
$inner..ng:
	.prologue 0
	ldt $f11,48($16)
	ldt $f10,48($17)
	ldt $f14,40($16)
	ldt $f12,40($17)
	ldt $f22,32($17)
	ldt $f28,32($16)
	ldt $f13,16($16)
	ldt $f26,24($16)
	multsu $f11,$f10,$f27
	ldt $f10,16($17)
	ldt $f11,8($17)
	multsu $f14,$f12,$f23
	ldt $f25,24($17)
	ldt $f24,8($16)
	multsu $f28,$f22,$f14
	multsu $f13,$f10,$f22
	addtsu $f23,$f27,$f12
	multsu $f26,$f25,$f15
	multsu $f24,$f11,$f13
	addtsu $f14,$f12,$f10
	addtsu $f15,$f10,$f11
	addtsu $f22,$f11,$f12
	addtsu $f13,$f12,$f0
	ret $31,($26),1
	.end inner
	.ident	"GCC: (GNU) 2.96 20000315 (experimental)"

which is scheduled in 29 clocks; without -fssa, I get

	.file	1 "test.c"
	.set noat
	.set noreorder
	.arch ev6
.text
	.align 5
	.globl inner
	.ent inner
inner:
	.eflag 48
	.frame $30,0,$26,0
$inner..ng:
	.prologue 0
	ldt $f22,8($17)
	ldt $f13,8($16)
	ldt $f12,16($17)
	ldt $f15,24($17)
	ldt $f14,32($17)
	ldt $f10,40($17)
	ldt $f11,48($17)
	multsu $f13,$f22,$f23
	ldt $f22,16($16)
	fmov $f23,$f13
	multsu $f22,$f12,$f23
	ldt $f12,24($16)
	fmov $f23,$f22
	multsu $f12,$f15,$f23
	ldt $f15,32($16)
	fmov $f23,$f12
	multsu $f15,$f14,$f23
	ldt $f14,40($16)
	fmov $f23,$f15
	multsu $f14,$f10,$f23
	ldt $f10,48($16)
	fmov $f23,$f14
	multsu $f10,$f11,$f23
	fmov $f23,$f10
	addtsu $f14,$f10,$f11
	fmov $f11,$f10
	addtsu $f15,$f10,$f11
	addtsu $f12,$f11,$f23
	fmov $f23,$f12
	addtsu $f22,$f12,$f10
	addtsu $f13,$f10,$f0
	ret $31,($26),1
	.end inner
	.ident	"GCC: (GNU) 2.96 20000315 (experimental)"

which is scheduled in 60 clocks.

So this is great! I did the same tests without -mieee, and there were
no fmovs, but the -fssa code was scheduled in 30 cycles (longer than the
-mieee code!), and the code without -fssa was scheduled in 48 cycles.

So now I run the test suite with -fssa enabled, if I can figure out
how to do it.

Brad Lucier

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]