This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Sun compares their compiler tools to GNU tools
> Those modest improvements will hopefully get much better
> one I figure out what has caused Alpha's slow decline
> over the past few months. Peak performance was in July,
> and we've lost about two points since then.
Well, to put up some food for thought, recently I got the following
for the second loop in the Fortran source below (left is ~ month
old, right is yesterdays snapshot):
$L432: | $L206:
ldt $f10,-16($4) | s8subq $23,16,$2
ldt $f13,0($3) | s8subq $23,8,$5
> addq $2,$22,$2
> addq $5,$12,$4
> ldq $26,544($30)
> addq $2,$8,$2
> addq $4,$27,$4
> addq $10,$2,$2
> addq $26,$4,$1
> ldt $f10,0($2)
> addq $5,$22,$6
> ldt $f11,0($1)
> ldq $2,600($30)
> addq $6,$8,$7
> fnop
> fnop
> s8addq $23,$22,$3
> ldq $26,552($30)
> mult $f11,$f10,$f11
> fnop
> addq $2,$7,$1
> addq $3,$8,$3
> addq $26,$4,$2
> addq $10,$3,$3
> ldt $f10,0($3)
> ldt $f13,0($1)
> ldt $f12,0($2)
> addq $5,$14,$1
> ldq $2,560($30)
> subt $f13,$f11,$f13
> addq $1,$8,$1
> fnop
> mult $f12,$f10,$f12
> addq $10,$1,$1
> addq $5,$11,$3
> fnop
> addq $2,$4,$4
> ldt $f10,0($1)
ldt $f11,0($4) ldt $f11,0($4)
ldt $f12,0($7) | addq $5,$19,$2
mult $f13,$f10,$f13 | ldq $26,568($30)
ldt $f22,0($1) | addq $3,$16,$3
ldt $f25,0($5) | fnop
fnop | fnop
ldt $f10,0($6) | mult $f11,$f10,$f11
ldt $f26,0($8) | subt $f13,$f12,$f13
fnop | addq $2,$8,$2
fnop | addq $6,$21,$1
mult $f22,$f11,$f22 | addq $26,$3,$3
ldt $f14,0($22) | addq $10,$2,$2
ldt $f24,0($2) | ldt $f10,0($2)
fnop | ldt $f12,0($3)
mult $f25,$f10,$f25 | ldq $2,584($30)
ldt $f15,0($23) | addq $10,$1,$1
ldt $f23,0($28) | fnop
fnop | fnop
subt $f12,$f13,$f12 | mult $f12,$f10,$f12
mult $f26,$f14,$f26 | subt $f13,$f11,$f13
ldt $f10,0($24) | addq $6,$0,$6
ldt $f11,-8($4) | addq $5,$13,$5
mult $f24,$f15,$f24 | addq $2,$7,$3
ldt $f13,0($25) | ldt $f10,0($1)
addq $24,8,$24 | ldt $f15,0($3)
fnop | addq $10,$6,$6
mult $f23,$f10,$f23 | ldq $3,592($30)
addq $25,8,$25 | ldt $f11,0($6)
addq $28,8,$28 | fnop
fnop | fnop
mult $f13,$f11,$f13 | mult $f15,$f10,$f15
addq $23,8,$23 | subt $f13,$f12,$f13
addq $2,8,$2 | ldq $26,576($30)
fnop | addq $5,$28,$5
subt $f12,$f22,$f12 | addq $3,$7,$1
addq $22,8,$22 | addl $23,1,$23
addq $8,8,$8 | ldt $f14,0($1)
fnop | addq $10,$7,$7
addq $6,8,$6 | addq $26,$5,$5
addq $5,8,$5 | ldt $f10,0($7)
addq $4,8,$4 | fnop
addq $1,8,$1 | fnop
addq $7,8,$7 | mult $f14,$f11,$f14
addq $3,8,$3 | subt $f13,$f15,$f13
subt $f12,$f25,$f12 | ldt $f12,0($5)
subl $9,1,$9 | subl $20,1,$20
subt $f12,$f26,$f12 | mult $f12,$f10,$f12
subt $f12,$f24,$f12 | subt $f13,$f14,$f13
subt $f12,$f23,$f12 | subt $f13,$f12,$f11
subt $f12,$f13,$f11 <
cpys $f31,$f11,$f11 cpys $f31,$f11,$f11
cmptle $f27,$f11,$f10 | cmptle $f22,$f11,$f10
fcmovne $f10,$f11,$f27 | fcmovne $f10,$f11,$f22
bge $9,$L432 | fnop
> bge $20,$L206
[ Obviously, something's wrong with strength reduction, because the
s8subq's at the top are a result of the -1, 0 and +1 elements below
- after strength reduction these instructions should be simple
offsets in addressing modes, not separate instructions ... ]
for the following Fortran code (second loop):
subroutine sl3d(itmax,n1,n2,n3,alfa,eps,method,impcv,
& cmnn,cpnn,cnmn,cnpn,cnnn,cnnm,cnnp,
& src,u,u1,resi,resf)
implicit double precision (a-h,o-z)
dimension cmnn(n1,n2,n3),cpnn(n1,n2,n3),cnmn(n1,n2,n3),
& cnpn(n1,n2,n3),cnnn(n1,n2,n3),src(n1,n2,n3),u(n1,n2,n3),
& cnnm(n1,n2,n3),cnnp(n1,n2,n3),u1(n1,n2,n3)
character*(*) method
alfa1=1.0-alfa
do k=1,n3
do j=1,n2
do i=1,n1
u1(i,j,k)=u(i,j,k)
end do
end do
end do
resi=0.0
do k=2,n3-1
do j=2,n2-1
do i=2,n1-1
res=src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
& -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
& -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
& -cnnn(i,j,k)*u(i,j,k)
resi=max(resi,abs(res))
end do
end do
end do
if (resi.le.1.0e-20) return
if (method.eq.'SOR'.or.method.eq.'sor') then
do it=1,itmax
err=0.0
do k=2,n3-1
do j=2,n2-1
do i=2,n1-1
u0=u(i,j,k)
u(i,j,k)=(src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
& -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
& -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
& )/cnnn(i,j,k)*alfa+alfa1*u(i,j,k)
err=err+abs(u(i,j,k)-u0)
end do
end do
end do
err=err/float((n1-2)*(n2-2)*(n3-2))
if (it.eq.1) err0=err
if (err.le.1.0e-20) return
if (impcv.ne.0) write(impcv,*) it,err/err0
if (err0/err.ge.eps) goto 10
end do
10 continue
else if (method.eq.'DJACO'.or.method.eq.'djaco') then
call jaco3d(itmax,n1,n2,n3,alfa,eps,impcv,
& cmnn,cpnn,cnmn,cnpn,cnnn,cnnm,cnnp,
& src,u,u1,resi,resf)
else if (method.eq.'JACOBI'.or.method.eq.'jacobi') then
do it=1,itmax
err=0.0
do k=2,n3-1
do j=2,n2-1
do i=2,n1-1
u(i,j,k)=(src(i,j,k)
& -cmnn(i,j,k)*u1(i-1,j,k)-cpnn(i,j,k)*u1(i+1,j,k)
& -cnmn(i,j,k)*u1(i,j-1,k)-cnpn(i,j,k)*u1(i,j+1,k)
& -cnnm(i,j,k)*u1(i,j,k-1)-cnnp(i,j,k)*u1(i,j,k+1)
& )/cnnn(i,j,k)
err=err+abs(u(i,j,k)-u1(i,j,k))
end do
end do
end do
err=err/float((n1-2)*(n2-2)*(n3-2))
if (it.eq.1) err0=err
if (err.le.1.0e-20) return
if (impcv.ne.0) write(impcv,*) it,err/err0
if (err0/err.ge.eps) goto 20
do k=2,n3-1
do j=2,n2-1
do i=2,n1-1
u1(i,j,k)=u(i,j,k)
end do
end do
end do
end do
20 continue
else if (method.eq.'ZEBRA'.or.method.eq.'zebra') then
do it=1,itmax
err=0.0
do kl=2,3
do jl=2,3
do il=2,3
do k=kl,n3-1,2
do j=jl,n2-1,2
do i=il,n1-1,2
u0=u(i,j,k)
u(i,j,k)=(src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
& -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
& -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
& )/cnnn(i,j,k)*alfa+alfa1*u(i,j,k)
err=err+abs(u(i,j,k)-u0)
end do
end do
end do
end do
end do
end do
err=err/float((n1-2)*(n2-2)*(n3-2))
if (it.eq.1) err0=err
if (err.le.1.0e-20) return
if (impcv.ne.0) write(impcv,*) it,err/err0
if (err0/err.ge.eps) goto 30
end do
30 continue
else
print *,'Methode de resolution inconnue: ',method
stop
end if
resf=0.0
do k=2,n3-1
do j=2,n2-1
do i=2,n1-1
res=src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
& -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
& -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
& -cnnn(i,j,k)*u(i,j,k)
resf=max(resf,abs(res))
end do
end do
end do
end
Hope this helps,
Toon.