This is the mail archive of the
fortran@gcc.gnu.org
mailing list for the GNU Fortran project.
Perfomance regression
- From: "Nagorny, Denis" <denis dot nagorny at intel dot com>
- To: <fortran at gcc dot gnu dot org>
- Date: Wed, 30 Mar 2005 19:38:43 +0400
- Subject: Perfomance regression
Hi all,
Looking through spec cpu2000/mgrid I found that performance of refined
test
compiled by gfortran 4.0 can be 2x worse then with g77 3.4.
(You can see test below) May be I'm mistaken with interpretation but it
looks like the one of the calculating functions helps to fill the cache
and this had to be used by code generated by previous compiler. You can
see this on p4 with -O2 or -O3 optimization level.
Can anybody refer me to realization of such optimizations in 3.4
compiler (and probably in the current one if such optimization exists).
PROGRAM sample
INTEGER LM, NM, NR, NIT
PARAMETER( LM=7 )
PARAMETER( NM=2+2**LM)
PARAMETER( NR = (8*(NM**3+NM**2+5*NM-23+7*LM))/7 )
REAL*8 U(NR),R(NR)
REAL*8 U0
INTEGER IR(LM), MM(LM)
INTEGER IT, N
INTEGER LMI
LMI = LM
NIT = 40
U0 = 0.D0
N = 2 + 2**LMI
CALL SETUP(LMI,IR,MM)
DO 20 IT=1,NIT
CALL MG3P(U,R,N,NR,IR,MM,LMI)
20 CONTINUE
STOP
END
*************************
SUBROUTINE MG3P(U,R,N,NR,IR,MM,LM)
INTEGER N,NV,NR,LM
REAL*8 U(NR),R(NR)
INTEGER IR(LM), MM(LM)
INTEGER K
K = 1
DO 100 K = 2, LM-1
CALL RESID(U(IR(K)),R(IR(K)),MM(K))
CALL PSINV(R(IR(K)),U(IR(K)),MM(K))
100 CONTINUE
RETURN
END
**************************
SUBROUTINE PSINV(R,U,N)
INTEGER N
REAL*8 U(N,N,N),R(N,N,N)
INTEGER I3, I2, I1
DO 600 I3=2,N-1
DO 600 I2=2,N-1
DO 600 I1=2,N-1
600 U(I1,I2,I3)=U(I1,I2,I3)
> +( R(I1, I2, I3 ) )
> +( R(I1-1,I2, I3 ) + R(I1+1,I2, I3 )
> + R(I1, I2-1,I3 ) + R(I1, I2+1,I3 )
> + R(I1, I2, I3-1) + R(I1, I2, I3+1) )
> +( R(I1-1,I2-1,I3 ) + R(I1+1,I2-1,I3 )
> + R(I1-1,I2+1,I3 ) + R(I1+1,I2+1,I3 )
> + R(I1, I2-1,I3-1) + R(I1, I2+1,I3-1)
> + R(I1, I2-1,I3+1) + R(I1, I2+1,I3+1)
> + R(I1-1,I2, I3-1) + R(I1-1,I2, I3+1)
> + R(I1+1,I2, I3-1) + R(I1+1,I2, I3+1) )
> +( R(I1-1,I2-1,I3-1) + R(I1+1,I2-1,I3-1)
> + R(I1-1,I2+1,I3-1) + R(I1+1,I2+1,I3-1)
> + R(I1-1,I2-1,I3+1) + R(I1+1,I2-1,I3+1)
> + R(I1-1,I2+1,I3+1) + R(I1+1,I2+1,I3+1) )
RETURN
END
****************************
SUBROUTINE RESID(U,R,N)
INTEGER N
REAL*8 U(N,N,N),R(N,N,N)
INTEGER I3, I2, I1
DO 600 I3=2,N-1
DO 600 I2=2,N-1
DO 600 I1=2,N-1
600 R(I1,I2,I3)=
> -( U(I1, I2, I3 ) )
> -( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 )
> + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 )
> + U(I1, I2, I3-1) + U(I1, I2, I3+1) )
> -( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 )
> + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 )
> + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1)
> + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1)
> + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1)
> + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) )
> -( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
> + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
> + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
> + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
RETURN
END
****************************
SUBROUTINE SETUP(LM,IR,MM)
INTEGER LM, IR(LM), MM(LM)
INTEGER L, K
MM(LM) = 2+2**LM
DO 100 K = LM-1, 1, -1
MM(K) = 1 + MM(K+1)/2
100 CONTINUE
L = LM
IR(L)=1
DO 200 K = L-1, 1, -1
IR(K)=IR(K+1)+MM(K+1)**3
200 CONTINUE
RETURN
END
Denis