GCC Bugzilla – Attachment 16788 Details for
Bug 38306
[4.4/4.5/4.6/4.7 Regression] 15% slowdown w.r.t. 4.3 of computational kernel on some architectures
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
testcase
test.f90 (text/plain), 11.06 KB, created by
Joost VandeVondele
on 2008-11-28 16:01:58 UTC
(
hide
)
Description:
testcase
Filename:
MIME Type:
Creator:
Joost VandeVondele
Created:
2008-11-28 16:01:58 UTC
Size:
11.06 KB
patch
obsolete
>MODULE kinds > INTEGER, PARAMETER :: dp = KIND(0.0D0) >END MODULE >MODULE T_C_G0 > USE kinds, ONLY: dp > IMPLICIT NONE > PUBLIC :: T_C_G0_n > REAL(KIND=dp), DIMENSION(:,:), ALLOCATABLE, SAVE :: C0 > INTEGER, PARAMETER :: degree=13 > REAL(KIND=dp), PARAMETER :: target_error= 0.100000E-08 > INTEGER, PARAMETER :: nderiv_max=21 > INTEGER :: nderiv_init=-1 > INTEGER, SAVE :: patches=-1 >CONTAINS >SUBROUTINE T_C_G0_n(RES,use_gamma,R,T,NDERIV) > IMPLICIT NONE > REAL(KIND=dp), INTENT(OUT) :: RES(*) > LOGICAL, INTENT(OUT) :: use_gamma > REAL(KIND=dp),INTENT(IN) :: R,T > INTEGER, INTENT(IN) :: NDERIV > REAL(KIND=dp) :: upper,lower,X1,X2,TG1,TG2 > use_gamma=.FALSE. > upper=R**2 + 11.0_dp*R + 50.0_dp > lower=R**2 - 11.0_dp*R + 0.0_dp > IF (T>upper) THEN > RES(1:NDERIV+1)=0.0_dp > RETURN > ENDIF > IF (R>11.0_dp) THEN > IF (T<lower) THEN > use_gamma=.TRUE. > RETURN > ENDIF > X2=11.0_dp/R > X1=(T-lower)/(upper-lower) > IF (X1<= 0.500000000000000000E+00_dp) THEN > IF (X1<= 0.250000000000000000E+00_dp) THEN > IF (X2<= 0.500000000000000000E+00_dp) THEN > IF (X2<= 0.250000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.250000000000000000E+00_dp)* 0.400000000000000000E+01_dp > TG2= ( 2 * X2 - 0.250000000000000000E+00_dp)* 0.400000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,1)) > ELSE > TG1= ( 2 * X1 - 0.250000000000000000E+00_dp)* 0.400000000000000000E+01_dp > TG2= ( 2 * X2 - 0.750000000000000000E+00_dp)* 0.400000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,2)) > ENDIF > ELSE > IF (X1<= 0.125000000000000000E+00_dp) THEN > IF (X2<= 0.750000000000000000E+00_dp) THEN > IF (X2<= 0.625000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.125000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.112500000000000000E+01_dp)* 0.800000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,3)) > ELSE > TG1= ( 2 * X1 - 0.125000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.137500000000000000E+01_dp)* 0.800000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,4)) > ENDIF > ELSE > IF (X1<= 0.625000000000000000E-01_dp) THEN > IF (X2<= 0.875000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.625000000000000000E-01_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.162500000000000000E+01_dp)* 0.800000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,5)) > ELSE > IF (X1<= 0.312500000000000000E-01_dp) THEN > TG1= ( 2 * X1 - 0.312500000000000000E-01_dp)* 0.320000000000000000E+02_dp > TG2= ( 2 * X2 - 0.187500000000000000E+01_dp)* 0.800000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,6)) > ELSE > TG1= ( 2 * X1 - 0.937500000000000000E-01_dp)* 0.320000000000000000E+02_dp > TG2= ( 2 * X2 - 0.187500000000000000E+01_dp)* 0.800000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,7)) > ENDIF > ENDIF > ELSE > TG1= ( 2 * X1 - 0.187500000000000000E+00_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.175000000000000000E+01_dp)* 0.400000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,8)) > ENDIF > ENDIF > ELSE > IF (X1<= 0.187500000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.312500000000000000E+00_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,9)) > ELSE > TG1= ( 2 * X1 - 0.437500000000000000E+00_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,10)) > ENDIF > ENDIF > ENDIF > ELSE > IF (X1<= 0.375000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.625000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.100000000000000000E+01_dp)* 0.100000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,11)) > ELSE > TG1= ( 2 * X1 - 0.875000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.100000000000000000E+01_dp)* 0.100000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,12)) > ENDIF > ENDIF > ELSE > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,13)) > ELSE > TG1= ( 2 * X1 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,14)) > ENDIF > ENDIF > ELSE > X2=R/11.0_dp > upper=R**2 + 11.0_dp*R + 50.0_dp > lower=0.0_dp > X1=(T-lower)/(upper-lower) > IF (X1<= 0.500000000000000000E+00_dp) THEN > IF (X1<= 0.250000000000000000E+00_dp) THEN > IF (X1<= 0.125000000000000000E+00_dp) THEN > IF (X1<= 0.625000000000000000E-01_dp) THEN > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.625000000000000000E-01_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,15)) > ELSE > IF (X1<= 0.312500000000000000E-01_dp) THEN > TG1= ( 2 * X1 - 0.312500000000000000E-01_dp)* 0.320000000000000000E+02_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,16)) > ELSE > TG1= ( 2 * X1 - 0.937500000000000000E-01_dp)* 0.320000000000000000E+02_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,17)) > ENDIF > ENDIF > ELSE > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.187500000000000000E+00_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,18)) > ELSE > TG1= ( 2 * X1 - 0.187500000000000000E+00_dp)* 0.160000000000000000E+02_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,19)) > ENDIF > ENDIF > ELSE > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.375000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,20)) > ELSE > TG1= ( 2 * X1 - 0.375000000000000000E+00_dp)* 0.800000000000000000E+01_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,21)) > ENDIF > ENDIF > ELSE > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.750000000000000000E+00_dp)* 0.400000000000000000E+01_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,22)) > ELSE > TG1= ( 2 * X1 - 0.750000000000000000E+00_dp)* 0.400000000000000000E+01_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,23)) > ENDIF > ENDIF > ELSE > IF (X2<= 0.500000000000000000E+00_dp) THEN > TG1= ( 2 * X1 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > TG2= ( 2 * X2 - 0.500000000000000000E+00_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,24)) > ELSE > TG1= ( 2 * X1 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > TG2= ( 2 * X2 - 0.150000000000000000E+01_dp)* 0.200000000000000000E+01_dp > CALL PD2VAL(RES,NDERIV,TG1,TG2,C0(1,25)) > ENDIF > ENDIF > ENDIF >END SUBROUTINE T_C_G0_n >! iunit contains the data file to initialize the table >! Nder is the number of derivatives that will actually be used >SUBROUTINE INIT(Nder,iunit) > IMPLICIT NONE > INTEGER, INTENT(IN) :: Nder,iunit > INTEGER :: I > REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: chunk > patches=25 > IF (Nder>nderiv_max) STOP "T_C_G0 init failed" > nderiv_init=Nder > IF(ALLOCATED(C0)) DEALLOCATE(C0) > ! round up to a multiple of 32 to give some generous alignment for each C0 > ALLOCATE(C0(32*((31+(Nder+1)*(degree+1)*(degree+2)/2)/32),patches)) > ALLOCATE(chunk((nderiv_max+1)*(degree+1)*(degree+2)/2)) > C0=0 > DEALLOCATE(chunk) >END SUBROUTINE INIT >SUBROUTINE FREE() > IF(ALLOCATED(C0)) DEALLOCATE(C0) > nderiv_init=-1 >END SUBROUTINE FREE >SUBROUTINE PD2VAL(RES,NDERIV,TG1,TG2,C0) > IMPLICIT NONE > REAL(KIND=dp), INTENT(OUT) :: res(*) > INTEGER, INTENT(IN) :: NDERIV > REAL(KIND=dp),INTENT(IN) :: TG1,TG2 > REAL(KIND=dp) :: T1(0:13) > REAL(KIND=dp) :: T2(0:13) > REAL(KIND=dp), INTENT(IN) :: C0(105,*) > INTEGER :: I,J,K > REAL(KIND=dp), PARAMETER :: SQRT2=1.4142135623730950488016887242096980785696718753_dp > T1(0)=1.0_dp > T2(0)=1.0_dp > T1(1)=SQRT2 * TG1 > T2(1)=SQRT2 * TG2 > T1(2)= 2 * TG1 * T1(1) - SQRT2 > T2(2)= 2 * TG2 * T2(1) - SQRT2 > T1(3) = 2 * TG1 * T1(2) - T1(1) > T2(3) = 2 * TG2 * T2(2) - T2(1) > T1(4) = 2 * TG1 * T1(3) - T1(2) > T2(4) = 2 * TG2 * T2(3) - T2(2) > T1(5) = 2 * TG1 * T1(4) - T1(3) > T2(5) = 2 * TG2 * T2(4) - T2(3) > T1(6) = 2 * TG1 * T1(5) - T1(4) > T2(6) = 2 * TG2 * T2(5) - T2(4) > T1(7) = 2 * TG1 * T1(6) - T1(5) > T2(7) = 2 * TG2 * T2(6) - T2(5) > T1(8) = 2 * TG1 * T1(7) - T1(6) > T2(8) = 2 * TG2 * T2(7) - T2(6) > T1(9) = 2 * TG1 * T1(8) - T1(7) > T2(9) = 2 * TG2 * T2(8) - T2(7) > T1(10) = 2 * TG1 * T1(9) - T1(8) > T2(10) = 2 * TG2 * T2(9) - T2(8) > T1(11) = 2 * TG1 * T1(10) - T1(9) > T2(11) = 2 * TG2 * T2(10) - T2(9) > T1(12) = 2 * TG1 * T1(11) - T1(10) > T2(12) = 2 * TG2 * T2(11) - T2(10) > T1(13) = 2 * TG1 * T1(12) - T1(11) > T2(13) = 2 * TG2 * T2(12) - T2(11) > DO K=1,NDERIV+1 > RES(K) = 0.0_dp > RES(K)=RES(K)+DOT_PRODUCT(T1(0:13),C0(1:14,K))*T2(0) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:12),C0(15:27,K))*T2(1) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:11),C0(28:39,K))*T2(2) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:10),C0(40:50,K))*T2(3) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:9),C0(51:60,K))*T2(4) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:8),C0(61:69,K))*T2(5) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:7),C0(70:77,K))*T2(6) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:6),C0(78:84,K))*T2(7) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:5),C0(85:90,K))*T2(8) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:4),C0(91:95,K))*T2(9) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:3),C0(96:99,K))*T2(10) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:2),C0(100:102,K))*T2(11) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:1),C0(103:104,K))*T2(12) > RES(K)=RES(K)+DOT_PRODUCT(T1(0:0),C0(105:105,K))*T2(13) > ENDDO >END SUBROUTINE PD2VAL >END MODULE T_C_G0 > USE T_C_G0 > IMPLICIT NONE > INTEGER, PARAMETER :: Nder=9 > REAL(KIND=dp), DIMENSION(:,:), ALLOCATABLE :: ref > REAL(KIND=dp) :: t1,t2,R,T > REAL(KIND=dp) :: RES(0:Nder),MAXERR(0:Nder) > INTEGER :: I,J,Nline > LOGICAL :: use_gamma > > CALL INIT(Nder,12) > > Nline=5000000 > CALL CPU_TIME(T1) > DO I=1,Nline > R=7 > T=7 > CALL T_C_G0_n(RES,use_gamma,R,T,Nder) > ENDDO > CALL CPU_TIME(T2) > write(6,'(A,F8.3)') "Time for evaluation [s]: ",t2-t1 >END >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 38306
: 16788