This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/31079] New: 300% difference between ifort/gfortran


I'm still trying to find a reduced testcase (or better source) for PR 31021,
but I'm not sure the code below is really the same issue. However, it
illustrates a rather small program with a very significant slowdown in gfortran
relative to ifort.

vondele@pcihpc13:/data/vondele/extracted_collocate/test> ifort -O2 -xT test.f90
test.f90(17) : (col. 7) remark: LOOP WAS VECTORIZED.
test.f90(20) : (col. 7) remark: LOOP WAS VECTORIZED.
test.f90(24) : (col. 4) remark: BLOCK WAS VECTORIZED.
vondele@pcihpc13:/data/vondele/extracted_collocate/test> ./a.out
   3.544221
vondele@pcihpc13:/data/vondele/extracted_collocate/test> gfortran -O3
-march=native -ftree-vectorize  -ffast-math  test.f90
vondele@pcihpc13:/data/vondele/extracted_collocate/test> ./a.out
   11.84874
vondele@pcihpc13:/data/vondele/extracted_collocate/test> gfortran -O2
-march=native -ftree-vectorize  -ffast-math  test.f90
vondele@pcihpc13:/data/vondele/extracted_collocate/test> ./a.out
   11.84474
vondele@pcihpc13:/data/vondele/extracted_collocate/test> cat test.f90
SUBROUTINE collocate_core_2_2_0_0(jg,cmax)
    IMPLICIT NONE
    integer, INTENT(IN)  :: jg,cmax
    INTEGER, PARAMETER :: wp = SELECTED_REAL_KIND ( 14, 200 )
    INTEGER, PARAMETER :: N=1000
    TYPE vec
      real(wp) :: a(2)
    END TYPE vec
    TYPE(vec) :: dpy(1000)
    TYPE(vec) ::  pxy(1000)
    real(wp) s(04)
    integer :: i

    CALL USE(dpy,pxy,s)

    DO i=1,N
       pxy(i)%a=0.0_wp
    ENDDO
    DO i=1,N
       dpy(i)%a=0.0_wp
    ENDDO


    s(01)=0.0_wp
    s(02)=0.0_wp
    s(03)=0.0_wp
    s(04)=0.0_wp

    DO i=1,N
      s(01)=s(01)+pxy(i)%a(1)*dpy(i)%a(1)
      s(02)=s(02)+pxy(i)%a(2)*dpy(i)%a(1)
      s(03)=s(03)+pxy(i)%a(1)*dpy(i)%a(2)
      s(04)=s(04)+pxy(i)%a(2)*dpy(i)%a(2)
    ENDDO

    CALL USE(dpy,pxy,s)

END SUBROUTINE

SUBROUTINE USE(a,b,c)
 INTEGER, PARAMETER :: wp = SELECTED_REAL_KIND ( 14, 200 )
 REAL(kind=wp) :: a(*),b(*),c(*)
END SUBROUTINE USE

PROGRAM TEST
    integer, parameter :: cmax=5
    integer*8 :: t1,t2,tbest
    real :: time1,time2
    jg=0
    CALL cpu_time(time1)
    tbest=huge(tbest)
    DO i=1,1000000
     ! t1=nanotime_ia32()
       CALL collocate_core_2_2_0_0(0,cmax)
     ! t2=nanotime_ia32()
     ! if(t2-t1>0 .AND. t2-t1<tbest) tbest=t2-t1
    ENDDO
    CALL cpu_time(time2)
    ! write(6,*) tbest,time2-time1
    write(6,*) time2-time1
END PROGRAM TEST


-- 
           Summary: 300% difference between ifort/gfortran
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jv244 at cam dot ac dot uk


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31079


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]