This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug fortran/59345] _gfortran_internal_pack on compiler generated temps


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59345

Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Last reconfirmed|2013-12-22 00:00:00         |2014-12-6
                 CC|                            |Joost.VandeVondele at mat dot ethz
                   |                            |.ch
      Known to fail|                            |4.9.2, 5.0

--- Comment #2 from Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> ---
still happens with trunk. 

In the microbenchmark below, seems like a 3-fold overhead due to packing. This
is similar to using an assumed shape dummy arg as a temp, while in the latter
case, this can be fixed with the contiguous attribute. Could the solution be as
simple as somehow providing the 'contiguous' attribute to compiler generated
temporaries ?

> gfortran -Ofast -fno-inline t.f90
> ./a.out
 with packing:   1.8157229999999998       sec.
 without packing:  0.49092599999999997      sec. 
 assumed shape, no contiguous :   1.9047100000000006      sec. 
 assumed shape, contiguous :  0.46692899999999948      sec. 
 total calls to foo:   400000000 expected   200000000

> cat t.f90
MODULE M
 INTEGER, SAVE :: count=0
CONTAINS
 SUBROUTINE S1(A,foo)
  REAL :: A(3)
  CALL foo(-A)
 END SUBROUTINE

 SUBROUTINE S2(A,foo)
  REAL :: A(3)
  REAL :: B(3)
  B=-A 
  CALL foo(B)
 END SUBROUTINE

 SUBROUTINE S3(A,B,foo)
  REAL :: A(3)
  REAL :: B(:)
  B=-A 
  CALL foo(B)
 END SUBROUTINE

 SUBROUTINE S4(A,B,foo)
  REAL :: A(3)
  REAL, CONTIGUOUS :: B(:)
  B=-A 
  CALL foo(B)
 END SUBROUTINE

 SUBROUTINE foo(A)
  REAL :: A(3)
  count=count+1
 END SUBROUTINE
END MODULE

PROGRAM TEST
   USE M
   IMPLICIT NONE
   REAL :: A(3),B(3)
   INTEGER :: i
   REAL*8 :: t1,t2,t3,t4,t5,t6,t7,t8
   INTEGER :: N
   A=0
   N=100000000

   CALL CPU_TIME(t1)
   DO i=1,N
      CALL S1(A,foo)
   ENDDO
   CALL CPU_TIME(t2)

   CALL CPU_TIME(t3)
   DO i=1,N
      CALL S2(A,foo)
   ENDDO
   CALL CPU_TIME(t4)

   CALL CPU_TIME(t5)
   DO i=1,N
      CALL S3(A,B,foo)
   ENDDO
   CALL CPU_TIME(t6)

   CALL CPU_TIME(t7)
   DO i=1,N
      CALL S4(A,B,foo)
   ENDDO
   CALL CPU_TIME(t8)

   WRITE(6,*) "with packing:", t2-t1, " sec."
   WRITE(6,*) "without packing:", t4-t3, "sec. "
   WRITE(6,*) "assumed shape, no contiguous :", t6-t5, "sec. "
   WRITE(6,*) "assumed shape, contiguous :", t8-t7, "sec. "

   WRITE(6,*) "total calls to foo:", count, "expected", 2*N
END


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]