This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug fortran/59345] _gfortran_internal_pack on compiler generated temps
- From: "Joost.VandeVondele at mat dot ethz.ch" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Sat, 06 Dec 2014 10:05:14 +0000
- Subject: [Bug fortran/59345] _gfortran_internal_pack on compiler generated temps
- Auto-submitted: auto-generated
- References: <bug-59345-4 at http dot gcc dot gnu dot org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59345
Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> changed:
What |Removed |Added
----------------------------------------------------------------------------
Last reconfirmed|2013-12-22 00:00:00 |2014-12-6
CC| |Joost.VandeVondele at mat dot ethz
| |.ch
Known to fail| |4.9.2, 5.0
--- Comment #2 from Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> ---
still happens with trunk.
In the microbenchmark below, seems like a 3-fold overhead due to packing. This
is similar to using an assumed shape dummy arg as a temp, while in the latter
case, this can be fixed with the contiguous attribute. Could the solution be as
simple as somehow providing the 'contiguous' attribute to compiler generated
temporaries ?
> gfortran -Ofast -fno-inline t.f90
> ./a.out
with packing: 1.8157229999999998 sec.
without packing: 0.49092599999999997 sec.
assumed shape, no contiguous : 1.9047100000000006 sec.
assumed shape, contiguous : 0.46692899999999948 sec.
total calls to foo: 400000000 expected 200000000
> cat t.f90
MODULE M
INTEGER, SAVE :: count=0
CONTAINS
SUBROUTINE S1(A,foo)
REAL :: A(3)
CALL foo(-A)
END SUBROUTINE
SUBROUTINE S2(A,foo)
REAL :: A(3)
REAL :: B(3)
B=-A
CALL foo(B)
END SUBROUTINE
SUBROUTINE S3(A,B,foo)
REAL :: A(3)
REAL :: B(:)
B=-A
CALL foo(B)
END SUBROUTINE
SUBROUTINE S4(A,B,foo)
REAL :: A(3)
REAL, CONTIGUOUS :: B(:)
B=-A
CALL foo(B)
END SUBROUTINE
SUBROUTINE foo(A)
REAL :: A(3)
count=count+1
END SUBROUTINE
END MODULE
PROGRAM TEST
USE M
IMPLICIT NONE
REAL :: A(3),B(3)
INTEGER :: i
REAL*8 :: t1,t2,t3,t4,t5,t6,t7,t8
INTEGER :: N
A=0
N=100000000
CALL CPU_TIME(t1)
DO i=1,N
CALL S1(A,foo)
ENDDO
CALL CPU_TIME(t2)
CALL CPU_TIME(t3)
DO i=1,N
CALL S2(A,foo)
ENDDO
CALL CPU_TIME(t4)
CALL CPU_TIME(t5)
DO i=1,N
CALL S3(A,B,foo)
ENDDO
CALL CPU_TIME(t6)
CALL CPU_TIME(t7)
DO i=1,N
CALL S4(A,B,foo)
ENDDO
CALL CPU_TIME(t8)
WRITE(6,*) "with packing:", t2-t1, " sec."
WRITE(6,*) "without packing:", t4-t3, "sec. "
WRITE(6,*) "assumed shape, no contiguous :", t6-t5, "sec. "
WRITE(6,*) "assumed shape, contiguous :", t8-t7, "sec. "
WRITE(6,*) "total calls to foo:", count, "expected", 2*N
END