[Bug lto/45810] 40% slowdown when using LTO for a single-file program
dominiq at lps dot ens.fr
gcc-bugzilla@gcc.gnu.org
Sun Jan 23 19:38:00 GMT 2011
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45810
--- Comment #14 from Dominique d'Humieres <dominiq at lps dot ens.fr> 2011-01-23 17:04:07 UTC ---
After removing the comments, generalized_hookes_law reads
function generalized_hookes_law (strain_tensor, lambda, mu) result
(stress_tensor)
!
real (kind = LONGreal), dimension(:,:), intent(in) :: strain_tensor
real (kind = LONGreal), intent(in) :: lambda, mu
real (kind = LONGreal), dimension(3,3) :: stress_tensor
real (kind = LONGreal), dimension(6) ::generalized_strain_vector,
&
generalized_stress_vector
real (kind = LONGreal), dimension(6,6) :: generalized_constitutive_tensor
integer :: i
!
generalized_constitutive_tensor(:,:) = 0.0_LONGreal
generalized_constitutive_tensor(1,1) = lambda + 2.0_LONGreal * mu
generalized_constitutive_tensor(1,2) = lambda
generalized_constitutive_tensor(1,3) = lambda
generalized_constitutive_tensor(2,1) = lambda
generalized_constitutive_tensor(2,2) = lambda + 2.0_LONGreal * mu
generalized_constitutive_tensor(2,3) = lambda
generalized_constitutive_tensor(3,1) = lambda
generalized_constitutive_tensor(3,2) = lambda
generalized_constitutive_tensor(3,3) = lambda + 2.0_LONGreal * mu
generalized_constitutive_tensor(4,4) = mu
generalized_constitutive_tensor(5,5) = mu
generalized_constitutive_tensor(6,6) = mu
!
generalized_strain_vector(1) = strain_tensor(1,1)
generalized_strain_vector(2) = strain_tensor(2,2)
generalized_strain_vector(3) = strain_tensor(3,3)
generalized_strain_vector(4) = strain_tensor(2,3)
generalized_strain_vector(5) = strain_tensor(1,3)
generalized_strain_vector(6) = strain_tensor(1,2)
!
do i = 1, 6
generalized_stress_vector(i) =
dot_product(generalized_constitutive_tensor(i,:), &
generalized_strain_vector(:))
end do
!
stress_tensor(1,1) = generalized_stress_vector(1)
stress_tensor(2,2) = generalized_stress_vector(2)
stress_tensor(3,3) = generalized_stress_vector(3)
stress_tensor(2,3) = generalized_stress_vector(4)
stress_tensor(1,3) = generalized_stress_vector(5)
stress_tensor(1,2) = generalized_stress_vector(6)
stress_tensor(3,2) = stress_tensor(2,3)
stress_tensor(3,1) = stress_tensor(1,3)
stress_tensor(2,1) = stress_tensor(1,2)
!
end function generalized_hookes_law
Note that 24 elements out of the 36 ones of generalized_constitutive_tensor are
null. Using that, the subroutine can be replaced with
function generalized_hookes_law (strain_tensor, lambda, mu) result
(stress_tensor)
!
real (kind = LONGreal), dimension(:,:), intent(in) :: strain_tensor
real (kind = LONGreal), intent(in) :: lambda, mu
real (kind = LONGreal), dimension(3,3) :: stress_tensor
real (kind = LONGreal) :: tmp
!
stress_tensor(:,:) = mu * strain_tensor(:,:)
tmp = lambda * (strain_tensor(1,1) + strain_tensor(2,2) +
strain_tensor(3,3))
stress_tensor(1,1) = tmp + 2.0_LONGreal * stress_tensor(1,1)
stress_tensor(2,2) = tmp + 2.0_LONGreal * stress_tensor(2,2)
stress_tensor(3,3) = tmp + 2.0_LONGreal * stress_tensor(3,3)
!
end function generalized_hookes_law
end module perdida_m
which is inlined at -finline-limit=320.
More information about the Gcc-bugs
mailing list