[Bug lto/45810] 40% slowdown when using LTO for a single-file program

dominiq at lps dot ens.fr gcc-bugzilla@gcc.gnu.org
Sun Jan 23 19:38:00 GMT 2011


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45810

--- Comment #14 from Dominique d'Humieres <dominiq at lps dot ens.fr> 2011-01-23 17:04:07 UTC ---
After removing the comments, generalized_hookes_law reads

      function generalized_hookes_law (strain_tensor, lambda, mu) result
(stress_tensor)
!
      real (kind = LONGreal), dimension(:,:), intent(in) :: strain_tensor
      real (kind = LONGreal), intent(in) :: lambda, mu
      real (kind = LONGreal), dimension(3,3) :: stress_tensor
      real (kind = LONGreal), dimension(6) ::generalized_strain_vector,        
            &
                                             generalized_stress_vector
      real (kind = LONGreal), dimension(6,6) :: generalized_constitutive_tensor
      integer :: i
!
      generalized_constitutive_tensor(:,:) = 0.0_LONGreal
      generalized_constitutive_tensor(1,1) = lambda + 2.0_LONGreal * mu
      generalized_constitutive_tensor(1,2) = lambda
      generalized_constitutive_tensor(1,3) = lambda
      generalized_constitutive_tensor(2,1) = lambda
      generalized_constitutive_tensor(2,2) = lambda + 2.0_LONGreal * mu
      generalized_constitutive_tensor(2,3) = lambda
      generalized_constitutive_tensor(3,1) = lambda
      generalized_constitutive_tensor(3,2) = lambda
      generalized_constitutive_tensor(3,3) = lambda + 2.0_LONGreal * mu
      generalized_constitutive_tensor(4,4) = mu
      generalized_constitutive_tensor(5,5) = mu
      generalized_constitutive_tensor(6,6) = mu
!
      generalized_strain_vector(1) = strain_tensor(1,1)
      generalized_strain_vector(2) = strain_tensor(2,2)
      generalized_strain_vector(3) = strain_tensor(3,3)
      generalized_strain_vector(4) = strain_tensor(2,3)
      generalized_strain_vector(5) = strain_tensor(1,3)
      generalized_strain_vector(6) = strain_tensor(1,2)
!
      do i = 1, 6
          generalized_stress_vector(i) =
dot_product(generalized_constitutive_tensor(i,:),  &   
                                                               
generalized_strain_vector(:))
      end do
!
      stress_tensor(1,1) = generalized_stress_vector(1)
      stress_tensor(2,2) = generalized_stress_vector(2)
      stress_tensor(3,3) = generalized_stress_vector(3)
      stress_tensor(2,3) = generalized_stress_vector(4)
      stress_tensor(1,3) = generalized_stress_vector(5)
      stress_tensor(1,2) = generalized_stress_vector(6)
      stress_tensor(3,2) = stress_tensor(2,3)
      stress_tensor(3,1) = stress_tensor(1,3)
      stress_tensor(2,1) = stress_tensor(1,2)
!
      end function generalized_hookes_law

Note that 24 elements out of the 36 ones of generalized_constitutive_tensor are
null. Using that, the subroutine can be replaced with

      function generalized_hookes_law (strain_tensor, lambda, mu) result
(stress_tensor)
!
      real (kind = LONGreal), dimension(:,:), intent(in) :: strain_tensor
      real (kind = LONGreal), intent(in) :: lambda, mu
      real (kind = LONGreal), dimension(3,3) :: stress_tensor
      real (kind = LONGreal) :: tmp
!
      stress_tensor(:,:) = mu * strain_tensor(:,:)
      tmp = lambda * (strain_tensor(1,1) + strain_tensor(2,2) +
strain_tensor(3,3))
      stress_tensor(1,1) = tmp + 2.0_LONGreal * stress_tensor(1,1)
      stress_tensor(2,2) = tmp + 2.0_LONGreal * stress_tensor(2,2)
      stress_tensor(3,3) = tmp + 2.0_LONGreal * stress_tensor(3,3)
!
      end function generalized_hookes_law

end module perdida_m

which is inlined at -finline-limit=320.



More information about the Gcc-bugs mailing list