This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Sun compares their compiler tools to GNU tools


>  Those modest improvements will hopefully get much better
>  one I figure out what has caused Alpha's slow decline
>  over the past few months.  Peak performance was in July,
>  and we've lost about two points since then.

Well, to put up some food for thought, recently I got the following  
for the second loop in the Fortran source below (left is ~ month  
old, right is yesterdays snapshot):

$L432:                                | $L206:
        ldt $f10,-16($4)              |         s8subq $23,16,$2
        ldt $f13,0($3)                |         s8subq $23,8,$5
                                      >         addq $2,$22,$2
                                      >         addq $5,$12,$4
                                      >         ldq $26,544($30)
                                      >         addq $2,$8,$2
                                      >         addq $4,$27,$4
                                      >         addq $10,$2,$2
                                      >         addq $26,$4,$1
                                      >         ldt $f10,0($2)
                                      >         addq $5,$22,$6
                                      >         ldt $f11,0($1)
                                      >         ldq $2,600($30)
                                      >         addq $6,$8,$7
                                      >         fnop
                                      >         fnop
                                      >         s8addq $23,$22,$3
                                      >         ldq $26,552($30)
                                      >         mult $f11,$f10,$f11
                                      >         fnop
                                      >         addq $2,$7,$1
                                      >         addq $3,$8,$3
                                      >         addq $26,$4,$2
                                      >         addq $10,$3,$3
                                      >         ldt $f10,0($3)
                                      >         ldt $f13,0($1)
                                      >         ldt $f12,0($2)
                                      >         addq $5,$14,$1
                                      >         ldq $2,560($30)
                                      >         subt $f13,$f11,$f13
                                      >         addq $1,$8,$1
                                      >         fnop
                                      >         mult $f12,$f10,$f12
                                      >         addq $10,$1,$1
                                      >         addq $5,$11,$3
                                      >         fnop
                                      >         addq $2,$4,$4
                                      >         ldt $f10,0($1)
        ldt $f11,0($4)                          ldt $f11,0($4)
        ldt $f12,0($7)                |         addq $5,$19,$2
        mult $f13,$f10,$f13           |         ldq $26,568($30)
        ldt $f22,0($1)                |         addq $3,$16,$3
        ldt $f25,0($5)                |         fnop
        fnop                          |         fnop
        ldt $f10,0($6)                |         mult $f11,$f10,$f11
        ldt $f26,0($8)                |         subt $f13,$f12,$f13
        fnop                          |         addq $2,$8,$2
        fnop                          |         addq $6,$21,$1
        mult $f22,$f11,$f22           |         addq $26,$3,$3
        ldt $f14,0($22)               |         addq $10,$2,$2
        ldt $f24,0($2)                |         ldt $f10,0($2)
        fnop                          |         ldt $f12,0($3)
        mult $f25,$f10,$f25           |         ldq $2,584($30)
        ldt $f15,0($23)               |         addq $10,$1,$1
        ldt $f23,0($28)               |         fnop
        fnop                          |         fnop
        subt $f12,$f13,$f12           |         mult $f12,$f10,$f12
        mult $f26,$f14,$f26           |         subt $f13,$f11,$f13
        ldt $f10,0($24)               |         addq $6,$0,$6
        ldt $f11,-8($4)               |         addq $5,$13,$5
        mult $f24,$f15,$f24           |         addq $2,$7,$3
        ldt $f13,0($25)               |         ldt $f10,0($1)
        addq $24,8,$24                |         ldt $f15,0($3)
        fnop                          |         addq $10,$6,$6
        mult $f23,$f10,$f23           |         ldq $3,592($30)
        addq $25,8,$25                |         ldt $f11,0($6)
        addq $28,8,$28                |         fnop
        fnop                          |         fnop
        mult $f13,$f11,$f13           |         mult $f15,$f10,$f15
        addq $23,8,$23                |         subt $f13,$f12,$f13
        addq $2,8,$2                  |         ldq $26,576($30)
        fnop                          |         addq $5,$28,$5
        subt $f12,$f22,$f12           |         addq $3,$7,$1
        addq $22,8,$22                |         addl $23,1,$23
        addq $8,8,$8                  |         ldt $f14,0($1)
        fnop                          |         addq $10,$7,$7
        addq $6,8,$6                  |         addq $26,$5,$5
        addq $5,8,$5                  |         ldt $f10,0($7)
        addq $4,8,$4                  |         fnop
        addq $1,8,$1                  |         fnop
        addq $7,8,$7                  |         mult $f14,$f11,$f14
        addq $3,8,$3                  |         subt $f13,$f15,$f13
        subt $f12,$f25,$f12           |         ldt $f12,0($5)
        subl $9,1,$9                  |         subl $20,1,$20
        subt $f12,$f26,$f12           |         mult $f12,$f10,$f12
        subt $f12,$f24,$f12           |         subt $f13,$f14,$f13
        subt $f12,$f23,$f12           |         subt $f13,$f12,$f11
        subt $f12,$f13,$f11           <
        cpys $f31,$f11,$f11                     cpys $f31,$f11,$f11
        cmptle $f27,$f11,$f10         |         cmptle $f22,$f11,$f10
        fcmovne $f10,$f11,$f27        |         fcmovne $f10,$f11,$f22
        bge $9,$L432                  |         fnop
                                      >         bge $20,$L206

[ Obviously, something's wrong with strength reduction, because the
  s8subq's at the top are a result of the -1, 0 and +1 elements below
  - after strength reduction these instructions should be simple
  offsets in addressing modes, not separate instructions ... ]

for the following Fortran code (second loop):

      subroutine sl3d(itmax,n1,n2,n3,alfa,eps,method,impcv,
     & cmnn,cpnn,cnmn,cnpn,cnnn,cnnm,cnnp,
     & src,u,u1,resi,resf)
      implicit double precision (a-h,o-z)
      dimension cmnn(n1,n2,n3),cpnn(n1,n2,n3),cnmn(n1,n2,n3),
     & cnpn(n1,n2,n3),cnnn(n1,n2,n3),src(n1,n2,n3),u(n1,n2,n3),
     & cnnm(n1,n2,n3),cnnp(n1,n2,n3),u1(n1,n2,n3)
      character*(*) method
      alfa1=1.0-alfa
      do k=1,n3
      do j=1,n2
      do i=1,n1
      u1(i,j,k)=u(i,j,k)
      end do
      end do
      end do
      resi=0.0
      do k=2,n3-1
      do j=2,n2-1
      do i=2,n1-1
      res=src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
     &              -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
     &              -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
     &              -cnnn(i,j,k)*u(i,j,k)
      resi=max(resi,abs(res))
      end do
      end do
      end do
      if (resi.le.1.0e-20) return
      if (method.eq.'SOR'.or.method.eq.'sor') then
      do it=1,itmax
      err=0.0
      do k=2,n3-1
      do j=2,n2-1
      do i=2,n1-1
      u0=u(i,j,k)
      u(i,j,k)=(src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
     &              -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
     &              -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
     &       )/cnnn(i,j,k)*alfa+alfa1*u(i,j,k)
      err=err+abs(u(i,j,k)-u0)
      end do
      end do
      end do
      err=err/float((n1-2)*(n2-2)*(n3-2))
      if (it.eq.1) err0=err
      if (err.le.1.0e-20) return
      if (impcv.ne.0) write(impcv,*) it,err/err0
      if (err0/err.ge.eps) goto 10
      end do
10    continue
      else if (method.eq.'DJACO'.or.method.eq.'djaco') then
       call jaco3d(itmax,n1,n2,n3,alfa,eps,impcv,
     & cmnn,cpnn,cnmn,cnpn,cnnn,cnnm,cnnp,
     & src,u,u1,resi,resf)
      else if (method.eq.'JACOBI'.or.method.eq.'jacobi') then
      do it=1,itmax
      err=0.0
      do k=2,n3-1
      do j=2,n2-1
      do i=2,n1-1
      u(i,j,k)=(src(i,j,k)
     &              -cmnn(i,j,k)*u1(i-1,j,k)-cpnn(i,j,k)*u1(i+1,j,k)
     &              -cnmn(i,j,k)*u1(i,j-1,k)-cnpn(i,j,k)*u1(i,j+1,k)
     &              -cnnm(i,j,k)*u1(i,j,k-1)-cnnp(i,j,k)*u1(i,j,k+1)
     &       )/cnnn(i,j,k)
      err=err+abs(u(i,j,k)-u1(i,j,k))
      end do
      end do
      end do
      err=err/float((n1-2)*(n2-2)*(n3-2))
      if (it.eq.1) err0=err
      if (err.le.1.0e-20) return
      if (impcv.ne.0) write(impcv,*) it,err/err0
      if (err0/err.ge.eps) goto 20
      do k=2,n3-1
      do j=2,n2-1
      do i=2,n1-1
      u1(i,j,k)=u(i,j,k)
      end do
      end do
      end do
      end do
20    continue
      else if (method.eq.'ZEBRA'.or.method.eq.'zebra') then
      do it=1,itmax
      err=0.0
      do kl=2,3
      do jl=2,3
      do il=2,3
      do k=kl,n3-1,2
      do j=jl,n2-1,2
      do i=il,n1-1,2
      u0=u(i,j,k)
      u(i,j,k)=(src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
     &              -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
     &              -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
     &       )/cnnn(i,j,k)*alfa+alfa1*u(i,j,k)
      err=err+abs(u(i,j,k)-u0)
      end do
      end do
      end do
      end do
      end do
      end do
      err=err/float((n1-2)*(n2-2)*(n3-2))
      if (it.eq.1) err0=err
      if (err.le.1.0e-20) return
      if (impcv.ne.0) write(impcv,*) it,err/err0
      if (err0/err.ge.eps) goto 30
      end do
30    continue
      else
          print *,'Methode de resolution inconnue: ',method
          stop
      end if
      resf=0.0
      do k=2,n3-1
      do j=2,n2-1
      do i=2,n1-1
      res=src(i,j,k)-cmnn(i,j,k)*u(i-1,j,k)-cpnn(i,j,k)*u(i+1,j,k)
     &              -cnmn(i,j,k)*u(i,j-1,k)-cnpn(i,j,k)*u(i,j+1,k)
     &              -cnnm(i,j,k)*u(i,j,k-1)-cnnp(i,j,k)*u(i,j,k+1)
     &              -cnnn(i,j,k)*u(i,j,k)
      resf=max(resf,abs(res))
      end do
      end do
      end do
      end

Hope this helps,
Toon.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]