This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

RE: EGCS-19981101 EXPERIMENTAL TEST IRIX6.4 WITH WORKING G++


          -Reply


From: Jeffrey A Law <law@upchuck.cygnus.com>


  In message
<4.19981104.16.15.23.137205@cat.e-mail.com>you write:
  >
  > Aside from breaking the x86 version, the new optimizations
gain
  > a little bit on R10K in many situations, but show a big loss in
  > some branching situations (Livermore Kernels 16).
  > Dr. Timothy C. Prince
Interesting.  Any chance you could analyze the problem with mips
and the
livermore kernels?  I'm a little suprised that they had that kind of
effect.

jeff


I compared the results from last week's prerelease and
experimental snapshots.  While the experimental 2.92.18 equals
or exceeds the performance of the pre-release 2.91.58 in every
section of the test, recent snapshots had much better
performance in this one test, where they nearly equalled the
performance of the MipsPro 7.2 compiler.

The attached shar file shows this segment of source code, as I
have re-processed it to change the archaic arithmetic IF's and
GOTO's to g77 style.  I suspect the major difference between the
fast 2.92.16 version and the slow 2.92.18 is in the recognition of
the pointers to COMMON blocks (almost) as loop invariants.
These pointers have the g77 internal names spaces_ and
spacer_.  The named integer variables are stored in spaces,
and the named single precision floating point variables are
stored in spacer.

The loop body head label is .L206 in both cases, and it will be
seen that those 2 pointers are loaded to register ahead of the
loop in the fast case.  In the slow case, it seems that a big delay
is introduced by the additional level of indirection which has to be
resolved before the first branch condition in the loop can be
evaluated.  The values in the zone() array are set in a totally
inscrutable way and I have no idea whether the branching
pattern responds to "prediction."

In both cases, there is a refetch of spaces_ in the middle of the
loop, apparently associated only with the incrementing of the
counter k3, but this appears not to be on the critical path.  Neither
the fast nor the slow code is holding k3 as a register variable,
but this doesn't seem to be so important.  The fast code places
this pointer redundantly in the same register where it is for the
other calculations, while the slow code is re-using a register
which is being used for other pointer calculations, but one would
think that the hardware shadow register mapping would take
care of this.

No doubt this is more than you bargained for, on a case of
doubtful practical significance.  I
Dr. Timothy C. Prince
Consulting Engineer
Solar Turbines, a Caterpillar Company
alternate e-mail: tprince@computer.org


# This is a shell archive.  Remove anything before this line,

# then unpack it by saving it in a file and typing "sh file".

#

# Wrapped by Prince <d28930@alpha095> on Mon Nov  9 11:09:30 1998

#

# This archive contains:

#       k16.f           k16fast.s       k16slow.s

#



LANG=""; export LANG

PATH=/bin:/usr/bin:/usr/sbin:/usr/ccs/bin:$PATH; export PATH



echo x - k16.f

cat >k16.f <<'@EOF'

C

C***********************************************************************

C***  KERNEL 16     MONTE CARLO SEARCH LOOP

C***********************************************************************

C

      lb= ii+ii

      k2= 0

      k3= 0

      continue

      dowhile(.true.)

          do m= 1,zone(1)

              j2= (n+n)*(m-1)+1

              do k= 1,n

                  k2= k2+1

                  j4= j2+k+k

                  j5= zone(j4)

                  if(j5 >= n)then

                      if(j5 == n)then

                        exit

                        endif

                      k3= k3+1

                      if(d(j5) <  d(j5-1)*(t-d(j5-2))**2+(s-d(j5-3))**2+

     &(r-d(j5-4))**2)then

                        goto200

                        endif

                      if(d(j5) == d(j5-1)*(t-d(j5-2))**2+(s-d(j5-3))**2+

     &(r-d(j5-4))**2)then

                        exit

                        endif

                    else

                      if(j5-n+lb <  0)then

                          if(plan(j5) <  t)then

                            goto200

                            endif

                          if(plan(j5) == t)then

                            exit

                            endif

                        else

                          if(j5-n+ii <  0)then

                              if(plan(j5) <  s)then

                                goto200

                                endif

                              if(plan(j5) == s)then

                                exit

                                endif

                            else

                                if(plan(j5) <  r)then

                                  goto200

                                  endif

                                if(plan(j5) == r)then

                                  exit

                                  endif

                            endif

                        endif

                    endif

                  if(zone(j4-1) <= 0)then

                    goto200

                    endif

                enddo

              exit

200             if(zone(j4-1) == 0)then

                  exit

                  endif

            enddo

          if(test(16) <= 0)then

            exit

            endif

        enddo

@EOF



chmod 644 k16.f



echo x - k16fast.s

sed 's/^@//' >k16fast.s <<'@EOF'

        lw      $23,.LC65

        la      $24,spaces_

        lw      $6,64($24)

        move    $16,$24

        la      $19,spacer_

        sw      $0,8($24)

        sw      $0,12($24)

        move    $5,$6

        dsll    $2,$5,2

        daddu   $2,$2,$5

        dsll    $3,$2,4

        daddu   $2,$2,$3

        dsll    $4,$2,8

        daddu   $2,$2,$4

        dsll    $3,$2,16

        daddu   $2,$2,$3

        daddu   $2,$2,$5

        dsra    $2,$2,32

        sra     $6,$6,31

        lw      $3,.LC64

        subu    $20,$2,$6

        sll     $fp,$20,1

        addu    $21,$3,24728

        addu    $22,$3,22328

@.L202:

        la      $25,ispace_

        lw      $18,8776($25)

        li      $25,1                   # 0x1

        la      $24,spaces_

        addu    $18,$18,-1

        .set    noreorder

        .set    nomacro

        bltz    $18,.L204

        sw      $25,28($24)

        .set    macro

        .set    reorder



@.L206:

        lw      $2,64($16)

        lw      $3,28($16)

        sll     $4,$2,1

        addu    $3,$3,-1

        mult    $4,$3

        mflo    $4

        #nop

        addu    $17,$2,-1

        .set    noreorder

        .set    nomacro

        bltz    $17,.L204

        addu    $7,$4,1

        .set    macro

        .set    reorder



        addu    $8,$4,3

        addu    $2,$4,2

        sll     $2,$2,2

        addu    $6,$2,$23

        sll     $3,$7,2

        addu    $4,$3,$23

@.L210:

        lw      $2,8($16)

        lw      $5,0($6)

        lw      $3,64($16)

        move    $7,$8

        addu    $2,$2,1

        sw      $2,8($16)

        slt     $2,$5,$3

        .set    noreorder

        .set    nomacro

        bne     $2,$0,.L211

        sw      $5,4($16)

        .set    macro

        .set    reorder



        .set    noreorder

        .set    nomacro

        beq     $5,$3,.L204

        addu    $2,$5,-3

        .set    macro

        .set    reorder



        sll     $2,$2,3

        addu    $2,$21,$2

        l.d     $f1,280($19)

        l.d     $f0,0($2)

        addu    $2,$5,-4

        sub.d   $f1,$f1,$f0

        sll     $2,$2,3

        addu    $2,$21,$2

        l.d     $f2,248($19)

        mul.d   $f1,$f1,$f1

        l.d     $f0,0($2)

        #nop

        sub.d   $f2,$f2,$f0

        addu    $3,$5,-2

        mul.d   $f2,$f2,$f2

        sll     $3,$3,3

        addu    $3,$21,$3

        l.d     $f3,0($3)

        addu    $3,$5,-5

        sll     $3,$3,3

        addu    $3,$21,$3

        l.d     $f0,0($3)

        mul.d   $f3,$f3,$f1

        l.d     $f1,232($19)

        #nop

        sub.d   $f1,$f1,$f0

        mul.d   $f1,$f1,$f1

        la      $24,spaces_

        add.d   $f3,$f3,$f2

        addu    $2,$5,-1

        sll     $2,$2,3

        addu    $2,$21,$2

        add.d   $f3,$f3,$f1

        l.d     $f0,0($2)

        lw      $3,12($24)

        c.lt.d  $f0,$f3

        addu    $3,$3,1

        .set    noreorder

        .set    nomacro

        bc1t    .L214

        sw      $3,12($24)

        .set    macro

        .set    reorder



        c.eq.d  $f0,$f3

        b       .L735

@.L211:

        subu    $3,$5,$3

        addu    $2,$3,$fp

        .set    noreorder

        .set    nomacro

        bgez    $2,.L217

        addu    $2,$3,$20

        .set    macro

        .set    reorder



        addu    $2,$5,-1

        sll     $2,$2,3

        addu    $2,$22,$2

        l.d     $f1,0($2)

        l.d     $f0,280($19)

        b       .L736

@.L217:

        .set    noreorder

        .set    nomacro

        bgez    $2,.L221

        addu    $2,$5,-1

        .set    macro

        .set    reorder



        sll     $2,$2,3

        addu    $2,$22,$2

        l.d     $f1,0($2)

        l.d     $f0,248($19)

        b       .L736

@.L221:

        sll     $2,$2,3

        addu    $2,$22,$2

        l.d     $f1,0($2)

        l.d     $f0,232($19)

@.L736:

        c.lt.d  $f1,$f0

        #nop

        .set    noreorder

        .set    nomacro

        bc1t    .L746

        addu    $2,$7,-2

        .set    macro

        .set    reorder



        c.eq.d  $f1,$f0

@.L735:

        bc1t    .L204

        lw      $2,0($4)

        #nop

        .set    noreorder

        .set    nomacro

        blez    $2,.L214

        addu    $4,$4,8

        .set    macro

        .set    reorder



        addu    $6,$6,8

        addu    $17,$17,-1

        .set    noreorder

        .set    nomacro

        bgez    $17,.L210

        addu    $8,$8,2

        .set    macro

        .set    reorder



        b       .L204

@.L214:

        addu    $2,$7,-2

@.L746:

        sll     $2,$2,2

        addu    $2,$2,$23

        lw      $3,0($2)

        #nop

        .set    noreorder

        .set    nomacro

        beq     $3,$0,.L204

        addu    $18,$18,-1

        .set    macro

        .set    reorder



        lw      $2,28($16)

        #nop

        addu    $2,$2,1

        .set    noreorder

        .set    nomacro

        bgez    $18,.L206

        sw      $2,28($16)

        .set    macro

        .set    reorder



@.L204:

        la      $4,.LC23

        la      $25,test_

        jal     $31,$25

        bgtz    $2,.L202

        l.d     $f22,.LC24

@EOF



chmod 644 k16fast.s



echo x - k16slow.s

sed 's/^@//' >k16slow.s <<'@EOF'

        la      $11,spaces_

        lw      $6,64($11)

        la      $21,.LC23

        li      $20,1                   # 0x1

        sw      $0,8($11)

        sw      $0,12($11)

        move    $5,$6

        dsll    $2,$5,2

        daddu   $2,$2,$5

        dsll    $3,$2,4

        daddu   $2,$2,$3

        dsll    $4,$2,8

        daddu   $2,$2,$4

        dsll    $3,$2,16

        daddu   $2,$2,$3

        daddu   $2,$2,$5

        dsra    $2,$2,32

        sra     $6,$6,31

        subu    $16,$2,$6

        sll     $19,$16,1

@.L202:

        la      $2,ispace_

        lw      $18,8776($2)

        la      $3,spaces_

        addu    $18,$18,-1

        .set    noreorder

        .set    nomacro

        bltz    $18,.L204

        sw      $20,28($3)

        .set    macro

        .set    reorder



        move    $13,$3

        lw      $15,.LC64

        lw      $14,.LC63

@.L206:

        lw      $2,64($13)

        lw      $3,28($13)

        sll     $4,$2,1

        addu    $3,$3,-1

        mult    $4,$3

        mflo    $3

        #nop

        addu    $17,$2,-1

        .set    noreorder

        .set    nomacro

        bltz    $17,.L204

        addu    $7,$3,1

        .set    macro

        .set    reorder



        la      $12,spaces_

        addu    $6,$14,24728

        la      $8,spacer_

        addu    $11,$14,22328

        addu    $10,$3,3

        lw      $2,.LC64

        addu    $3,$3,2

        sll     $3,$3,2

        sll     $4,$7,2

        addu    $9,$3,$2

        addu    $4,$4,$2

@.L210:

        lw      $2,8($12)

        lw      $5,0($9)

        lw      $3,64($12)

        move    $7,$10

        addu    $2,$2,1

        sw      $2,8($12)

        slt     $2,$5,$3

        .set    noreorder

        .set    nomacro

        bne     $2,$0,.L211

        sw      $5,4($12)

        .set    macro

        .set    reorder



        .set    noreorder

        .set    nomacro

        beq     $5,$3,.L204

        addu    $2,$5,-3

        .set    macro

        .set    reorder



        sll     $2,$2,3

        addu    $2,$6,$2

        l.d     $f1,280($8)

        l.d     $f0,0($2)

        addu    $2,$5,-4

        sub.d   $f1,$f1,$f0

        sll     $2,$2,3

        addu    $2,$6,$2

        l.d     $f2,248($8)

        mul.d   $f1,$f1,$f1

        l.d     $f0,0($2)

        #nop

        sub.d   $f2,$f2,$f0

        addu    $3,$5,-2

        mul.d   $f2,$f2,$f2

        sll     $3,$3,3

        addu    $3,$6,$3

        l.d     $f3,0($3)

        addu    $3,$5,-5

        sll     $3,$3,3

        addu    $3,$6,$3

        l.d     $f0,0($3)

        mul.d   $f3,$f3,$f1

        l.d     $f1,232($8)

        #nop

        sub.d   $f1,$f1,$f0

        mul.d   $f1,$f1,$f1

        addu    $2,$5,-1

        add.d   $f3,$f3,$f2

        sll     $2,$2,3

        addu    $2,$6,$2

        l.d     $f0,0($2)

        add.d   $f3,$f3,$f1

        la      $2,spaces_

        lw      $3,12($2)

        c.lt.d  $f0,$f3

        addu    $3,$3,1

        .set    noreorder

        .set    nomacro

        bc1t    .L214

        sw      $3,12($2)

        .set    macro

        .set    reorder



        c.eq.d  $f0,$f3

        b       .L734

@.L211:

        subu    $3,$5,$3

        addu    $2,$3,$19

        .set    noreorder

        .set    nomacro

        bgez    $2,.L217

        addu    $2,$3,$16

        .set    macro

        .set    reorder



        addu    $2,$5,-1

        sll     $2,$2,3

        addu    $2,$11,$2

        l.d     $f1,0($2)

        l.d     $f0,280($8)

        b       .L735

@.L217:

        .set    noreorder

        .set    nomacro

        bgez    $2,.L221

        addu    $2,$5,-1

        .set    macro

        .set    reorder



        sll     $2,$2,3

        addu    $2,$11,$2

        l.d     $f1,0($2)

        l.d     $f0,248($8)

        b       .L735

@.L221:

        sll     $2,$2,3

        addu    $2,$11,$2

        l.d     $f1,0($2)

        l.d     $f0,232($8)

@.L735:

        c.lt.d  $f1,$f0

        #nop

        .set    noreorder

        .set    nomacro

        bc1t    .L746

        addu    $2,$7,-2

        .set    macro

        .set    reorder



        c.eq.d  $f1,$f0

@.L734:

        .set    noreorder

        .set    nomacro

        bc1tl   .L738

        move    $4,$21

        .set    macro

        .set    reorder



        lw      $2,0($4)

        #nop

        .set    noreorder

        .set    nomacro

        blez    $2,.L214

        addu    $4,$4,8

        .set    macro

        .set    reorder



        addu    $9,$9,8

        addu    $17,$17,-1

        .set    noreorder

        .set    nomacro

        bgez    $17,.L210

        addu    $10,$10,2

        .set    macro

        .set    reorder



        .set    noreorder

        .set    nomacro

        b       .L738

        move    $4,$21

        .set    macro

        .set    reorder



@.L214:

        addu    $2,$7,-2

@.L746:

        sll     $2,$2,2

        addu    $2,$2,$15

        lw      $3,0($2)

        #nop

        .set    noreorder

        .set    nomacro

        beq     $3,$0,.L204

        addu    $18,$18,-1

        .set    macro

        .set    reorder



        lw      $2,28($13)

        #nop

        addu    $2,$2,1

        .set    noreorder

        .set    nomacro

        bgez    $18,.L206

        sw      $2,28($13)

        .set    macro

        .set    reorder



@.L204:

        move    $4,$21

@.L738:

        la      $25,test_

        jal     $31,$25

        bgtz    $2,.L202

        l.d     $f22,.LC24

@EOF



chmod 644 k16slow.s



exit 0

           To:                                              INTERNET - IBMMAIL
                                                            N4248388 - IBMMAIL


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]