This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: should MEM tracking be able to optimize this?


On 16 Nov, Dan Nicolaescu wrote:

> On sparc-sun-solaris2.8 using -O2 I get the following assembly:
> (from a gcc updated about 1 hour ago when using -O2, just the loops
> are shown)

Highly interesting. On powerpc-gnu-linux with 2.95.3 the loop is almost
the same while the assebmly before is worse in your second case:

version1:

        li %r0,1024
        mtctr %r0
        lis %r9,A@ha
        la %r9,A@l(%r9)
.L8:
        lfs %f0,8192(%r9)
        lfs %f12,16384(%r9)
        fadds %f13,%f0,%f12
        fadds %f0,%f13,%f0
        stfs %f13,24576(%r9)
        fadds %f0,%f0,%f12
        stfs %f0,0(%r9)
        addi %r9,%r9,4
        bdnz .L8
        blr

version2:
        li %r0,1024
        lis %r9,f@ha
        mtctr %r0
        lis %r11,p@ha
        la %r7,f@l(%r9)
        la %r8,p@l(%r11)
        lis %r9,q@ha
        lis %r11,d@ha
        la %r9,q@l(%r9)
        la %r11,d@l(%r11)
        li %r10,0
.L8:
        lfsx %f0,%r10,%r8
        lfsx %f12,%r10,%r9
        fadds %f13,%f0,%f12
        fadds %f0,%f13,%f0
        stfsx %f13,%r10,%r11
        fadds %f0,%f0,%f12
        stfsx %f0,%r10,%r7
        addi %r10,%r10,4
        bdnz .L8
        blr

While with CVS it's worse in both cases and really evil for the first
one:

version1:
        li %r0,1024
        lis %r9,A@ha
        mtctr %r0
        li %r7,0
        la %r8,A@l(%r9)
.L9:
        slwi %r9,%r7,2
        addi %r7,%r7,1
        addi %r0,%r9,8192
        addi %r11,%r9,16384
        lfsx %f0,%r8,%r11
        addi %r10,%r9,24576
        lfsx %f13,%r8,%r0
        fadds %f13,%f13,%f0
        stfsx %f13,%r8,%r9
        lfsx %f13,%r8,%r11
        lfsx %f0,%r8,%r0
        fadds %f0,%f0,%f13
        stfsx %f0,%r8,%r10
        lfsx %f12,%r8,%r0
        lfsx %f0,%r8,%r9
        lfsx %f13,%r8,%r11
        fadds %f0,%f0,%f12
        fadds %f0,%f0,%f13
        stfsx %f0,%r8,%r9
        bdnz .L9
        blr

version2:
        li %r0,1024
        lis %r9,f@ha
        mtctr %r0
        lis %r11,p@ha
        la %r7,f@l(%r9)
        la %r8,p@l(%r11)
        lis %r9,q@ha
        lis %r11,d@ha
        la %r9,q@l(%r9)
        la %r11,d@l(%r11)
        li %r10,0
.L9:
        slwi %r0,%r10,2
        addi %r10,%r10,1
        lfsx %f0,%r8,%r0
        lfsx %f12,%r9,%r0
        fadds %f13,%f0,%f12
        fadds %f0,%f13,%f0
        stfsx %f13,%r11,%r0
        fadds %f0,%f0,%f12
        stfsx %f0,%r7,%r0
        bdnz .L9
        blr

Please note that each loop is called equally often and thusly the amount
of work differs quite a bit.

--
Servus,
       Daniel


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]