This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: should MEM tracking be able to optimize this?
- From: degger at fhm dot edu
- To: dann at godzilla dot ICS dot UCI dot EDU
- Cc: gcc at gcc dot gnu dot org, kenner at vlsi1 dot ultra dot nyu dot edu
- Date: Sat, 17 Nov 2001 01:18:45 +0100 (CET)
- Subject: Re: should MEM tracking be able to optimize this?
- Reply-to: degger at fhm dot edu
On 16 Nov, Dan Nicolaescu wrote:
> On sparc-sun-solaris2.8 using -O2 I get the following assembly:
> (from a gcc updated about 1 hour ago when using -O2, just the loops
> are shown)
Highly interesting. On powerpc-gnu-linux with 2.95.3 the loop is almost
the same while the assebmly before is worse in your second case:
version1:
li %r0,1024
mtctr %r0
lis %r9,A@ha
la %r9,A@l(%r9)
.L8:
lfs %f0,8192(%r9)
lfs %f12,16384(%r9)
fadds %f13,%f0,%f12
fadds %f0,%f13,%f0
stfs %f13,24576(%r9)
fadds %f0,%f0,%f12
stfs %f0,0(%r9)
addi %r9,%r9,4
bdnz .L8
blr
version2:
li %r0,1024
lis %r9,f@ha
mtctr %r0
lis %r11,p@ha
la %r7,f@l(%r9)
la %r8,p@l(%r11)
lis %r9,q@ha
lis %r11,d@ha
la %r9,q@l(%r9)
la %r11,d@l(%r11)
li %r10,0
.L8:
lfsx %f0,%r10,%r8
lfsx %f12,%r10,%r9
fadds %f13,%f0,%f12
fadds %f0,%f13,%f0
stfsx %f13,%r10,%r11
fadds %f0,%f0,%f12
stfsx %f0,%r10,%r7
addi %r10,%r10,4
bdnz .L8
blr
While with CVS it's worse in both cases and really evil for the first
one:
version1:
li %r0,1024
lis %r9,A@ha
mtctr %r0
li %r7,0
la %r8,A@l(%r9)
.L9:
slwi %r9,%r7,2
addi %r7,%r7,1
addi %r0,%r9,8192
addi %r11,%r9,16384
lfsx %f0,%r8,%r11
addi %r10,%r9,24576
lfsx %f13,%r8,%r0
fadds %f13,%f13,%f0
stfsx %f13,%r8,%r9
lfsx %f13,%r8,%r11
lfsx %f0,%r8,%r0
fadds %f0,%f0,%f13
stfsx %f0,%r8,%r10
lfsx %f12,%r8,%r0
lfsx %f0,%r8,%r9
lfsx %f13,%r8,%r11
fadds %f0,%f0,%f12
fadds %f0,%f0,%f13
stfsx %f0,%r8,%r9
bdnz .L9
blr
version2:
li %r0,1024
lis %r9,f@ha
mtctr %r0
lis %r11,p@ha
la %r7,f@l(%r9)
la %r8,p@l(%r11)
lis %r9,q@ha
lis %r11,d@ha
la %r9,q@l(%r9)
la %r11,d@l(%r11)
li %r10,0
.L9:
slwi %r0,%r10,2
addi %r10,%r10,1
lfsx %f0,%r8,%r0
lfsx %f12,%r9,%r0
fadds %f13,%f0,%f12
fadds %f0,%f13,%f0
stfsx %f13,%r11,%r0
fadds %f0,%f0,%f12
stfsx %f0,%r7,%r0
bdnz .L9
blr
Please note that each loop is called equally often and thusly the amount
of work differs quite a bit.
--
Servus,
Daniel