This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: suboptimal GCC code generation for unsigned long long on sparc
- To: eggert at twinsun dot com (Paul Eggert)
- Subject: Re: suboptimal GCC code generation for unsigned long long on sparc
- From: "Philippe De Muyter" <phdm at mail dot macqel dot be>
- Date: Sun, 5 Oct 1997 01:17:40 +0200 (MET DST)
>
> C code:
>
> unsigned
> afunc( unsigned long long var1, unsigned long long var2) {
> return ((unsigned)( (var1 ^ var2) >> 48)) ;
> }
>
> Assembler code as follows:
>
> gcc (2.7.2.1)
>
> Xor %o0, %o2, %o0
> Xor %o1, %o3, %o1
> Srl %o0, 16, %g3
> Mov 0, %g2
> Retl
> Mov %g3, %o0
>
> Sun C (4.2)
>
> Xor %o0, %o2, %g1
> Retl
> Srl %g1, 16, %o0
>
What should I say about m68k code generation for your testcase ?
This is what I get with the current snapshot.
pea (%fp)
move.l %sp,%fp
mov.l %d3,-(%sp)
mov.l %d2,-(%sp)
mov.l 8(%fp),%d0 ;# 28 movdi+1/2
mov.l 12(%fp),%d1
mov.l 16(%fp),%d2 ;# 30 movdi+1/2
mov.l 20(%fp),%d3
eor.l %d2,%d0 ;# 9 xordi3
eor.l %d3,%d1
mov.l %d0,%d1 ;# 33 movsi+1/1
clr.w %d1 ;# 10 lshrsi_16
swap.w %d1
clr.l %d0 ;# 12 movsi_const0
mov.l %d1,%d0 ;# 18 movsi+1/1
mov.l (%sp)+,%d2
mov.l (%sp)+,%d3
unlk %fp
rts
If I modify m68k.md by enhancing the lshrdi3 pattern, I still get similar code.
pea (%fp)
move.l %sp,%fp
mov.l %d3,-(%sp)
mov.l %d2,-(%sp)
mov.l 8(%fp),%d0 ;# 25 movdi+1/2
mov.l 12(%fp),%d1
mov.l 16(%fp),%d2 ;# 27 movdi+1/2
mov.l 20(%fp),%d3
eor.l %d2,%d0 ;# 9 xordi3
eor.l %d3,%d1
mov.l %d0,%d1 ;# 11 lshrdi_const
clr.w %d1
clr.l %d0
swap.w %d1
mov.l %d1,%d0 ;# 15 movsi+1/1
mov.l (%sp)+,%d2
mov.l (%sp)+,%d3
unlk %fp
rts
And even if I disable all the DImode patterns, (hoping to get simplifications
removal of useless instructions by using only SImode operands), I now get
worse code with the same amount of instructions, but using more registers.
pea (%fp)
move.l %sp,%fp
movm.l &0x3c00,-(%sp)
mov.l 8(%fp),%d4 ;# 5 movsi+1/1
mov.l 12(%fp),%d5 ;# 7 movsi+1/1
mov.l 16(%fp),%d0 ;# 10 movsi+1/1
mov.l 20(%fp),%d1 ;# 12 movsi+1/1
mov.l %d4,%d2 ;# 38 movsi+1/1
eor.l %d0,%d2 ;# 17 xorsi3_internal/1
mov.l %d5,%d3 ;# 41 movsi+1/1
eor.l %d1,%d3 ;# 18 xorsi3_internal/1
mov.l %d2,%d1 ;# 44 movsi+1/1
clr.w %d1 ;# 20 lshrsi_16
swap.w %d1
clr.l %d0 ;# 22 movsi_const0
mov.l %d1,%d0 ;# 27 movsi+1/1
movm.l (%sp)+,&0x3c
unlk %fp
rts
Here is what I would expect gcc to generate (this is basically the same as
what the Sun cc compiler generates, except we pass the parameters
on the stack and we handle explicitely the frame-pointer) :
pea (%fp)
move.l %sp,%fp
mov.l 8(%fp),%d0 ;# 5 movsi+1/1
mov.l 16(%fp),%d1 ;# 10 movsi+1/1
eor.l %d1,%d0 ;# 18 xorsi3_internal/1
clr.w %d0 ;# 20 lshrsi_16
swap.w %d0
unlk %fp
rts