GCC-SH: regmove fails to optimize SImode access

Toshi Morita tm2@best.com
Fri Oct 13 17:33:00 GMT 2000


Oops, meant to say "HImode" not "SImode".

Toshi


> 
>    host: i386-linux
>  target: sh-elf
> version: CVS 10/13/2000
> options: -O2 -m4-single-only -ml
> 
> In jidctflt.i in stress-1.16, I am noticing gcc generate fairly weak code.
> It appears regmove is not optimizing related addresses properly in this sample.
> The compiler output is on the left; what it should generate is on the right.
> 
> _jpeg_idct_float:
>         mov.l   r8,@-r15
>         add     #64,r5
>         mov.l   r9,@-r15
>         mov     r6,r8
>         mov.l   r10,@-r15
>         mov.l   r11,@-r15
>         mov.l   r12,@-r15
>         mov.l   r13,@-r15
>         mov.w   .L38,r3
>         mov.l   r14,@-r15
>         fmov.s  fr12,@-r15
>         mov.w   .L39,r0
>         fmov.s  fr13,@-r15
>         sub     r3,r15
>         mov.w   .L39,r1
>         mov     r15,r14
>         mov.l   @(12,r5),r6
>         add     r14,r0
>         mov     r14,r13
>         mov.l   r7,@(4,r0)
>         add     r14,r1
>         mov.w   .L40,r0
>         mov     #8,r5
>         mov     r6,r11
>         mov.l   r5,@(12,r1)
>         mov.l   @(r0,r4),r4
>         mov.w   .L41,r0
>         mov     r4,r2
>         mov     r14,r4
>         add     r0,r2
>         mova    .L42,r0
>         fmov.s  @r0,fr12
>         mov.l   r2,@(8,r1)
>         mov     r8,r12
> .L59:
>         mov     r8,r7                   add     #16,r8
>         add     #16,r12
>         mov     r8,r10
>         add     #32,r7
>         mov.w   @r12,r2                 mov.w   @r8,r2          ; r8 + 16
>         add     #48,r10                 add     #16,r8
>         mov.w   @r7,r1                  mov.w   @r8,r1          ; r8 + 32
>         mov     r8,r5
>         mov     r8,r9
>         add     #64,r5
>         mov     r8,r3
>         or      r1,r2                   or      r1,r2
>         mov     r8,r0                   add     #16,r8
>         mov.w   @r10,r1                 mov.w   @r8,r1          ; r8 + 48
>         add     #80,r9
>         add     #96,r3
>         or      r1,r2                   or      r1,r2
>                                         add     #16,r8
>         mov.w   @r5,r1                  mov.w   @r8,r1          ; r8 + 64
>         add     #112,r0
>         or      r1,r2                   or      r1,r2
>                                         add     #16,r8
>         mov.w   @r9,r1                  mov.w   @r8,r1          ; r8 + 80
>         or      r1,r2                   or      r1,r2
>                                         add     #16,r8
>         mov.w   @r3,r1                  mov.w   @r8,r1          ; r8 + 96
>         or      r1,r2                   or      r1,r2
>         mov.w   .L39,r1                 mov.w   .L39,r1
>         add     r14,r1                  add     r14,r1
>                                         add     #16,r8
>         mov.l   r0,@(16,r1)             mov     r8,@(16,r1)
>         mov.w   @r0,r1                  mov.w   @r8,r1          ; r8 + 112
>         or      r1,r2                   or      r1,r2
>         tst     r2,r2                   tst     r2,r2
>         bf.s    .L58                    bf.s    .L58
>                                         add     #-112,r8
>         mov.w   @r8+,r1                 mov.w   @r8+,r1
>         bra     .L36                    bra     .L36
>         add     #32,r4                  add     #32,r4
> 
> 
> insns:  34                              29
>  regs:  11                               4
> 
> It would be nice if the register allocator could use r0 to load the HImode values,
> which would enable the  mov.w @(offset, rm),rn addressing mode could be used.
> This may be asking too much from a compiler, though.
> 
> .L59:
>         mov     r8,r7                   mov     r8,r0
>         add     #16,r12
>         mov     r8,r10
>         add     #32,r7
>         mov.w   @r12,r2                 mov     @(16,r8),r0
>         add     #48,r10
>         mov.w   @r7,r1                  mov     @r8,r1
>         mov     r8,r5
>         mov     r8,r9
>         add     #64,r5                  add     #32,r8
>         mov     r8,r3
>         or      r1,r2                   or      r0,r1
>         mov     r8,r0
>         mov.w   @r10,r1                 mov     @r8,r0
>         add     #80,r9
>         add     #96,r3
>         or      r1,r2                   or      r0,r1
>         mov.w   @r5,r1                  mov.w   @(16,r8),r0
>         add     #112,r0                 add     #32,r8
>         or      r1,r2                   or      r0,r1
>         mov.w   @r9,r1                  mov.w   @r8,r0
>         or      r1,r2                   or      r0,r1
>         mov.w   @r3,r1                  mov.w   @(16,r8),r0
>         or      r1,r2                   or      r0,r1
>         mov.w   .L39,r1                 mov.w   .L39,r2
>         add     r14,r1                  add     r14,r2
>         mov.l   r0,@(16,r1)             mov.l   r0,@(16,r2)
>                                         add     #32,r8
>         mov.w   @r0,r1                  mov.w   @r8,r0
>         or      r1,r2                   or      r0,r1
>         tst     r2,r2                   tst     r2,r2
>         bf.s    .L58                    bf.s    .L58
>                                         add     #-112,r8
>         mov.w   @r8+,r1                 mov.w   @r8+,r2
>         bra     .L36                    bra     .L36
>         add     #32,r4                  add     #32.r4
> 
> insns:  34                              26
> 
> Toshi
> 
> 



More information about the Gcc-bugs mailing list