[Bug middle-end/35860] [4.4/4.5/4.6/4.7 Regression] [avr] code bloat caused by -fsplit-wide-types

gjl at gcc dot gnu.org gcc-bugzilla@gcc.gnu.org
Thu Sep 22 10:29:00 GMT 2011


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35860

Georg-Johann Lay <gjl at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |ra
             Status|NEW                         |WAITING
          Component|target                      |middle-end
               Host|mingw                       |
   Target Milestone|4.4.7                       |4.7.0

--- Comment #15 from Georg-Johann Lay <gjl at gcc dot gnu.org> 2011-09-22 10:24:45 UTC ---
With 4.7 trunk r179081 and the code from comment #0 avr-gcc -mmcu=avr4 -Os -S
-dp 

The output with -fno-split-wide-types is 36 bytes

udivr32_7:
/* stack size = 0 */
    ldi r30,lo8(32)     ;  10    *movqi/2    [length = 1]
    ldi r21,lo8(0)     ;  11    *movqi/1    [length = 1]
.L4:
    lsl r21     ;  15    *ashlqi3/3    [length = 1]
    sbrc r25,7     ;  58    *sbrx_and_branchsi    [length = 2]
    subi r21,lo8(-(1))     ;  19    addqi3/2    [length = 1]
.L2:
    lsl r22     ;  57    *ashlsi3_const/2    [length = 4]
    rol r23
    rol r24
    rol r25
    cp r21,r20     ;  23    *cmpqi/2    [length = 1]
    brlo .L3     ;  24    branch    [length = 1]
    sub r21,r20     ;  26    subqi3/1    [length = 1]
    ori r22,1     ;  27    iorsi3/2    [length = 1]
.L3:
    subi r30,lo8(-(-1))     ;  30    addqi3/2    [length = 1]
    brne .L4     ;  33    branch    [length = 1]
    movw r30,r18     ;  52    *movhi/1    [length = 1]
    st Z,r21     ;  35    *movqi/3    [length = 1]
/* epilogue start */
    ret     ;  55    return    [length = 1]


The output with -fsplit-wide-types is 62 bytes

udivr32_7:
    push r12     ;  61    pushqi1/1    [length = 1]
    push r13     ;  62    pushqi1/1    [length = 1]
    push r14     ;  63    pushqi1/1    [length = 1]
    push r15     ;  64    pushqi1/1    [length = 1]
/* stack size = 4 */
    movw r12,r22     ;  6    *movsi/1    [length = 2]
    movw r14,r24
    ldi r25,lo8(32)     ;  10    *movqi/2    [length = 1]
    ldi r24,lo8(0)     ;  11    *movqi/1    [length = 1]
.L4:
    lsl r24     ;  15    *ashlqi3/3    [length = 1]
    sbrc r15,7     ;  76    *sbrx_and_branchsi    [length = 2]
    subi r24,lo8(-(1))     ;  19    addqi3/2    [length = 1]
.L2:
    lsl r12     ;  75    *ashlsi3_const/2    [length = 4]
    rol r13
    rol r14
    rol r15
    cp r24,r20     ;  23    *cmpqi/2    [length = 1]
    brlo .L3     ;  24    branch    [length = 1]
    sub r24,r20     ;  26    subqi3/1    [length = 1]
    set     ;  27    iorsi3/3    [length = 2]
    bld r12,0
.L3:
    subi r25,lo8(-(-1))     ;  30    addqi3/2    [length = 1]
    brne .L4     ;  33    branch    [length = 1]
    movw r30,r18     ;  60    *movhi/1    [length = 1]
    st Z,r24     ;  35    *movqi/3    [length = 1]
    movw r22,r12     ;  73    *movhi/1    [length = 1]
    movw r24,r14     ;  74    *movhi/1    [length = 1]
/* epilogue start */
    pop r15     ;  67    popqi    [length = 1]
    pop r14     ;  68    popqi    [length = 1]
    pop r13     ;  69    popqi    [length = 1]
    pop r12     ;  70    popqi    [length = 1]
    ret     ;  71    return_from_epilogue    [length = 1]


So there is still code bloat with -fsplit-wide-types.

I don't see how the back-end can improve thas situation and IMO the bloat is
caused by the register allocation which leads to the 13 additional
instructions: all push/pop and moving registers back and forth (and one for
a|=1 in a register that cannot operate with constants in insn 27).

Therefore, I added RA to the keywords, set component to "middle-end" and
changed the status to "waiting" so that someone familiar with the register
allocator can tell if it's a RA flaw or not or give better component/keyword.



More information about the Gcc-bugs mailing list