This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug middle-end/35860] [4.4/4.5/4.6/4.7 Regression] [avr] code bloat caused by -fsplit-wide-types
- From: "gjl at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Thu, 22 Sep 2011 10:24:45 +0000
- Subject: [Bug middle-end/35860] [4.4/4.5/4.6/4.7 Regression] [avr] code bloat caused by -fsplit-wide-types
- Auto-submitted: auto-generated
- References: <bug-35860-4@http.gcc.gnu.org/bugzilla/>
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35860
Georg-Johann Lay <gjl at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords| |ra
Status|NEW |WAITING
Component|target |middle-end
Host|mingw |
Target Milestone|4.4.7 |4.7.0
--- Comment #15 from Georg-Johann Lay <gjl at gcc dot gnu.org> 2011-09-22 10:24:45 UTC ---
With 4.7 trunk r179081 and the code from comment #0 avr-gcc -mmcu=avr4 -Os -S
-dp
The output with -fno-split-wide-types is 36 bytes
udivr32_7:
/* stack size = 0 */
ldi r30,lo8(32) ; 10 *movqi/2 [length = 1]
ldi r21,lo8(0) ; 11 *movqi/1 [length = 1]
.L4:
lsl r21 ; 15 *ashlqi3/3 [length = 1]
sbrc r25,7 ; 58 *sbrx_and_branchsi [length = 2]
subi r21,lo8(-(1)) ; 19 addqi3/2 [length = 1]
.L2:
lsl r22 ; 57 *ashlsi3_const/2 [length = 4]
rol r23
rol r24
rol r25
cp r21,r20 ; 23 *cmpqi/2 [length = 1]
brlo .L3 ; 24 branch [length = 1]
sub r21,r20 ; 26 subqi3/1 [length = 1]
ori r22,1 ; 27 iorsi3/2 [length = 1]
.L3:
subi r30,lo8(-(-1)) ; 30 addqi3/2 [length = 1]
brne .L4 ; 33 branch [length = 1]
movw r30,r18 ; 52 *movhi/1 [length = 1]
st Z,r21 ; 35 *movqi/3 [length = 1]
/* epilogue start */
ret ; 55 return [length = 1]
The output with -fsplit-wide-types is 62 bytes
udivr32_7:
push r12 ; 61 pushqi1/1 [length = 1]
push r13 ; 62 pushqi1/1 [length = 1]
push r14 ; 63 pushqi1/1 [length = 1]
push r15 ; 64 pushqi1/1 [length = 1]
/* stack size = 4 */
movw r12,r22 ; 6 *movsi/1 [length = 2]
movw r14,r24
ldi r25,lo8(32) ; 10 *movqi/2 [length = 1]
ldi r24,lo8(0) ; 11 *movqi/1 [length = 1]
.L4:
lsl r24 ; 15 *ashlqi3/3 [length = 1]
sbrc r15,7 ; 76 *sbrx_and_branchsi [length = 2]
subi r24,lo8(-(1)) ; 19 addqi3/2 [length = 1]
.L2:
lsl r12 ; 75 *ashlsi3_const/2 [length = 4]
rol r13
rol r14
rol r15
cp r24,r20 ; 23 *cmpqi/2 [length = 1]
brlo .L3 ; 24 branch [length = 1]
sub r24,r20 ; 26 subqi3/1 [length = 1]
set ; 27 iorsi3/3 [length = 2]
bld r12,0
.L3:
subi r25,lo8(-(-1)) ; 30 addqi3/2 [length = 1]
brne .L4 ; 33 branch [length = 1]
movw r30,r18 ; 60 *movhi/1 [length = 1]
st Z,r24 ; 35 *movqi/3 [length = 1]
movw r22,r12 ; 73 *movhi/1 [length = 1]
movw r24,r14 ; 74 *movhi/1 [length = 1]
/* epilogue start */
pop r15 ; 67 popqi [length = 1]
pop r14 ; 68 popqi [length = 1]
pop r13 ; 69 popqi [length = 1]
pop r12 ; 70 popqi [length = 1]
ret ; 71 return_from_epilogue [length = 1]
So there is still code bloat with -fsplit-wide-types.
I don't see how the back-end can improve thas situation and IMO the bloat is
caused by the register allocation which leads to the 13 additional
instructions: all push/pop and moving registers back and forth (and one for
a|=1 in a register that cannot operate with constants in insn 27).
Therefore, I added RA to the keywords, set component to "middle-end" and
changed the status to "waiting" so that someone familiar with the register
allocator can tell if it's a RA flaw or not or give better component/keyword.