This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Path,AVR]: Improve loading of 32-bit constants
Denis Chertykov wrote:
> 2011/7/6 Georg-Johann Lay <avr@gjlay.de>:
>> For loading a 32-bit constant in a register, there is room for
>> improvement:
>>
>> * SF can be handled the same way as SI and therefore the patch
>> adds a peep2 to produce a *reload_insf analogon to *reload_insi.
>>
>> * If the destination register overlaps NO_LD_REGS, values already
>> loaded into some other byte can be reused by a simple MOV.
>> This is helpful then moving values like, e.g. -2, -100 etc. because
>> all high bytes are 0xff.
>>
>> * 0.0f can be directly moved to memory.
>>
>> * The mov insns contain "!d" constraint. I see no reason to make "d"
>> expensive and discourage use of d-regs. A "*d" to hide is better
>> because it does it neither puts additional pressure on "d" nor
>> discourages "d".
>>
>
> I would like to have a real code examples.
>
> Denis.
Hi Denis.
Attached you find a small C file and the asm that is generated by new
and old versions (-Os -mmcu=atmega88 -S -dp).
I took away some regs as potential clobbers (or -fno-peephole2) to
show the effect of high register pressure. Bit even if a clobber was
available you can see that the new version is smarter in reusing
values, e.g. note the loading of -1L to r22-r25.
Johann
register int _x asm ("26");
register int _y asm ("28");
register int _z asm ("30");
void ibar (long, long, long, long);
void fbar (long, long, float, float);
void foo1 (long x)
{
ibar (-1, x, -2, 0xff008000);
}
void foo2 (long x)
{
ibar (x, x, 65537L, 0xffff0408);
}
void foo3 (long x)
{
fbar (x, x, -3.0f, 2.0f);
}
.file "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.global __do_copy_data
.global __do_clear_bss
.text
.global foo1
.type foo1, @function
foo1:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r22,lo8(-1) ; 7 *movsi/5 [length = 4]
ldi r23,hi8(-1)
ldi r24,hlo8(-1)
ldi r25,hhi8(-1)
mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10]
ldi r31,lo8(-2)
mov r14,r31
ldi r31,hi8(-2)
mov r15,r31
ldi r31,hlo8(-2)
mov r16,r31
ldi r31,hhi8(-2)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10]
ldi r31,lo8(-16744448)
mov r10,r31
ldi r31,hi8(-16744448)
mov r11,r31
ldi r31,hlo8(-16744448)
mov r12,r31
ldi r31,hhi8(-16744448)
mov r13,r31
mov r31,__tmp_reg__
rcall ibar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo1, .-foo1
.global foo2
.type foo2, @function
foo2:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
mov __tmp_reg__,r31 ; 9 *movsi/6 [length = 10]
ldi r31,lo8(65537)
mov r14,r31
ldi r31,hi8(65537)
mov r15,r31
ldi r31,hlo8(65537)
mov r16,r31
ldi r31,hhi8(65537)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsi/6 [length = 10]
ldi r31,lo8(-64504)
mov r10,r31
ldi r31,hi8(-64504)
mov r11,r31
ldi r31,hlo8(-64504)
mov r12,r31
ldi r31,hhi8(-64504)
mov r13,r31
mov r31,__tmp_reg__
rcall ibar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo2, .-foo2
.global foo3
.type foo3, @function
foo3:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
mov __tmp_reg__,r31 ; 9 *movsf/6 [length = 10]
ldi r31,lo8(0xc0400000)
mov r14,r31
ldi r31,hi8(0xc0400000)
mov r15,r31
ldi r31,hlo8(0xc0400000)
mov r16,r31
ldi r31,hhi8(0xc0400000)
mov r17,r31
mov r31,__tmp_reg__
mov __tmp_reg__,r31 ; 10 *movsf/6 [length = 10]
ldi r31,lo8(0x40000000)
mov r10,r31
ldi r31,hi8(0x40000000)
mov r11,r31
ldi r31,hlo8(0x40000000)
mov r12,r31
ldi r31,hhi8(0x40000000)
mov r13,r31
mov r31,__tmp_reg__
rcall fbar ; 11 call_insn/3 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo3, .-foo3
.file "oint.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.text
.global foo1
.type foo1, @function
foo1:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r22,lo8(-1) ; 7 *movsi/5 [length = 3]
ldi r23,lo8(-1)
movw r24,r22
ldi r17,lo8(-2) ; 9 *movsi/6 [length = 6]
mov r14,r17
clr r15
dec r15
ldi r16,lo8(-1)
ldi r17,lo8(-1)
clr r10 ; 10 *movsi/6 [length = 7]
set
clr r11
bld r11,7
clr r12
clr r13
dec r13
rcall ibar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo1, .-foo1
.global foo2
.type foo2, @function
foo2:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
ldi r17,lo8(1) ; 9 *movsi/6 [length = 4]
mov r14,r17
clr r15
movw r16,r14
set ; 10 *movsi/6 [length = 8]
clr r10
bld r10,3
clr r11
bld r11,2
clr r12
dec r12
mov r13,r12
rcall ibar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo2, .-foo2
.global foo3
.type foo3, @function
foo3:
push r10 ; 16 *pushqi/1 [length = 1]
push r11 ; 17 *pushqi/1 [length = 1]
push r12 ; 18 *pushqi/1 [length = 1]
push r13 ; 19 *pushqi/1 [length = 1]
push r14 ; 20 *pushqi/1 [length = 1]
push r15 ; 21 *pushqi/1 [length = 1]
push r16 ; 22 *pushqi/1 [length = 1]
push r17 ; 23 *pushqi/1 [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 8 */
.L__stack_usage = 8
movw r18,r22 ; 2 *movsi/1 [length = 2]
movw r20,r24
clr r14 ; 9 *movsf/6 [length = 4]
clr r15
ldi r16,lo8(64)
ldi r17,lo8(-64)
clr r10 ; 10 *movsf/6 [length = 6]
clr r11
clr r12
set
clr r13
bld r13,6
rcall fbar ; 11 *call_insn/2 [length = 1]
/* epilogue start */
pop r17 ; 26 popqi [length = 1]
pop r16 ; 27 popqi [length = 1]
pop r15 ; 28 popqi [length = 1]
pop r14 ; 29 popqi [length = 1]
pop r13 ; 30 popqi [length = 1]
pop r12 ; 31 popqi [length = 1]
pop r11 ; 32 popqi [length = 1]
pop r10 ; 33 popqi [length = 1]
ret ; 34 return_from_epilogue [length = 1]
.size foo3, .-foo3
.ident "GCC: (GNU) 4.7.0 20110704 (experimental)"