This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug other/50775] New: Register allocator sets up frame and frame pointer with low register pressure
- From: "gjl at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Tue, 18 Oct 2011 11:43:55 +0000
- Subject: [Bug other/50775] New: Register allocator sets up frame and frame pointer with low register pressure
- Auto-submitted: auto-generated
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50775
Bug #: 50775
Summary: Register allocator sets up frame and frame pointer
with low register pressure
Classification: Unclassified
Product: gcc
Version: 4.7.0
Status: UNCONFIRMED
Keywords: ra
Severity: normal
Priority: P3
Component: other
AssignedTo: unassigned@gcc.gnu.org
ReportedBy: gjl@gcc.gnu.org
CC: eric.weddington@atmel.com
Target: avr
In the function below, IRA/reload generates a frame/frame pointer and spills
one pointer variable to the frame.
This is bad code because there are many hard registers free and there is no
need for a frame and taking away the frame pointer register Y. Notice that on
the hardware, it is not possible to address the frame via the stack pointer.
== Background ==
The issue is that AVR has only 3 pointer registers X, Y, and Z with the
following addressing capabilities:
*X, *X++, *--X (R27:R26, call-clobbered)
*Y, *Y++, *--Y, *(Y+const) (R28:R29, call-saved, frame pointer)
*Z, *Z++, *--Z, *(Z+const) (R30:R31, call-clobbered)
Older version of the compiler prior to 4.7 trunk r179993 allowed an addressing
mode *(X+const) and emulated it by emitting appropriate instructions sequence
as
X = X + const
r = *X
X = X - const
which was only a rare corner case in the old register allocator, but in the new
allocator this sequence is seen very often leading to code bloat of +50% for
some real-world functions.
This is the reason why -mstrict-X has been added to the AVR backend, see
PR46278. This option denies fake *(X+const) addressing but leads to the
mentioned spills from register allocator and to code even worse as compared to
without setting -mstrict-X.
== Source ==
typedef struct
{
unsigned char a, b, c, d;
} s_t;
unsigned char func1 (s_t *x, s_t *y, s_t *z)
{
unsigned char s = 0;
s += x->a;
s += y->a;
s += z->a;
s += x->b;
s += y->b;
s += z->b;
s += x->c;
s += y->c;
s += z->c;
return s;
}
== Assembler output ==
Besides the bad code of upappropriate setting up a frame, note that the frame
is 4 bytes but only 2 bytes (Y+1,Y+2) are actually used.
func1:
push r28 ; 45 pushqi1/1 [length = 1]
push r29 ; 46 pushqi1/1 [length = 1]
; SP -= 4 ; 50 *addhi3_sp_R [length = 2]
rcall .
rcall .
in r28,__SP_L__ ; 51 *movhi/8 [length = 2]
in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 6 */
.L__stack_usage = 6
std Y+2,r25 ; 2 *movhi/4 [length = 2]
std Y+1,r24
movw r26,r22 ; 37 *movhi/1 [length = 1]
ld r25,X ; 8 movqi_insn/4 [length = 1]
ldd r30,Y+1 ; 38 *movhi/3 [length = 2]
ldd r31,Y+2
ld r24,Z ; 9 movqi_insn/4 [length = 1]
add r25,r24 ; 10 addqi3/1 [length = 1]
movw r26,r20 ; 39 *movhi/1 [length = 1]
ld r24,X ; 11 movqi_insn/4 [length = 1]
add r25,r24 ; 12 addqi3/1 [length = 1]
ldd r24,Z+1 ; 13 movqi_insn/4 [length = 1]
add r25,r24 ; 14 addqi3/1 [length = 1]
movw r30,r22 ; 40 *movhi/1 [length = 1]
ldd r24,Z+1 ; 15 movqi_insn/4 [length = 1]
add r25,r24 ; 16 addqi3/1 [length = 1]
movw r30,r20 ; 41 *movhi/1 [length = 1]
ldd r24,Z+1 ; 17 movqi_insn/4 [length = 1]
add r25,r24 ; 18 addqi3/1 [length = 1]
ldd r30,Y+1 ; 42 *movhi/3 [length = 2]
ldd r31,Y+2
ldd r24,Z+2 ; 19 movqi_insn/4 [length = 1]
add r25,r24 ; 20 addqi3/1 [length = 1]
movw r30,r22 ; 43 *movhi/1 [length = 1]
ldd r24,Z+2 ; 21 movqi_insn/4 [length = 1]
add r25,r24 ; 22 addqi3/1 [length = 1]
movw r30,r20 ; 44 *movhi/1 [length = 1]
ldd r24,Z+2 ; 23 movqi_insn/4 [length = 1]
add r24,r25 ; 29 addqi3/1 [length = 1]
/* epilogue start */
; SP += 4 ; 56 *addhi3_sp_R [length = 4]
pop __tmp_reg__
pop __tmp_reg__
pop __tmp_reg__
pop __tmp_reg__
pop r29 ; 57 popqi [length = 1]
pop r28 ; 58 popqi [length = 1]
ret ; 59 return_from_epilogue [length = 1]
.size func1, .-func1
.ident "GCC: (GNU) 4.7.0 20111017 (experimental)"
== Command line ==
$ avr-gcc in.c -c -save-temps -dp -Os -mmcu=avr4 -mstrict-X -v
Same happens with -O2, -O3 or with -fira-algorithm=priority.
Target: avr
Configured with: ../../gcc.gnu.org/trunk/configure --target=avr
--prefix=/local/gnu/install/gcc-4.7 --disable-nls --disable-shared
--enable-languages=c,c++ --with-dwarf2 --disable-lto --enable-checking=yes,rtl
Thread model: single
gcc version 4.7.0 20111017 (experimental) (GCC)
GNU C (GCC) version 4.7.0 20111017 (experimental) (avr)
compiled by GNU C version 4.3.2 [gcc-4_3-branch revision 141291], GMP
version 5.0.1, MPFR version 3.0.0-p8, MPC version 0.8.2