This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug other/50775] New: Register allocator sets up frame and frame pointer with low register pressure


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50775

             Bug #: 50775
           Summary: Register allocator sets up frame and frame pointer
                    with low register pressure
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Keywords: ra
          Severity: normal
          Priority: P3
         Component: other
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: gjl@gcc.gnu.org
                CC: eric.weddington@atmel.com
            Target: avr


In the function below, IRA/reload generates a frame/frame pointer and spills
one pointer variable to the frame.

This is bad code because there are many hard registers free and there is no
need for a frame and taking away the frame pointer register Y. Notice that on
the hardware, it is not possible to address the frame via the stack pointer.

== Background ==

The issue is that AVR has only 3 pointer registers X, Y, and Z with the
following addressing capabilities:

 *X, *X++, *--X             (R27:R26, call-clobbered)
 *Y, *Y++, *--Y, *(Y+const) (R28:R29, call-saved, frame pointer)
 *Z, *Z++, *--Z, *(Z+const) (R30:R31, call-clobbered)

Older version of the compiler prior to 4.7 trunk r179993 allowed an addressing
mode *(X+const) and emulated it by emitting appropriate instructions sequence
as

  X = X + const
  r = *X
  X = X - const

which was only a rare corner case in the old register allocator, but in the new
allocator this sequence is seen very often leading to code bloat of +50% for
some real-world functions.

This is the reason why -mstrict-X has been added to the AVR backend, see
PR46278. This option denies fake *(X+const) addressing but leads to the
mentioned spills from register allocator and to code even worse as compared to
without setting -mstrict-X.

== Source ==

typedef struct
{
    unsigned char a, b, c, d;
} s_t;

unsigned char func1 (s_t *x, s_t *y, s_t *z)
{
    unsigned char s = 0;
    s += x->a;
    s += y->a;
    s += z->a;

    s += x->b;
    s += y->b;
    s += z->b;

    s += x->c;
    s += y->c;
    s += z->c;

    return s;
}

== Assembler output ==

Besides the bad code of upappropriate setting up a frame, note that the frame
is 4 bytes but only 2 bytes (Y+1,Y+2) are actually used.

func1:
    push r28     ;  45    pushqi1/1    [length = 1]
    push r29     ;  46    pushqi1/1    [length = 1]
     ; SP -= 4     ;  50    *addhi3_sp_R    [length = 2]
    rcall .
    rcall .
    in r28,__SP_L__     ;  51    *movhi/8    [length = 2]
    in r29,__SP_H__
/* prologue: function */
/* frame size = 4 */
/* stack size = 6 */
.L__stack_usage = 6
    std Y+2,r25     ;  2    *movhi/4    [length = 2]
    std Y+1,r24
    movw r26,r22     ;  37    *movhi/1    [length = 1]
    ld r25,X     ;  8    movqi_insn/4    [length = 1]
    ldd r30,Y+1     ;  38    *movhi/3    [length = 2]
    ldd r31,Y+2
    ld r24,Z     ;  9    movqi_insn/4    [length = 1]
    add r25,r24     ;  10    addqi3/1    [length = 1]
    movw r26,r20     ;  39    *movhi/1    [length = 1]
    ld r24,X     ;  11    movqi_insn/4    [length = 1]
    add r25,r24     ;  12    addqi3/1    [length = 1]
    ldd r24,Z+1     ;  13    movqi_insn/4    [length = 1]
    add r25,r24     ;  14    addqi3/1    [length = 1]
    movw r30,r22     ;  40    *movhi/1    [length = 1]
    ldd r24,Z+1     ;  15    movqi_insn/4    [length = 1]
    add r25,r24     ;  16    addqi3/1    [length = 1]
    movw r30,r20     ;  41    *movhi/1    [length = 1]
    ldd r24,Z+1     ;  17    movqi_insn/4    [length = 1]
    add r25,r24     ;  18    addqi3/1    [length = 1]
    ldd r30,Y+1     ;  42    *movhi/3    [length = 2]
    ldd r31,Y+2
    ldd r24,Z+2     ;  19    movqi_insn/4    [length = 1]
    add r25,r24     ;  20    addqi3/1    [length = 1]
    movw r30,r22     ;  43    *movhi/1    [length = 1]
    ldd r24,Z+2     ;  21    movqi_insn/4    [length = 1]
    add r25,r24     ;  22    addqi3/1    [length = 1]
    movw r30,r20     ;  44    *movhi/1    [length = 1]
    ldd r24,Z+2     ;  23    movqi_insn/4    [length = 1]
    add r24,r25     ;  29    addqi3/1    [length = 1]
/* epilogue start */
     ; SP += 4     ;  56    *addhi3_sp_R    [length = 4]
    pop __tmp_reg__
    pop __tmp_reg__
    pop __tmp_reg__
    pop __tmp_reg__
    pop r29     ;  57    popqi    [length = 1]
    pop r28     ;  58    popqi    [length = 1]
    ret     ;  59    return_from_epilogue    [length = 1]
    .size    func1, .-func1
    .ident    "GCC: (GNU) 4.7.0 20111017 (experimental)"

== Command line ==

$ avr-gcc in.c -c -save-temps -dp -Os -mmcu=avr4 -mstrict-X -v

Same happens with -O2, -O3 or with -fira-algorithm=priority.

Target: avr
Configured with: ../../gcc.gnu.org/trunk/configure --target=avr
--prefix=/local/gnu/install/gcc-4.7 --disable-nls --disable-shared
--enable-languages=c,c++ --with-dwarf2 --disable-lto --enable-checking=yes,rtl

Thread model: single

gcc version 4.7.0 20111017 (experimental) (GCC)
GNU C (GCC) version 4.7.0 20111017 (experimental) (avr)
        compiled by GNU C version 4.3.2 [gcc-4_3-branch revision 141291], GMP
version 5.0.1, MPFR version 3.0.0-p8, MPC version 0.8.2


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]