This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/60300] New: [avr] Suboptimal stack pointer manipulation for frame setup


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60300

            Bug ID: 60300
           Summary: [avr] Suboptimal stack pointer manipulation for frame
                    setup
           Product: gcc
           Version: 4.8.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: matthijs at stdin dot nl

For setting up the stack frame in the function prologue, gcc chooses between
either directly manipulation the stack pointer with "rcall ." and "push"
instructions, or copying it to the frame pointer, modifying that and copying it
back, depending on which is shorter.

However, when the frame size is 7 or more, gcc picks the frame-pointer
approach, even when the direct manipulation approach would be shorter.

Here's the example (lines with dashes added by me to indicate the
relevant code

$ cat foo.c
#include <stdint.h>

void bar(uint8_t *);

void foo() {
        uint8_t x[SIZE];
        bar(x);
}

$ diff -u <(avr-gcc -DSIZE=6 -c foo.c -o - -S) <(avr-gcc -D SIZE=7 -c foo.c -o
- -S)
--- /dev/fd/63  2014-02-21 13:04:18.531142523 +0100
+++ /dev/fd/62  2014-02-21 13:04:18.535142628 +0100
@@ -10,21 +10,24 @@
 foo:
        push r28
        push r29
-       rcall .
-       rcall .
-       rcall .
        in r28,__SP_L__
        in r29,__SP_H__
+       sbiw r28,7
+       in __tmp_reg__,__SREG__
+       cli
+       out __SP_H__,r29
+       out __SREG__,__tmp_reg__
+       out __SP_L__,r28
 /* prologue: function */
-/* frame size = 6 */
-/* stack size = 8 */
-.L__stack_usage = 8
+/* frame size = 7 */
+/* stack size = 9 */
+.L__stack_usage = 9
        mov r24,r28
        mov r25,r29
        adiw r24,1
        rcall bar
 /* epilogue start */
-       adiw r28,6
+       adiw r28,7
        in __tmp_reg__,__SREG__
        cli
        out __SP_H__,r29

As you can see, for SIZE=7 it switches to a 6-instruction sequence, when a
4-instruction sequence (3x rcall + 1x push) would also suffice.


Relevant code seems to be avr_prologue_setup_frame and avr_out_addto_sp:
 -
https://github.com/mirrors/gcc/blob/c2e306f5efb32b7eed856a1844487cff09aa86ac/gcc/config/avr/avr.c#L1109-L1278
 -
https://github.com/mirrors/gcc/blob/c2e306f5efb32b7eed856a1844487cff09aa86ac/gcc/config/avr/avr.c#L7002-L7014

That code tries both approaches to see which one is smaller, so
presumably it gets the size of either of them wrong and thus makes the
wrong decision.



Note that for the epilogue, the compiler has the turnover point at the expected
5/6 bytes of frame size:

$ diff -u <(avr-gcc -DSIZE=5 -c foo.c -o - -S) <(avr-gcc -D SIZE=6 -c foo.c -o
- -S)
--- /dev/fd/63  2014-02-21 13:05:55.825616219 +0100
+++ /dev/fd/62  2014-02-21 13:05:55.821616121 +0100
@@ -12,23 +12,24 @@
        push r29
        rcall .
        rcall .
-       push __zero_reg__
+       rcall .
        in r28,__SP_L__
        in r29,__SP_H__
 /* prologue: function */
-/* frame size = 5 */
-/* stack size = 7 */
-.L__stack_usage = 7
+/* frame size = 6 */
+/* stack size = 8 */
+.L__stack_usage = 8
        mov r24,r28
        mov r25,r29
        adiw r24,1
        rcall bar
 /* epilogue start */
-       pop __tmp_reg__
-       pop __tmp_reg__
-       pop __tmp_reg__
-       pop __tmp_reg__
-       pop __tmp_reg__
+       adiw r28,6
+       in __tmp_reg__,__SREG__
+       cli
+       out __SP_H__,r29
+       out __SREG__,__tmp_reg__
+       out __SP_L__,r28
        pop r29
        pop r28
        ret


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]