This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [mips] fix $gp restore bug
Richard Sandiford <rdsandiford@googlemail.com> writes:
> Adam Nemet <anemet@caviumnetworks.com> writes:
>> Richard Sandiford writes:
>>> FWIW, the idea I'm toying with at the moment is:
>>>
>>> - Make every branch use pic_offset_table_rtx.
>>>
>>> - Try to detect cases where a function only needs a global pointer
>>> because of branches. In this case, emit easily-identifiable "ghost"
>>> instructions (in the .md "type" attribute sense) to set, save and
>>> restore the gp.
>>>
>>> These ghost instructions would replace the sequences we'd normally use.
>>> They would have 0 length at this stage, and in theory shouldn't interfere
>>> with things like scheduling.
>>>
>>> - At the end of md_reorg, run shorten_branches to see whether
>>> all branches are in range. This should be safe because we run
>>> dbr_schedule within mips_reorg. Nothing after mips_reorg should
>>> change the insn stream besides shorten_branches itself.
>>>
>>> - If some branches are not in range, go through the insn stream and
>>> replace all the ghost instructions with real implementations.
>>> (This will of course invalidate the results of the shorten_branches
>>> call, but the "real" shorten_branches pass will then calculate new
>>> lengths.)
>>
>> This sounds like a good plan to me, FWIW.
>
> Thanks. I tried it, and unfortunately it doesn't look like it's
> going to work. Too much rtl infrastructure expects unconditional
> jumps to be simple sets. I'm also not sure they really want a
> situation where inserting a jump can suddenly change the liveness
> of $gp.
>
> Oh well. Back to the drawing board.
So, I can't really see a better way out of this than avoiding $gp,
as discussed upthread. We just can't assume that the global pointer
is valid unless we explicitly list it as being (potentially) used.
The sequence I've used here is truly horrible though. ;(
Only lightly tested. I don't think this patch is appropriate for 4.4,
although Nathan's patch probably is.
Richard
gcc/
* config/mips/mips.c (mips_output_load_label): Avoid using $gp
for TARGET_EXPLICIT_RELOCS.
* config/mips/mips.md (length): Update the worst-case branch length
accordingly.
(jump): Likewise.
gcc/testsuite/
* gcc.target/mips/branch-2.c: New test.
* gcc.target/mips/branch-3.c: Likewise.
* gcc.target/mips/branch-4.c: Likewise.
* gcc.target/mips/branch-5.c: Likewise.
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c 2009-03-26 22:18:43.000000000 +0000
+++ gcc/config/mips/mips.c 2009-03-26 22:26:13.000000000 +0000
@@ -10157,19 +10157,34 @@ mips_adjust_insn_length (rtx insn, int l
mips_output_load_label (void)
{
if (TARGET_EXPLICIT_RELOCS)
- switch (mips_abi)
- {
- case ABI_N32:
- return "%[lw\t%@,%%got_page(%0)(%+)\n\taddiu\t%@,%@,%%got_ofst(%0)";
-
- case ABI_64:
- return "%[ld\t%@,%%got_page(%0)(%+)\n\tdaddiu\t%@,%@,%%got_ofst(%0)";
+ {
+ /* We can't rely in general on the global pointer being valid
+ at a given branch, so we use this convoluted sequence instead.
- default:
- if (ISA_HAS_LOAD_DELAY)
- return "%[lw\t%@,%%got(%0)(%+)%#\n\taddiu\t%@,%@,%%lo(%0)";
- return "%[lw\t%@,%%got(%0)(%+)\n\taddiu\t%@,%@,%%lo(%0)";
- }
+ In principle, we could check at the end of md_reorg whether a
+ given branch has access to $gp, store the result somehow, and
+ use it here. However, because we don't have a proper CFG or DFG
+ so late in compilation, any such code is likely to be fragile.
+ These long branches shouldn't occur very often anyway. */
+ if (TARGET_64BIT)
+ return ("%[daddiu\t$sp,$sp,-16\n"
+ "\tsd\t$31,0($sp)\n"
+ "\t%(%<bal\t1f\n"
+ "\tlui\t%@,%%hi(%0)%>%)\n"
+ "1:\tdaddu\t%@,%@,$31\n"
+ "\tld\t$31,0($sp)\n"
+ "\tdaddiu\t%@,%@,%%lo(%0)\n"
+ "\tdaddiu\t$sp,$sp,16");
+ else
+ return ("%[addiu\t$sp,$sp,-8\n"
+ "\tsw\t$31,0($sp)\n"
+ "\t%(%<bal\t1f\n"
+ "\tlui\t%@,%%hi(%0)%>%)\n"
+ "1:\taddu\t%@,%@,$31\n"
+ "\tlw\t$31,0($sp)\n"
+ "\taddiu\t%@,%@,%%lo(%0)\n"
+ "\taddiu\t$sp,$sp,16");
+ }
else
{
if (Pmode == DImode)
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md 2009-03-26 22:18:43.000000000 +0000
+++ gcc/config/mips/mips.md 2009-03-26 22:18:46.000000000 +0000
@@ -442,7 +442,13 @@ (define_attr "length" ""
(ne (symbol_ref "TARGET_MIPS16") (const_int 0)))
(const_int 8)
- ;; Direct branch instructions have a range of [-0x40000,0x3fffc].
+ ;; Direct branch instructions have a range of [-0x20000,0x1fffc],
+ ;; relative to the delay slot. In GCC, the reference PC for
+ ;; forward branches is the end of the delay slot while the
+ ;; reference PC for backward branches is the start of the branch
+ ;; itself. Thus the maximum forward extent is 0x1fff8 (131064)
+ ;; and the maximum backward extent is 0x1fffc (131068).
+ ;;
;; If a branch is outside this range, we have a choice of two
;; sequences. For PIC, an out-of-range branch like:
;;
@@ -458,8 +464,8 @@ (define_attr "length" ""
;; nop
;; 1:
;;
- ;; where the load address can be up to three instructions long
- ;; (lw, nop, addiu).
+ ;; where the load address can be up to eight(!) instructions long.
+ ;; See mips_output_label for the sequences used.
;;
;; The non-PIC case is similar except that we use a direct
;; jump instead of an la/jr pair. Since the target of this
@@ -479,7 +485,7 @@ (define_attr "length" ""
(le (minus (pc) (match_dup 1)) (const_int 131068)))
(const_int 4)
(ne (symbol_ref "flag_pic") (const_int 0))
- (const_int 24)
+ (const_int 44)
] (const_int 12))
;; "Ghost" instructions occupy no space.
@@ -5484,13 +5490,14 @@ (define_insn "jump"
(set (attr "length")
;; We can't use `j' when emitting PIC. Emit a branch if it's
;; in range, otherwise load the address of the branch target into
- ;; $at and then jump to it.
+ ;; $at and then jump to it. See mips_output_label for the load
+ ;; sequence used.
(if_then_else
(ior (eq (symbol_ref "flag_pic") (const_int 0))
(lt (abs (minus (match_dup 0)
(plus (pc) (const_int 4))))
(const_int 131072)))
- (const_int 4) (const_int 16)))])
+ (const_int 4) (const_int 36)))])
;; We need a different insn for the mips16, because a mips16 branch
;; does not have a delay slot.
Index: gcc/testsuite/gcc.target/mips/branch-2.c
===================================================================
--- /dev/null 2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-2.c 2009-03-26 22:21:05.000000000 +0000
@@ -0,0 +1,44 @@
+/* { dg-options "-mabicalls -mshared" } */
+/* { dg-final { scan-assembler-not "\tbal\t" } } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+ if (__builtin_expect (*x == 0, 1))
+ {
+ /* At the time of writing, each assignment takes 2 instructions:
+ a load from the stack and a store. This sequence therefore
+ occupies 0x1fff8 bytes, which is the largest permissible
+ range for natural branches. */
+ asm("nop");
+ asm("nop");
+ D1(*x = 0;);
+ D2(*x = 0;);
+ D3(*x = 0;);
+ D4(*x = 0;);
+ D5(*x = 0;);
+ D6(*x = 0;);
+ D7(*x = 0;);
+ D8(*x = 0;);
+ D9(*x = 0;);
+ D10(*x = 0;);
+ D11(*x = 0;);
+ D12(*x = 0;);
+ D13(*x = 0;);
+ }
+}
Index: gcc/testsuite/gcc.target/mips/branch-3.c
===================================================================
--- /dev/null 2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-3.c 2009-03-26 22:22:07.000000000 +0000
@@ -0,0 +1,45 @@
+/* { dg-options "-mabicalls -mshared" } */
+/* { dg-final { scan-assembler "\tbal\t" } } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+ if (__builtin_expect (*x == 0, 1))
+ {
+ /* At the time of writing, each assignment takes 2 instructions:
+ a load from the stack and a store. This sequence therefore
+ occupies 0x1fffc bytes, which is the smallest forbidden
+ range for natural branches. */
+ asm("nop");
+ asm("nop");
+ asm("nop");
+ D1(*x = 0;);
+ D2(*x = 0;);
+ D3(*x = 0;);
+ D4(*x = 0;);
+ D5(*x = 0;);
+ D6(*x = 0;);
+ D7(*x = 0;);
+ D8(*x = 0;);
+ D9(*x = 0;);
+ D10(*x = 0;);
+ D11(*x = 0;);
+ D12(*x = 0;);
+ D13(*x = 0;);
+ }
+}
Index: gcc/testsuite/gcc.target/mips/branch-4.c
===================================================================
--- /dev/null 2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-4.c 2009-03-26 22:21:56.000000000 +0000
@@ -0,0 +1,56 @@
+/* An executable version of branch-2.c. */
+/* { dg-do run } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void __attribute__((noinline))
+foo (volatile int *x)
+{
+ if (__builtin_expect (*x == 0, 1))
+ {
+ /* At the time of writing, each assignment takes 2 instructions:
+ a load from the stack and a store. This sequence therefore
+ occupies 0x1fff8 bytes, which is the largest permissible
+ range for natural branches. */
+ asm("nop");
+ asm("nop");
+ D1(*x = 0;);
+ D2(*x = 0;);
+ D3(*x = 0;);
+ D4(*x = 0;);
+ D5(*x = 0;);
+ D6(*x = 0;);
+ D7(*x = 0;);
+ D8(*x = 0;);
+ D9(*x = 0;);
+ D10(*x = 0;);
+ D11(*x = 0;);
+ D12(*x = 0;);
+ D13(*x = 0;);
+ }
+}
+
+int
+main (void)
+{
+ int x = 0;
+ int y = 1;
+
+ foo (&x);
+ foo (&y);
+
+ return 0;
+}
Index: gcc/testsuite/gcc.target/mips/branch-5.c
===================================================================
--- /dev/null 2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-5.c 2009-03-26 22:22:40.000000000 +0000
@@ -0,0 +1,57 @@
+/* An executable version of branch-3.c. */
+/* { dg-do run } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+ if (__builtin_expect (*x == 0, 1))
+ {
+ /* At the time of writing, each assignment takes 2 instructions:
+ a load from the stack and a store. This sequence therefore
+ occupies 0x1fffc bytes, which is the smallest forbidden
+ range for natural branches. */
+ asm("nop");
+ asm("nop");
+ asm("nop");
+ D1(*x = 0;);
+ D2(*x = 0;);
+ D3(*x = 0;);
+ D4(*x = 0;);
+ D5(*x = 0;);
+ D6(*x = 0;);
+ D7(*x = 0;);
+ D8(*x = 0;);
+ D9(*x = 0;);
+ D10(*x = 0;);
+ D11(*x = 0;);
+ D12(*x = 0;);
+ D13(*x = 0;);
+ }
+}
+
+int
+main (void)
+{
+ int x = 0;
+ int y = 1;
+
+ foo (&x);
+ foo (&y);
+
+ return 0;
+}