This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [mips] fix $gp restore bug


Richard Sandiford <rdsandiford@googlemail.com> writes:
> Adam Nemet <anemet@caviumnetworks.com> writes:
>> Richard Sandiford writes:
>>> FWIW, the idea I'm toying with at the moment is:
>>> 
>>>   - Make every branch use pic_offset_table_rtx.
>>> 
>>>   - Try to detect cases where a function only needs a global pointer
>>>     because of branches.  In this case, emit easily-identifiable "ghost"
>>>     instructions (in the .md "type" attribute sense) to set, save and
>>>     restore the gp.
>>> 
>>>     These ghost instructions would replace the sequences we'd normally use.
>>>     They would have 0 length at this stage, and in theory shouldn't interfere
>>>     with things like scheduling.
>>> 
>>>   - At the end of md_reorg, run shorten_branches to see whether
>>>     all branches are in range.  This should be safe because we run
>>>     dbr_schedule within mips_reorg.  Nothing after mips_reorg should
>>>     change the insn stream besides shorten_branches itself.
>>> 
>>>   - If some branches are not in range, go through the insn stream and
>>>     replace all the ghost instructions with real implementations.
>>>     (This will of course invalidate the results of the shorten_branches
>>>     call, but the "real" shorten_branches pass will then calculate new
>>>     lengths.)
>>
>> This sounds like a good plan to me, FWIW.
>
> Thanks.  I tried it, and unfortunately it doesn't look like it's
> going to work.  Too much rtl infrastructure expects unconditional
> jumps to be simple sets.  I'm also not sure they really want a
> situation where inserting a jump can suddenly change the liveness
> of $gp.
>
> Oh well.  Back to the drawing board.

So, I can't really see a better way out of this than avoiding $gp,
as discussed upthread.  We just can't assume that the global pointer
is valid unless we explicitly list it as being (potentially) used.

The sequence I've used here is truly horrible though. ;(

Only lightly tested.  I don't think this patch is appropriate for 4.4,
although Nathan's patch probably is.

Richard


gcc/
	* config/mips/mips.c (mips_output_load_label): Avoid using $gp
	for TARGET_EXPLICIT_RELOCS.
	* config/mips/mips.md (length): Update the worst-case branch length
	accordingly.
	(jump): Likewise.

gcc/testsuite/
	* gcc.target/mips/branch-2.c: New test.
	* gcc.target/mips/branch-3.c: Likewise.
	* gcc.target/mips/branch-4.c: Likewise.
	* gcc.target/mips/branch-5.c: Likewise.

Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c	2009-03-26 22:18:43.000000000 +0000
+++ gcc/config/mips/mips.c	2009-03-26 22:26:13.000000000 +0000
@@ -10157,19 +10157,34 @@ mips_adjust_insn_length (rtx insn, int l
 mips_output_load_label (void)
 {
   if (TARGET_EXPLICIT_RELOCS)
-    switch (mips_abi)
-      {
-      case ABI_N32:
-	return "%[lw\t%@,%%got_page(%0)(%+)\n\taddiu\t%@,%@,%%got_ofst(%0)";
-
-      case ABI_64:
-	return "%[ld\t%@,%%got_page(%0)(%+)\n\tdaddiu\t%@,%@,%%got_ofst(%0)";
+    {
+      /* We can't rely in general on the global pointer being valid
+	 at a given branch, so we use this convoluted sequence instead.
 
-      default:
-	if (ISA_HAS_LOAD_DELAY)
-	  return "%[lw\t%@,%%got(%0)(%+)%#\n\taddiu\t%@,%@,%%lo(%0)";
-	return "%[lw\t%@,%%got(%0)(%+)\n\taddiu\t%@,%@,%%lo(%0)";
-      }
+	 In principle, we could check at the end of md_reorg whether a
+	 given branch has access to $gp, store the result somehow, and
+	 use it here.  However, because we don't have a proper CFG or DFG
+	 so late in compilation, any such code is likely to be fragile.
+	 These long branches shouldn't occur very often anyway.  */
+      if (TARGET_64BIT)
+	return ("%[daddiu\t$sp,$sp,-16\n"
+		"\tsd\t$31,0($sp)\n"
+		"\t%(%<bal\t1f\n"
+		"\tlui\t%@,%%hi(%0)%>%)\n"
+		"1:\tdaddu\t%@,%@,$31\n"
+		"\tld\t$31,0($sp)\n"
+		"\tdaddiu\t%@,%@,%%lo(%0)\n"
+		"\tdaddiu\t$sp,$sp,16");
+      else
+	return ("%[addiu\t$sp,$sp,-8\n"
+		"\tsw\t$31,0($sp)\n"
+		"\t%(%<bal\t1f\n"
+		"\tlui\t%@,%%hi(%0)%>%)\n"
+		"1:\taddu\t%@,%@,$31\n"
+		"\tlw\t$31,0($sp)\n"
+		"\taddiu\t%@,%@,%%lo(%0)\n"
+		"\taddiu\t$sp,$sp,16");
+    }
   else
     {
       if (Pmode == DImode)
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md	2009-03-26 22:18:43.000000000 +0000
+++ gcc/config/mips/mips.md	2009-03-26 22:18:46.000000000 +0000
@@ -442,7 +442,13 @@ (define_attr "length" ""
 	       (ne (symbol_ref "TARGET_MIPS16") (const_int 0)))
 	  (const_int 8)
 
-	  ;; Direct branch instructions have a range of [-0x40000,0x3fffc].
+	  ;; Direct branch instructions have a range of [-0x20000,0x1fffc],
+	  ;; relative to the delay slot.  In GCC, the reference PC for
+	  ;; forward branches is the end of the delay slot while the
+	  ;; reference PC for backward branches is the start of the branch
+	  ;; itself.  Thus the maximum forward extent is 0x1fff8 (131064)
+	  ;; and the maximum backward extent is 0x1fffc (131068).
+	  ;;
 	  ;; If a branch is outside this range, we have a choice of two
 	  ;; sequences.  For PIC, an out-of-range branch like:
 	  ;;
@@ -458,8 +464,8 @@ (define_attr "length" ""
 	  ;;	nop
 	  ;; 1:
 	  ;;
-	  ;; where the load address can be up to three instructions long
-	  ;; (lw, nop, addiu).
+	  ;; where the load address can be up to eight(!) instructions long.
+	  ;; See mips_output_label for the sequences used.
 	  ;;
 	  ;; The non-PIC case is similar except that we use a direct
 	  ;; jump instead of an la/jr pair.  Since the target of this
@@ -479,7 +485,7 @@ (define_attr "length" ""
                       (le (minus (pc) (match_dup 1)) (const_int 131068)))
                   (const_int 4)
 		 (ne (symbol_ref "flag_pic") (const_int 0))
-		 (const_int 24)
+		 (const_int 44)
 		 ] (const_int 12))
 
 	  ;; "Ghost" instructions occupy no space.
@@ -5484,13 +5490,14 @@ (define_insn "jump"
    (set (attr "length")
 	;; We can't use `j' when emitting PIC.  Emit a branch if it's
 	;; in range, otherwise load the address of the branch target into
-	;; $at and then jump to it.
+	;; $at and then jump to it.  See mips_output_label for the load
+	;; sequence used.
 	(if_then_else
 	 (ior (eq (symbol_ref "flag_pic") (const_int 0))
 	      (lt (abs (minus (match_dup 0)
 			      (plus (pc) (const_int 4))))
 		  (const_int 131072)))
-	 (const_int 4) (const_int 16)))])
+	 (const_int 4) (const_int 36)))])
 
 ;; We need a different insn for the mips16, because a mips16 branch
 ;; does not have a delay slot.
Index: gcc/testsuite/gcc.target/mips/branch-2.c
===================================================================
--- /dev/null	2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-2.c	2009-03-26 22:21:05.000000000 +0000
@@ -0,0 +1,44 @@
+/* { dg-options "-mabicalls -mshared" } */
+/* { dg-final { scan-assembler-not "\tbal\t" } } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+  if (__builtin_expect (*x == 0, 1))
+    {
+      /* At the time of writing, each assignment takes 2 instructions:
+	 a load from the stack and a store.  This sequence therefore
+	 occupies 0x1fff8 bytes, which is the largest permissible
+	 range for natural branches.  */
+      asm("nop");
+      asm("nop");
+      D1(*x = 0;);
+      D2(*x = 0;);
+      D3(*x = 0;);
+      D4(*x = 0;);
+      D5(*x = 0;);
+      D6(*x = 0;);
+      D7(*x = 0;);
+      D8(*x = 0;);
+      D9(*x = 0;);
+      D10(*x = 0;);
+      D11(*x = 0;);
+      D12(*x = 0;);
+      D13(*x = 0;);
+    }
+}
Index: gcc/testsuite/gcc.target/mips/branch-3.c
===================================================================
--- /dev/null	2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-3.c	2009-03-26 22:22:07.000000000 +0000
@@ -0,0 +1,45 @@
+/* { dg-options "-mabicalls -mshared" } */
+/* { dg-final { scan-assembler "\tbal\t" } } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+  if (__builtin_expect (*x == 0, 1))
+    {
+      /* At the time of writing, each assignment takes 2 instructions:
+	 a load from the stack and a store.  This sequence therefore
+	 occupies 0x1fffc bytes, which is the smallest forbidden
+	 range for natural branches.  */
+      asm("nop");
+      asm("nop");
+      asm("nop");
+      D1(*x = 0;);
+      D2(*x = 0;);
+      D3(*x = 0;);
+      D4(*x = 0;);
+      D5(*x = 0;);
+      D6(*x = 0;);
+      D7(*x = 0;);
+      D8(*x = 0;);
+      D9(*x = 0;);
+      D10(*x = 0;);
+      D11(*x = 0;);
+      D12(*x = 0;);
+      D13(*x = 0;);
+    }
+}
Index: gcc/testsuite/gcc.target/mips/branch-4.c
===================================================================
--- /dev/null	2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-4.c	2009-03-26 22:21:56.000000000 +0000
@@ -0,0 +1,56 @@
+/* An executable version of branch-2.c.  */
+/* { dg-do run } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void __attribute__((noinline))
+foo (volatile int *x)
+{
+  if (__builtin_expect (*x == 0, 1))
+    {
+      /* At the time of writing, each assignment takes 2 instructions:
+	 a load from the stack and a store.  This sequence therefore
+	 occupies 0x1fff8 bytes, which is the largest permissible
+	 range for natural branches.  */
+      asm("nop");
+      asm("nop");
+      D1(*x = 0;);
+      D2(*x = 0;);
+      D3(*x = 0;);
+      D4(*x = 0;);
+      D5(*x = 0;);
+      D6(*x = 0;);
+      D7(*x = 0;);
+      D8(*x = 0;);
+      D9(*x = 0;);
+      D10(*x = 0;);
+      D11(*x = 0;);
+      D12(*x = 0;);
+      D13(*x = 0;);
+    }
+}
+
+int
+main (void)
+{
+  int x = 0;
+  int y = 1;
+
+  foo (&x);
+  foo (&y);
+
+  return 0;
+}
Index: gcc/testsuite/gcc.target/mips/branch-5.c
===================================================================
--- /dev/null	2009-03-26 19:51:48.503884293 +0000
+++ gcc/testsuite/gcc.target/mips/branch-5.c	2009-03-26 22:22:40.000000000 +0000
@@ -0,0 +1,57 @@
+/* An executable version of branch-3.c.  */
+/* { dg-do run } */
+
+#define D0(X) X
+#define D1(X) X X
+#define D2(X) D1(D1(X))
+#define D3(X) D2(D1(X))
+#define D4(X) D2(D2(X))
+#define D5(X) D4(D1(X))
+#define D6(X) D4(D2(X))
+#define D7(X) D4(D2(D1(X)))
+#define D8(X) D4(D4(X))
+#define D9(X) D8(D1(X))
+#define D10(X) D8(D2(X))
+#define D11(X) D8(D2(D1(X)))
+#define D12(X) D8(D4(X))
+#define D13(X) D8(D4(D1(X)))
+
+void
+foo (volatile int *x)
+{
+  if (__builtin_expect (*x == 0, 1))
+    {
+      /* At the time of writing, each assignment takes 2 instructions:
+	 a load from the stack and a store.  This sequence therefore
+	 occupies 0x1fffc bytes, which is the smallest forbidden
+	 range for natural branches.  */
+      asm("nop");
+      asm("nop");
+      asm("nop");
+      D1(*x = 0;);
+      D2(*x = 0;);
+      D3(*x = 0;);
+      D4(*x = 0;);
+      D5(*x = 0;);
+      D6(*x = 0;);
+      D7(*x = 0;);
+      D8(*x = 0;);
+      D9(*x = 0;);
+      D10(*x = 0;);
+      D11(*x = 0;);
+      D12(*x = 0;);
+      D13(*x = 0;);
+    }
+}
+
+int
+main (void)
+{
+  int x = 0;
+  int y = 1;
+
+  foo (&x);
+  foo (&y);
+
+  return 0;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]