This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [committed] PR 51931: force non-MIPS16ness for long-branch tests (NOW RFA: MIPS16 Long Branch Patch)


Sorry for the late reply.  Been thinking it over a bit more.

"Moore, Catherine" <Catherine_Moore@mentor.com> writes:
> Now that we are in the window for 4.8, I'd like to discuss the
> possibility of applying this patch.  Have you had a chance to think
> about it?

In the end, I think we should go for the fix originally suggested
in the PR: a sequence that saves a scratch register to $1 and restores
it in the delay slot.  I checked with the MIPS UK guys that there aren't
any known gotchas with doing that, which shows how long this has been
in the making.

The quality of the scratch sequence is clearly vastly inferior to your
version in the "common" case, but the point is that that case isn't
really all that common, and this way should at least be fully general.
The problem with inserting branch trampolines separately from laying out
the constant pool is that one could interfere with the other.  There are
also corner cases where we need more trampolines than we have room.

There's still the possiblity of using trampolines as an optimisation
in cases where we know its safe, but I'm not sure it's worth the effort.

Tested on mips64-linux-gnu, both with -mips16 and -mips16/-fpic.
I did a run with the patch as below, and a run with the extended
branches commented out (so that branches were either unextended
or long).  Applied.

Richard


gcc/
	PR target/51931
	* config/mips/mips-protos.h (mips_strip_unspec_address): Declare.
	* config/mips/mips.c (mips_strip_unspec_address): Make extern.
	(mips16_rewrite_pool_constant): Make a copy of the pool constant
	before adding to a PC-relative table.
	(mips16_lay_out_constants): Add a SPLIT_P parameter.
	(mips16_load_branch_target, mips16_split_long_branches): New functions.
	(mips_reorg): Update call to mips16_lay_out_constants.
	Call mips16_split_long_branches.
	* config/mips/predicates.md (pc_or_label_operand): Delete.
	* config/mips/mips.md (length): Add a calculation for MIPS16 branches.
	Move the extended_mips16 handling further down.
	(*branch_equality<mode>_mips16): Replace use pc_or_label_operand
	with explicit label_ref and pc.  Follow the usual operand numbering.
	(*branch_equality<mode>_mips16_inverted): New pattern.
	(*jump_mips16): Add length attribute.
	(indirect_jump_and_restore_<mode>): New pattern.
	(consttable_int): Call mips_strip_unspec_address on the operand.

gcc/testsuite/
	PR target/51931
	* gcc.c-torture/compile/20001226-1.c: Remove nomips16 attribute.
	* g++.dg/opt/longbranch1.C: Likewise.

Index: gcc/config/mips/mips-protos.h
===================================================================
--- gcc/config/mips/mips-protos.h	2012-08-02 21:19:07.323527035 +0100
+++ gcc/config/mips/mips-protos.h	2012-08-02 21:51:23.509522298 +0100
@@ -190,6 +190,7 @@ extern rtx mips_pic_base_register (rtx);
 extern rtx mips_got_load (rtx, rtx, enum mips_symbol_type);
 extern bool mips_split_symbol (rtx, rtx, enum machine_mode, rtx *);
 extern rtx mips_unspec_address (rtx, enum mips_symbol_type);
+extern rtx mips_strip_unspec_address (rtx);
 extern void mips_move_integer (rtx, rtx, unsigned HOST_WIDE_INT);
 extern bool mips_legitimize_move (enum machine_mode, rtx, rtx);
 
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c	2012-08-02 21:19:07.322527035 +0100
+++ gcc/config/mips/mips.c	2012-08-02 21:51:23.503522297 +0100
@@ -2563,7 +2563,7 @@ mips_unspec_address (rtx address, enum m
 /* If OP is an UNSPEC address, return the address to which it refers,
    otherwise return OP itself.  */
 
-static rtx
+rtx
 mips_strip_unspec_address (rtx op)
 {
   rtx base, offset;
@@ -14070,7 +14070,7 @@ mips16_rewrite_pool_constant (struct mip
   split_const (*x, &base, &offset);
   if (GET_CODE (base) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (base))
     {
-      label = mips16_add_constant (pool, get_pool_constant (base),
+      label = mips16_add_constant (pool, copy_rtx (get_pool_constant (base)),
 				   get_pool_mode (base));
       base = gen_rtx_LABEL_REF (Pmode, label);
       *x = mips_unspec_address_offset (base, offset, SYMBOL_PC_RELATIVE);
@@ -14126,10 +14126,11 @@ mips_cfg_in_reorg (void)
 	  || TARGET_RELAX_PIC_CALLS);
 }
 
-/* Build MIPS16 constant pools.  */
+/* Build MIPS16 constant pools.  Split the instructions if SPLIT_P,
+   otherwise assume that they are already split.  */
 
 static void
-mips16_lay_out_constants (void)
+mips16_lay_out_constants (bool split_p)
 {
   struct mips16_constant_pool pool;
   struct mips16_rewrite_pool_refs_info info;
@@ -14138,10 +14139,13 @@ mips16_lay_out_constants (void)
   if (!TARGET_MIPS16_PCREL_LOADS)
     return;
 
-  if (mips_cfg_in_reorg ())
-    split_all_insns ();
-  else
-    split_all_insns_noflow ();
+  if (split_p)
+    {
+      if (mips_cfg_in_reorg ())
+	split_all_insns ();
+      else
+	split_all_insns_noflow ();
+    }
   barrier = 0;
   memset (&pool, 0, sizeof (pool));
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
@@ -15490,6 +15494,110 @@ mips_df_reorg (void)
   df_finish_pass (false);
 }
 
+/* Emit code to load LABEL_REF SRC into MIPS16 register DEST.  This is
+   called very late in mips_reorg, but the caller is required to run
+   mips16_lay_out_constants on the result.  */
+
+static void
+mips16_load_branch_target (rtx dest, rtx src)
+{
+  if (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS)
+    {
+      rtx page, low;
+
+      if (mips_cfun_has_cprestore_slot_p ())
+	mips_emit_move (dest, mips_cprestore_slot (dest, true));
+      else
+	mips_emit_move (dest, pic_offset_table_rtx);
+      page = mips_unspec_address (src, SYMBOL_GOTOFF_PAGE);
+      low = mips_unspec_address (src, SYMBOL_GOT_PAGE_OFST);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      PMODE_INSN (gen_unspec_got, (dest, page))));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_LO_SUM (Pmode, dest, low)));
+    }
+  else
+    {
+      src = mips_unspec_address (src, SYMBOL_ABSOLUTE);
+      mips_emit_move (dest, src);
+    }
+}
+
+/* If we're compiling a MIPS16 function, look for and split any long branches.
+   This must be called after all other instruction modifications in
+   mips_reorg.  */
+
+static void
+mips16_split_long_branches (void)
+{
+  bool something_changed;
+
+  if (!TARGET_MIPS16)
+    return;
+
+  /* Loop until the alignments for all targets are sufficient.  */
+  do
+    {
+      rtx insn;
+
+      shorten_branches (get_insns ());
+      something_changed = false;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (JUMP_P (insn)
+	    && USEFUL_INSN_P (insn)
+	    && get_attr_length (insn) > 8)
+	  {
+	    rtx old_label, new_label, temp, saved_temp;
+	    rtx target, jump, jump_sequence;
+
+	    start_sequence ();
+
+	    /* Free up a MIPS16 register by saving it in $1.  */
+	    saved_temp = gen_rtx_REG (Pmode, AT_REGNUM);
+	    temp = gen_rtx_REG (Pmode, GP_REG_FIRST + 2);
+	    emit_move_insn (saved_temp, temp);
+
+	    /* Load the branch target into TEMP.  */
+	    old_label = JUMP_LABEL (insn);
+	    target = gen_rtx_LABEL_REF (Pmode, old_label);
+	    mips16_load_branch_target (temp, target);
+
+	    /* Jump to the target and restore the register's
+	       original value.  */
+	    jump = emit_jump_insn (PMODE_INSN (gen_indirect_jump_and_restore,
+					       (temp, temp, saved_temp)));
+	    JUMP_LABEL (jump) = old_label;
+	    LABEL_NUSES (old_label)++;
+
+	    /* Rewrite any symbolic references that are supposed to use
+	       a PC-relative constant pool.  */
+	    mips16_lay_out_constants (false);
+
+	    if (simplejump_p (insn))
+	      /* We're going to replace INSN with a longer form.  */
+	      new_label = NULL_RTX;
+	    else
+	      {
+		/* Create a branch-around label for the original
+		   instruction.  */
+		new_label = gen_label_rtx ();
+		emit_label (new_label);
+	      }
+
+	    jump_sequence = get_insns ();
+	    end_sequence ();
+
+	    emit_insn_after (jump_sequence, insn);
+	    if (new_label)
+	      invert_jump (insn, new_label, false);
+	    else
+	      delete_insn (insn);
+	    something_changed = true;
+	  }
+    }
+  while (something_changed);
+}
+
 /* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
 
 static void
@@ -15500,7 +15608,7 @@ mips_reorg (void)
      to date if the CFG is available.  */
   if (mips_cfg_in_reorg ())
     compute_bb_for_insn ();
-  mips16_lay_out_constants ();
+  mips16_lay_out_constants (true);
   if (mips_cfg_in_reorg ())
     {
       mips_df_reorg ();
@@ -15519,6 +15627,7 @@ mips_reorg (void)
     /* The expansion could invalidate some of the VR4130 alignment
        optimizations, but this should be an extremely rare case anyhow.  */
     mips_reorg_process_insns ();
+  mips16_split_long_branches ();
 }
 
 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
@@ -15639,7 +15748,7 @@ mips_output_mi_thunk (FILE *file, tree t
   insn = get_insns ();
   insn_locators_alloc ();
   split_all_insns_noflow ();
-  mips16_lay_out_constants ();
+  mips16_lay_out_constants (true);
   shorten_branches (insn);
   final_start_function (insn, file, 1);
   final (insn, file, 1);
Index: gcc/config/mips/predicates.md
===================================================================
--- gcc/config/mips/predicates.md	2012-08-02 21:19:07.322527035 +0100
+++ gcc/config/mips/predicates.md	2012-08-02 21:51:23.510522298 +0100
@@ -139,9 +139,6 @@ (define_predicate "muldiv_target_operand
 		(match_operand 0 "hilo_operand")
 		(match_operand 0 "register_operand")))
 
-(define_special_predicate "pc_or_label_operand"
-  (match_code "pc,label_ref"))
-
 (define_predicate "const_call_insn_operand"
   (match_code "const,symbol_ref,label_ref")
 {
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md	2012-08-02 21:19:07.322527035 +0100
+++ gcc/config/mips/mips.md	2012-08-02 22:01:23.067520831 +0100
@@ -402,11 +402,7 @@ (define_attr "extended_mips16" "no,yes"
 
 ;; Length of instruction in bytes.
 (define_attr "length" ""
-   (cond [(and (eq_attr "extended_mips16" "yes")
-	       (match_test "TARGET_MIPS16"))
-	  (const_int 8)
-
-	  ;; Direct branch instructions have a range of [-0x20000,0x1fffc],
+   (cond [;; Direct branch instructions have a range of [-0x20000,0x1fffc],
 	  ;; relative to the address of the delay slot.  If a branch is
 	  ;; outside this range, we have a choice of two sequences.
 	  ;; For PIC, an out-of-range branch like:
@@ -431,14 +427,21 @@ (define_attr "length" ""
 	  ;; using la/jr in this case too, but we do not do so at
 	  ;; present.
 	  ;;
-	  ;; Note that this value does not account for the delay slot
+	  ;; The value we specify here does not account for the delay slot
 	  ;; instruction, whose length is added separately.  If the RTL
 	  ;; pattern has no explicit delay slot, mips_adjust_insn_length
-	  ;; will add the length of the implicit nop.  The values for
-	  ;; forward and backward branches will be different as well.
-	  (eq_attr "type" "branch")
+	  ;; will add the length of the implicit nop.  The range of
+	  ;; [-0x20000, 0x1fffc] from the address of the delay slot
+	  ;; therefore translates to a range of:
+	  ;;
+	  ;;    [-(0x20000 - sizeof (branch)), 0x1fffc - sizeof (slot)]
+	  ;; == [-0x1fffc, 0x1fff8]
+	  ;;
+	  ;; from the shorten_branches reference address.
+	  (and (eq_attr "type" "branch")
+	       (not (match_test "TARGET_MIPS16")))
 	  (cond [(and (le (minus (match_dup 0) (pc)) (const_int 131064))
-			  (le (minus (pc) (match_dup 0)) (const_int 131068)))
+		      (le (minus (pc) (match_dup 0)) (const_int 131068)))
 		   (const_int 4)
 
 		 ;; The non-PIC case: branch, first delay slot, and J.
@@ -453,6 +456,100 @@ (define_attr "length" ""
 		 ;; of an insn.
 		 (const_int MAX_PIC_BRANCH_LENGTH))
 
+	  ;; An unextended MIPS16 branch has a range of [-0x100, 0xfe]
+	  ;; from the address of the following instruction, which leads
+	  ;; to a range of:
+	  ;;
+	  ;;    [-(0x100 - sizeof (branch)), 0xfe]
+	  ;; == [-0xfe, 0xfe]
+	  ;;
+	  ;; from the shorten_branches reference address.  Extended branches
+	  ;; likewise have a range of [-0x10000, 0xfffe] from the address
+	  ;; of the following instruction, which leads to a range of:
+	  ;;
+	  ;;    [-(0x10000 - sizeof (branch)), 0xfffe]
+	  ;; == [-0xfffc, 0xfffe]
+	  ;;
+	  ;; from the reference address.
+	  ;;
+	  ;; When a branch is out of range, mips_reorg splits it into a form
+	  ;; that uses in-range branches.  There are four basic sequences:
+	  ;;
+	  ;; (1) Absolute addressing with a readable text segment
+	  ;;     (32-bit addresses):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lw $2,label		2 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;;	 .align 2		0 or 2 bytes
+	  ;; label:
+	  ;;	 .word target		4 bytes
+	  ;; foo:
+	  ;;				(16 bytes in the worst case)
+	  ;;
+	  ;; (2) Absolute addressing with a readable text segment
+	  ;;     (64-bit addresses):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     ld $2,label		2 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;;	 .align 3		0 to 6 bytes
+	  ;; label:
+	  ;;	 .dword target		8 bytes
+	  ;; foo:
+	  ;;				(24 bytes in the worst case)
+	  ;;
+	  ;; (3) Absolute addressing without a readable text segment
+	  ;;     (which requires 32-bit addresses at present):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lui $2,%hi(target)	4 bytes
+	  ;;	 sll $2,8		2 bytes
+	  ;;	 sll $2,8		2 bytes
+	  ;;     addiu $2,%lo(target)	4 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;; foo:
+	  ;;				(20 bytes)
+	  ;;
+	  ;; (4) PIC addressing (which requires 32-bit addresses at present):
+	  ;;
+	  ;;	 b... foo		2 bytes
+	  ;;	 move $1,$2		2 bytes
+	  ;;     lw $2,cprestore	0, 2 or 4 bytes
+	  ;;	 lw $2,%got(target)($2)	4 bytes
+	  ;;     addiu $2,%lo(target)	4 bytes
+	  ;;	 jr $2			2 bytes
+	  ;;	 move $2,$1		2 bytes
+	  ;; foo:
+	  ;;				(20 bytes in the worst case)
+	  ;;
+	  ;; Note that the conditions test adjusted lengths, whereas the
+	  ;; result is an unadjusted length, and is thus twice the true value.
+	  (and (eq_attr "type" "branch")
+	       (match_test "TARGET_MIPS16"))
+	  (cond [(and (le (minus (match_dup 0) (pc)) (const_int 254))
+		      (le (minus (pc) (match_dup 0)) (const_int 254)))
+		 (const_int 4)
+		 (and (le (minus (match_dup 0) (pc)) (const_int 65534))
+		      (le (minus (pc) (match_dup 0)) (const_int 65532)))
+		 (const_int 8)
+		 (and (match_test "TARGET_ABICALLS")
+		      (not (match_test "TARGET_ABSOLUTE_ABICALLS")))
+		 (const_int 40)
+		 (match_test "Pmode == SImode")
+		 (const_int 32)
+		 ] (const_int 48))
+
+	  (and (eq_attr "extended_mips16" "yes")
+	       (match_test "TARGET_MIPS16"))
+	  (const_int 8)
+
 	  ;; "Ghost" instructions occupy no space.
 	  (eq_attr "type" "ghost")
 	  (const_int 0)
@@ -5400,28 +5497,29 @@ (define_insn "*branch_equality<mode>_inv
 (define_insn "*branch_equality<mode>_mips16"
   [(set (pc)
 	(if_then_else
-	 (match_operator 0 "equality_operator"
-			 [(match_operand:GPR 1 "register_operand" "d,t")
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d,t")
 			  (const_int 0)])
-	 (match_operand 2 "pc_or_label_operand" "")
-	 (match_operand 3 "pc_or_label_operand" "")))]
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
   "TARGET_MIPS16"
-{
-  if (operands[2] != pc_rtx)
-    {
-      if (which_alternative == 0)
-	return "b%C0z\t%1,%2";
-      else
-	return "bt%C0z\t%2";
-    }
-  else
-    {
-      if (which_alternative == 0)
-	return "b%N0z\t%1,%3";
-      else
-	return "bt%N0z\t%3";
-    }
-}
+  "@
+   b%C1z\t%2,%0
+   bt%C1z\t%0"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_equality<mode>_mips16_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d,t")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_MIPS16"
+  "@
+   b%N1z\t%2,%0
+   bt%N1z\t%0"
   [(set_attr "type" "branch")])
 
 (define_expand "cbranch<mode>4"
@@ -5717,7 +5815,30 @@ (define_insn "*jump_mips16"
 	(label_ref (match_operand 0 "" "")))]
   "TARGET_MIPS16"
   "b\t%l0"
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	;; This calculation is like the normal branch one, but the
+	;; range of the unextended instruction is [-0x800, 0x7fe] rather
+	;; than [-0x100, 0xfe].  This translates to a range of:
+	;;
+	;;    [-(0x800 - sizeof (branch)), 0x7fe]
+	;; == [-0x7fe, 0x7fe]
+	;;
+	;; from the shorten_branches reference address.  Long-branch
+	;; sequences will replace this one, so the minimum length
+	;; is one instruction shorter than for conditional branches.
+	(cond [(and (le (minus (match_dup 0) (pc)) (const_int 2046))
+		    (le (minus (pc) (match_dup 0)) (const_int 2046)))
+	       (const_int 4)
+	       (and (le (minus (match_dup 0) (pc)) (const_int 65534))
+		    (le (minus (pc) (match_dup 0)) (const_int 65532)))
+	       (const_int 8)
+	       (and (match_test "TARGET_ABICALLS")
+		    (not (match_test "TARGET_ABSOLUTE_ABICALLS")))
+	       (const_int 36)
+	       (match_test "Pmode == SImode")
+	       (const_int 28)
+	       ] (const_int 44)))])
 
 (define_expand "indirect_jump"
   [(set (pc) (match_operand 0 "register_operand"))]
@@ -5735,6 +5856,18 @@ (define_insn "indirect_jump_<mode>"
   [(set_attr "type" "jump")
    (set_attr "mode" "none")])
 
+;; A combined jump-and-move instruction, used for MIPS16 long-branch
+;; sequences.  Having a dedicated pattern is more convenient than
+;; creating a SEQUENCE for this special case.
+(define_insn "indirect_jump_and_restore_<mode>"
+  [(set (pc) (match_operand:P 1 "register_operand" "d"))
+   (set (match_operand:P 0 "register_operand" "=d")
+   	(match_operand:P 2 "register_operand" "y"))]
+  ""
+  "%(%<jr\t%1\;move\t%0,%2%>%)"
+  [(set_attr "type" "multi")
+   (set_attr "extended_mips16" "yes")])
+
 (define_expand "tablejump"
   [(set (pc)
 	(match_operand 0 "register_operand"))
@@ -6549,7 +6682,8 @@ (define_insn "consttable_int"
 		    UNSPEC_CONSTTABLE_INT)]
   "TARGET_MIPS16"
 {
-  assemble_integer (operands[0], INTVAL (operands[1]),
+  assemble_integer (mips_strip_unspec_address (operands[0]),
+		    INTVAL (operands[1]),
 		    BITS_PER_UNIT * INTVAL (operands[1]), 1);
   return "";
 }
Index: gcc/testsuite/gcc.c-torture/compile/20001226-1.c
===================================================================
--- gcc/testsuite/gcc.c-torture/compile/20001226-1.c	2012-08-02 21:19:07.321527035 +0100
+++ gcc/testsuite/gcc.c-torture/compile/20001226-1.c	2012-08-02 21:51:23.511522298 +0100
@@ -21,10 +21,6 @@ #define C256(x,y) C64(x,y) C64(x,y+4) C6
 #define C1024(x,y) C256(x,y) C256(x+16,y) C256(x+32,y) C256(x+48,y)
 #define C4096(x,y) C1024(x,y) C1024(x,y+16) C1024(x,y+32) C1024(x,y+48)
 
-#ifdef __mips
-/* See PR 51931.  */
-__attribute__((nomips16))
-#endif
 unsigned foo(int x[64], int y[64])
 {
   C4096(x,y);
Index: gcc/testsuite/g++.dg/opt/longbranch1.C
===================================================================
--- gcc/testsuite/g++.dg/opt/longbranch1.C	2012-08-02 21:19:07.321527035 +0100
+++ gcc/testsuite/g++.dg/opt/longbranch1.C	2012-08-02 21:51:23.511522298 +0100
@@ -26,10 +26,6 @@ #define verymuchcode \
         muchcode; muchcode; muchcode; muchcode; muchcode; muchcode; \
         muchcode; muchcode; muchcode; muchcode; muchcode; muchcode
 
-#ifdef __mips
-/* See PR 51931.  */
-__attribute__((nomips16))
-#endif
 int
 main (int argc, char **argv)
 {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]