[RFC patch] Use indirect jumps for nonlocal goto's

John David Anglin dave@hiauly1.hia.nrc.ca
Sat Jun 12 00:48:00 GMT 2004


> Ya see, it seems like this might be severely pessimizing
> other platforms that can do such direct jumps.

I didn't think anybody would notice.  I still think there are issues
with respect branch distances when branch labels outside the current
function.

This is what I committed.  Tested on hppa2.0w-hp-hpux11.11.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

2004-06-11  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>

	* pa.c (legitimize_pic_address): Use UNSPEC_DLTIND14R to identify
	unspec used for loading address from DLT.
	* pa.md: Define constants for the uses of UNSPEC and UNSPEC_VOLATILE.
	Change all users of UNSPEC and UNSPEC_VOLATILE to use new constants.
	Don't use short code sequence when loading the address of a nonlocal
	label.
	(nonlocal_goto): New expander.
	(indirect_goto): New jump pattern for nonlocal gotos.
	(short_jump): Remove extra whitespace.
	(builtin_longjmp): Clobber memory and hard frame pointer.  Restore
	frame pointer via virtual_stack_vars_rtx when we have a nonlocal goto
	pattern.

Index: config/pa/pa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.c,v
retrieving revision 1.249
diff -u -3 -p -r1.249 pa.c
--- config/pa/pa.c	31 May 2004 02:07:40 -0000	1.249
+++ config/pa/pa.c	11 Jun 2004 18:31:55 -0000
@@ -904,7 +904,7 @@ legitimize_pic_address (rtx orig, enum m
 		       gen_rtx_LO_SUM (Pmode, tmp_reg,
 				       gen_rtx_UNSPEC (Pmode,
 						       gen_rtvec (1, orig),
-						       0)));
+						       UNSPEC_DLTIND14R)));
 
       current_function_uses_pic_offset_table = 1;
       MEM_NOTRAP_P (pic_ref) = 1;
Index: config/pa/pa.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.md,v
retrieving revision 1.148
diff -u -3 -p -r1.148 pa.md
--- config/pa/pa.md	31 May 2004 16:10:54 -0000	1.148
+++ config/pa/pa.md	11 Jun 2004 18:31:57 -0000
@@ -26,6 +26,25 @@
 
 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
 
+;; Uses of UNSPEC in this file:
+
+(define_constants
+  [(UNSPEC_CFFC		0)	; canonicalize_funcptr_for_compare
+   (UNSPEC_GOTO		1)	; indirect_goto
+   (UNSPEC_DLTIND14R	2)	; 
+  ])
+
+;; UNSPEC_VOLATILE:
+
+(define_constants
+  [(UNSPECV_BLOCKAGE	0)	; blockage
+   (UNSPECV_DCACHE	1)	; dcacheflush
+   (UNSPECV_ICACHE	2)	; icacheflush
+   (UNSPECV_OPC		3)	; outline_prologue_call
+   (UNSPECV_OEC		4)	; outline_epilogue_call
+   (UNSPECV_LONGJMP	5)	; builtin_longjmp
+  ])
+
 ;; Insn type.  Used to default other attribute values.
 
 ;; type "unary" insns have one input operand (1) and one output operand (0)
@@ -2632,6 +2651,7 @@
   /* If we're trying to load the address of a label that happens to be
      close, then we can use a shorter sequence.  */
   if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
       && INSN_ADDRESSES_SET_P ()
       && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
 	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
@@ -2666,6 +2686,7 @@
   /* If we're trying to load the address of a label that happens to be
      close, then we can use a shorter sequence.  */
   if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
       && INSN_ADDRESSES_SET_P ()
       && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
 	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
@@ -6787,7 +6808,7 @@
    (set_attr "length" "4")])
 
 (define_insn "blockage"
-  [(unspec_volatile [(const_int 2)] 0)]
+  [(unspec_volatile [(const_int 2)] UNSPECV_BLOCKAGE)]
   ""
   ""
   [(set_attr "length" "0")])
@@ -6828,6 +6849,62 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4")])
 
+;;; An indirect jump can be optimized to a direct jump.  GAS for the
+;;; SOM target doesn't allow branching to a label inside a function.
+;;; We also don't correctly compute branch distances for labels
+;;; outside the current function.  Thus, we use an indirect jump can't
+;;; be optimized to a direct jump for all targets.  We assume that
+;;; the branch target is in the same space (i.e., nested function
+;;; jumping to a label in an outer function in the same translation
+;;; unit).
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+
+  lab = copy_to_reg (lab);
+
+  emit_insn (gen_rtx_CLOBBER (VOIDmode,
+			      gen_rtx_MEM (BLKmode,
+					   gen_rtx_SCRATCH (VOIDmode))));
+  emit_insn (gen_rtx_CLOBBER (VOIDmode,
+			      gen_rtx_MEM (BLKmode,
+					   hard_frame_pointer_rtx)));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  As a
+     result, an extra instruction is needed to adjust for the offset
+     of the virtual stack variables and the frame pointer.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (virtual_stack_vars_rtx, fp);
+
+  emit_stack_restore (SAVE_NONLOCAL, stack, NULL_RTX);
+
+  emit_insn (gen_rtx_USE (VOIDmode, hard_frame_pointer_rtx));
+  emit_insn (gen_rtx_USE (VOIDmode, stack_pointer_rtx));
+
+  /* Nonlocal goto jumps are only used between functions in the same
+     translation unit.  Thus, we can avoid the extra overhead of an
+     interspace jump.  */
+  emit_jump_insn (gen_indirect_goto (lab));
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn "indirect_goto"
+  [(unspec [(match_operand 0 "register_operand" "=r")] UNSPEC_GOTO)]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
 ;;; This jump is used in branch tables where the insn length is fixed.
 ;;; The length of this insn is adjusted if the delay slot is not filled.
 (define_insn "short_jump"
@@ -8248,7 +8325,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
   (clobber (reg:SI 2))]
   "!TARGET_64BIT"
-  "ldsid (%%sr0,%0),%%r2\; mtsp %%r2,%%sr0\; be%* 0(%%sr0,%0)"
+  "ldsid (%%sr0,%0),%%r2\;mtsp %%r2,%%sr0\;be%* 0(%%sr0,%0)"
    [(set_attr "type" "branch")
     (set_attr "length" "12")])
 
@@ -8261,7 +8338,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
     (set_attr "length" "4")])
 
 (define_expand "builtin_longjmp"
-  [(unspec_volatile [(match_operand 0 "register_operand" "r")] 3)]
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPECV_LONGJMP)]
   ""
   "
 {
@@ -8273,8 +8350,26 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 			   (POINTER_SIZE * 2) / BITS_PER_UNIT));
   rtx pv = gen_rtx_REG (Pmode, 1);
 
+  emit_insn (gen_rtx_CLOBBER (VOIDmode,
+			      gen_rtx_MEM (BLKmode,
+					   gen_rtx_SCRATCH (VOIDmode))));
+  emit_insn (gen_rtx_CLOBBER (VOIDmode,
+			      gen_rtx_MEM (BLKmode,
+					   hard_frame_pointer_rtx)));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  We need
+     to adjust for the offset between these two values when we have
+     a nonlocal_goto pattern.  When we don't have a nonlocal_goto
+     pattern, the receiver performs the adjustment.  */
+#ifdef HAVE_nonlocal_goto
+  if (HAVE_nonlocal_goto)
+    emit_move_insn (virtual_stack_vars_rtx, force_reg (Pmode, fp));
+  else
+#endif
+    emit_move_insn (hard_frame_pointer_rtx, fp);
+
   /* This bit is the same as expand_builtin_longjmp.  */
-  emit_move_insn (hard_frame_pointer_rtx, fp);
   emit_stack_restore (SAVE_NONLOCAL, stack, NULL_RTX);
   emit_insn (gen_rtx_USE (VOIDmode, hard_frame_pointer_rtx));
   emit_insn (gen_rtx_USE (VOIDmode, stack_pointer_rtx));
@@ -8959,7 +9054,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 ;; Operands 3 and 4 (icacheflush) are clobbered scratch registers.
 (define_insn "dcacheflush"
   [(const_int 1)
-   (unspec_volatile [(mem:BLK (scratch))] 0)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE)
    (use (match_operand 0 "pmode_register_operand" "r"))
    (use (match_operand 1 "pmode_register_operand" "r"))
    (use (match_operand 2 "pmode_register_operand" "r"))
@@ -8977,7 +9072,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 
 (define_insn "icacheflush"
   [(const_int 2)
-   (unspec_volatile [(mem:BLK (scratch))] 0)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
    (use (match_operand 0 "pmode_register_operand" "r"))
    (use (match_operand 1 "pmode_register_operand" "r"))
    (use (match_operand 2 "pmode_register_operand" "r"))
@@ -8997,7 +9092,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 
 ;; An out-of-line prologue.
 (define_insn "outline_prologue_call"
-  [(unspec_volatile [(const_int 0)] 0)
+  [(unspec_volatile [(const_int 0)] UNSPECV_OPC)
    (clobber (reg:SI 31))
    (clobber (reg:SI 22))
    (clobber (reg:SI 21))
@@ -9047,7 +9142,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 
 ;; An out-of-line epilogue.
 (define_insn "outline_epilogue_call"
-  [(unspec_volatile [(const_int 1)] 0)
+  [(unspec_volatile [(const_int 1)] UNSPECV_OEC)
    (use (reg:SI 29))
    (use (reg:SI 28))
    (clobber (reg:SI 31))
@@ -9106,7 +9201,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 ;; reliably compared to another function pointer.  */
 (define_expand "canonicalize_funcptr_for_compare"
   [(set (reg:SI 26) (match_operand:SI 1 "register_operand" ""))
-   (parallel [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] 0))
+   (parallel [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
 	      (clobber (match_dup 2))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 22))
@@ -9136,8 +9231,8 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
     }
 }")
 
-(define_insn ""
-  [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] 0))
+(define_insn "*$$sh_func_adrs"
+  [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
    (clobber (match_operand:SI 0 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 22))



More information about the Gcc-patches mailing list