This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Blackfin: try to schedule loads so as to avoid nops


The Blackfin has a slightly broken speculative loads hardware feature
which requires the compiler to insert nops after jumps if there's a load
too quickly afterwards.  This patch tries to mitigate the problem by
exposing it to the final scheduling pass.  Special dummy stall insns are
added which only have the purpose of telling the scheduler by how many
cycles to avoid a load if possible.  This reduces the need to pad with
nops later on.


Bernd
-- 
This footer brought to you by insane German lawmakers.
Analog Devices GmbH      Wilhelm-Wagenfeld-Str. 6      80807 Muenchen
Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368
Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 151484)
+++ ChangeLog	(working copy)
@@ -1,3 +1,25 @@
+2009-09-07  Bernd Schmidt  <bernd.schmidt@analog.com>
+
+	* config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant.
+	(attr "addrtype"): New member "spreg".
+	Use it if mem_spfp_address_operand is true for the address.
+	(attr "type"): New entry "stall".
+	(cpu_unit "load"): New.
+	(insn_reservations "load32", "loadp", "loadi"): Add reservation of
+	"load".
+	(insn_reservation "loadsp"): New.
+	(insn_reservation "load_stall1"): New.
+	(insn_reservation "load_stall3"): New.
+	(stall): New insn.	
+	* config/bfin/predicates.md (const1_operand, const3_operand): New.
+	(mem_p_address_operand): Exclude stack and frame pointer based
+	addresses.
+	(mem_spfp_address_operand): New; match them here.
+	* config/bfin/bfin.c (add_sched_insns_for_speculation): New function.
+	(bfin_reorg): Call it if scheduling insns.
+	(bfin_gen_bundles): Remove dummy insns created by
+	add_sched_insns_for_speculation.
+
 2009-09-07  Martin Jambor  <mjambor@suse.cz>
 
 	PR middle-end/41282
Index: config/bfin/predicates.md
===================================================================
--- config/bfin/predicates.md	(revision 151327)
+++ config/bfin/predicates.md	(working copy)
@@ -59,6 +59,14 @@ (define_predicate "const01_operand"
   (and (match_code "const_int")
        (match_test "op == const0_rtx || op == const1_rtx")))
 
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+(define_predicate "const3_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 3")))
+
 (define_predicate "vec_shift_operand"
   (ior (and (match_code "const_int")
 	    (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15"))
@@ -180,10 +188,14 @@ (define_predicate "bfin_bimode_compariso
 (define_predicate "bfin_direct_comparison_operator"
   (match_code "eq,lt,le,leu,ltu"))
 
-;; The following two are used to compute the addrtype attribute.  They return
+;; The following three are used to compute the addrtype attribute.  They return
 ;; true if passed a memory address usable for a 16-bit load or store using a
 ;; P or I register, respectively.  If neither matches, we know we have a
 ;; 32-bit instruction.
+;; We subdivide the P case into normal P registers, and SP/FP.  We can assume
+;; that speculative loads through SP and FP are no problem, so this has
+;; an effect on the anomaly workaround code.
+
 (define_predicate "mem_p_address_operand"
   (match_code "mem")
 {
@@ -193,7 +205,19 @@ (define_predicate "mem_p_address_operand
   if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
     op = XEXP (op, 0);
   gcc_assert (REG_P (op));
-  return PREG_P (op);
+  return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx;
+})
+
+(define_predicate "mem_spfp_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return op == stack_pointer_rtx || op == frame_pointer_rtx;
 })
 
 (define_predicate "mem_i_address_operand"
Index: config/bfin/bfin.c
===================================================================
--- config/bfin/bfin.c	(revision 151479)
+++ config/bfin/bfin.c	(working copy)
@@ -4784,15 +4784,27 @@ bfin_gen_bundles (void)
       for (insn = BB_HEAD (bb);; insn = next)
 	{
 	  int at_end;
+	  rtx delete_this = NULL_RTX;
+
 	  if (INSN_P (insn))
 	    {
-	      if (get_attr_type (insn) == TYPE_DSP32)
-		slot[0] = insn;
-	      else if (slot[1] == NULL_RTX)
-		slot[1] = insn;
+	      enum attr_type type = get_attr_type (insn);
+
+	      if (type == TYPE_STALL)
+		{
+		  gcc_assert (n_filled == 0);
+		  delete_this = insn;
+		}
 	      else
-		slot[2] = insn;
-	      n_filled++;
+		{
+		  if (type == TYPE_DSP32)
+		    slot[0] = insn;
+		  else if (slot[1] == NULL_RTX)
+		    slot[1] = insn;
+		  else
+		    slot[2] = insn;
+		  n_filled++;
+		}
 	    }
 
 	  next = NEXT_INSN (insn);
@@ -4807,7 +4819,7 @@ bfin_gen_bundles (void)
 
 	  /* BB_END can change due to emitting extra NOPs, so check here.  */
 	  at_end = insn == BB_END (bb);
-	  if (at_end || GET_MODE (next) == TImode)
+	  if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
 	    {
 	      if ((n_filled < 2
 		   || !gen_one_bundle (slot))
@@ -4826,6 +4838,8 @@ bfin_gen_bundles (void)
 	      n_filled = 0;
 	      slot[0] = slot[1] = slot[2] = NULL_RTX;
 	    }
+	  if (delete_this != NULL_RTX)
+	    delete_insn (delete_this);
 	  if (at_end)
 	    break;
 	}
@@ -5226,6 +5240,65 @@ workaround_speculation (void)
     }
 }
 
+/* Called just before the final scheduling pass.  If we need to insert NOPs
+   later on to work around speculative loads, insert special placeholder
+   insns that cause loads to be delayed for as many cycles as necessary
+   (and possible).  This reduces the number of NOPs we need to add.
+   The dummy insns we generate are later removed by bfin_gen_bundles.  */
+static void
+add_sched_insns_for_speculation (void)
+{
+  rtx insn;
+
+  if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+      && ! ENABLE_WA_INDIRECT_CALLS)
+    return;
+
+  /* First pass: find predicted-false branches; if something after them
+     needs nops, insert them or change the branch to predict true.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+
+      if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+	continue;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+	  || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+	continue;
+
+      if (JUMP_P (insn))
+	{
+	  if (any_condjump_p (insn)
+	      && !cbranch_predicted_taken_p (insn))
+	    {
+	      rtx n = next_real_insn (insn);
+	      emit_insn_before (gen_stall (GEN_INT (3)), n);
+	    }
+	}
+    }
+
+  /* Second pass: for predicted-true branches, see if anything at the
+     branch destination needs extra nops.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn)
+	  && any_condjump_p (insn)
+	  && (cbranch_predicted_taken_p (insn)))
+	{
+	  rtx target = JUMP_LABEL (insn);
+	  rtx next = next_real_insn (target);
+
+	  if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+	      && get_attr_type (next) == TYPE_STALL)
+	    continue;
+	  emit_insn_before (gen_stall (GEN_INT (1)), next);	  
+	}
+    }
+}
+
 /* We use the machine specific reorg pass for emitting CSYNC instructions
    after conditional branches as needed.
 
@@ -5259,6 +5332,8 @@ bfin_reorg (void)
       split_all_insns ();
       splitting_for_sched = 0;
 
+      add_sched_insns_for_speculation ();
+
       timevar_push (TV_SCHED2);
       schedule_insns ();
       timevar_pop (TV_SCHED2);
Index: config/bfin/bfin.md
===================================================================
--- config/bfin/bfin.md	(revision 151381)
+++ config/bfin/bfin.md	(working copy)
@@ -146,7 +146,8 @@ (define_constants
    (UNSPEC_VOLATILE_SSYNC 2)
    (UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
    (UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
-   (UNSPEC_VOLATILE_DUMMY 5)])
+   (UNSPEC_VOLATILE_DUMMY 5)
+   (UNSPEC_VOLATILE_STALL 6)])
 
 (define_constants
   [(MACFLAG_NONE 0)
@@ -163,16 +164,20 @@ (define_constants
    (MACFLAG_IH 11)])
 
 (define_attr "type"
-  "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy"
+  "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall"
   (const_string "misc"))
 
-(define_attr "addrtype" "32bit,preg,ireg"
+(define_attr "addrtype" "32bit,preg,spreg,ireg"
   (cond [(and (eq_attr "type" "mcld")
 	      (and (match_operand 0 "d_register_operand" "")
 		   (match_operand 1 "mem_p_address_operand" "")))
 	   (const_string "preg")
 	 (and (eq_attr "type" "mcld")
 	      (and (match_operand 0 "d_register_operand" "")
+		   (match_operand 1 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "d_register_operand" "")
 		   (match_operand 1 "mem_i_address_operand" "")))
 	   (const_string "ireg")
 	 (and (eq_attr "type" "mcst")
@@ -181,6 +186,10 @@ (define_attr "addrtype" "32bit,preg,ireg
 	   (const_string "preg")
 	 (and (eq_attr "type" "mcst")
 	      (and (match_operand 1 "d_register_operand" "")
+		   (match_operand 0 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "d_register_operand" "")
 		   (match_operand 0 "mem_i_address_operand" "")))
 	   (const_string "ireg")]
 	(const_string "32bit")))
@@ -199,6 +208,10 @@ (define_cpu_unit "slot2" "bfin")
 (define_cpu_unit "store" "bfin")
 (define_cpu_unit "pregs" "bfin")
 
+;; A dummy unit used to delay scheduling of loads after a conditional
+;; branch.
+(define_cpu_unit "load" "bfin")
+
 (define_reservation "core" "slot0+slot1+slot2")
 
 (define_insn_reservation "alu" 1
@@ -216,17 +229,22 @@ (define_insn_reservation "dsp32" 1
 (define_insn_reservation "load32" 1
   (and (not (eq_attr "seq_insns" "multi"))
        (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit")))
-  "core")
+  "core+load")
 
 (define_insn_reservation "loadp" 1
   (and (not (eq_attr "seq_insns" "multi"))
        (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg")))
+  "(slot1|slot2)+pregs+load")
+
+(define_insn_reservation "loadsp" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg")))
   "(slot1|slot2)+pregs")
 
 (define_insn_reservation "loadi" 1
   (and (not (eq_attr "seq_insns" "multi"))
        (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg")))
-  "(slot1|slot2)")
+  "(slot1|slot2)+load")
 
 (define_insn_reservation "store32" 1
   (and (not (eq_attr "seq_insns" "multi"))
@@ -235,7 +253,8 @@ (define_insn_reservation "store32" 1
 
 (define_insn_reservation "storep" 1
   (and (not (eq_attr "seq_insns" "multi"))
-       (and (eq_attr "type" "mcst") (eq_attr "addrtype" "preg")))
+       (and (eq_attr "type" "mcst")
+	    (ior (eq_attr "addrtype" "preg") (eq_attr "addrtype" "spreg"))))
   "(slot1|slot2)+pregs+store")
 
 (define_insn_reservation "storei" 1
@@ -247,6 +266,16 @@ (define_insn_reservation "multi" 2
   (eq_attr "seq_insns" "multi")
   "core")
 
+(define_insn_reservation "load_stall1" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const1_operand" ""))
+  "core+load*2")
+
+(define_insn_reservation "load_stall3" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const3_operand" ""))
+  "core+load*4")
+
 (absence_set "slot0" "slot1,slot2")
 (absence_set "slot1" "slot2")
 
@@ -2667,6 +2696,9 @@ (define_insn "return_internal"
   gcc_unreachable ();
 })
 
+;; When used at a location where CC contains 1, causes a speculative load
+;; that is later cancelled.  This is used for certain workarounds in
+;; interrupt handler prologues.
 (define_insn "dummy_load"
   [(unspec_volatile [(match_operand 0 "register_operand" "a")
 		     (match_operand 1 "register_operand" "C")]
@@ -2677,6 +2709,17 @@ (define_insn "dummy_load"
   (set_attr "length" "4")
   (set_attr "seq_insns" "multi")])
 
+;; A placeholder insn inserted before the final scheduling pass.  It is used
+;; to improve scheduling of loads when workarounds for speculative loads are
+;; needed, by not placing them in the first few cycles after a conditional
+;; branch.
+(define_insn "stall"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")]
+		    UNSPEC_VOLATILE_STALL)]
+  ""
+  ""
+  [(set_attr "type" "stall")])
+
 (define_insn "csync"
   [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)]
   ""

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]