This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Blackfin: Improve hwloop initialization


The ideal way to initialize a hardware loop is with
  LSETUP (L1, L2) LCx = Py;

However, given the way we describe these loops before reorg, this requires that there's a free P register for the duration of the loop which can be used as a counter. In loops with high register pressure, the register allocator sometimes gives us a different register.

If we have an R register, the hardware loop code currently uses
  LCx = Ry;
  LSETUP (L1, L2) LCx;

In all other cases, we fail to generate a hardware loop, and generate a pretty inefficient sequence that looks like this (if M3 was the iteration register).

       P2 = M3;
       P2 += -1;
       M3 = P2;
       cc =P2==0;
       if !cc jump .L62 (bp);

This is stupid; we could just look for a free P register or (worst case) push and pop one on the stack during the LSETUP. To be able to do this, we also need to defer splitting the loop_end instructions until after the loop reorg code has done its thing.

So far so good, the remaining complication is anomaly 312. On some
chips we must avoid instructions of the form "LCx = something". So in those cases we also try to find a P reg to use as scratch.


Committed as 146971.


Bernd -- This footer brought to you by insane German lawmakers. Analog Devices GmbH Wilhelm-Wagenfeld-Str. 6 80807 Muenchen Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368 Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 146970)
+++ ChangeLog	(working copy)
@@ -1,3 +1,22 @@
+2009-04-29  Bernd Schmidt  <bernd.schmidt@analog.com>
+
+	* config/bfin/bfin.h (splitting_loops): Declare.
+	* config/bfin/bfin-protos.h (WA_05000257, WA_05000283, WA_05000315):
+	Reorder bit definitions to be ascending.
+	(WA_LOAD_LCREGS, ENABLE_WA_LOAD_LCREGS): New macros.
+	* config/bfin/bfin.c (splitting_loops): New variable.
+	(bfin_cpus): Add WA_LOAD_LCREGS as needed.
+	(struct loop_info): Remove members INIT and LOOP_INIT.
+	(bfin_optimize_loop): Don't set them.  Reorder the code that generates
+	the LSETUP sequence.  Allow LC to be loaded from any register, but also
+	add a case to push/pop a PREG scratch if ENABLE_WA_LOAD_LCREGS.
+	(bfin_reorg_loops): When done, split all BB_ENDs with splitting_loops
+	set to 1.
+	* config/bfin/bfin.md (loop_end splitter): Use splitting_loops instead
+	of reload_completed.
+	From Jie Zhang:
+	* config/bfin/bfin.md (movsi_insn): Refine constraints.
+
 2009-04-29  Rafael Avila de Espindola  <espindola@google.com>
 
 	* Makefile.in (PLUGIN_VERSION_H): New.
Index: config/bfin/bfin-protos.h
===================================================================
--- config/bfin/bfin-protos.h	(revision 146889)
+++ config/bfin/bfin-protos.h	(working copy)
@@ -80,18 +80,23 @@ extern unsigned int bfin_workarounds;
 #define ENABLE_WA_INDIRECT_CALLS \
   ((bfin_workarounds & WA_INDIRECT_CALLS) && !TARGET_ICPLB)
 
-#define WA_05000257 0x00000040
+#define WA_05000257 0x00000010
 #define ENABLE_WA_05000257 \
   (bfin_workarounds & WA_05000257)
 
-#define WA_05000283 0x00000010
+#define WA_05000283 0x00000020
 #define ENABLE_WA_05000283 \
   (bfin_workarounds & WA_05000283)
 
-#define WA_05000315 0x00000020
+#define WA_05000315 0x00000040
 #define ENABLE_WA_05000315 \
   (bfin_workarounds & WA_05000315)
 
+/* For the anomaly 05-00-0312 */
+#define WA_LOAD_LCREGS 0x00000080
+#define ENABLE_WA_LOAD_LCREGS \
+  (bfin_workarounds & WA_LOAD_LCREGS)
+
 #define Mmode enum machine_mode
 
 extern rtx function_arg (CUMULATIVE_ARGS *, Mmode, tree, int);
Index: config/bfin/bfin.c
===================================================================
--- config/bfin/bfin.c	(revision 146956)
+++ config/bfin/bfin.c	(working copy)
@@ -173,134 +173,135 @@ struct bfin_cpu bfin_cpus[] =
    WA_SPECULATIVE_LOADS | WA_RETS},
 
   {"bf531", BFIN_CPU_BF531, 0x0006,
-   WA_SPECULATIVE_LOADS},
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
   {"bf531", BFIN_CPU_BF531, 0x0005,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf531", BFIN_CPU_BF531, 0x0004,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf531", BFIN_CPU_BF531, 0x0003,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf532", BFIN_CPU_BF532, 0x0006,
-   WA_SPECULATIVE_LOADS},
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
   {"bf532", BFIN_CPU_BF532, 0x0005,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf532", BFIN_CPU_BF532, 0x0004,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf532", BFIN_CPU_BF532, 0x0003,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf533", BFIN_CPU_BF533, 0x0006,
-   WA_SPECULATIVE_LOADS},
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
   {"bf533", BFIN_CPU_BF533, 0x0005,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf533", BFIN_CPU_BF533, 0x0004,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf533", BFIN_CPU_BF533, 0x0003,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf534", BFIN_CPU_BF534, 0x0003,
-   WA_SPECULATIVE_LOADS | WA_RETS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
   {"bf534", BFIN_CPU_BF534, 0x0002,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf534", BFIN_CPU_BF534, 0x0001,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf536", BFIN_CPU_BF536, 0x0003,
-   WA_SPECULATIVE_LOADS | WA_RETS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
   {"bf536", BFIN_CPU_BF536, 0x0002,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf536", BFIN_CPU_BF536, 0x0001,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf537", BFIN_CPU_BF537, 0x0003,
-   WA_SPECULATIVE_LOADS | WA_RETS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
   {"bf537", BFIN_CPU_BF537, 0x0002,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf537", BFIN_CPU_BF537, 0x0001,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf538", BFIN_CPU_BF538, 0x0005,
-   WA_SPECULATIVE_LOADS},
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
   {"bf538", BFIN_CPU_BF538, 0x0004,
-   WA_SPECULATIVE_LOADS | WA_RETS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
   {"bf538", BFIN_CPU_BF538, 0x0003,
    WA_SPECULATIVE_LOADS | WA_RETS
-   | WA_05000283 | WA_05000315},
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf538", BFIN_CPU_BF538, 0x0002,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000257 | WA_05000315},
+   WA_SPECULATIVE_LOADS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf539", BFIN_CPU_BF539, 0x0005,
-   WA_SPECULATIVE_LOADS},
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
   {"bf539", BFIN_CPU_BF539, 0x0004,
-   WA_SPECULATIVE_LOADS | WA_RETS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
   {"bf539", BFIN_CPU_BF539, 0x0003,
    WA_SPECULATIVE_LOADS | WA_RETS
-   | WA_05000283 | WA_05000315},
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf539", BFIN_CPU_BF539, 0x0002,
    WA_SPECULATIVE_LOADS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {"bf542", BFIN_CPU_BF542, 0x0002,
    WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
   {"bf542", BFIN_CPU_BF542, 0x0001,
    WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
   {"bf542", BFIN_CPU_BF542, 0x0000,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
 
   {"bf544", BFIN_CPU_BF544, 0x0002,
    WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
   {"bf544", BFIN_CPU_BF544, 0x0001,
    WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
   {"bf544", BFIN_CPU_BF544, 0x0000,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
 
   {"bf547", BFIN_CPU_BF547, 0x0002,
    WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
   {"bf547", BFIN_CPU_BF547, 0x0001,
    WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
   {"bf547", BFIN_CPU_BF547, 0x0000,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
 
   {"bf548", BFIN_CPU_BF548, 0x0002,
    WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
   {"bf548", BFIN_CPU_BF548, 0x0001,
    WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
   {"bf548", BFIN_CPU_BF548, 0x0000,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
 
   {"bf549", BFIN_CPU_BF549, 0x0002,
    WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
   {"bf549", BFIN_CPU_BF549, 0x0001,
    WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
   {"bf549", BFIN_CPU_BF549, 0x0000,
-   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
 
   {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS
-   | WA_05000283 | WA_05000315},
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf561", BFIN_CPU_BF561, 0x0003,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
   {"bf561", BFIN_CPU_BF561, 0x0002,
    WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
-   | WA_05000283 | WA_05000257 | WA_05000315},
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
 
   {NULL, 0, 0, 0}
 };
 
-int splitting_for_sched;
+int splitting_for_sched, splitting_loops;
 
 static void
 bfin_globalize_label (FILE *stream, const char *name)
@@ -3640,12 +3641,6 @@ struct GTY (()) loop_info
   /* The iteration register.  */
   rtx iter_reg;
 
-  /* The new initialization insn.  */
-  rtx init;
-
-  /* The new initialization instruction.  */
-  rtx loop_init;
-
   /* The new label placed at the beginning of the loop. */
   rtx start_label;
 
@@ -3786,10 +3781,10 @@ bfin_optimize_loop (loop_info loop)
 {
   basic_block bb;
   loop_info inner;
-  rtx insn, init_insn, last_insn;
+  rtx insn, last_insn;
   rtx loop_init, start_label, end_label;
   rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
-  rtx iter_reg;
+  rtx iter_reg, scratchreg;
   rtx lc_reg, lt_reg, lb_reg;
   rtx seq, seq_end;
   int length;
@@ -3835,13 +3830,27 @@ bfin_optimize_loop (loop_info loop)
   /* Get the loop iteration register.  */
   iter_reg = loop->iter_reg;
 
-  if (!DPREG_P (iter_reg))
+  if (!REG_P (iter_reg))
     {
       if (dump_file)
-	fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n",
+	fprintf (dump_file, ";; loop %d iteration count not in a register\n",
 		 loop->loop_no);
       goto bad_loop;
     }
+  scratchreg = NULL_RTX;
+  if (!PREG_P (iter_reg) && loop->incoming_src)
+    {
+      int i;
+      for (i = REG_P0; i <= REG_P5; i++)
+	if ((df_regs_ever_live_p (i)
+	     || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
+		 && call_used_regs[i]))
+	    && !REGNO_REG_SET_P (df_get_live_out (loop->incoming_src), i))
+	  {
+	    scratchreg = gen_rtx_REG (SImode, i);
+	    break;
+	  }
+    }
 
   if (loop->incoming_src)
     {
@@ -3860,7 +3869,7 @@ bfin_optimize_loop (loop_info loop)
 
       for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
 	length += length_for_loop (insn);
-      
+
       if (!insn)
 	{
 	  if (dump_file)
@@ -3869,6 +3878,11 @@ bfin_optimize_loop (loop_info loop)
 	  goto bad_loop;
 	}
 
+      /* Account for the pop of a scratch register where necessary.  */
+      if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
+	  && ENABLE_WA_LOAD_LCREGS)
+	length += 2;
+
       if (length > MAX_LSETUP_DISTANCE)
 	{
 	  if (dump_file)
@@ -4049,46 +4063,67 @@ bfin_optimize_loop (loop_info loop)
       loop->clobber_loop0 = 1;
     }
 
-  /* If iter_reg is a DREG, we need generate an instruction to load
-     the loop count into LC register. */
-  if (D_REGNO_P (REGNO (iter_reg)))
+  loop->end_label = end_label;
+
+  /* Create a sequence containing the loop setup.  */
+  start_sequence ();
+
+  /* LSETUP only accepts P registers.  If we have one, we can use it,
+     otherwise there are several ways of working around the problem.
+     If we're not affected by anomaly 312, we can load the LC register
+     from any iteration register, and use LSETUP without initialization.
+     If we've found a P scratch register that's not live here, we can
+     instead copy the iter_reg into that and use an initializing LSETUP.
+     If all else fails, push and pop P0 and use it as a scratch.  */
+  if (P_REGNO_P (REGNO (iter_reg)))
     {
-      init_insn = gen_movsi (lc_reg, iter_reg);
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, iter_reg);
+      seq_end = emit_insn (loop_init);
+    }
+  else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
+    {
+      emit_insn (gen_movsi (lc_reg, iter_reg));
       loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
 					       lb_reg, end_label,
 					       lc_reg);
+      seq_end = emit_insn (loop_init);
     }
-  else if (P_REGNO_P (REGNO (iter_reg)))
+  else if (scratchreg != NULL_RTX)
     {
-      init_insn = NULL_RTX;
+      emit_insn (gen_movsi (scratchreg, iter_reg));
       loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
 					    lb_reg, end_label,
-					    lc_reg, iter_reg);
+					    lc_reg, scratchreg);
+      seq_end = emit_insn (loop_init);
     }
   else
-    gcc_unreachable ();
-
-  loop->init = init_insn;
-  loop->end_label = end_label;
-  loop->loop_init = loop_init;
+    {
+      rtx p0reg = gen_rtx_REG (SImode, REG_P0);
+      rtx push = gen_frame_mem (SImode,
+				gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+      rtx pop = gen_frame_mem (SImode,
+			       gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+      emit_insn (gen_movsi (push, p0reg));
+      emit_insn (gen_movsi (p0reg, iter_reg));
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, p0reg);
+      emit_insn (loop_init);
+      seq_end = emit_insn (gen_movsi (p0reg, pop));
+    }
 
   if (dump_file)
     {
       fprintf (dump_file, ";; replacing loop %d initializer with\n",
 	       loop->loop_no);
-      print_rtl_single (dump_file, loop->loop_init);
+      print_rtl_single (dump_file, loop_init);
       fprintf (dump_file, ";; replacing loop %d terminator with\n",
 	       loop->loop_no);
       print_rtl_single (dump_file, loop->loop_end);
     }
 
-  /* Create a sequence containing the loop setup.  */
-  start_sequence ();
-
-  if (loop->init != NULL_RTX)
-    emit_insn (loop->init);
-  seq_end = emit_insn (loop->loop_init);
-
   /* If the loop isn't entered at the top, also create a jump to the entry
      point.  */
   if (!loop->incoming_src && loop->head != loop->incoming_dest)
@@ -4106,7 +4141,7 @@ bfin_optimize_loop (loop_info loop)
 	  seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
 	}
       else
-	seq_end = emit_insn (gen_jump (label));
+	seq_end = emit_jump_insn (gen_jump (label));
     }
 
   seq = get_insns ();
@@ -4216,7 +4251,6 @@ bfin_discover_loop (loop_info loop, basi
   loop->outer = NULL;
   loop->loops = NULL;
   loop->incoming = VEC_alloc (edge, gc, 2);
-  loop->init = loop->loop_init = NULL_RTX;
   loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
   loop->end_label = NULL_RTX;
   loop->bad = 0;
@@ -4590,7 +4624,7 @@ bfin_reorg_loops (FILE *dump_file)
       fprintf (dump_file, ";; All loops found:\n\n");
       bfin_dump_loops (loops);
     }
-  
+
   /* Now apply the optimizations.  */
   for (loop = loops; loop; loop = loop->next)
     bfin_optimize_loop (loop);
@@ -4608,6 +4642,17 @@ bfin_reorg_loops (FILE *dump_file)
 
   FOR_EACH_BB (bb)
     bb->aux = NULL;
+
+  splitting_loops = 1;
+  FOR_EACH_BB (bb)
+    {
+      rtx insn = BB_END (bb);
+      if (!JUMP_P (insn))
+	continue;
+
+      try_split (PATTERN (insn), insn, 1);
+    }
+  splitting_loops = 0;
 }
 
 /* Possibly generate a SEQUENCE out of three insns found in SLOT.
Index: config/bfin/bfin.h
===================================================================
--- config/bfin/bfin.h	(revision 146889)
+++ config/bfin/bfin.h	(working copy)
@@ -1322,7 +1322,7 @@ extern struct rtx_def *bfin_cc_rtx, *bfi
 
 #define SIZE_ASM_OP     "\t.size\t"
 
-extern int splitting_for_sched;
+extern int splitting_for_sched, splitting_loops;
 
 #define PRINT_OPERAND_PUNCT_VALID_P(CHAR) ((CHAR) == '!')
 
Index: config/bfin/bfin.md
===================================================================
--- config/bfin/bfin.md	(revision 146929)
+++ config/bfin/bfin.md	(working copy)
@@ -532,20 +532,22 @@ (define_insn "*popsi_insn"
 ;; with a PLUS.  We generally require fewer secondary reloads this way.
 
 (define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x*y,da,x,x,x,da,mr")
-	(match_operand:SI 1 "general_operand" "da,x*y,xKs7,xKsh,xKuh,ix,mr,da"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x,da,y,da,x,x,x,da,mr")
+	(match_operand:SI 1 "general_operand" "da,x,y,da,xKs7,xKsh,xKuh,ix,mr,da"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
  "@
    %0 = %1;
    %0 = %1;
+   %0 = %1;
+   %0 = %1;
    %0 = %1 (X);
    %0 = %1 (X);
    %0 = %1 (Z);
    #
    %0 = %1%!
    %0 = %1%!"
-  [(set_attr "type" "move,move,mvi,mvi,mvi,*,mcld,mcst")
-   (set_attr "length" "2,2,2,4,4,*,*,*")])
+  [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst")
+   (set_attr "length" "2,2,2,2,2,4,4,*,*,*")])
 
 (define_insn "*movsi_insn32"
   [(set (match_operand:SI 0 "register_operand" "=d,d")
@@ -1908,7 +1910,7 @@ (define_split
 	      (const_int -1)))
    (unspec [(const_int 0)] UNSPEC_LSETUP_END)
    (clobber (match_scratch:SI 2 "=&r"))]
-  "reload_completed"
+  "splitting_loops"
   [(set (match_dup 2) (match_dup 0))
    (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
    (set (match_dup 0) (match_dup 2))

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]