This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add hardware loop support to bfin port


This patch adds hardware loop support to Blackfin port GCC. From
beginning It was based on Nathan Sidwell's work on mt port. But the
changes are large when done. Bernd gave me numerous reviews and
comments when I was developing this patch on the toolchain mailing
list on blackfin.uclinux.org.

This patch has been tested on bfin-elf and bfin-uclinux.

Is it OK?

Jie
	* config/bfin/bfin-protos.h (bfin_hardware_loop): Declare.
	* config/bfin/bfin.c (basic-block.h): Include.
	(struct machine_function): New.
	(bfin_init_machine_status): New.
	(override_options): Initialize init_machine_status.
	(bfin_hardware_loop): New.
	(MAX_LOOP_DEPTH, MAX_LOOP_LENGTH): Define.
	(DEF_VEC_P (loop_info)): New.
	(DEF_VEC_ALLOC_P (loop_info,heap)): New.
	(struct loop_info): New.
	(loop_info): New typedef.
	(struct loop_work): New.
	(loop_work): New typedef.
	(DEF_VEC_O (loop_work)): New.
	(DEF_VEC_ALLOC_O (loop_work,heap)): New.
	(bfin_dump_loops): New.
	(bfin_bb_in_loop): New.
	(bfin_scan_loop): New.
	(bfin_optimize_loop): New.
	(bfin_reorg_loops): New.
	(bfin_reorg): Use bfin_reorg_loops.
	* config/bfin/bfin.h (FIRST_PSEUDO_REGISTER): Adjust for adding
	loop registers.
	(I_REGNO_P): Simplify.
	(DP_REGNO_P, DPREG_P): New macros.
	(REGISTER_NAMES, FIXED_REGISTERS, CALL_USED_REGISTERS,
	REG_ALLOC_ORDER): Add LT0, LT1, LC0, LC1, LB0, LB1.
	(enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS):
	Add LT_REGS, LC_REGS, LB_REGS.
	(REG_CLASS_FROM_LETTER): Add 't' for LT_REGS, 'k' for LC_REGS,
	'l' for LB_REGS.
	(REGNO_REG_CLASS): Deal with loop registers.
	* config/bfin/bfin.md: Add comment for 't', 'k', 'l' constraint
	letters.
	(REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1):
	New constants for loop registers.
	(UNSPEC_LSETUP_END): New.
	(seq_insns): New define_attr. Set it for appropriate insns.
	(movsi_insn): Add alternatives for move from/to
	loop count registers.
	(doloop_end): New define_expand.
	(loop_end): New define_insn.
	(define_split for bad doloop_end): New.
	(lsetup_with_autoinit): New define_insn.
	(lsetup_without_autoinit): New define_insn.
	(rep_movsi, rep_movhi): Clobber LT1, LC1, LB1.
	* config/bfin/predicates.md (lc_register_operand): New.
	(lt_register_operand): New.
	(lb_register_operand): New.
	(nondp_register_operand): New.
	(nondp_reg_or_memory_operand): New.
	* doc/md.texi: Document Blackfin new 't', 'k', 'l' constraint letters.


Index: config/bfin/predicates.md
===================================================================
--- config/bfin/predicates.md	(revision 114203)
+++ config/bfin/predicates.md	(working copy)
@@ -76,12 +76,44 @@
   return 1;
 })
 
+;; Return nonzero if OP is a LC register.
+(define_predicate "lc_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1")))
+
+;; Return nonzero if OP is a LT register.
+(define_predicate "lt_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1")))
+
+;; Return nonzero if OP is a LB register.
+(define_predicate "lb_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1")))
+
 ;; Return nonzero if OP is a register or a 7 bit signed constant.
 (define_predicate "reg_or_7bit_operand"
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
 	    (match_test "CONST_7BIT_IMM_P (INTVAL (op))"))))
 
+;; Return nonzero if OP is a register other than DREG and PREG.
+(define_predicate "nondp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno));
+})
+
+;; Return nonzero if OP is a register other than DREG and PREG, or MEM.
+(define_predicate "nondp_reg_or_memory_operand"
+  (ior (match_operand 0 "nondp_register_operand")
+       (match_operand 0 "memory_operand")))
+
 ;; Used for secondary reloads, this function returns 1 if OP is of the
 ;; form (plus (fp) (const_int)).
 (define_predicate "fp_plus_const_operand"
Index: config/bfin/bfin-protos.h
===================================================================
--- config/bfin/bfin-protos.h	(revision 114203)
+++ config/bfin/bfin-protos.h	(working copy)
@@ -83,6 +83,7 @@
 extern void output_pop_multiple (rtx, rtx *);
 extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int);
 extern rtx bfin_return_addr_rtx (int);
+extern void bfin_hardware_loop (void);
 #undef  Mmode 
 
 #endif
Index: config/bfin/bfin.c
===================================================================
--- config/bfin/bfin.c	(revision 114203)
+++ config/bfin/bfin.c	(working copy)
@@ -51,7 +51,15 @@
 #include "bfin-protos.h"
 #include "tm-preds.h"
 #include "gt-bfin.h"
+#include "basic-block.h"
 
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct machine_function GTY(())
+{
+  int has_hardware_loops;
+};
+
 /* Test and compare insns in bfin.md store the information needed to
    generate branch and scc insns here.  */
 rtx bfin_compare_op0, bfin_compare_op1;
@@ -1938,6 +1946,16 @@
     }
 }
 
+static struct machine_function *
+bfin_init_machine_status (void)
+{
+  struct machine_function *f;
+
+  f = ggc_alloc_cleared (sizeof (struct machine_function));
+
+  return f;
+}
+
 /* Implement the macro OVERRIDE_OPTIONS.  */
 
 void
@@ -1968,6 +1986,8 @@
     flag_pic = 0;
 
   flag_schedule_insns = 0;
+
+  init_machine_status = bfin_init_machine_status;
 }
 
 /* Return the destination address of BRANCH.
@@ -2685,7 +2705,772 @@
 
   return cost;
 }
+
 
+/* Increment the counter for the number of loop instructions in the
+   current function.  */
+
+void
+bfin_hardware_loop (void)
+{
+  cfun->machine->has_hardware_loops++;
+}
+
+/* Maxium loop nesting depth.  */
+#define MAX_LOOP_DEPTH 2
+
+/* Maxium size of a loop.  */
+#define MAX_LOOP_LENGTH 4096
+
+/* We need to keep a vector of loops */
+typedef struct loop_info *loop_info;
+DEF_VEC_P (loop_info);
+DEF_VEC_ALLOC_P (loop_info,heap);
+
+/* Information about a loop we have found (or are in the process of
+   finding).  */
+struct loop_info GTY (())
+{
+  /* loop number, for dumps */
+  int loop_no;
+
+  /* Predecessor block of the loop.   This is the one that falls into
+     the loop and contains the initialization instruction.  */
+  basic_block predecessor;
+
+  /* First block in the loop.  This is the one branched to by the loop_end
+     insn.  */
+  basic_block head;
+
+  /* Last block in the loop (the one with the loop_end insn).  */
+  basic_block tail;
+
+  /* The successor block of the loop.  This is the one the loop_end insn
+     falls into.  */
+  basic_block successor;
+
+  /* The last instruction in the tail.  */
+  rtx last_insn;
+
+  /* The loop_end insn.  */
+  rtx loop_end;
+
+  /* The iteration register.  */
+  rtx iter_reg;
+
+  /* The new initialization insn.  */
+  rtx init;
+
+  /* The new initialization instruction.  */
+  rtx loop_init;
+
+  /* The new label placed at the beginning of the loop. */
+  rtx start_label;
+
+  /* The new label placed at the end of the loop. */
+  rtx end_label;
+
+  /* The length of the loop.  */
+  int length;
+
+  /* The nesting depth of the loop.  Set to -1 for a bad loop.  */
+  int depth;
+
+  /* True if we have visited this loop.  */
+  int visited;
+
+  /* True if this loop body clobbers any of LC0, LT0, or LB0.  */
+  int clobber_loop0;
+
+  /* True if this loop body clobbers any of LC1, LT1, or LB1.  */
+  int clobber_loop1;
+
+  /* Next loop in the graph. */
+  struct loop_info *next;
+
+  /* Immediate outer loop of this loop.  */
+  struct loop_info *outer;
+
+  /* Vector of blocks only within the loop, (excluding those within
+     inner loops).  */
+  VEC (basic_block,heap) *blocks;
+
+  /* Vector of inner loops within this loop  */
+  VEC (loop_info,heap) *loops;
+};
+
+/* Information used during loop detection.  */
+typedef struct loop_work GTY(())
+{
+  /* Basic block to be scanned.  */
+  basic_block block;
+
+  /* Loop it will be within.  */
+  loop_info loop;
+} loop_work;
+
+/* Work list.  */
+DEF_VEC_O (loop_work);
+DEF_VEC_ALLOC_O (loop_work,heap);
+
+static void
+bfin_dump_loops (loop_info loops)
+{
+  loop_info loop;
+
+  for (loop = loops; loop; loop = loop->next)
+    {
+      loop_info i;
+      basic_block b;
+      unsigned ix;
+
+      fprintf (dump_file, ";; loop %d: ", loop->loop_no);
+      fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
+
+      fprintf (dump_file, " blocks: [ ");
+      for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+	fprintf (dump_file, "%d ", b->index);
+      fprintf (dump_file, "] ");
+
+      fprintf (dump_file, " inner loops: [ ");
+      for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++)
+	fprintf (dump_file, "%d ", i->loop_no);
+      fprintf (dump_file, "]\n");
+    }
+  fprintf (dump_file, "\n");
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for basic block
+   BB. Return true, if we find it.  */
+
+static bool
+bfin_bb_in_loop (loop_info loop, basic_block bb)
+{
+  unsigned ix;
+  loop_info inner;
+  basic_block b;
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+    if (b == bb)
+      return true;
+
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    if (bfin_bb_in_loop (inner, bb))
+      return true;
+
+  return false;
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for uses of
+   REG.  Return true, if we find any.  Don't count the loop's loop_end
+   insn if it matches LOOP_END.  */
+
+static bool
+bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
+{
+  unsigned ix;
+  loop_info inner;
+  basic_block bb;
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+	  if (insn == loop_end)
+	    continue;
+	  if (reg_mentioned_p (reg, PATTERN (insn)))
+	    return true;
+	}
+    }
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    if (bfin_scan_loop (inner, reg, NULL_RTX))
+      return true;
+
+  return false;
+}
+
+/* Optimize LOOP.  */
+
+static void
+bfin_optimize_loop (loop_info loop)
+{
+  basic_block bb;
+  loop_info inner, outer;
+  rtx insn, init_insn, last_insn, nop_insn;
+  rtx loop_init, start_label, end_label;
+  rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
+  rtx iter_reg;
+  rtx lc_reg, lt_reg, lb_reg;
+  rtx seq;
+  int length;
+  unsigned ix;
+  int inner_depth = 0;
+  int inner_num;
+  int bb_num;
+
+  if (loop->visited)
+    return;
+
+  loop->visited = 1;
+
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    {
+      if (inner->loop_no == loop->loop_no)
+	loop->depth = -1;
+      else
+	bfin_optimize_loop (inner);
+
+      if (inner->depth < 0 || inner->depth > MAX_LOOP_DEPTH)
+	{
+	  inner->outer = NULL;
+	  VEC_ordered_remove (loop_info, loop->loops, ix);
+	}
+
+      if (inner_depth < inner->depth)
+	inner_depth = inner->depth;
+
+      loop->clobber_loop0 |= inner->clobber_loop0;
+      loop->clobber_loop1 |= inner->clobber_loop1;
+    }
+
+  if (loop->depth < 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  loop->depth = inner_depth + 1;
+  if (loop->depth > MAX_LOOP_DEPTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Make sure we only have one entry point.  */
+  if (EDGE_COUNT (loop->head->preds) == 2)
+    {
+      loop->predecessor = EDGE_PRED (loop->head, 0)->src;
+      if (loop->predecessor == loop->tail)
+	/* We wanted the other predecessor.  */
+	loop->predecessor = EDGE_PRED (loop->head, 1)->src;
+
+      /* We can only place a loop insn on a fall through edge of a
+	 single exit block.  */
+      if (EDGE_COUNT (loop->predecessor->succs) != 1
+	  || !(EDGE_SUCC (loop->predecessor, 0)->flags & EDGE_FALLTHRU)
+	  /* If loop->predecessor is in loop, loop->head is not really
+	     the head of the loop.  */
+	  || bfin_bb_in_loop (loop, loop->predecessor))
+	loop->predecessor = NULL;
+    }
+
+  if (loop->predecessor == NULL)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad predecessor\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  if (!DPREG_P (iter_reg))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Check if start_label appears before loop_end and calculate the
+     offset between them.  We calculate the length of instructions
+     conservatively.  */
+  length = 0;
+  for (insn = loop->start_label;
+       insn && insn != loop->loop_end;
+       insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
+	{
+	  if (TARGET_CSYNC_ANOMALY)
+	    length += 8;
+	  else if (TARGET_SPECLD_ANOMALY)
+	    length += 6;
+	}
+      else if (LABEL_P (insn))
+	{
+	  if (TARGET_CSYNC_ANOMALY)
+	    length += 4;
+	}
+
+      if (INSN_P (insn))
+	length += get_attr_length (insn);
+    }
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  loop->length = length;
+  if (loop->length > MAX_LOOP_LENGTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the insns to see if the loop body clobber
+     any hardware loop registers. */
+
+  reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
+  reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
+  reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
+  reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
+  reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
+  reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+
+	  if (reg_set_p (reg_lc0, insn)
+	      || reg_set_p (reg_lt0, insn)
+	      || reg_set_p (reg_lb0, insn))
+	    loop->clobber_loop0 = 1;
+	  
+	  if (reg_set_p (reg_lc1, insn)
+	      || reg_set_p (reg_lt1, insn)
+	      || reg_set_p (reg_lb1, insn))
+	    loop->clobber_loop1 |= 1;
+	}
+    }
+
+  if ((loop->clobber_loop0 && loop->clobber_loop1)
+      || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
+    {
+      loop->depth = MAX_LOOP_DEPTH + 1;
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d no loop reg available\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - CSYNC
+     - SSYNC
+     - Returns (RTS, RTN, etc.)  */
+
+  bb = loop->tail;
+  last_insn = PREV_INSN (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != PREV_INSN (BB_HEAD (bb));
+	   last_insn = PREV_INSN (last_insn))
+	if (INSN_P (last_insn))
+	  break;
+
+      if (last_insn != PREV_INSN (BB_HEAD (bb)))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred (bb) != ENTRY_BLOCK_PTR)
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL_RTX;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  if (JUMP_P (last_insn))
+    {
+      loop_info inner = bb->aux;
+      if (inner
+	  && inner->outer == loop
+	  && inner->loop_end == last_insn
+	  && inner->depth == 1)
+	/* This jump_insn is the exact loop_end of an inner loop
+	   and to be optimized away. So use the inner's last_insn.  */
+	last_insn = inner->last_insn;
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		     loop->loop_no);
+	  goto bad_loop;
+	}
+    }
+  else if (CALL_P (last_insn)
+	   || get_attr_type (last_insn) == TYPE_SYNC
+	   || recog_memoized (last_insn) == CODE_FOR_return_internal)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+      || asm_noperands (PATTERN (last_insn)) >= 0
+      || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI)
+    {
+      nop_insn = emit_insn_after (gen_nop (), last_insn);
+      last_insn = nop_insn;
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  end_label = gen_label_rtx ();
+  iter_reg = loop->iter_reg;
+
+  if (loop->depth == 1 && !loop->clobber_loop1)
+    {
+      lc_reg = reg_lc1;
+      lt_reg = reg_lt1;
+      lb_reg = reg_lb1;
+      loop->clobber_loop1 = 1;
+    }
+  else
+    {
+      lc_reg = reg_lc0;
+      lt_reg = reg_lt0;
+      lb_reg = reg_lb0;
+      loop->clobber_loop0 = 1;
+    }
+
+  /* If iter_reg is a DREG, we need generate an instruction to load
+     the loop count into LC register. */
+  if (D_REGNO_P (REGNO (iter_reg)))
+    {
+      init_insn = gen_movsi (lc_reg, iter_reg);
+      loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
+					       lb_reg, end_label,
+					       lc_reg);
+    }
+  else if (P_REGNO_P (REGNO (iter_reg)))
+    {
+      init_insn = NULL_RTX;
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, iter_reg);
+    }
+  else
+    gcc_unreachable ();
+
+  loop->init = init_insn;
+  loop->end_label = end_label;
+  loop->loop_init = loop_init;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_init);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  start_sequence ();
+
+  if (loop->init != NULL_RTX)
+    emit_insn (loop->init);
+  emit_insn(loop->loop_init);
+  emit_label (loop->start_label);
+
+  seq = get_insns ();
+  end_sequence ();
+
+  emit_insn_after (seq, BB_END (loop->predecessor));
+  delete_insn (loop->loop_end);
+
+  /* Insert the loop end label before the last instruction of the loop.  */
+  emit_label_before (loop->end_label, loop->last_insn);
+
+  return;
+
+bad_loop:
+
+  if (dump_file)
+    fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
+
+  /* Mark this loop bad.  */
+  if (loop->depth <= MAX_LOOP_DEPTH)
+    loop->depth = -1;
+
+  outer = loop->outer;
+
+  /* Move all inner loops to loop's outer loop.  */
+  inner_num = VEC_length (loop_info, loop->loops);
+  if (inner_num)
+    {
+      loop_info l;
+
+      if (outer)
+	VEC_reserve (loop_info, heap, outer->loops, inner_num);
+
+      for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, l); ix++)
+	{
+	  l->outer = outer;
+	  if (outer)
+	    VEC_quick_push (loop_info, outer->loops, l);
+	}
+
+      VEC_free (loop_info, heap, loop->loops);
+    }
+
+  /* Move all blocks to loop's outer loop.  */
+  bb_num = VEC_length (basic_block, loop->blocks);
+  if (bb_num)
+    {
+      basic_block b;
+
+      if (outer)
+	VEC_reserve (basic_block, heap, outer->blocks, bb_num);
+
+      for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+	{
+	  b->aux = outer;
+	  if (outer)
+	    VEC_quick_push (basic_block, outer->blocks, b);
+	}
+
+      VEC_free (basic_block, heap, loop->blocks);
+    }
+
+  if (DPREG_P (loop->iter_reg))
+    {
+      /* If loop->iter_reg is a DREG or PREG, we can split it here
+	 without scratch register.  */
+      rtx insn;
+
+      emit_insn_before (gen_addsi3 (loop->iter_reg,
+				    loop->iter_reg,
+				    constm1_rtx),
+			loop->loop_end);
+
+      emit_insn_before (gen_cmpsi (loop->iter_reg, const0_rtx),
+			loop->loop_end);
+
+      insn = emit_jump_insn_before (gen_bne (loop->start_label),
+				    loop->loop_end);
+
+      JUMP_LABEL (insn) = loop->start_label;
+      LABEL_NUSES (loop->start_label)++;
+      delete_insn (loop->loop_end);
+    }
+}
+
+static void
+bfin_reorg_loops (FILE *dump_file)
+{
+  basic_block bb;
+  loop_info loops = NULL;
+  loop_info loop;
+  int nloops = 0;
+  unsigned dwork = 0;
+  VEC (loop_work,heap) *works = VEC_alloc (loop_work,heap,20);
+  loop_work *work;
+  edge e;
+  edge_iterator ei;
+
+  /* Find all the possible loop tails.  This means searching for every
+     loop_end instruction.  For each one found, create a loop_info
+     structure and add the head block to the work list. */
+  FOR_EACH_BB (bb)
+    {
+      rtx tail = BB_END (bb);
+
+      while (GET_CODE (tail) == NOTE)
+	tail = PREV_INSN (tail);
+
+      bb->aux = NULL;
+      if (recog_memoized (tail) == CODE_FOR_loop_end)
+	{
+	  /* A possible loop end */
+
+	  loop = XNEW (struct loop_info);
+	  loop->next = loops;
+	  loops = loop;
+	  loop->tail = bb;
+	  loop->head = BRANCH_EDGE (bb)->dest;
+	  loop->successor = FALLTHRU_EDGE (bb)->dest;
+	  loop->predecessor = NULL;
+	  loop->loop_end = tail;
+	  loop->last_insn = NULL_RTX;
+	  loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail), 0, 1));
+	  loop->depth = loop->length = 0;
+	  loop->visited = 0;
+	  loop->clobber_loop0 = loop->clobber_loop1 = 0;
+	  loop->blocks = VEC_alloc (basic_block, heap, 20);
+	  VEC_quick_push (basic_block, loop->blocks, bb);
+	  loop->outer = NULL;
+	  loop->loops = NULL;
+	  loop->loop_no = nloops++;
+
+	  loop->init = loop->loop_init = NULL_RTX;
+	  loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail), 0, 0)), 1), 0);
+	  loop->end_label = NULL_RTX;
+
+	  work = VEC_safe_push (loop_work, heap, works, NULL);
+	  work->block = loop->head;
+	  work->loop = loop;
+
+	  bb->aux = loop;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; potential loop %d ending at\n",
+		       loop->loop_no);
+	      print_rtl_single (dump_file, tail);
+	    }
+	}
+    }
+
+  /*  Now find all the closed loops.
+      until work list empty,
+       if block's auxptr is set
+         if != loop slot
+           if block's loop's start != block
+	     mark loop as bad
+	   else
+             append block's loop's fallthrough block to worklist
+	     increment this loop's depth
+       else if block is exit block
+         mark loop as bad
+       else
+	  set auxptr
+	  for each target of block
+	    add to worklist */
+  while (VEC_iterate (loop_work, works, dwork++, work))
+    {
+      loop = work->loop;
+      bb = work->block;
+      if (bb == EXIT_BLOCK_PTR)
+	/* We've reached the exit block.  The loop must be bad. */
+	loop->depth = -1;
+      else if (!bb->aux)
+	{
+	  /* We've not seen this block before.  Add it to the loop's
+	     list and then add each successor to the work list.  */
+	  bb->aux = loop;
+	  VEC_safe_push (basic_block, heap, loop->blocks, bb);
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    {
+	      if (!VEC_space (loop_work, works, 1))
+		{
+		  if (dwork)
+		    {
+		      VEC_block_remove (loop_work, works, 0, dwork);
+		      dwork = 0;
+		    }
+		  else
+		    VEC_reserve (loop_work, heap, works, 1);
+		}
+	      work = VEC_quick_push (loop_work, works, NULL);
+	      work->block = EDGE_SUCC (bb, ei.index)->dest;
+	      work->loop = loop;
+	    }
+	}
+      else if (bb->aux != loop)
+	{
+	  /* We've seen this block in a different loop.  If it's not
+	     the other loop's head, then this loop must be bad.
+	     Otherwise, the other loop might be a nested loop, so
+	     continue from that loop's successor.  */
+	  loop_info other = bb->aux;
+
+	  if (other->head != bb)
+	    loop->depth = -1;
+	  else
+	    {
+	      other->outer = loop;
+	      VEC_safe_push (loop_info, heap, loop->loops, other);
+	      work = VEC_safe_push (loop_work, heap, works, NULL);
+	      work->loop = loop;
+	      work->block = other->successor;
+	    }
+	}
+    }
+  VEC_free (loop_work, heap, works);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; All loops found:\n\n");
+      bfin_dump_loops (loops);
+    }
+  
+  /* Now apply the optimizations.  */
+  for (loop = loops; loop; loop = loop->next)
+    bfin_optimize_loop (loop);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; After hardware loops optimization:\n\n");
+      bfin_dump_loops (loops);
+    }
+
+  /* Free up the loop structures */
+  while (loops)
+    {
+      loop = loops;
+      loops = loop->next;
+      VEC_free (loop_info, heap, loop->loops);
+      VEC_free (basic_block, heap, loop->blocks);
+      XDELETE (loop);
+    }
+
+  if (dump_file)
+    print_rtl (dump_file, get_insns ());
+}
+
+
 /* We use the machine specific reorg pass for emitting CSYNC instructions
    after conditional branches as needed.
 
@@ -2712,7 +3497,11 @@
   rtx insn, last_condjump = NULL_RTX;
   int cycles_since_jump = INT_MAX;
 
-  if (! TARGET_SPECLD_ANOMALY || ! TARGET_CSYNC_ANOMALY)
+  /* Doloop optimization */
+  if (cfun->machine->has_hardware_loops)
+    bfin_reorg_loops (dump_file);
+
+  if (! TARGET_SPECLD_ANOMALY && ! TARGET_CSYNC_ANOMALY)
     return;
 
   /* First pass: find predicted-false branches; if something after them
Index: config/bfin/bfin.h
===================================================================
--- config/bfin/bfin.h	(revision 114203)
+++ config/bfin/bfin.h	(working copy)
@@ -268,15 +268,17 @@
    5  return address registers RETS/I/X/N/E
    1  arithmetic status register (ASTAT).  */
 
-#define FIRST_PSEUDO_REGISTER 44
+#define FIRST_PSEUDO_REGISTER 50
 
+#define D_REGNO_P(X) ((X) <= REG_R7)
+#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7)
+#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3)
+#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X))
+#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
+#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X)))
 #define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X)))
 #define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X)))
-#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
-#define D_REGNO_P(X) ((X) <= REG_R7)
-#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7)
-#define I_REGNO_P(X) \
-  ((X) == REG_I0 || (X) == REG_I1 || (X) == REG_I2 || (X) == REG_I3)
+#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X)))
 
 #define REGISTER_NAMES { \
   "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \
@@ -286,7 +288,8 @@
   "A0", "A1", \
   "CC", \
   "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \
-  "ARGP" \
+  "ARGP", \
+  "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \
 }
 
 #define SHORT_REGISTER_NAMES { \
@@ -316,8 +319,10 @@
 { 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 1, 0,    \
 /*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
   0, 0, 0, 0, 0, 0, 0, 0,   1, 1, 1, 1, 0, 0, 0, 0,    \
-/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp */ \
-  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1	 \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,    \
+/*lb0/1 */ \
+  1, 1  \
 }
 
 /* 1 for registers not available across function calls.
@@ -332,8 +337,10 @@
 { 1, 1, 1, 1, 0, 0, 0, 0,   1, 1, 1, 0, 0, 0, 1, 0, \
 /*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   \
-/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp */ \
-  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1	 \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1, \
+/*lb0/1 */ \
+  1, 1  \
 }
 
 /* Order in which to allocate registers.  Each register must be
@@ -350,7 +357,8 @@
   REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \
   REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE,		  \
   REG_ASTAT, REG_SEQSTAT, REG_USP, 				  \
-  REG_CC, REG_ARGP						  \
+  REG_CC, REG_ARGP,						  \
+  REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1		  \
 }
 
 /* Macro to conditionally modify fixed_regs/call_used_regs.  */
@@ -410,6 +418,9 @@
   IPREGS,
   DPREGS,
   MOST_REGS,
+  LT_REGS,
+  LC_REGS,
+  LB_REGS,
   PROLOGUE_REGS,
   NON_A_CC_REGS,
   ALL_REGS, LIM_REG_CLASSES
@@ -443,6 +454,9 @@
    "IPREGS",		\
    "DPREGS",		\
    "MOST_REGS",		\
+   "LT_REGS",		\
+   "LC_REGS",		\
+   "LB_REGS",		\
    "PROLOGUE_REGS",	\
    "NON_A_CC_REGS",	\
    "ALL_REGS" }
@@ -484,9 +498,12 @@
     { 0x000fff00,    0x800 },		/* IPREGS */	\
     { 0x0000ffff,    0x800 },		/* DPREGS */   \
     { 0xffffffff,    0x800 },		/* MOST_REGS */\
-    { 0x00000000,    0x7f8 },		/* PROLOGUE_REGS */\
-    { 0xffffffff,    0xff8 },		/* NON_A_CC_REGS */\
-    { 0xffffffff,    0xfff }}		/* ALL_REGS */
+    { 0x00000000,    0x3000 },		/* LT_REGS */\
+    { 0x00000000,    0xc000 },		/* LC_REGS */\
+    { 0x00000000,    0x30000 },		/* LB_REGS */\
+    { 0x00000000,    0x3f7f8 },		/* PROLOGUE_REGS */\
+    { 0xffffffff,    0x3fff8 },		/* NON_A_CC_REGS */\
+    { 0xffffffff,    0x3ffff }}		/* ALL_REGS */
 
 #define IREG_POSSIBLE_P(OUTER)				     \
   ((OUTER) == POST_INC || (OUTER) == PRE_INC		     \
@@ -535,6 +552,9 @@
    (LETTER) == 'f' ? MREGS : 		\
    (LETTER) == 'c' ? CIRCREGS :         \
    (LETTER) == 'C' ? CCREGS : 		\
+   (LETTER) == 't' ? LT_REGS : 		\
+   (LETTER) == 'k' ? LC_REGS : 		\
+   (LETTER) == 'l' ? LB_REGS : 		\
    (LETTER) == 'x' ? MOST_REGS :	\
    (LETTER) == 'y' ? PROLOGUE_REGS :	\
    (LETTER) == 'w' ? NON_A_CC_REGS :	\
@@ -554,6 +574,9 @@
  : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS	\
  : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS	\
  : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS	\
+ : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS	\
+ : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS	\
+ : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS	\
  : (REGNO) == REG_CC ? CCREGS				\
  : (REGNO) >= REG_RETS ? PROLOGUE_REGS			\
  : NO_REGS)
Index: config/bfin/bfin.md
===================================================================
--- config/bfin/bfin.md	(revision 114203)
+++ config/bfin/bfin.md	(working copy)
@@ -49,6 +49,9 @@
 ;     B
 ;     c (i0..i3,m0..m3) CIRCREGS
 ;     C (CC)            CCREGS
+;     t  (lt0,lt1)
+;     k  (lc0,lc1)
+;     l  (lb0,lb1)
 ;
 
 ;; Define constants for hard registers.
@@ -109,8 +112,15 @@
    (REG_SEQSTAT 41)
    (REG_USP 42)
 
-   (REG_ARGP 43)])
+   (REG_ARGP 43)
 
+   (REG_LT0 44)
+   (REG_LT1 45)
+   (REG_LC0 46)
+   (REG_LC1 47)
+   (REG_LB0 48)
+   (REG_LB1 49)])
+
 ;; Constants used in UNSPECs and UNSPEC_VOLATILEs.
 
 (define_constants
@@ -124,7 +134,8 @@
    (UNSPEC_MUL_WITH_FLAG 6)
    (UNSPEC_MAC_WITH_FLAG 7)
    (UNSPEC_MOVE_FDPIC 8)
-   (UNSPEC_FUNCDESC_GOT17M4 9)])
+   (UNSPEC_FUNCDESC_GOT17M4 9)
+   (UNSPEC_LSETUP_END 10)])
 
 (define_constants
   [(UNSPEC_VOLATILE_EH_RETURN 0)
@@ -243,6 +254,12 @@
 
 	(const_int 2)))
 
+
+;; Classify the insns into those that are one instruction and those that
+;; are more than one in sequence.
+(define_attr "seq_insns" "single,multi"
+  (const_string "single"))
+
 ;; Conditional moves
 
 (define_expand "movsicc"
@@ -268,7 +285,8 @@
     if cc %0 =%2; /* movsicc-1b */
     if !cc %0 =%1; if cc %0=%2; /* movsicc-1 */"
   [(set_attr "length" "2,2,4")
-   (set_attr "type" "move")])
+   (set_attr "type" "move")
+   (set_attr "seq_insns" "*,*,multi")])
 
 (define_insn "*movsicc_insn2"
   [(set (match_operand:SI 0 "register_operand" "=da,da,da")
@@ -283,7 +301,8 @@
    if cc %0 =%1; /* movsicc-2a */
    if cc %0 =%1; if !cc %0=%2; /* movsicc-1 */"
   [(set_attr "length" "2,2,4")
-   (set_attr "type" "move")])
+   (set_attr "type" "move")
+   (set_attr "seq_insns" "*,*,multi")])
 
 ;; Insns to load HIGH and LO_SUM
 
@@ -376,7 +395,8 @@
    %0 = CC;
    R0 = R0 | R0; CC = AC0;"
   [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,alu0")
-   (set_attr "length" "2,2,*,*,2,2,4")])
+   (set_attr "length" "2,2,*,*,2,2,4")
+   (set_attr "seq_insns" "*,*,*,*,*,*,multi")])
 
 (define_insn "movpdi"
   [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e")
@@ -386,7 +406,8 @@
    %0 = %1;
    %0 = %x1; %0 = %w1;
    %w0 = %1; %x0 = %1;"
-  [(set_attr "type" "move,mcst,mcld")])
+  [(set_attr "type" "move,mcst,mcld")
+   (set_attr "seq_insns" "*,multi,multi")])
 
 (define_insn "load_accumulator"
   [(set (match_operand:PDI 0 "register_operand" "=e")
@@ -429,22 +450,24 @@
 ;; The first alternative is used to make reload choose a limited register
 ;; class when faced with a movsi_insn that had its input operand replaced
 ;; with a PLUS.  We generally require fewer secondary reloads this way.
+
 (define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x*y,da,x,x,x,da,mr")
-        (match_operand:SI 1 "general_operand" "da,x*y,xKs7,xKsh,xKuh,ix,mr,da"))]
-
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x*y,*k,da,da,x,x,x,da,mr")
+	(match_operand:SI 1 "general_operand" "da,x*y,da,*k,xKs7,xKsh,xKuh,ix,mr,da"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
-  "@
+ "@
    %0 = %1;
    %0 = %1;
+   %0 = %1;
+   %0 = %1;
    %0 = %1 (X);
    %0 = %1 (X);
    %0 = %1 (Z);
    #
    %0 = %1;
    %0 = %1;"
-  [(set_attr "type" "move,move,mvi,mvi,mvi,*,mcld,mcst")
-   (set_attr "length" "2,2,2,4,4,*,*,*")])
+  [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst")
+   (set_attr "length" "2,2,2,2,2,4,4,*,*,*")])
 
 (define_insn_and_split "*movv2hi_insn"
   [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm")
@@ -776,7 +799,8 @@
 			(match_operand:DI 2 "register_operand" "d")))]
   ""
   "%0 = %1 <op> %2;\\n\\t%H0 = %H1 <op> %H2;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*<optab>di_zesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -785,7 +809,8 @@
 			(match_operand:DI 1 "register_operand" "d")))]
   ""
   "%0 = %1 <op>  %2;\\n\\t%H0 = <high_result>;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*<optab>di_sesdi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -795,7 +820,8 @@
    (clobber (match_scratch:SI 3 "=&d"))]
   ""
   "%0 = %1 <op> %2;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %H1 <op> %3;"
-  [(set_attr "length" "8")])
+  [(set_attr "length" "8")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "negdi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -804,14 +830,16 @@
    (clobber (reg:CC REG_CC))]
   ""
   "%2 = 0; %2 = %2 - %1; cc = ac0; cc = !cc; %2 = cc;\\n\\t%0 = -%1; %H0 = -%H1; %H0 = %H0 - %2;"
-  [(set_attr "length" "16")])
+  [(set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "one_cmpldi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
         (not:DI (match_operand:DI 1 "register_operand" "d")))]
   ""
   "%0 = ~%1;\\n\\t%H0 = ~%H1;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 ;; DImode zero and sign extend patterns
 
@@ -833,14 +861,16 @@
         (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))]
   ""
   "%0 = %T1 (Z);\\n\\t%H0 = 0;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "zero_extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
         (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))]
   ""
   "%0 = %h1 (Z);\\n\\t%H0 = 0;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn_and_split "extendsidi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -896,7 +926,8 @@
    %0 += %2; cc = ac0; %3 = cc; %H0 = %H0 + %3;
    %0 = %0 + %2; cc = ac0; %3 = cc; %H0 = %H0 + %H2; %H0 = %H0 + %3;"
   [(set_attr "type" "alu0")
-   (set_attr "length" "10,8,10")])
+   (set_attr "length" "10,8,10")
+   (set_attr "seq_insns" "multi,multi,multi")])
 
 (define_insn "subdi3"
   [(set (match_operand:DI 0 "register_operand" "=&d")
@@ -905,7 +936,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1-%2;\\n\\tcc = ac0;\\n\\t%H0 = %H1-%H2;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "10")])
+  [(set_attr "length" "10")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_di_zesidi"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -916,7 +948,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1 - %2;\\n\\tcc = ac0;\\n\\tcc = ! cc;\\n\\t%3 = cc;\\n\\t%H0 = %H1 - %3;"
-  [(set_attr "length" "10")])
+  [(set_attr "length" "10")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_zesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -927,7 +960,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %2 - %1;\\n\\tcc = ac0;\\n\\tcc = ! cc;\\n\\t%3 = cc;\\n\\t%3 = -%3;\\n\\t%H0 = %3 - %H1"
-  [(set_attr "length" "12")])
+  [(set_attr "length" "12")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_di_sesidi"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -938,7 +972,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1 - %2;\\n\\tcc = ac0;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %H1 - %3;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "14")])
+  [(set_attr "length" "14")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_sesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -949,7 +984,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %2 - %1;\\n\\tcc = ac0;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %3 - %H1;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "14")])
+  [(set_attr "length" "14")
+   (set_attr "seq_insns" "multi")])
 
 ;; Combined shift/add instructions
 
@@ -1496,6 +1532,89 @@
   "jump (%0);"
   [(set_attr "type" "misc")])
 
+;;  Hardware loop
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 4 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+	      (clobber (match_scratch:SI 5 ""))])]
+  ""
+  {bfin_hardware_loop ();})
+
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+a*d,*b*h*f,m")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=X,&r,&r"))]
+  ""
+  "@
+   /* loop end %0 %l1 */
+   #
+   #"
+  [(set_attr "length" "6,10,14")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 0))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:BI REG_CC)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn "lsetup_with_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=l")
+	(label_ref (match_operand 3 "" "")))
+   (set (match_operand:SI 4 "lc_register_operand" "=k")
+	(match_operand:SI 5 "register_operand" "a"))]
+  ""
+  "LSETUP (%1, %3) %4 = %5;"
+  [(set_attr "length" "4")])
+
+(define_insn "lsetup_without_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=l")
+	(label_ref (match_operand 3 "" "")))
+   (use (match_operand:SI 4 "lc_register_operand" "k"))]
+  ""
+  "LSETUP (%1, %3) %4;"
+  [(set_attr "length" "4")])
+
 ;;  Call instructions..
 
 ;; The explicit MEM inside the UNSPEC prevents the compiler from moving
@@ -1760,11 +1879,15 @@
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 2))
-   (clobber (match_scratch:HI 5 "=&d"))]
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
   ""
   "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;"
   [(set_attr "type" "misc")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "rep_movhi"
   [(set (match_operand:SI 0 "register_operand" "=&a")
@@ -1779,11 +1902,15 @@
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 2))
-   (clobber (match_scratch:HI 5 "=&d"))]
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
   ""
   "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;"
   [(set_attr "type" "misc")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_expand "movmemsi"
   [(match_operand:BLK 0 "general_operand" "")
@@ -2315,7 +2442,8 @@
   ""
   "if !cc jump 4 (bp); excpt 3;"
   [(set_attr "type" "misc")
-   (set_attr "length" "4")])
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 ;;; Vector instructions
 







Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]