This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add zero-overhead looping for xtensa backend


Hi Sterling,

  This patch implements zero-overhead looping for xtensa backend using
hw-doloop facility.
  If OK for trunk, please apply it for me. Thanks.


Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog    (revision 206431)
+++ gcc/ChangeLog    (working copy)
@@ -1,3 +1,18 @@
+2014-01-08  Felix Yang  <fei.yang0953@gmail.com>
+
+    * config/xtensa/xtensa.c (xtensa_reorg): New.
+    (xtensa_reorg_loops): New.
+    (xtensa_can_use_doloop_p): New.
+    (xtensa_invalid_within_doloop): New.
+    (hwloop_optimize): New.
+    (hwloop_fail): New.
+    (hwloop_pattern_reg): New.
+    (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end label.
+    (xtensa_doloop_hooks): Define.
+    * config/xtensa/xtensa.md (doloop_end): New.
+    (zero_cost_loop_start): Rewritten.
+    (zero_cost_loop_end): Rewritten.
+
 2014-01-08  Marek Polacek  <polacek@redhat.com>

     PR middle-end/59669
Index: gcc/config/xtensa/xtensa.md
===================================================================
--- gcc/config/xtensa/xtensa.md    (revision 206431)
+++ gcc/config/xtensa/xtensa.md    (working copy)
@@ -35,6 +35,8 @@
   (UNSPEC_TLS_CALL    9)
   (UNSPEC_TP        10)
   (UNSPEC_MEMW        11)
+  (UNSPEC_LSETUP_START  12)
+  (UNSPEC_LSETUP_END    13)

   (UNSPECV_SET_FP    1)
   (UNSPECV_ENTRY    2)
@@ -1289,6 +1291,8 @@
    (set_attr "length"    "3")])


+;; Hardware loop support.
+
 ;; Define the loop insns used by bct optimization to represent the
 ;; start and end of a zero-overhead loop (in loop.c).  This start
 ;; template generates the loop insn; the end template doesn't generate
@@ -1296,34 +1300,58 @@

 (define_insn "zero_cost_loop_start"
   [(set (pc)
-    (if_then_else (eq (match_operand:SI 0 "register_operand" "a")
-              (const_int 0))
-              (label_ref (match_operand 1 "" ""))
-              (pc)))
-   (set (reg:SI 19)
-    (plus:SI (match_dup 0) (const_int -1)))]
+        (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0")
+                          (const_int 1))
+                      (label_ref (match_operand 1 "" ""))
+                      (pc)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=a")
+        (plus (match_dup 2)
+              (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_START)]
   ""
-  "loopnez\t%0, %l1"
+  "loop\t%0, %l1_LEND"
   [(set_attr "type"    "jump")
    (set_attr "mode"    "none")
    (set_attr "length"    "3")])

 (define_insn "zero_cost_loop_end"
   [(set (pc)
-    (if_then_else (ne (reg:SI 19) (const_int 0))
-              (label_ref (match_operand 0 "" ""))
-              (pc)))
-   (set (reg:SI 19)
-    (plus:SI (reg:SI 19) (const_int -1)))]
+        (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0")
+                          (const_int 1))
+                      (label_ref (match_operand 1 "" ""))
+                      (pc)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=a")
+        (plus (match_dup 2)
+              (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)]
   ""
 {
-    xtensa_emit_loop_end (insn, operands);
-    return "";
+  xtensa_emit_loop_end (insn, operands);
+  return "";
 }
   [(set_attr "type"    "jump")
    (set_attr "mode"    "none")
    (set_attr "length"    "0")])

+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+                          (ne (match_operand:SI 0 "" "")
+                              (const_int 1))
+                          (label_ref (match_operand 1 "" ""))
+                          (pc)))
+              (set (match_dup 0)
+                   (plus:SI (match_dup 0)
+                            (const_int -1)))
+              (unspec [(const_int 0)] UNSPEC_LSETUP_END)])]
+  ""
+{
+  /* The loop optimizer doesn't check the predicates... */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+})
+

 ;; Setting a register from a comparison.

Index: gcc/config/xtensa/xtensa.c
===================================================================
--- gcc/config/xtensa/xtensa.c    (revision 206431)
+++ gcc/config/xtensa/xtensa.c    (working copy)
@@ -1,6 +1,7 @@
 /* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
    Copyright (C) 2001-2014 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+   Zero-overhead looping support by Felix Yang (felix.yang0953@gmail.com).

 This file is part of GCC.

@@ -61,8 +62,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "gimplify.h"
 #include "df.h"
+#include "hw-doloop.h"
+#include "dumpfile.h"

-
 /* Enumeration for all of the relational tests, so that we can build
    arrays indexed by the test type, and not worry about the order
    of EQ, NE, etc.  */
@@ -186,6 +188,10 @@ static reg_class_t xtensa_secondary_reload (bool,

 static bool constantpool_address_p (const_rtx addr);
 static bool xtensa_legitimate_constant_p (enum machine_mode, rtx);
+static void xtensa_reorg (void);
+static bool xtensa_can_use_doloop_p (double_int, double_int iterations_max,
+                                     unsigned int, bool);
+static const char *xtensa_invalid_within_doloop (const_rtx);

 static bool xtensa_member_type_forces_blk (const_tree,
                        enum machine_mode mode);
@@ -312,6 +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p

+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop
+
 struct gcc_target targetm = TARGET_INITIALIZER;


@@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx insn, rtx *operands)
         }
     }

-  output_asm_insn ("# loop end for %0", operands);
+  output_asm_insn ("%1_LEND:", operands);
 }


@@ -3709,4 +3724,224 @@ xtensa_legitimate_constant_p (enum machine_mode mo
   return !xtensa_tls_referenced_p (x);
 }

+/* Implement TARGET_CAN_USE_DOLOOP_P.  */
+
+static bool
+xtensa_can_use_doloop_p (double_int, double_int,
+                         unsigned int level, bool entered_at_top)
+{
+  /* Considering limitations in the hardware, only use doloop for
innermost loops
+     which must be entered from the top.  */
+  if (level != 1 || !entered_at_top)
+    return false;
+
+  return true;
+}
+
+/* NULL if INSN insn is valid within a low-overhead loop.
+   Otherwise return why doloop cannot be applied.  */
+
+static const char *
+xtensa_invalid_within_doloop (const_rtx insn)
+{
+  if (CALL_P (insn))
+    return "Function call in the loop.";
+
+  return NULL;
+}
+
+/* Optimize LOOP.  */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+  int i;
+  edge entry_edge;
+  basic_block entry_bb;
+  rtx insn, seq, iter_reg, entry_after;
+
+  if (loop->depth > 1)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d is not innermost\n", loop->loop_no);
+      return false;
+    }
+
+  if (!loop->incoming_dest)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d has more than one entry\n",
loop->loop_no);
+      return false;
+    }
+
+  if (loop->incoming_dest != loop->head)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d is not entered from head\n",
loop->loop_no);
+      return false;
+    }
+
+  if (loop->has_call || loop->has_asm)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d has invalid insn\n", loop->loop_no);
+      return false;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (loop->iter_reg_used || loop->iter_reg_used_outside)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+      return false;
+    }
+
+  /* Check if start_label appears before doloop_end.  */
+  insn = loop->start_label;
+  while (insn && insn != loop->loop_end)
+    insn = NEXT_INSN (insn);
+
+  if (!insn)
+    {
+      if (dump_file)
+        fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+                 loop->loop_no);
+      return false;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  gcc_assert (REG_P (iter_reg));
+
+  entry_edge = NULL;
+
+  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
+    if (entry_edge->flags & EDGE_FALLTHRU)
+      break;
+
+  if (entry_edge == NULL)
+    return false;
+
+  /* Place the zero_cost_loop_start instruction before the loop.  */
+  entry_bb = entry_edge->src;
+
+  start_sequence ();
+
+  insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg,
+                                              loop->start_label,
+                                              loop->iter_reg));
+
+  seq = get_insns ();
+
+  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
+    {
+      basic_block new_bb;
+      edge e;
+      edge_iterator ei;
+
+      emit_insn_before (seq, BB_HEAD (loop->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      new_bb = create_basic_block (seq, insn, entry_bb);
+      FOR_EACH_EDGE (e, ei, loop->incoming)
+        {
+          if (!(e->flags & EDGE_FALLTHRU))
+            redirect_edge_and_branch_force (e, new_bb);
+          else
+            redirect_edge_succ (e, new_bb);
+        }
+      make_edge (new_bb, loop->head, 0);
+    }
+  else
+    {
+      entry_after = BB_END (entry_bb);
+      while (DEBUG_INSN_P (entry_after)
+             || (NOTE_P (entry_after)
+                 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
+        entry_after = PREV_INSN (entry_after);
+      emit_insn_after (seq, entry_after);
+    }
+
+  end_sequence ();
+
+  return true;
+}
+
+/* A callback for the hw-doloop pass.  Called when a loop we have discovered
+   turns out not to be optimizable; we have to split the loop_end pattern into
+   a subtract and a test.  */
+
+static void
+hwloop_fail (hwloop_info loop)
+{
+  rtx test, insn = loop->loop_end;
+
+  emit_insn_before (gen_addsi3 (loop->iter_reg,
+                                loop->iter_reg,
+                                constm1_rtx),
+                    loop->loop_end);
+
+  test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+  insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+                                                loop->iter_reg, const0_rtx,
+                                                loop->start_label),
+                                loop->loop_end);
+
+  JUMP_LABEL (insn) = loop->start_label;
+  LABEL_NUSES (loop->start_label)++;
+  delete_insn (loop->loop_end);
+}
+
+/* A callback for the hw-doloop pass.  This function examines INSN; if
+   it is a doloop_end pattern we recognize, return the reg rtx for the
+   loop counter.  Otherwise, return NULL_RTX.  */
+
+static rtx
+hwloop_pattern_reg (rtx insn)
+{
+  rtx reg;
+
+  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_zero_cost_loop_end)
+    return NULL_RTX;
+
+  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+  if (!REG_P (reg))
+    return NULL_RTX;
+  return reg;
+}
+
+
+static struct hw_doloop_hooks xtensa_doloop_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop_optimize,
+  hwloop_fail
+};
+
+/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
+   and tries to rewrite the RTL of these loops so that proper Xtensa
+   hardware loops are generated.  */
+
+static void
+xtensa_reorg_loops (void)
+{
+  reorg_loops (true, &xtensa_doloop_hooks);
+}
+
+/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass.  */
+
+static void
+xtensa_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  df_analyze ();
+
+  /* Doloop optimization.  */
+  xtensa_reorg_loops ();
+}
+
 #include "gt-xtensa.h"

Cheers,
Felix

Attachment: xtensa-zcl.diff
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]