This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[SH] PR 51244 - Fix defects introduced in 4.8


Hello,

Some of the things I've done in 4.8 to improve SH T bit handling turned
out to produce wrong code.  The attached patch fixes that by introducing
an SH specific RTL pass.

Tested on rev 202876 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
Additional test cases will follow.
OK for trunk?

Cheers,
Oleg

gcc/ChangeLog:

	PR target/51244
	* config/sh/ifcvt_sh.cc: New SH specific RTL pass.
	* config.gcc (SH extra_objs): Add ifcvt_sh.o.
	* config/sh/t-sh (ifcvt_sh.o): New entry.
	* config/sh/sh.c (sh_fixed_condition_code_regs): New function 
	that implements the target hook TARGET_FIXED_CONDITION_CODE_REGS.
	(register_sh_passes): New function.  Register ifcvt_sh pass.
	(sh_option_override): Invoke it.
	(sh_canonicalize_comparison): Handle op0_preserve_value.
	* sh.md (*cbranch_t"): Do not try to optimize missed test and 
	branch opportunities.  Canonicalize branch condition.
	(nott): Allow only if pseudos can be created for non-SH2A.
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc	(revision 202876)
+++ gcc/config.gcc	(working copy)
@@ -462,6 +462,7 @@
 	cpu_type=sh
 	need_64bit_hwint=yes
 	extra_options="${extra_options} fused-madd.opt"
+	extra_objs="${extra_objs} ifcvt_sh.o"
 	;;
 v850*-*-*)
 	cpu_type=v850
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 202876)
+++ gcc/config/sh/sh.c	(working copy)
@@ -53,6 +53,9 @@
 #include "alloc-pool.h"
 #include "tm-constrs.h"
 #include "opts.h"
+#include "tree-pass.h"
+#include "pass_manager.h"
+#include "context.h"
 
 #include <sstream>
 #include <vector>
@@ -311,6 +314,7 @@
 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
 					enum machine_mode, bool);
+static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
 
 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
 
@@ -587,6 +591,9 @@
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON	sh_canonicalize_comparison
 
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
+
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
 
@@ -710,6 +717,34 @@
 #undef err_ret
 }
 
+/* Register SH specific RTL passes.  */
+extern opt_pass* make_pass_ifcvt_sh (gcc::context* ctx, bool split_insns,
+				     const char* name);
+static void
+register_sh_passes (void)
+{
+  if (!TARGET_SH1)
+    return;
+
+/* Running the ifcvt_sh pass after ce1 generates better code when
+   comparisons are combined and reg-reg moves are introduced, because
+   reg-reg moves will be eliminated afterwards.  However, there are quite
+   some cases where combine will be unable to fold comparison related insns,
+   thus for now don't do it.
+  register_pass (make_pass_ifcvt_sh (g, false, "ifcvt1_sh"),
+		 PASS_POS_INSERT_AFTER, "ce1", 1);
+*/
+
+  /*  Run ifcvt_sh pass after combine but before register allocation.  */
+  register_pass (make_pass_ifcvt_sh (g, true, "ifcvt2_sh"),
+		 PASS_POS_INSERT_AFTER, "split1", 1);
+
+  /* Run ifcvt_sh pass after register allocation and basic block reordering
+     as this sometimes creates new opportunities.  */
+  register_pass (make_pass_ifcvt_sh (g, true, "ifcvt3_sh"),
+		 PASS_POS_INSERT_AFTER, "split4", 1);
+}
+
 /* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
    various options, and do some machine dependent initialization.  */
 static void
@@ -1022,6 +1057,8 @@
      target CPU.  */
   selected_atomic_model_
     = parse_validate_atomic_model_option (sh_atomic_model_str);
+
+  register_sh_passes ();
 }
 
 /* Print the operand address in x to the stream.  */
@@ -1908,7 +1945,7 @@
 static void
 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
 			    enum machine_mode mode,
-			    bool op0_preserve_value ATTRIBUTE_UNUSED)
+			    bool op0_preserve_value)
 {
   /* When invoked from within the combine pass the mode is not specified,
      so try to get it from one of the operands.  */
@@ -1928,6 +1965,9 @@
   // Make sure that the constant operand is the second operand.
   if (CONST_INT_P (op0) && !CONST_INT_P (op1))
     {
+      if (op0_preserve_value)
+	return;
+
       std::swap (op0, op1);
       cmp = swap_condition (cmp);
     }
@@ -2016,6 +2056,14 @@
   *code = (int)tmp_code;
 }
 
+bool
+sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
+{
+  *p1 = T_REG;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
 enum rtx_code
 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
 			  enum rtx_code comparison)
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 202876)
+++ gcc/config/sh/sh.md	(working copy)
@@ -8419,89 +8419,32 @@
   return output_branch (sh_eval_treg_value (operands[1]), insn, operands);
 }
   "&& 1"
-  [(set (pc) (if_then_else (eq (reg:SI T_REG) (match_dup 2))
-			   (label_ref (match_dup 0))
-			   (pc)))]
+  [(const_int 0)]
 {
-  /* Try to find missed test and branch combine opportunities which result
-     in redundant T bit tests before conditional branches.
-     This is done not only after combine (and before reload) but in every
-     split pass, because some opportunities are formed also after combine.
-     FIXME: Probably this would not be needed if CCmode was used
-     together with TARGET_FIXED_CONDITION_CODE_REGS.  */
+  /* Try to canonicalize the branch condition if it is not one of:
+	(ne (reg:SI T_REG) (const_int 0))
+	(eq (reg:SI T_REG) (const_int 0))
 
-  const int treg_value = sh_eval_treg_value (operands[1]);
-  operands[2] = NULL_RTX;
+     Instead of splitting out a new insn, we modify the current insn's
+     operands as needed.  This preserves things such as REG_DEAD notes.  */
 
-  /* Scan the insns backwards for an insn that sets the T bit by testing a
-     reg against zero like:
-	(set (reg T_REG) (eq (reg) (const_int 0)))  */
-  rtx testing_insn = NULL_RTX;
-  rtx tested_reg = NULL_RTX;
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG
+      && XEXP (operands[1], 1) == const0_rtx)
+    DONE;
 
-  set_of_reg s0 = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
-				      prev_nonnote_insn_bb);
-  if (s0.set_src != NULL_RTX
-      && GET_CODE (s0.set_src) == EQ
-      && REG_P (XEXP (s0.set_src, 0))
-      && satisfies_constraint_Z (XEXP (s0.set_src, 1)))
-    {
-      testing_insn = s0.insn;
-      tested_reg = XEXP (s0.set_src, 0);
-    }
-  else
-    FAIL;
+  int branch_cond = sh_eval_treg_value (operands[1]);
+  rtx new_cond_rtx = NULL_RTX;
 
-  /* Continue scanning the insns backwards and try to find the insn that
-     sets the tested reg which we found above.  If the reg is set by storing
-     the T bit or the negated T bit we can eliminate the test insn before
-     the branch.  Notice that the branch condition has to be inverted if the
-     test is eliminated.  */
+  if (branch_cond == 0)
+    new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx);
+  else if (branch_cond == 1)
+    new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx);
 
-  /* If the T bit is used between the testing insn and the brach insn
-     leave it alone.  */
-  if (reg_used_between_p (get_t_reg_rtx (), testing_insn, curr_insn))
-    FAIL;
-
-  while (true)
-    {
-      /* It's not safe to go beyond the current basic block after reload.  */
-      set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn,
-					  reload_completed
-					  ? prev_nonnote_insn_bb
-					  : prev_nonnote_insn);
-      if (s1.set_src == NULL_RTX)
-	break;
-
-      if (t_reg_operand (s1.set_src, VOIDmode))
-	operands[2] = GEN_INT (treg_value ^ 1);
-      else if (negt_reg_operand (s1.set_src, VOIDmode))
-	operands[2] = GEN_INT (treg_value);
-      else if (REG_P (s1.set_src))
-	{
-	   /* If it's a reg-reg copy follow the copied reg.  This can
-	      happen e.g. when T bit store zero-extensions are
-	      eliminated.  */
-	  tested_reg = s1.set_src;
-	  s0.insn = s1.insn;
-	  continue;
-	}
-
-	/* It's only safe to remove the testing insn if the T bit is not
-	   modified between the testing insn and the insn that stores the
-	   T bit.  Notice that some T bit stores such as negc also modify
-	   the T bit.  */
-	if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn)
-	    || modified_in_p (get_t_reg_rtx (), s1.insn))
-	  operands[2] = NULL_RTX;
-
-	break;
-    }
-
-  if (operands[2] == NULL_RTX)
-    FAIL;
-
-  set_insn_deleted (testing_insn);
+  if (new_cond_rtx != NULL_RTX)
+    validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0),
+		     new_cond_rtx, false);
+  DONE;
 }
   [(set_attr "type" "cbranch")])
 
@@ -11480,10 +11423,12 @@
 ;; multiple insns like:
 ;;	movt	Rn
 ;;	tst	Rn,Rn
+;; This requires an additional pseudo.  The SH specific ifcvt_sh pass will
+;; look for this insn.  Disallow using it if pseudos can't be created.
 (define_insn_and_split "nott"
   [(set (reg:SI T_REG)
-	(xor:SI (match_operand:SI 0 "t_reg_operand" "") (const_int 1)))]
-  "TARGET_SH1"
+	(xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))]
+  "TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())"
 {
   gcc_assert (TARGET_SH2A);
   return "nott";
Index: gcc/config/sh/t-sh
===================================================================
--- gcc/config/sh/t-sh	(revision 202876)
+++ gcc/config/sh/t-sh	(working copy)
@@ -21,6 +21,10 @@
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/sh/sh-c.c
 
+ifcvt_sh.o: $(srcdir)/config/sh/ifcvt_sh.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
 DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
 OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
 
Index: gcc/config/sh/ifcvt_sh.cc
===================================================================
--- gcc/config/sh/ifcvt_sh.cc	(revision 0)
+++ gcc/config/sh/ifcvt_sh.cc	(revision 0)
@@ -0,0 +1,1489 @@
+/* An SH specific if-conversion RTL pass that tries to combine comparisons
+   and redundant condition code register stores across multiple basic blocks.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "tree-pass.h"
+#include "target.h"
+#include "expr.h"
+
+#include <algorithm>
+#include <list>
+#include <vector>
+
+/*
+This pass tries to optimize for example this:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	movt	r1
+	tst	r1,r1
+	bt/s	.L5
+
+into something simpler:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	bf/s	.L5
+
+Such sequences can be identified by looking for conditional branches and
+checking whether the ccreg is set before the conditional branch
+by testing another register for != 0, which was set by a ccreg store.
+This can be optimized by eliminating the redundant comparison and
+inverting the branch condition.  There can be multiple comparisons in
+different basic blocks that all end up in the redunant test insn before the
+conditional branch.  Some example RTL ...
+
+Example 1)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 0)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In [bb 4] elimination of the comparison would require inversion of the branch
+condition and compensation of other BBs.
+Instead an inverting reg-move can be used:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+-> bb 5
+
+[BB 4]
+(set (reg:SI 167) (not:SI (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)))
+                        (label_ref:SI 50) (pc)))
+
+
+Example 2)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (gt:SI (reg:SI 177) (reg:SI 179)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The common comparison is factored out and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+(set (reg:SI 200) (reg:SI 175))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 167) (reg:SI 177))
+(set (reg:SI 200) (reg:SI 179))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (gt:SI (reg:SI 167) (reg:SI 200)))
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 3)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The T bit lifetime is extended and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 4)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In this case the comparisons are the same and could be combined, but the
+branch condition is different for [bb 3] and [bb 5].  Since the comparison
+is not a zero comparison, we can't negate one of the operands.  The best thing
+we can do here is to eliminate the comparison before the cbranch and invert
+the ccreg in one of the BBs.  On SH2A this will utilize the 'nott' instruction.
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 147 t) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))  // inverted
+                        (label_ref:SI 50) (pc)))
+
+
+In order to handle cases such as above the RTL pass does the following:
+
+- Find the ccreg sets (comparisons) and ccreg stores
+  (inverting and non-inverting) in all related BBs.
+
+- If the comparison types in the BBs are all the same, try to combine the
+  comparisons in the BBs and replace the zero comparison before the cbranch
+  with the common comparison.
+
+    - If the cstores are the same, move the comparison before the cbranch
+      and replace the comparisons in the BBs with reg-reg copies to get the
+      operands in place (create new pseudo regs).
+
+    - If the cstores differ, try to apply the special case
+        (eq (reg) (const_int 0)) -> inverted = (not (reg)).
+      for the subordinate cstore types and eliminate the dominating ones.
+
+- If the comparison types in the BBs are not the same, or the first approach
+  doesn't work out for some reason, try to eliminate the comparison before the
+  cbranch by extending the lifetime of the ccreg by leaving the individual
+  comparisons but eliminating the cstores.
+  If the cstores are all the same this is straight forward.
+  If they're not, try to reverse the ccreg for the subordinate cstore type
+  and eliminate the dominating one.
+*/
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, (const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+struct set_of_reg
+{
+  // The insn where the search stopped or NULL_RTX.
+  rtx insn;
+
+  // The set rtx of the specified reg if found, NULL_RTX otherwise.
+  // Notice that the set rtx can also be in a parallel.
+  const_rtx set_rtx;
+
+  // The set source operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_src (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 1);
+  }
+
+  // The set destination operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_dst (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 0);
+  }
+
+  bool
+  empty (void) const
+  {
+    return insn == NULL_RTX || set_rtx == NULL_RTX;
+  }
+};
+
+// Given a reg rtx and a start insn find the insn (in the same basic block)
+// that sets the reg.
+static set_of_reg
+find_set_of_reg_bb (rtx reg, rtx insn)
+{
+  set_of_reg result = { insn, NULL_RTX };
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = insn; result.insn != NULL_RTX;
+       result.insn = prev_nonnote_insn_bb (result.insn))
+    {
+      if (BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    result.set_rtx = NULL_RTX;
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+static bool
+reg_dead_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_DEAD, REGNO (reg)) != NULL_RTX;
+}
+
+static bool
+reg_unused_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_UNUSED, REGNO (reg)) != NULL_RTX;
+}
+
+// Check whether the two specified basic blocks are adjacent, i.e. there's no
+// other basic block in between them.
+static bool
+is_adjacent_bb (basic_block a, basic_block b)
+{
+  basic_block bb0[] = { a, b };
+  basic_block bb1[] = { b, a };
+
+  for (int i = 0; i < 2; ++i)
+    for (edge_iterator ei = ei_start (bb0[i]->succs);
+	 !ei_end_p (ei); ei_next (&ei))
+      if (ei_edge (ei)->dest == bb1[i])
+	return true;
+
+  return false;
+}
+
+// Internal function of trace_reg_uses.
+static void
+trace_reg_uses_1 (rtx reg, rtx start_insn, basic_block bb, int& count,
+		  std::vector<basic_block>& visited_bb, rtx abort_at_insn)
+{
+  if (bb == NULL)
+    return;
+
+  if (std::find (visited_bb.begin (), visited_bb.end (), bb)
+      != visited_bb.end ())
+    log_return_void ("[bb %d] already visited\n", bb->index);
+
+  visited_bb.push_back (bb);
+
+  if (BB_END (bb) == NULL_RTX)
+    log_return_void ("[bb %d] BB_END is null\n", bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return_void ("[bb %d] start_insn is null\n", bb->index);
+
+  rtx end_insn = NEXT_INSN (BB_END (bb));
+  if (end_insn == NULL_RTX)
+    log_return_void ("[bb %d] end_insn is null\n", bb->index);
+
+  for (rtx i = NEXT_INSN (start_insn); i != end_insn; i = NEXT_INSN (i))
+    {
+      if (INSN_P (i))
+	{
+	  if (NONDEBUG_INSN_P (i)
+	      && (reg_overlap_mentioned_p (reg, PATTERN (i))
+		  || (CALL_P (i) && find_reg_fusage (i, USE, reg))))
+	    {
+	      log_msg ("found use in [bb %d] at insn:\n", bb->index);
+	      log_insn (i);
+	      log_msg ("\n");
+	      count += 1;
+	    }
+
+	  // Stop following this BB if the reg is set or dies along the way.
+	  if (reg_set_p (reg, i) || reg_dead_after_insn (reg, i))
+	    return;
+	}
+
+      if (abort_at_insn != NULL_RTX && abort_at_insn == i)
+	return;
+    }
+
+  for (edge_iterator ei = ei_start (bb->succs); !ei_end_p (ei); ei_next (&ei))
+    {
+      basic_block succ_bb = ei_edge (ei)->dest;
+      trace_reg_uses_1 (reg, BB_HEAD (succ_bb), succ_bb, count, visited_bb,
+			abort_at_insn);
+    }
+}
+
+// Trace uses of the specified reg in all basic blocks that are reachable from
+// the specified insn.  If 'abort_at_insn' is not null, abort the trace at
+// that insn.  If the insn 'abort_at_insn' uses the specified reg, it is also
+// counted.
+static int
+trace_reg_uses (rtx reg, rtx start_insn, rtx abort_at_insn)
+{
+  log_msg ("\ntrace_reg_uses\nreg = ");
+  log_rtx (reg);
+  log_msg ("\nstart_insn = ");
+  log_insn (start_insn);
+
+  int count = 0;
+  std::vector<basic_block> visited_bb;
+  visited_bb.reserve (32);
+
+  trace_reg_uses_1 (reg, start_insn, BLOCK_FOR_INSN (start_insn),
+		    count, visited_bb, abort_at_insn);
+  return count;
+}
+
+// FIXME: Remove dependency on SH predicate function somehow.
+extern int t_reg_operand (rtx, machine_mode);
+extern int negt_reg_operand (rtx, machine_mode);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class ifcvt_sh : public rtl_opt_pass
+{
+public:
+  ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name);
+  virtual ~ifcvt_sh (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  // Type of ccreg store that is supported.
+  enum cstore_type_t
+  {
+    cstore_normal = 0,
+    cstore_inverted = 1,
+    cstore_unknown = -1
+  };
+
+  // Type of branch condition that is supported.
+  enum branch_condition_type_t
+  {
+    branch_if_true = 1,
+    branch_if_false = 0,
+    unknown_branch_condition = -1
+  };
+
+  // For each basic block there can be a trace entry which consists of an
+  // insn that sets the ccreg (usually a comparison) and a ccreg store.
+  struct bb_entry
+  {
+    basic_block bb;
+    set_of_reg setcc;
+    set_of_reg cstore;
+    cstore_type_t cstore_type;
+    std::vector<set_of_reg> cstore_reg_reg_copies;
+
+    bb_entry (basic_block b)
+    : bb (b), setcc (), cstore (), cstore_type (cstore_unknown) { }
+
+    rtx comparison_rtx (void) const { return setcc.set_src (); }
+  };
+
+  // A ccreg trace for a conditional branch.
+  struct cbranch_trace
+  {
+    rtx cbranch_insn;
+    branch_condition_type_t cbranch_type;
+
+    // The comparison against zero right before the conditional branch.
+    set_of_reg setcc;
+
+    // All BBs that are related to the cbranch.  The last BB in the list is
+    // the BB of the cbranch itself and might be empty.
+    std::list<bb_entry> bb_entries;
+
+    cbranch_trace (rtx insn)
+    : cbranch_insn (insn),
+      cbranch_type (unknown_branch_condition),
+      setcc ()
+    {
+    }
+
+    basic_block bb (void) const { return BLOCK_FOR_INSN (cbranch_insn); }
+
+    rtx
+    branch_condition_rtx (void) const
+    {
+      rtx x = pc_set (cbranch_insn);
+      return x == NULL_RTX ? NULL_RTX : XEXP (XEXP (x, 1), 0);
+    }
+
+    bool
+    can_invert_condition (void) const
+    {
+      // The branch condition can be inverted safely only if the condition
+      // reg is dead after the cbranch.
+      return reg_dead_after_insn (XEXP (branch_condition_rtx (), 0),
+				  cbranch_insn);
+    }
+  };
+
+  static const pass_data default_pass_data;
+
+  // Tells whether modified or newly added insns are to be split at the end
+  // of the pass.
+  const bool split_insns_;
+
+  // Captured dump_file != NULL.
+  bool en_logging_;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx ccreg_;
+
+  // Newly added or modified insns.
+  std::vector<rtx> touched_insns_;
+
+  // Given an rtx determine whether it's a comparison with a constant zero.
+  static bool is_cmp_eq_zero (const_rtx i);
+
+  // Update the stored mode of the ccreg from the given branch condition rtx.
+  void update_ccreg_mode (const_rtx cond);
+
+  // Given an rtx, figure out the branch condition, assuming that it is
+  // in canonical form:
+  //   (ne (reg) (const_int 0))
+  //   (eq (reg) (const_int 0))
+  branch_condition_type_t branch_condition_type (const_rtx cond) const;
+
+  // Return true if the specified rtx is either a normal ccreg or
+  // a negated form of the ccreg.
+  bool is_normal_ccreg (const_rtx x) const;
+  bool is_inverted_ccreg (const_rtx x) const;
+
+  // Given a reg rtx and a start insn rtx, try to find the insn in the same
+  // basic block that sets the specified reg.
+  // Return how the search ended and the insn where it stopped or NULL_RTX.
+  enum record_return_t
+  {
+    set_found,
+    set_not_found,
+    other_set_found
+  };
+  record_return_t record_set_of_reg (rtx reg, rtx start_insn, bb_entry& e);
+
+  // Tells whether the cbranch insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_cstore (const bb_entry& e,
+			  const cbranch_trace& trace) const;
+
+  // Tells whether the setcc insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_comparison (const bb_entry& e,
+			      const cbranch_trace& trace) const;
+
+  // Tells whether the two specified comparison rtx can be combined into a
+  // single comparison.
+  bool can_combine_comparisons (const_rtx x, const_rtx y) const;
+
+  // Tells whether the ccreg usage can be extended from the bb_entry on until
+  // the final cbranch of the trace.
+  bool can_extend_ccreg_usage (const bb_entry& e,
+			       const cbranch_trace& trace) const;
+
+  // Create an insn rtx that is a negating reg move (not operation).
+  rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const;
+
+  // Create an insn rtx that inverts the ccreg.
+  rtx make_inv_ccreg_insn (void) const;
+
+  // Adds the specified insn to the set of modified or newly added insns that
+  // might need splitting at the end of the pass.
+  rtx touched_insn (rtx i);
+
+  // Try to invert the branch condition of the specified trace.
+  bool try_invert_branch_condition (cbranch_trace& trace);
+
+  // Try to optimize a cbranch trace by combining comparisons in BBs and
+  // eliminate the cstores.
+  bool try_combine_comparisons (cbranch_trace& trace,
+				int cstore_count, int inv_cstore_count,
+				cstore_type_t dominating_cstore);
+
+  // Try to optimize a cbranch trace by eliminating the cstores in BBs only.
+  bool try_eliminate_cstores (cbranch_trace& trace,
+			      int cstore_count, int inv_cstore_count,
+			      cstore_type_t dominating_cstore);
+
+  // Given a branch insn, try to optimize its branch condition.
+  // If any insns are modified or added they are added to 'touched_insns_'.
+  void try_optimize_cbranch (rtx i);
+};
+
+
+const pass_data ifcvt_sh::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  TODO_df_finish | TODO_df_verify	// todo_flags_finish
+  | TODO_verify_rtl_sharing
+};
+
+ifcvt_sh::ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  split_insns_ (split_insns),
+  en_logging_ (false),
+  ccreg_ (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class. 
+  this->name = name;
+}
+
+ifcvt_sh::~ifcvt_sh (void)
+{
+}
+
+void ifcvt_sh::update_ccreg_mode (const_rtx cond)
+{
+  if (REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) != REGNO (ccreg_))
+    return;
+
+  machine_mode m = GET_MODE (XEXP (cond, 0));
+  if (m == GET_MODE (ccreg_))
+    return;
+
+  PUT_MODE (ccreg_, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (ccreg_);
+  log_msg ("\n");
+}
+
+bool
+ifcvt_sh::is_cmp_eq_zero (const_rtx i)
+{
+  return i != NULL_RTX && GET_CODE (i) == EQ
+	 && REG_P (XEXP (i, 0)) && XEXP (i, 1) == const0_rtx;
+}
+
+ifcvt_sh::branch_condition_type_t
+ifcvt_sh::branch_condition_type (const_rtx cond) const
+{
+  if (cond == NULL_RTX)
+    return unknown_branch_condition;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (ccreg_)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (ccreg_)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_false;
+
+  else
+    return unknown_branch_condition;
+}
+
+bool
+ifcvt_sh::is_normal_ccreg (const_rtx x) const
+{
+  return t_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+bool
+ifcvt_sh::is_inverted_ccreg (const_rtx x) const
+{
+  return negt_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+ifcvt_sh::record_return_t
+ifcvt_sh::record_set_of_reg (rtx reg, rtx start_insn, bb_entry& new_entry)
+{
+  log_msg ("\n[bb %d]\n", new_entry.bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return (set_not_found, "set of reg not found.  empty BB?\n");
+
+  new_entry.cstore_type = cstore_unknown;
+
+  for (rtx i = start_insn; i != NULL_RTX; )
+    {
+      new_entry.cstore = find_set_of_reg_bb (reg, i);
+
+      if (new_entry.cstore.set_src () == NULL_RTX)
+	log_return (set_not_found, "set of reg not found (cstore)\n");
+
+      log_insn (new_entry.cstore.insn);
+      log_msg ("\n");
+
+      if (is_normal_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("normal condition store\n");
+	  new_entry.cstore_type = cstore_normal;
+	}
+      else if (is_inverted_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("inverted condition store\n");
+	  new_entry.cstore_type = cstore_inverted;
+	}
+      else if (REG_P (new_entry.cstore.set_src ()))
+	{
+	  // If it's a reg-reg copy follow the copied reg.
+	  new_entry.cstore_reg_reg_copies.push_back (new_entry.cstore);
+	  reg = new_entry.cstore.set_src ();
+	  i = new_entry.cstore.insn;
+
+	  log_msg ("reg-reg copy.  tracing ");
+	  log_rtx (reg);
+	  log_msg ("\n");
+	  continue;
+	}
+      else
+	log_return (other_set_found, "not a condition store\n");
+
+      gcc_assert (new_entry.cstore_type != cstore_unknown);
+
+      // Now see how the ccreg was set.
+      // For now it must be in the same BB.
+      log_msg ("tracing ccreg\n");
+      new_entry.setcc =
+	  find_set_of_reg_bb (ccreg_,
+			      prev_nonnote_insn_bb (new_entry.cstore.insn));
+
+      // If cstore was found but setcc was not found continue anyway, as
+      // for some of the optimization types the setcc is irrelevant.
+      if (new_entry.setcc.set_src () == NULL_RTX)
+	log_return (set_found, "set of ccreg not found\n");
+
+      else if (GET_CODE (new_entry.setcc.set_rtx) == SET)
+	{
+	  // Also allow insns that set the ccreg, but are not true comparison
+	  // insns, as long as they are sets and not e.g. clobbers.
+	  log_insn (new_entry.setcc.insn);
+	  log_msg ("\n");
+	  return set_found;
+	}
+      else
+	// If cstore was found but setcc was not found continue anyway, as
+	// for some of the optimization types the setcc is irrelevant.
+ 	log_return (set_found, "unknown set of ccreg\n");
+    }
+
+  log_return (set_not_found, "set of reg not found\n");
+}
+
+bool
+ifcvt_sh::can_remove_cstore (const bb_entry& e,
+			     const cbranch_trace& trace) const
+{
+  if (volatile_insn_p (PATTERN (e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's volatile\n");
+    }
+
+  // On SH there are parallel patterns which store the ccreg multiple times.
+  // In this case it's not safe.
+  rtx cstore_pat = PATTERN (e.cstore.insn);
+  if (GET_CODE (cstore_pat) == PARALLEL)
+    for (int i = 0; i < XVECLEN (cstore_pat, 0); ++i)
+      {
+	rtx x = XVECEXP (cstore_pat, 0, i);
+
+	// It's the cstore set that we're referring to, ignore that one.
+	if (x != e.cstore.set_rtx
+	    && GET_CODE (x) == SET && reg_referenced_p (ccreg_, x))
+	  {
+	    log_msg ("can't remove insn\n");
+	    log_insn (e.cstore.insn);
+	    log_return (false, "\nbecause it's a multiple ccreg store\n");
+	  }
+      }
+
+  // If the cstore sets the ccreg (e.g. negc) and the ccreg is used afterwards
+  // it's not safe.
+  if (modified_in_p (ccreg_, e.cstore.insn)
+      && !(reg_dead_after_insn (ccreg_, e.cstore.insn)
+	   || reg_unused_after_insn (ccreg_, e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it sets the ccreg\n");
+    }
+
+  // If the cstore destination reg is copied around check the reg-reg
+  // copies.  At every reg-reg copy the copied reg must be dead.
+  // Otherwise we assume that the result of the cstore is used in some
+  // other way.
+  for (std::vector<set_of_reg>::const_reverse_iterator i =
+	   e.cstore_reg_reg_copies.rbegin ();
+       i != e.cstore_reg_reg_copies.rend (); ++i)
+    {
+      if (!reg_dead_after_insn (i->set_src (), i->insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (i->insn);
+	  log_return (false, "\nbecause source of reg-reg copy doesn't die\n");
+	}
+    }
+
+  // Check that the destination reg of the cstore is not used otherwise.
+
+  // If the cstore destination reg is copied around check the effective
+  // destination reg of the cstore.  The reg-reg copies are recorded in
+  // reverse order, i.e. the most recent reg-reg copy in the insn list
+  // comes first.
+  rtx cstore_dst = e.cstore_reg_reg_copies.empty ()
+		   ? e.cstore.set_dst ()
+		   : e.cstore_reg_reg_copies.front ().set_dst ();
+
+  // The cstore_dst reg must die after the test before the cbranch.
+  if (!reg_dead_after_insn (cstore_dst, trace.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause its effective target reg %d doesn't die "
+			 "after trace.setcc.insn\n", REGNO (cstore_dst));
+    }
+
+  // Even if the cstore_dst reg dies after the test before the cbranch we
+  // have to check whether it is used in other BBs if the cstore is not
+  // in the same BB where the cstore_dst dies.
+  if (e.bb == trace.bb ())
+    {
+      if (reg_used_between_p (cstore_dst, e.cstore.insn, trace.setcc.insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (e.cstore.insn);
+	  log_return (false, "\nbecause its effective target reg %d is used "
+			     "otherwise\n", REGNO (cstore_dst));
+	}
+    }
+  else
+    {
+      // Count the uses of cstore_dst reg in all BBs that can be reached from
+      // bb_entry's BB.  If we get more than '1' we assume that it's used
+      // somewhere else and is not safe to be removed.
+      int cstore_dst_use_count = trace_reg_uses (cstore_dst, e.cstore.insn,
+						 trace.setcc.insn);
+      if (cstore_dst_use_count > 1)
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (e.cstore.insn);
+	  log_return (false, "\nbecause its effective target reg %d is used "
+			     "in %d other places\n", REGNO (cstore_dst),
+			     cstore_dst_use_count - 1);
+	}
+    }
+
+  return true;
+}
+
+bool
+ifcvt_sh::can_remove_comparison (const bb_entry& e,
+				 const cbranch_trace&/* trace*/) const
+{
+  // If the ccreg is used otherwise between the comparison and the cstore,
+  // it's not safe.
+  if (reg_used_between_p (ccreg_, e.setcc.insn, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.setcc.insn);
+      log_return (false, "\nbecause the ccreg is used otherwise\n");
+    }
+
+  if (!reg_dead_after_insn (ccreg_, e.cstore.insn)
+      && !reg_unused_after_insn (ccreg_, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause ccreg is not dead or unused afterwards\n");
+    }
+
+  // On SH there are also multiple set patterns that can be used for
+  // comparisons, such as "shll".  It's not safe to remove those.
+  if (multiple_sets (e.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's a multiple set\n");
+    }
+
+  return true;
+}
+
+rtx
+ifcvt_sh::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
+{
+  // This will to go through expanders and may output multiple insns
+  // for multi-word regs.
+  start_sequence ();
+  expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0);
+  rtx i = get_insns ();
+  end_sequence ();
+  return i;
+}
+
+rtx
+ifcvt_sh::make_inv_ccreg_insn (void) const
+{
+  start_sequence ();
+  rtx i = emit_insn (gen_rtx_SET (VOIDmode, ccreg_,
+				  gen_rtx_fmt_ee (XOR, GET_MODE (ccreg_),
+						  ccreg_, const1_rtx)));
+  end_sequence ();
+  return i;
+}
+
+rtx
+ifcvt_sh::touched_insn (rtx i)
+{
+  touched_insns_.push_back (i);
+  return i;
+}
+
+bool
+ifcvt_sh::can_combine_comparisons (const_rtx x, const_rtx y) const
+{
+  if (GET_CODE (x) != GET_CODE (y))
+    return false;
+
+  rtx x_op0 = XEXP (x, 0);
+  rtx x_op1 = XEXP (x, 1);
+
+  rtx y_op0 = XEXP (y, 0);
+  rtx y_op1 = XEXP (y, 1);
+
+  if (!REG_P (x_op0) || !REG_P (y_op0))
+    return false;
+
+  if (GET_MODE (x_op0) != GET_MODE (y_op0))
+    return false;
+
+  // rtx_equal_p also compares the reg numbers which we do not care about
+  // here, as long as both are regs and the modes are the same.
+  if (REG_P (x_op1))
+    return REG_P (y_op1) && GET_MODE (x_op1) == GET_MODE (y_op1);
+
+  return rtx_equal_p (x_op1, y_op1);
+}
+
+bool
+ifcvt_sh::can_extend_ccreg_usage (const bb_entry& e,
+				  const cbranch_trace& trace) const
+{
+  // Check if the ccreg is not modified by other insins in the BB path until
+  // the final cbranch of the trace.
+  // Start checking after the cstore that follows the setcc, assuming that
+  // the cstore will be removed.
+
+  // The assumption here is that the specified bb_entry's BB is a direct
+  // predecessor of the trace.cbranch_insn's BB.
+  if (e.bb != trace.bb () && !is_adjacent_bb (e.bb, trace.bb ()))
+    log_return (false,
+	"can't extend ccreg usage -- [bb %d] and [bb %d] are not adjacent\n",
+	e.bb->index, trace.bb ()->index);
+
+  if (e.cstore.empty ())
+    log_return (false, "can't extend ccreg usage -- no cstore\n");
+
+  // The entry's cstore is in the same BB as the final cbranch.
+  if (e.bb == trace.bb ())
+    {
+      if (reg_set_between_p (ccreg_, e.cstore.insn, trace.setcc.insn))
+	log_return (false,
+	    "can't extend ccreg usage -- it's modified between e.cstore.insn "
+	    "and trace.setcc.insn");
+      else
+	return true;
+    }
+
+  // The entry's cstore and the final cbranch are in different BBs.
+  if (reg_set_between_p (ccreg_, e.cstore.insn, NEXT_INSN (BB_END (e.bb))))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]", e.bb->index);
+
+  if (reg_set_between_p (ccreg_, PREV_INSN (BB_HEAD (trace.bb ())),
+			 trace.setcc.insn))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]",
+	trace.bb ()->index);
+
+  return true;
+}
+
+bool
+ifcvt_sh::try_invert_branch_condition (cbranch_trace& trace)
+{
+  log_msg ("inverting branch condition\n");
+
+  if (!invert_jump_1 (trace.cbranch_insn, JUMP_LABEL (trace.cbranch_insn)))
+    log_return (false, "invert_jump_1 failed\n");
+
+  if (verify_changes (num_validated_changes ()))
+    confirm_change_group ();
+  else
+    log_return (false, "verify_changed failed\n");
+
+  touched_insn (trace.cbranch_insn);
+  return true;
+}
+
+bool
+ifcvt_sh::try_combine_comparisons (cbranch_trace& trace,
+				   int cstore_count, int inv_cstore_count,
+				   cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_combine_comparisons\n");
+
+  // This function will always try to create new pseudos.
+  if (!can_create_pseudo_p ())
+    log_return (false, "can't create pseudos\n");
+
+  // Check that all ccset insns are comparisons and all comparison types in
+  // all BBs are the same and could be combined into one single comparison.
+  rtx comp = NULL_RTX;
+  rtx comp_insn = NULL_RTX;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      int i_empty_count = i->setcc.empty () + i->cstore.empty ();
+
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i_empty_count == 2)
+	continue;
+
+      // Otherwise we need both, the setcc and the cstore.
+      if (i_empty_count != 0)
+	log_return (false, "bb entry is not a setcc cstore pair\n");
+
+      rtx other_comp = i->comparison_rtx ();
+
+      if (!COMPARISON_P (other_comp))
+	{
+	  log_msg ("setcc is not a comparison:\n");
+	  log_rtx (other_comp);
+	  log_return (false, "\n");
+	}
+
+      if (comp_insn == NULL_RTX)
+	{
+	  comp = other_comp;
+	  comp_insn = i->setcc.insn;
+	}
+      else if (!can_combine_comparisons (comp, other_comp))
+	return false;
+
+      // The goal here is to eliminate all cstores and comparisons in the BBs.
+      // Thus check if every cstore can actually be removed safely.
+      if (!can_remove_cstore (*i, trace) || !can_remove_comparison (*i, trace))
+	return false;
+    }
+
+  // FIXME: The first operand of the comparison must be a simple reg.
+  // This effectively prohibits combining div0s comparisons such as
+  //    (lt:SI (xor:SI (reg:SI) (reg:SI)))
+  if (!REG_P (XEXP (comp, 0)))
+    {
+      log_msg ("comparison operand 0\n");
+      log_rtx (XEXP (comp, 0));
+      log_return (false, "\nis not a reg\n");
+    }
+
+  rtx comp_op0 = gen_reg_rtx (GET_MODE (XEXP (comp, 0)));
+  rtx comp_op1 = REG_P (XEXP (comp, 1))
+		 ? gen_reg_rtx (GET_MODE (XEXP (comp, 1)))
+		 : XEXP (comp, 1);
+
+  // If there are both, inverting and non-inverting cstores, they can only
+  // be eliminated if the comparison can be inverted.  We assume that the
+  // comparison insns that we find are already minimal and canonicalized.
+  // There is one special case though, where an integer comparison
+  //     (eq (reg) (const_int 0))
+  // can be inverted with a sequence
+  //     (eq (not (reg)) (const_int 0))
+  if (inv_cstore_count != 0 && cstore_count != 0)
+    {
+      if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX)
+	log_return (false, "make_not_reg_insn failed.\n");
+
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  if (i->setcc.empty () || i->cstore.empty ())
+	    continue;
+
+	  if (i->cstore_type != dominating_cstore
+	      && !is_cmp_eq_zero (i->comparison_rtx ()))
+	    {
+	      log_msg ("can't invert comparison in insn\n");
+	      log_insn (i->setcc.insn);
+	      log_return (false,
+		"\nbecause it's not a (eq (reg) (const_int 0))\n");
+	    }
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Replace the test insn before the cbranch with the common comparison.
+  // Instead of creating a new insn from scratch we copy the common comparison
+  // pattern.  This simplifies handling parallel comparison patterns, such as
+  // FP comparisons on SH, which have an extra use on FPSCR.
+  log_msg ("installing common comparison in [bb %d]\n", trace.bb ()->index);
+
+  rtx common_comp_pat = copy_rtx (PATTERN (comp_insn));
+  rtx common_comp = const_cast<rtx> (set_of (ccreg_, common_comp_pat));
+
+  gcc_assert (common_comp != NULL_RTX);
+
+  XEXP (XEXP (common_comp, 1), 0) = comp_op0;
+  XEXP (XEXP (common_comp, 1), 1) = comp_op1;
+
+  log_rtx (common_comp_pat);
+  log_msg ("\n");
+
+  touched_insn (emit_insn_after (common_comp_pat, trace.setcc.insn));
+  delete_insn (trace.setcc.insn);
+
+  // Replace comparison and cstore insns with reg-reg moves in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->setcc.empty () || i->cstore.empty ())
+	continue;
+
+      if (i->cstore_type == dominating_cstore)
+	{
+	  log_msg ("replacing comparison and cstore with reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  touched_insn (emit_insn_after (
+	      gen_move_insn (comp_op0, XEXP (i->comparison_rtx (), 0)),
+	      i->setcc.insn));
+
+	  // If the second operand is a reg, have to emit a move insn.
+	  // Otherwise assume it's a const_int and just reference it.
+	  if (REG_P (comp_op1))
+	    touched_insn (emit_insn_after (
+		gen_move_insn (comp_op1, XEXP (i->comparison_rtx (), 1)),
+		i->setcc.insn));
+	}
+      else
+	{
+	  log_msg ("replacing comparison and cstore with inverting reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  rtx new_i = make_not_reg_insn (comp_op0,
+					 XEXP (i->comparison_rtx (), 0));
+	  touched_insn (emit_insn_after (new_i, i->setcc.insn));
+	}
+
+      delete_insn (i->cstore.insn);
+      delete_insn (i->setcc.insn);
+    }
+
+  return true;
+}
+
+bool
+ifcvt_sh::try_eliminate_cstores (cbranch_trace& trace,
+				 int cstore_count, int inv_cstore_count,
+				 cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_eliminate_cstores\n");
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i->setcc.empty () && i->cstore.empty ())
+	continue;
+
+      // We're going to eliminate cstores, but for that they have to be
+      // there.  We don't care about the setcc in this case.
+      if (i->cstore.empty ())
+	log_return (false, "bb entry cstore empty -- aborting\n");
+
+      // The goal here is to eliminate all cstores in the BBs and extend the
+      // ccreg usage.
+      if (!can_extend_ccreg_usage (*i, trace))
+	return false;
+
+      // If the cstore can't be removed we can keep it around as long as
+      // it doesn't modify the ccreg.
+      if (!can_remove_cstore (*i, trace)
+	  && modified_in_p (ccreg_, i->cstore.insn))
+	log_return (false, "cstore sets ccreg -- aborting\n");
+    }
+
+  // If there are both, inverting and non-inverting cstores, we'll have to
+  // invert the ccreg as a replacement for one of them.
+  if (cstore_count != 0 && inv_cstore_count != 0)
+    {
+      rtx i = make_inv_ccreg_insn ();
+      if (recog_memoized (i) < 0)
+	{
+	  log_msg ("failed to match ccreg inversion insn:\n");
+	  log_rtx (PATTERN (i));
+	  log_return (false, "\naborting\n");
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Eliminate cstores in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore.empty ())
+	continue;
+
+      if (i->cstore_type == dominating_cstore)
+	log_msg ("removing cstore in [bb %d]\n", i->bb->index);
+      else
+	{
+	  log_msg ("replacing cstore with ccreg inversion in [bb %d]\n",
+		   i->bb->index);
+
+	  touched_insn (
+	    emit_insn_after (make_inv_ccreg_insn (), i->cstore.insn));
+	}
+
+      if (can_remove_cstore (*i, trace))
+	delete_insn (i->cstore.insn);
+    }
+
+  log_msg ("removing test insn before cbranch\n");
+  delete_insn (trace.setcc.insn);
+  return true;
+}
+
+void
+ifcvt_sh::try_optimize_cbranch (rtx insn)
+{
+  cbranch_trace trace (insn);
+
+  log_msg ("\n\n--------------------------------------\n");
+  log_msg ("found cbranch insn in [bb %d]:\n", trace.bb ()->index);
+  log_insn (insn);
+
+  trace.cbranch_type = branch_condition_type (trace.branch_condition_rtx ());
+
+  if (trace.cbranch_type == branch_if_true)
+    log_msg ("condition: branch if true\n");
+  else if (trace.cbranch_type == branch_if_false)
+    log_msg ("condition: branch if false\n");
+  else
+    {
+      log_msg ("unknown branch condition\n");
+      log_rtx (trace.branch_condition_rtx ());
+      log_return_void ("\n");
+    }
+
+  update_ccreg_mode (trace.branch_condition_rtx ());
+
+  // Scan the insns backwards for an insn that sets the ccreg by testing a
+  // reg against zero like
+  //   (set (reg ccreg) (eq (reg) (const_int 0)))
+  // The testing insn could also be outside of the current basic block, but
+  // for now we limit the search to the current basic block.
+  trace.setcc = find_set_of_reg_bb (ccreg_, prev_nonnote_insn_bb (insn));
+
+  if (!is_cmp_eq_zero (trace.setcc.set_src ()))
+    log_return_void ("could not find set of ccreg in current BB\n");
+
+  rtx trace_reg = XEXP (trace.setcc.set_src (), 0);
+
+  log_msg ("set of ccreg:\n");
+  log_insn (trace.setcc.insn);
+
+  // See if we can remove the trace.setcc insn safely.
+  if (reg_used_between_p (ccreg_, trace.setcc.insn, trace.cbranch_insn))
+    log_return_void ("ccreg used between testing insn and branch insn\n");
+
+  if (volatile_insn_p (PATTERN (trace.setcc.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (trace.setcc.insn);
+      log_return_void ("\nbecause it's volatile\n");
+    }
+
+  // Now that we have an insn which tests some reg and sets the condition
+  // reg before the conditional branch, try to figure out how that tested
+  // reg was formed, i.e. find all the insns that set the tested reg in
+  // some way.
+  // The tested reg might be set in multiple basic blocks so we need to
+  // check all basic blocks which can reach this current basic block.
+  // If the set of reg is an inverting or non-inverting store of the condition
+  // register, check how the ccreg value was obtained.
+  log_msg ("\ntracing ");
+  log_rtx (trace_reg);
+  log_msg ("\n");
+
+
+  // First check the basic block where the conditional branch is in.
+  // If we find it here there's no point in checking other BBs.
+  trace.bb_entries.push_front (bb_entry (trace.bb ()));
+
+  record_return_t res =
+      record_set_of_reg (trace_reg, prev_nonnote_insn_bb (trace.setcc.insn),
+			 trace.bb_entries.front ());
+
+  if (res == other_set_found)
+    log_return_void ("other set found - aborting trace\n");
+  else if (res == set_not_found)
+    {
+      // It seems the initial search in the BB of the conditional branch
+      // didn't find anything.  Now look in all predecessor BBs.
+      for (edge_iterator ei = ei_start (trace.bb ()->preds);
+	   !ei_end_p (ei); ei_next (&ei))
+	{
+	  edge e = ei_edge (ei);
+	  trace.bb_entries.push_front (bb_entry (e->src));
+
+	  res = record_set_of_reg (trace_reg, BB_END (e->src),
+				   trace.bb_entries.front ());
+	  if (res != set_found)
+	    log_return_void ("set not found - aborting trace\n");
+	}
+    }
+
+  if (en_logging_)
+    {
+      log_msg ("\ncbranch trace summary:\n");
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  log_msg ("\n[bb %d]\n", i->bb->index);
+	  if (!i->setcc.empty ())
+	    {
+	      log_rtx (i->setcc.set_rtx);
+	      log_msg ("\n");
+	    }
+	  if (!i->cstore.empty ())
+	    {
+	      log_rtx (i->cstore.set_rtx);
+	      log_msg ("\n");
+	    }
+
+	  for (std::vector<set_of_reg>::const_reverse_iterator j =
+		   i->cstore_reg_reg_copies.rbegin ();
+	       j != i->cstore_reg_reg_copies.rend (); ++j)
+	    {
+	      log_rtx (j->set_rtx);
+	      log_msg ("\n");
+	    }
+	}
+
+      log_rtx (trace.setcc.set_rtx);
+      log_msg ("\n");
+      log_rtx (PATTERN (trace.cbranch_insn));
+      log_msg ("\n");
+    }
+
+  // Check that we don't have any empty BBs.
+  // Only the BB with the cbranch may be empty.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    if (i->setcc.empty () && i->cstore.empty () && i->bb != trace.bb ())
+      log_return_void ("\n[bb %d] is empty - aborting.\n", i->bb->index);
+
+  // Determine the dominating cstore type
+  // FIXME: Try to take the probabilities of the BBs into account somehow.
+  int cstore_count = 0;
+  int inv_cstore_count = 0;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore_type == cstore_normal)
+	cstore_count += 1;
+      else if (i->cstore_type == cstore_inverted)
+	inv_cstore_count += 1;
+    }
+
+  log_msg ("cstore count = %d  inverted cstore count = %d\n",
+	   cstore_count, inv_cstore_count);
+
+  // This puts a priority on inverting cstores.
+  cstore_type_t dominating_cstore = inv_cstore_count >= cstore_count
+				    ? cstore_inverted
+				    : cstore_normal;
+
+  if (dominating_cstore == cstore_inverted)
+      log_msg ("will try to eliminate inverted cstore\n");
+  else if (dominating_cstore == cstore_normal)
+    {
+      log_msg ("will try to eliminate normal cstore\n");
+      if (!trace.can_invert_condition ())
+	log_return_void ("branch condition can't be inverted - aborting\n");
+    }
+  else
+    gcc_unreachable ();
+
+  if (try_combine_comparisons (trace, cstore_count, inv_cstore_count,
+			       dominating_cstore))
+    return;
+
+  try_eliminate_cstores (trace, cstore_count, inv_cstore_count,
+			 dominating_cstore);
+}
+
+bool
+ifcvt_sh::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+ifcvt_sh::execute (void)
+{
+  en_logging_ = dump_file != NULL;
+
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the first conditional branch is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      ccreg_ = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (ccreg_ == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (ccreg_);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  // Look for basic blocks that end with a conditional branch and try to
+  // optimize them.
+  basic_block bb;
+  FOR_EACH_BB (bb)
+    {
+      rtx i = BB_END (bb);
+      if (any_condjump_p (i) && onlyjump_p (i))
+	try_optimize_cbranch (i);
+    }
+
+  log_msg ("\n\n");
+
+  // If new insns are created and this pass is executed after all insns
+  // have been split already, we must split the insns we've changed or added
+  // ourselves here.
+  // FIXME: Multi-word operations (which emit multiple insns) are not handled
+  // properly here, since only one insn will end up in 'touched_insns_'.
+  // On SH this is not a problem though.
+  if (split_insns_)
+    for (std::vector<rtx>::const_iterator i = touched_insns_.begin ();
+	 i != touched_insns_.end (); ++i)
+      {
+	log_msg ("trying to split insn:\n");
+	log_insn (*i);
+	log_msg ("\n");
+	try_split (PATTERN (*i), *i, 0);
+      }
+
+  touched_insns_.clear ();
+  log_return (0, "\n\n");
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name)
+{
+  return new ifcvt_sh (ctx, split_insns, name);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]