[PATCH v3 3/3] PR80791 Consider doloop cmp use in ivopts

Kewen.Lin linkw@linux.ibm.com
Thu Jun 20 12:17:00 GMT 2019


Hi,

Sorry, the previous patch is incomplete.
New one attached.  Sorry for inconvenience.

on 2019/6/20 脧脗脦莽8:08, Kewen.Lin wrote:
> Hi Segher,
> 
>> On Wed, Jun 19, 2019 at 07:47:34PM +0800, Kewen.Lin wrote:
>>> +/* Return true if count register for branch is supported.  */
>>> +
>>> +static bool
>>> +rs6000_have_count_reg_decr_p ()
>>> +{
>>> +  return flag_branch_on_count_reg;
>>> +}
>>
>> rs6000 unconditionally supports these instructions, not just when that
>> flag is set.  If you need to look at the flag, the *caller* of this new
>> hook should, not every implementation of the hook.  So just "return true"
>> here?
> 
> Good point!  Updated it as hookpod.
> 
>>> +/* For doloop use, if the algothrim selects some candidate which invalid for
>>
>> "algorithm", "which is invalid".
> 
>>> +   some cost like zero rather than original inifite cost.  The point is to
>>
>> "infinite"
>>
> 
> Thanks for catching!  I should run spelling check next time.  :)
> 
> New version attached with comments addressed.
> 
> 
> Thanks,
> Kewen
> 
-------------- next part --------------
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6667cd0..e98aba9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1912,6 +1912,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_PREDICT_DOLOOP_P
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
+#undef TARGET_HAVE_COUNT_REG_DECR_P
+#define TARGET_HAVE_COUNT_REG_DECR_P true
+
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c2aa4d0..5477294 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11618,6 +11618,14 @@ loops, and will help ivopts to make some decisions.
 The default version of this hook returns false.
 @end deftypefn
 
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
+Return true if the target supports hardware count register for decrement
+and branch.  This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
+
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int @var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index b4d57b8..5f43b27 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7946,6 +7946,8 @@ to by @var{ce_info}.
 
 @hook TARGET_PREDICT_DOLOOP_P
 
+@hook TARGET_HAVE_COUNT_REG_DECR_P
+
 @hook TARGET_CAN_USE_DOLOOP_P
 
 @hook TARGET_INVALID_WITHIN_DOLOOP
diff --git a/gcc/target.def b/gcc/target.def
index 71b6972..8a64e5b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4246,6 +4246,16 @@ The default version of this hook returns false.",
  bool, (struct loop *loop),
  default_predict_doloop_p)
 
+DEFHOOKPOD
+(have_count_reg_decr_p,
+ "Return true if the target supports hardware count register for decrement\n\
+and branch.  This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
+
 DEFHOOK
 (can_use_doloop_p,
  "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 7d5859b..71d7f67 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n)
   while (i < n);
 }
 
-/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts"} } */
-/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI" 2 "ivopts" { target { powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 530ea4a..742d3fa 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -399,6 +399,8 @@ struct iv_group
   struct cost_pair *cost_map;
   /* The selected candidate for the group.  */
   struct iv_cand *selected;
+  /* The bind candidate for this group, for doloop use group only.  */
+  struct iv_cand *bind_cand;
   /* Uses in the group.  */
   vec<struct iv_use *> vuses;
 };
@@ -612,6 +614,9 @@ struct ivopts_data
 
   /* Whether the loop body can only be exited via single exit.  */
   bool loop_single_exit_p;
+
+  /* Whether the loop has doloop comparison use.  */
+  bool doloop_use_p;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -1528,6 +1533,7 @@ record_group (struct ivopts_data *data, enum use_type type)
   group->type = type;
   group->related_cands = BITMAP_ALLOC (NULL);
   group->vuses.create (1);
+  group->bind_cand = NULL;
 
   data->vgroups.safe_push (group);
   return group;
@@ -3724,7 +3730,7 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
    Some RTL specific checks seems unable to be checked in gimple, if any new
    checks or easy checks _are_ missing here, please add them.  */
 
-static bool ATTRIBUTE_UNUSED
+static bool
 generic_predict_doloop_p (struct ivopts_data *data)
 {
   struct loop *loop = data->current_loop;
@@ -5291,6 +5297,21 @@ determine_group_iv_cost_cond (struct ivopts_data *data,
   return !cost.infinite_cost_p ();
 }
 
+/* Set no cost for pair between doloop iv use GROUP and iv cand CAND.  */
+
+static bool
+adjust_group_iv_cost_for_doloop (struct ivopts_data *data,
+				 struct iv_group *group, struct iv_cand *cand)
+{
+  struct cost_pair *cp = get_group_iv_cost (data, group, cand);
+  if (!cp)
+    set_group_iv_cost (data, group, cand, no_cost, NULL, NULL_TREE, ERROR_MARK,
+		       NULL);
+  else
+    cp->cost = no_cost;
+  return true;
+}
+
 /* Determines cost of computing uses in GROUP with CAND.  Returns false
    if USE cannot be represented with CAND.  */
 
@@ -5308,7 +5329,12 @@ determine_group_iv_cost (struct ivopts_data *data,
       return determine_group_iv_cost_address (data, group, cand);
 
     case USE_COMPARE:
-      return determine_group_iv_cost_cond (data, group, cand);
+      {
+	bool finite_cost_p = determine_group_iv_cost_cond (data, group, cand);
+	if (data->doloop_use_p && group->bind_cand)
+	  finite_cost_p = adjust_group_iv_cost_for_doloop (data, group, cand);
+	return finite_cost_p;
+      }
 
     default:
       gcc_unreachable ();
@@ -6723,6 +6749,29 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
   return set;
 }
 
+/* For doloop use, if the algorithm selects some candidate which is invalid for
+   later rewrite, fix it up with bind_cand.  */
+
+static void
+fixup_doloop_groups (struct ivopts_data *data, struct iv_ca *set)
+{
+  for (unsigned i = 0; i < data->vgroups.length (); i++)
+    {
+      struct iv_group *group = data->vgroups[i];
+      if (group->bind_cand)
+	{
+	  struct cost_pair *cp = iv_ca_cand_for_group (set, group);
+	  gcc_assert (cp);
+	  if (cp->cand != group->bind_cand && cp->value == NULL_TREE)
+	    {
+	      struct cost_pair *bind_cp
+		= get_group_iv_cost (data, group, group->bind_cand);
+	      iv_ca_set_cp (data, set, group, bind_cp);
+	    }
+	}
+    }
+}
+
 static struct iv_ca *
 find_optimal_iv_set (struct ivopts_data *data)
 {
@@ -6760,6 +6809,9 @@ find_optimal_iv_set (struct ivopts_data *data)
   else if (origset)
     iv_ca_free (&origset);
 
+  if (data->doloop_use_p)
+    fixup_doloop_groups (data, set);
+
   for (i = 0; i < data->vgroups.length (); i++)
     {
       struct iv_group *group = data->vgroups[i];
@@ -7568,6 +7620,72 @@ determine_scaling_factor (struct ivopts_data *data, basic_block *body)
     }
 }
 
+/* Find doloop comparison use and set its related bind_cand.  We adjust the
+   doloop use group cost against various IV cands, it's possible to assign
+   some cost like zero rather than original infinite cost.  The point is to
+   give more chances to consider other IV cands instead of BIV.  The cost
+   originally given on doloop use can affect optimal decision because it can
+   become dead and get eliminated but considered too much here.
+
+   So it's possible that doloop use is assigned one invalid IV cand to rewrite.
+   In this case, we need bind_cand to fix up.  Even if the bind_cand doesn't
+   exist in final iv_ca set, it won't affect optimal decision since it gets
+   eliminated along with doloop use.  */
+
+static bool
+find_doloop_use_and_its_bind (struct ivopts_data *data)
+{
+  struct loop *loop = data->current_loop;
+
+  for (unsigned i = 0; i < data->vgroups.length (); i++)
+    {
+      struct iv_group *group = data->vgroups[i];
+      if (group->type == USE_COMPARE)
+	{
+	  gcc_assert (group->vuses.length () == 1);
+	  struct iv_use *use = group->vuses[0];
+	  gimple *stmt = use->stmt;
+	  if (gimple_code (stmt) == GIMPLE_COND)
+	    {
+	      basic_block bb = gimple_bb (stmt);
+	      edge true_edge, false_edge;
+	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+	      /* This comparison is used for loop latch.  Require latch is empty
+		 for now.  */
+	      if ((loop->latch == true_edge->dest
+		   || loop->latch == false_edge->dest)
+		  && empty_block_p (loop->latch))
+		{
+		  for (unsigned j = 0; j < data->vcands.length (); j++)
+		    {
+		      if (bitmap_bit_p (group->related_cands, j))
+			{
+			  struct iv_cand *cand = data->vcands[j];
+			  tree op = use->iv->ssa_name;
+			  if (op == cand->var_before || op == cand->var_after)
+			    {
+			      group->bind_cand = cand;
+			      if (dump_file && (dump_flags & TDF_DETAILS))
+				{
+				  fprintf (dump_file, "Doloop cmp iv use: ");
+				  print_gimple_stmt (dump_file, stmt,
+						     TDF_DETAILS);
+				  dump_cand (dump_file, cand);
+				}
+			      break;
+			    }
+			}
+		    }
+		  if (group->bind_cand)
+		    return true;
+		}
+	    }
+	}
+    }
+
+  return false;
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
@@ -7580,6 +7698,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
   basic_block *body;
 
   gcc_assert (!data->niters);
+  data->doloop_use_p = false;
   data->current_loop = loop;
   data->loop_loc = find_loop_location (loop).get_location_t ();
   data->speed = optimize_loop_for_speed_p (loop);
@@ -7625,6 +7744,19 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
   /* Finds candidates for the induction variables (item 2).  */
   find_iv_candidates (data);
 
+  if (flag_branch_on_count_reg && targetm.have_count_reg_decr_p
+      && generic_predict_doloop_p (data))
+    {
+      data->doloop_use_p = find_doloop_use_and_its_bind (data);
+      if (data->doloop_use_p && dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file,
+		   "Predict loop %d can perform doloop optimization later.\n",
+		   loop->num);
+	  flow_loop_dump (loop, dump_file, NULL, 1);
+	}
+    }
+
   /* Calculates the costs (item 3, part 1).  */
   determine_iv_costs (data);
   determine_group_iv_costs (data);


More information about the Gcc-patches mailing list