[PATCH v3 3/3] PR80791 Consider doloop cmp use in ivopts

Kewen.Lin linkw@linux.ibm.com
Thu Jun 20 12:08:00 GMT 2019


Hi Segher,

> On Wed, Jun 19, 2019 at 07:47:34PM +0800, Kewen.Lin wrote:
>> +/* Return true if count register for branch is supported.  */
>> +
>> +static bool
>> +rs6000_have_count_reg_decr_p ()
>> +{
>> +  return flag_branch_on_count_reg;
>> +}
> 
> rs6000 unconditionally supports these instructions, not just when that
> flag is set.  If you need to look at the flag, the *caller* of this new
> hook should, not every implementation of the hook.  So just "return true"
> here?

Good point!  Updated it as hookpod.

>> +/* For doloop use, if the algothrim selects some candidate which invalid for
> 
> "algorithm", "which is invalid".

>> +   some cost like zero rather than original inifite cost.  The point is to
> 
> "infinite"
> 

Thanks for catching!  I should run spelling check next time.  :)

New version attached with comments addressed.


Thanks,
Kewen
-------------- next part --------------
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 12f1dfd..e98aba9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1913,7 +1913,7 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
 #undef TARGET_HAVE_COUNT_REG_DECR_P
-#define TARGET_HAVE_COUNT_REG_DECR_P rs6000_have_count_reg_decr_p
+#define TARGET_HAVE_COUNT_REG_DECR_P true
 
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
@@ -39440,14 +39440,6 @@ rs6000_predict_doloop_p (struct loop *loop)
   return true;
 }
 
-/* Return true if count register for branch is supported.  */
-
-static bool
-rs6000_have_count_reg_decr_p ()
-{
-  return flag_branch_on_count_reg;
-}
-
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 46e488f..5477294 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11618,11 +11618,13 @@ loops, and will help ivopts to make some decisions.
 The default version of this hook returns false.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P (void)
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
 Return true if the target supports hardware count register for decrement
-and branch.
-The default version of this hook returns false.
-@end deftypefn
+and branch.  This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
 
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int @var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
diff --git a/gcc/target.def b/gcc/target.def
index ec15a6d..8a64e5b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4246,13 +4246,15 @@ The default version of this hook returns false.",
  bool, (struct loop *loop),
  default_predict_doloop_p)
 
-DEFHOOK
+DEFHOOKPOD
 (have_count_reg_decr_p,
  "Return true if the target supports hardware count register for decrement\n\
-and branch.\n\
-The default version of this hook returns false.",
- bool, (void),
- hook_bool_void_false)
+and branch.  This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
 
 DEFHOOK
 (can_use_doloop_p,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 7d5859b..71d7f67 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n)
   while (i < n);
 }
 
-/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts"} } */
-/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI" 2 "ivopts" { target { powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index b1138ea..742d3fa 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -3730,7 +3730,7 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
    Some RTL specific checks seems unable to be checked in gimple, if any new
    checks or easy checks _are_ missing here, please add them.  */
 
-static bool ATTRIBUTE_UNUSED
+static bool
 generic_predict_doloop_p (struct ivopts_data *data)
 {
   struct loop *loop = data->current_loop;
@@ -6749,7 +6749,7 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
   return set;
 }
 
-/* For doloop use, if the algothrim selects some candidate which invalid for
+/* For doloop use, if the algorithm selects some candidate which is invalid for
    later rewrite, fix it up with bind_cand.  */
 
 static void
@@ -7622,7 +7622,7 @@ determine_scaling_factor (struct ivopts_data *data, basic_block *body)
 
 /* Find doloop comparison use and set its related bind_cand.  We adjust the
    doloop use group cost against various IV cands, it's possible to assign
-   some cost like zero rather than original inifite cost.  The point is to
+   some cost like zero rather than original infinite cost.  The point is to
    give more chances to consider other IV cands instead of BIV.  The cost
    originally given on doloop use can affect optimal decision because it can
    become dead and get eliminated but considered too much here.
@@ -7744,7 +7744,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
   /* Finds candidates for the induction variables (item 2).  */
   find_iv_candidates (data);
 
-  if (targetm.have_count_reg_decr_p() && generic_predict_doloop_p (data))
+  if (flag_branch_on_count_reg && targetm.have_count_reg_decr_p
+      && generic_predict_doloop_p (data))
     {
       data->doloop_use_p = find_doloop_use_and_its_bind (data);
       if (data->doloop_use_p && dump_file && (dump_flags & TDF_DETAILS))


More information about the Gcc-patches mailing list