[PATCH V2] Use preferred mode for doloop iv [PR61837].

guojiufu guojiufu@linux.ibm.com
Wed Jul 14 10:26:28 GMT 2021


On 2021-07-14 12:40, guojiufu via Gcc-patches wrote:
Updated the patch as below:
Thanks for comments.

gcc/ChangeLog:

2021-07-13  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR target/61837
	* config/rs6000/rs6000.c (TARGET_PREFERRED_DOLOOP_MODE): New hook.
	(rs6000_preferred_doloop_mode): New hook.
	* doc/tm.texi: Regenerate.
	* doc/tm.texi.in: Add hook preferred_doloop_mode.
	* target.def (preferred_doloop_mode): New hook.
	* targhooks.c (default_preferred_doloop_mode): New hook.
	* targhooks.h (default_preferred_doloop_mode): New hook.
	* tree-ssa-loop-ivopts.c (compute_doloop_base_on_mode): New function.
	(add_iv_candidate_for_doloop): Call targetm.preferred_doloop_mode
	and compute_doloop_base_on_mode.

gcc/testsuite/ChangeLog:

2021-07-13  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR target/61837
	* gcc.target/powerpc/pr61837.c: New test.
---
  gcc/config/rs6000/rs6000.c                 | 11 ++++
  gcc/doc/tm.texi                            | 10 ++++
  gcc/doc/tm.texi.in                         |  2 +
  gcc/target.def                             | 14 +++++
  gcc/targhooks.c                            |  8 +++
  gcc/targhooks.h                            |  1 +
  gcc/testsuite/gcc.target/powerpc/pr61837.c | 20 +++++++
  gcc/tree-ssa-loop-ivopts.c                 | 67 +++++++++++++++++++++-
  8 files changed, 131 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr61837.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9a5db63d0ef..3bdf0cb97a3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1700,6 +1700,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
  #undef TARGET_DOLOOP_COST_FOR_ADDRESS
  #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000

+#undef TARGET_PREFERRED_DOLOOP_MODE
+#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
+
  #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
  #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV 
rs6000_atomic_assign_expand_fenv

@@ -27867,6 +27870,14 @@ rs6000_predict_doloop_p (struct loop *loop)
    return true;
  }

+/* Implement TARGET_PREFERRED_DOLOOP_MODE. */
+
+static machine_mode
+rs6000_preferred_doloop_mode (machine_mode)
+{
+  return word_mode;
+}
+
  /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P.  */

  static bool
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2a41ae5fba1..4fb516169dc 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11984,6 +11984,16 @@ By default, the RTL loop optimizer does not use 
a present doloop pattern for
  loops containing function calls or branch on table instructions.
  @end deftypefn

+@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE 
(machine_mode @var{mode})
+This hook takes a @var{mode} which is the original mode of doloop IV.
+And if the target prefers other mode for doloop IV, this hook returns 
the
+preferred mode.
+For example, on 64bit target, DImode may be preferred than SImode.
+This hook could return the original mode itself if the target prefer to
+keep the original mode.
+The origianl mode and return mode should be MODE_INT.
+@end deftypefn
+
  @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn 
*@var{insn})
  Take an instruction in @var{insn} and return @code{false} if the 
instruction
  is not appropriate as a combination of two or more instructions.  The
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f881cdabe9e..38215149a92 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7917,6 +7917,8 @@ to by @var{ce_info}.

  @hook TARGET_INVALID_WITHIN_DOLOOP

+@hook TARGET_PREFERRED_DOLOOP_MODE
+
  @hook TARGET_LEGITIMATE_COMBINED_INSN

  @hook TARGET_CAN_FOLLOW_JUMP
diff --git a/gcc/target.def b/gcc/target.def
index c009671c583..1b6c9872807 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4454,6 +4454,20 @@ loops containing function calls or branch on 
table instructions.",
   const char *, (const rtx_insn *insn),
   default_invalid_within_doloop)

+/* Returns the machine mode which the target prefers for doloop IV.  */
+DEFHOOK
+(preferred_doloop_mode,
+ "This hook takes a @var{mode} which is the original mode of doloop 
IV.\n\
+And if the target prefers another mode for doloop IV, this hook returns 
the\n\
+preferred mode.\n\
+For example, on 64bit target, DImode may be preferred than SImode.\n\
+This hook could return the original mode itself if the target prefer 
to\n\
+keep the original mode.\n\
+The original mode and return mode should be MODE_INT.",
+ machine_mode,
+ (machine_mode mode),
+ default_preferred_doloop_mode)
+
  /* Returns true for a legitimate combined insn.  */
  DEFHOOK
  (legitimate_combined_insn,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 44a1facedcf..eb5190910dc 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop 
ATTRIBUTE_UNUSED)
    return false;
  }

+/* By default, just use the input MODE itself.  */
+
+machine_mode
+default_preferred_doloop_mode (machine_mode mode)
+{
+  return mode;
+}
+
  /* NULL if INSN insn is valid within a low-overhead loop, otherwise 
returns
     an error message.

diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index f70a307d26c..f92e102c450 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -88,6 +88,7 @@ extern bool default_fixed_point_supported_p (void);
  extern bool default_has_ifunc_p (void);

  extern bool default_predict_doloop_p (class loop *);
+extern machine_mode default_preferred_doloop_mode (machine_mode);
  extern const char * default_invalid_within_doloop (const rtx_insn *);

  extern tree default_builtin_vectorized_function (unsigned int, tree, 
tree);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c 
b/gcc/testsuite/gcc.target/powerpc/pr61837.c
new file mode 100644
index 00000000000..87b69888c7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_doloop -fno-unroll-loops" } */
+/* The inner loop would use the doloop IV in word_mode.  And then
+   there is no need to access it though zero_extend on shorter mode.  
*/
+void foo(int *p1, long *p2, int s)
+{
+  int n, v, i;
+
+  v = 0;
+  for (n = 0; n <= 100; n++) {
+     for (i = 0; i < s; i++)
+        if (p2[i] == n)
+           p1[i] = v;
+     v += 88;
+  }
+}
+
+/* { dg-final {scan-rtl-dump-not "zero_extend.*doloop" "loop2_doloop"} 
} */
+/* { dg-final {scan-rtl-dump-not "reg:SI.*doloop" "loop2_doloop" { 
target lp64 } } } */
+
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 12a8a49a307..33da3184d6f 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -5657,6 +5657,57 @@ relate_compare_use_with_all_cands (struct 
ivopts_data *data)
      }
  }

+/* If PREFERRED_MODE is suitable and profitable, use the
+   PREFERRED_MODE to compute doloop iv base from niter: base = niter + 
1.  */
+
+static tree
+compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
+			     const widest_int &iterations_max)
+{
+  tree ntype = TREE_TYPE (niter);
+  gcc_assert (TYPE_UNSIGNED (ntype));
+
+  tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
+  gcc_assert (pref_type && TREE_CODE (pref_type) == INTEGER_TYPE);
+
+  int prec = TYPE_PRECISION (ntype);
+  int pref_prec = TYPE_PRECISION (pref_type);
+
+  tree base;
+
+  /* Check if the PREFERRED_MODED is able to present niter.  */
+  if (pref_prec > prec
+      || wi::ltu_p (iterations_max,
+		    widest_int::from (wi::max_value (pref_prec, UNSIGNED),
+				      UNSIGNED)))
+    {
+      /* No wrap, it is safe to use preferred type after niter + 1.  */
+      if (wi::ltu_p (iterations_max,
+		     widest_int::from (wi::max_value (prec, UNSIGNED),
+				       UNSIGNED)))
+	{
+	  /* This could help to optimize "-1 +1" pair when niter looks
+	     like "n-1": n is in original mode.  "base = (n - 1) + 1"
+	     in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
+	  base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+			      build_int_cst (ntype, 1));
+	  base = fold_convert (pref_type, base);
+	}
+
+      /* To avoid wrap, need fold niter to preferred type before plus 
1.  */
+      else
+	{
+	  niter = fold_convert (pref_type, niter);
+	  base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
+			      build_int_cst (pref_type, 1));
+	}
+    }
+  else
+    base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+			build_int_cst (ntype, 1));
+  return base;
+}
+
  /* Add one doloop dedicated IV candidate:
       - Base is (may_be_zero ? 1 : (niter + 1)).
       - Step is -1.  */
@@ -5688,8 +5739,20 @@ add_iv_candidate_for_doloop (struct ivopts_data 
*data)
  	return;
      }

-  tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
-			   build_int_cst (ntype, 1));
+  machine_mode mode = TYPE_MODE (ntype);
+  machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
+
+  tree base;
+  if (mode != pref_mode)
+    {
+      base = compute_doloop_base_on_mode (pref_mode, niter, 
niter_desc->max);
+      ntype = TREE_TYPE (base);
+    }
+  else
+    base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+			build_int_cst (ntype, 1));
+
+
    add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, 
NULL, true);
  }

-- 
2.17.1


> On 2021-07-14 04:50, Segher Boessenkool wrote:
>> Hi!
>> 
>> On Tue, Jul 13, 2021 at 08:50:46PM +0800, Jiufu Guo wrote:
>>> 	* doc/tm.texi: Regenerated.
>> 
>> Pet peeve: "Regenerate.", no "d".
> 
> Ok, yeap. While, 'Regenerate and Regenerated' were used by commits 
> somewhere :)
> 
>> 
>>> +DEFHOOK
>>> +(preferred_doloop_mode,
>>> + "This hook returns a more preferred mode or the @var{mode} 
>>> itself.",
>>> + machine_mode,
>>> + (machine_mode mode),
>>> + default_preferred_doloop_mode)
>> 
>> You need a bit more description here.  What does the value it returns
>> mean?  If you want to say "a more preferred mode or the mode itself",
>> you should explain what the difference means, too.
> 
> Ok, thanks.
> 
>> 
>> You also should say the hook does not need to test if things will fit,
>> since the generic code already does.
>> 
>> And say this should return a MODE_INT always -- you never test for 
>> that
>> as far as I can see, but you don't need to, as long as everyone does 
>> the
>> sane thing.  So just state every hok implementation should :-)
> 
> Yes, the preferred 'doloop iv mode' from targets should be a MODE_INT.
> I will add comments, and update the gcc_assert you mentioned below
> for this.
> 
> Thanks a lot for your comments and suggestions!
> 
> When writing words, I was always from adding/deleting and still hard to 
> get
> perfect ones -:(
> 
>> 
>>> +extern machine_mode
>>> +default_preferred_doloop_mode (machine_mode);
>> 
>> One line please (this is a declaration).
>> 
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2" } */
>>> +void foo(int *p1, long *p2, int s)
>>> +{
>>> +  int n, v, i;
>>> +
>>> +  v = 0;
>>> +  for (n = 0; n <= 100; n++) {
>>> +     for (i = 0; i < s; i++)
>>> +        if (p2[i] == n)
>>> +           p1[i] = v;
>>> +     v += 88;
>>> +  }
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>> 
>> That is a pretty fragile thing to test for.  It also need a line or 
>> two
>> of comment in the test case what this does, what kind of thing it does
>> not want to see.
> 
> Thanks! I will update accordingly.  And I'm thinking to add tests to 
> check
> doloop.xx type: no zero_extend to access subreg. This is the intention
> of this patch.
> 
>> 
>>> +/* If PREFERRED_MODE is suitable and profitable, use the preferred
>>> +   PREFERRED_MODE to compute doloop iv base from niter: base = niter 
>>> + 1.  */
>>> +
>>> +static tree
>>> +compute_doloop_base_on_mode (machine_mode preferred_mode, tree 
>>> niter,
>>> +			     const widest_int &iterations_max)
>>> +{
>>> +  tree ntype = TREE_TYPE (niter);
>>> +  tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 
>>> 1);
>>> +
>>> +  gcc_assert (pref_type && TYPE_UNSIGNED (ntype));
>> 
>> Should that be pref_type instead of ntype?  If not, write it as two
>> separate asserts please.
> 
> Ok, will separate as two asserts.
> 
>> 
>>> +static machine_mode
>>> +rs6000_preferred_doloop_mode (machine_mode)
>>> +{
>>> +  return word_mode;
>>> +}
>> 
>> This is fine if the generic code does the right thing if it passes say
>> TImode here, and if it never will pass some other mode class mode.
> 
> The generic code checks if the returned mode can works on doloop iv 
> correctly,
> if the preferred mode is not suitable (e.g. preferred_doloop_mode 
> returns DI,
> but niter is a large value in TI), then doloop.xx IV will use the 
> original mode.
> 
> When a target really prefer TImode, and TImode can represent number of
> iterations,
> This would still work.  In current code, word_mode is SI/DI in most
> targets, like Pmode.
> On powerpc, they are DImode (for 64bit)/SImode(32bit)
> 
> Thanks again for your comments!
> 
> BR,
> Jiufu
> 
>> 
>> 
>> Segher


More information about the Gcc-patches mailing list