[PATCH 1/2] rs6000: tune cunroll for simple loops at O2
Jiufu Guo
guojiufu@linux.ibm.com
Wed May 20 03:55:00 GMT 2020
Hi,
In r10-4525, and r10-4161, loop unroller was enabled for simply loops at -O2.
At the same time, the GIMPLE cunroll is also enabled, while it is not only for
simple loops. This patch introduces a hook to check if a loop is suitable to
unroll completely. The hook can be used to check if a loop is simply enough
for complete unroll, then avoid negative effects like size increasing or
complex loop unrolling at -O2.
This patch could help to handle the code in PR95018 which contains complex
loop, and does not cause obvious performance change on SPEC2017.
Bootstrap/regtest pass on powerpc64le. OK for trunk?
Jiufu
2020-05-20 Jiufu Guo <guojiufu@cn.ibm.com>
PR target/95018
* target.def (loop_allow_unroll_completely_peel): New hook.
* doc/tm.texi (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook.
* doc/tm.texi.in (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook.
* config/rs6000/rs6000.c (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook.
(rs6000_loop_allow_unroll_completely_peel): New hook implementation.
* tree-ssa-loop-ivcanon.c (canonicalize_loop_induction_variables): Call
hook loop_allow_unroll_completely_peel.
---
gcc/config/rs6000/rs6000.c | 19 +++++++++++++++++++
gcc/doc/tm.texi | 9 +++++++++
gcc/doc/tm.texi.in | 2 ++
gcc/target.def | 12 ++++++++++++
gcc/tree-ssa-loop-ivcanon.c | 5 +++++
5 files changed, 47 insertions(+)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8435bc15d72..a1a3f9cb583 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1433,6 +1433,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_LOOP_UNROLL_ADJUST
#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+#undef TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL
+#define TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL rs6000_loop_allow_unroll_completely_peel
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
#undef TARGET_BUILTIN_DECL
@@ -5139,6 +5142,22 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
return nunroll;
}
+/* Implement targetm.loop_allow_unroll_completely_peel. */
+
+static bool
+rs6000_loop_allow_unroll_completely_peel (HOST_WIDE_INT maxiter, tree niter,
+ struct loop *loop)
+{
+ if (unroll_only_small_loops && optimize == 2)
+ {
+ if (maxiter >= 4
+ && !(TREE_CODE (niter) == INTEGER_CST && single_exit (loop)))
+ return false;
+ }
+
+ return true;
+}
+
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 6e7d9dc54a9..f7419872c2f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11893,6 +11893,15 @@ is required only when the target has special constraints like maximum
number of memory accesses.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL (HOST_WIDE_INT @var{maxiter}, tree @var{niter}, class loop *@var{loop})
+This target hook check if the loop @var{loop} is suitable to unroll
+completely on the target. The parameter @var{maxiter} is the possible max
+bound of iterations. The parameter @var{niter} is the number expression of
+iterations the loop is executed. The parameter @var{loop} is a pointer to
+the loop. This target hook is required only when the target has special
+constraints.
+@end deftypefn
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 3be984bbd5c..4ae079a650c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8010,6 +8010,8 @@ lists.
@hook TARGET_LOOP_UNROLL_ADJUST
+@hook TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/target.def b/gcc/target.def
index 07059a87caf..3842ba611a2 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2709,6 +2709,18 @@ number of memory accesses.",
unsigned, (unsigned nunroll, class loop *loop),
NULL)
+/* Return if a loop is suitable for complete unroll. */
+DEFHOOK
+(loop_allow_unroll_completely_peel,
+ "This target hook check if the loop @var{loop} is suitable to unroll\n\
+completely on the target. The parameter @var{maxiter} is the possible max\n\
+bound of iterations. The parameter @var{niter} is the number expression of\n\
+iterations the loop is executed. The parameter @var{loop} is a pointer to\n\
+the loop. This target hook is required only when the target has special\n\
+constraints.",
+ bool, (HOST_WIDE_INT maxiter, tree niter, class loop *loop),
+ NULL)
+
/* True if X is a legitimate MODE-mode immediate operand. */
DEFHOOK
(legitimate_constant_p,
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index 8ab6ab3330c..52d45a90a56 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "target.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
@@ -1231,6 +1232,10 @@ canonicalize_loop_induction_variables (class loop *loop,
/* Force re-computation of loop bounds so we can remove redundant exits. */
maxiter = max_loop_iterations_int (loop);
+ if (allow_peel && targetm.loop_allow_unroll_completely_peel
+ && !targetm.loop_allow_unroll_completely_peel (maxiter, niter, loop))
+ return false;
+
if (dump_file && (dump_flags & TDF_DETAILS)
&& TREE_CODE (niter) == INTEGER_CST)
{
--
2.17.1
More information about the Gcc-patches
mailing list