This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, GCC, AArch64] Fix PR88398 for AArch64
- From: Sudakshina Das <Sudi dot Das at arm dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Cc: Kyrill Tkachov <kyrylo dot tkachov at foss dot arm dot com>, James Greenhalgh <James dot Greenhalgh at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>, "bin dot cheng at linux dot alibaba dot com" <bin dot cheng at linux dot alibaba dot com>, "ook at ucw dot cz" <ook at ucw dot cz>
- Date: Thu, 14 Nov 2019 15:40:53 +0000
- Subject: [PATCH, GCC, AArch64] Fix PR88398 for AArch64
- Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
- Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=dG0Imx5njnYt7vivjI1WYmhsn8vMVohvYKdsEmics6w=; b=hO204kkmanjCWnHvQZIpa+1dLiLdMlwZPdirtgWqIwcVctA2riJJwAuljIr9oCIZzyj6/CP2fzglE2WLnWxn2EJdrCni/nVEjyNlv607Nr61RrBnGLSVNXjEf4s80W+tOBS9/eWapq+QPDO8JDFQxdIlKcVE+SzXx2sdq55uzm/bKE0tO7u8aiFmAcw5TDiZ2kB7dzvSwuYlDBkGBIjyNgbEZwlgt9i3sPC+/0QPwRitTa0IeD4tIlYT0fY3jP0Oyp5bZTBS/5lj9m8DsyRiDssZIm3sMfQBKmomp3IqyKS/7BhLUm4uXn0lrCmMwFFpqPnnrPgiHKn/o+wUI8j2ow==
- Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=GtilWiru/YRpo/zC1Ofqm1JmYgABTKziDELWYmVuM+t03c9YzK6rB4vdfy6nYvcm9lnTGgfR8sI5KiUqGc2CeCsJJzHihEtVhA7V5KohRC6BzRDZ6+9eNyO6v3s/m1uiRG76VVxFH08f7C5KZUEcfXGTV9rMMcuV6cgF9AFoCCn8kbSKNDvXSbJdiIyAyDhPS/8Q3SamH9hM4n4yAcD0so6P2nYLWt6wBy1sv9DTk8gXwRWOYOPB2EGxW9ha9HrUxi0bexIrCLS1fT6FsV3Ra6BNUcoSIXrVi1JAxPv90gp1hevemyHiiwmrSlMQq7P27Eacoi8xYsDIyWk2WV1UZg==
- Original-authentication-results: spf=none (sender IP is ) smtp.mailfrom=Sudi dot Das at arm dot com;
Hi
This patch is trying to fix PR88398 for AArch64. As discussed in the PR,
loop unrolling is probably what we can do here. As an easy fix, the
existing unroll_stupid is unrolling the given example better than the
unroll_runtime_iterations since the the loop contains a break inside it.
So all I have done here is:
1) Add a target hook so that this is AArch64 specific.
2) We are not unrolling the loops that decide_unroll_runtime_iterations
would reject.
3) Out of the ones that decide_unroll_runtime_iterations would accept,
check if the loop has more than 1 exit (this is done in the new target
hook) and if it does, try to unroll using unroll_stupid.
Regression tested on AArch64 and added the test from the PR. This gives
an overall code size reduction of 2.35% and performance gain of 0.498%
on Spec2017 Intrate.
Is this ok for trunk?
Thanks
Sudi
gcc/ChangeLog:
2019-xx-xx Sudakshina Das <sudi.das@arm.com>
PR88398
* cfgloop.h: Include target.h.
(lpt_dec): Move to...
* target.h (lpt_dec): ... Here.
* target.def: Define TARGET_LOOP_DECISION_ADJUST.
* loop-unroll.c (decide_unroll_runtime_iterations): Use new target hook.
(decide_unroll_stupid): Likewise.
* config/aarch64/aarch64.c (aarch64_loop_decision_adjust): New function.
(TARGET_LOOP_DECISION_ADJUST): Define for AArch64.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Document TARGET_LOOP_DECISION_ADJUST.
gcc/testsuite/ChangeLog:
2019-xx-xx Sudakshina Das <sudi.das@arm.com>
PR88398
* gcc.target/aarch64/pr88398.c: New test.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 0b0154ffd7bf031a005de993b101d9db6dd98c43..985c74e3b60728fc8c9d34b69634488cae3451cb 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -21,15 +21,7 @@ along with GCC; see the file COPYING3. If not see
#define GCC_CFGLOOP_H
#include "cfgloopmanip.h"
-
-/* Structure to hold decision about unrolling/peeling. */
-enum lpt_dec
-{
- LPT_NONE,
- LPT_UNROLL_CONSTANT,
- LPT_UNROLL_RUNTIME,
- LPT_UNROLL_STUPID
-};
+#include "target.h"
struct GTY (()) lpt_decision {
enum lpt_dec decision;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 599d07a729e7438080f8b5240ee95037a49fb983..f31ac41d66257c01ead8d5f5b9b22379ecb5d276 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21093,6 +21093,39 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
}
}
+/* Implement TARGET_LOOP_DECISION_ADJUST. CONSIDER is the loop decision
+ currently being checked for loop LOOP. This returns a decision which could
+ either be LPT_UNROLL_STUPID or the current value in LOOP. */
+static enum lpt_dec
+aarch64_loop_decision_adjust (enum lpt_dec consider, class loop *loop)
+{
+ switch (consider)
+ {
+ case LPT_UNROLL_CONSTANT:
+ return loop->lpt_decision.decision;
+
+ case LPT_UNROLL_RUNTIME:
+ /* Fall through. */
+ case LPT_UNROLL_STUPID:
+ {
+ vec<edge> edges = get_loop_exit_edges (loop);
+ if (edges.length () > 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Need change in loop decision\n");
+ consider = LPT_UNROLL_STUPID;
+ return consider;
+ }
+ return loop->lpt_decision.decision;
+ }
+
+ case LPT_NONE:
+ /* Fall through. */
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
static int
@@ -21839,6 +21872,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+#undef TARGET_LOOP_DECISION_ADJUST
+#define TARGET_LOOP_DECISION_ADJUST aarch64_loop_decision_adjust
+
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cd9aed9874f4e6b2b0e2f8956ed6155975e643a8..61bd00e84c8a2a8865e95ba579c3b94790ab1331 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11857,6 +11857,15 @@ is required only when the target has special constraints like maximum
number of memory accesses.
@end deftypefn
+@deftypefn {Target Hook} {enum lpt_dec} TARGET_LOOP_DECISION_ADJUST (enum lpt_dec @var{consider}, class loop *@var{loop})
+This target hook returns either a new value for the loop unrolling
+decision or the existing value in @var{loop}. The parameter @var{consider}
+is the loop decision currently being tested. The parameter @var{loop} is a
+pointer to the loop, which is going to be checked for unrolling. This target
+hook is required only when the target wants to override the unrolling
+decisions.
+@end deftypefn
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2739e9ceec5ad7253ff9135da8dbe3bf6010e8d7..7a7f917fb45a6cc22f373ff16f8b78aa3e35f210 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8026,6 +8026,8 @@ build_type_attribute_variant (@var{mdecl},
@hook TARGET_LOOP_UNROLL_ADJUST
+@hook TARGET_LOOP_DECISION_ADJUST
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 63fccd23fae38f8918a7d94411aaa43c72830dd3..d8f09d445764f74b8142ff3996cd1db229464202 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -672,6 +672,7 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
unsigned nunroll, nunroll_by_av, i;
class niter_desc *desc;
widest_int iterations;
+ enum lpt_dec decision = LPT_UNROLL_RUNTIME;
/* If we were not asked to unroll this loop, just return back silently. */
if (!(flags & UAP_UNROLL) && !loop->unroll)
@@ -735,6 +736,16 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
return;
}
+ if (targetm.loop_decision_adjust)
+ decision = targetm.loop_decision_adjust (decision, loop);
+
+ if (decision != loop->lpt_decision.decision)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Not unrolling loop, there maybe a better decision\n");
+ return;
+ }
+
/* Success; now force nunroll to be power of 2, as code-gen
requires it, we are unable to cope with overflows in
computation of number of iterations. */
@@ -1157,9 +1168,16 @@ decide_unroll_stupid (class loop *loop, int flags)
unsigned nunroll, nunroll_by_av, i;
class niter_desc *desc;
widest_int iterations;
+ enum lpt_dec decision = LPT_UNROLL_STUPID;
- /* If we were not asked to unroll this loop, just return back silently. */
- if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
+ if (targetm.loop_decision_adjust)
+ decision = targetm.loop_decision_adjust (decision, loop);
+
+ /* If we were not asked to unroll this loop and the target also does not
+ tell us otherwise, just return back silently. */
+ if (!(flags & UAP_UNROLL_ALL)
+ && !loop->unroll
+ && decision == loop->lpt_decision.decision)
return;
if (dump_enabled_p ())
@@ -1189,26 +1207,41 @@ decide_unroll_stupid (class loop *loop, int flags)
return;
}
- /* Check for simple loops. */
desc = get_simple_loop_desc (loop);
- /* Check simpleness. */
- if (desc->simple_p && !desc->assumptions)
+ /* Skip/add a few checks if the target changed the decision. */
+ if (decision == loop->lpt_decision.decision)
{
- if (dump_file)
- fprintf (dump_file, ";; Loop is simple\n");
- return;
- }
+ /* Check simpleness. */
+ if (desc->simple_p && !desc->assumptions)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Loop is simple\n");
+ return;
+ }
- /* Do not unroll loops with branches inside -- it increases number
- of mispredicts.
- TODO: this heuristic needs tunning; call inside the loop body
- is also relatively good reason to not unroll. */
- if (num_loop_branches (loop) > 1)
+ /* Do not unroll loops with branches inside -- it increases number
+ of mispredicts.
+ TODO: this heuristic needs tunning; call inside the loop body
+ is also relatively good reason to not unroll. */
+ if (num_loop_branches (loop) > 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Not unrolling, contains branches\n");
+ return;
+ }
+ }
+ else
{
- if (dump_file)
- fprintf (dump_file, ";; Not unrolling, contains branches\n");
- return;
+ /* For target effected loops, adding the simpleness check from
+ decide_unroll_runtime_iterations since we do not want to add more
+ unrolling than before. */
+ if (!desc->simple_p && desc->assumptions)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Not unrolling, loop is not simple\n");
+ return;
+ }
}
/* Check whether the loop rolls. */
diff --git a/gcc/target.def b/gcc/target.def
index 8e83c2c7a7136511c07a5bc9e18876c91a38b955..d869495f9c8a9b9187a802298e28aeb334a94cfb 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2680,6 +2680,18 @@ number of memory accesses.",
unsigned, (unsigned nunroll, class loop *loop),
NULL)
+/* Return a new value for loop unroll decision. */
+DEFHOOK
+(loop_decision_adjust,
+ "This target hook returns either a new value for the loop unrolling\n\
+decision or the existing value in @var{loop}. The parameter @var{consider}\n\
+is the loop decision currently being tested. The parameter @var{loop} is a\n\
+pointer to the loop, which is going to be checked for unrolling. This target\n\
+hook is required only when the target wants to override the unrolling\n\
+decisions.",
+ enum lpt_dec, (enum lpt_dec consider, class loop *loop),
+ NULL)
+
/* True if X is a legitimate MODE-mode immediate operand. */
DEFHOOK
(legitimate_constant_p,
diff --git a/gcc/target.h b/gcc/target.h
index 843c3d7887f8b49edfe9e45abf9de760176896e4..f67084a449a145f3c8016df8b9230ae835eef363 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -140,6 +140,16 @@ struct ddg;
/* This is defined in cfgloop.h . */
class loop;
+/* Structure to hold decision about unrolling/peeling. This is used in
+ targetm.loop_unroll_adjust and in class loop. */
+enum lpt_dec
+{
+ LPT_NONE,
+ LPT_UNROLL_CONSTANT,
+ LPT_UNROLL_RUNTIME,
+ LPT_UNROLL_STUPID
+};
+
/* This is defined in ifcvt.h. */
struct noce_if_info;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88398.c b/gcc/testsuite/gcc.target/aarch64/pr88398.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad1929a4e81df92972418ab5c7ed2a2e6db4f157
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88398.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -funroll-loops -fdump-rtl-loop2_unroll-all" } */
+
+typedef unsigned char __uint8_t;
+typedef unsigned int __uint32_t;
+typedef unsigned long long __uint64_t;
+
+int
+foo (const __uint64_t len_limit, const __uint8_t *cur,
+ __uint32_t delta, int len) {
+
+ const __uint8_t *pb = cur - delta;
+
+ while (++len != len_limit) {
+ if (pb[len] != cur[len])
+ break;
+ }
+
+ return len;
+}
+
+/* { dg-final { scan-rtl-dump "considering unrolling loop stupidly" "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump "optimized: loop unrolled 7 times" "loop2_unroll" } } */