This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] enable loop fusion with ISL scheduler
- From: Bernhard Reutner-Fischer <rep dot dot dot nop at gmail dot com>
- To: Tobias Grosser <tobias at grosser dot es>
- Cc: Sebastian Pop <s dot pop at samsung dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>, Sebastian Pop <sebpop at gmail dot com>, aditya dot k7 at samsung dot com, Richard Biener <richard dot guenther at gmail dot com>
- Date: Fri, 17 Jul 2015 13:27:20 +0200
- Subject: Re: [PATCH] enable loop fusion with ISL scheduler
- Authentication-results: sourceware.org; auth=none
- References: <1437086111-6784-1-git-send-email-s dot pop at samsung dot com> <55A8855A dot 30303 at grosser dot es>
On 17 July 2015 at 06:32, Tobias Grosser <tobias@grosser.es> wrote:
> On 07/17/2015 12:35 AM, Sebastian Pop wrote:
>>
>> gcc/ChangeLog:
>>
>> 2015-07-16 Aditya Kumar <aditya.k7@samsung.com>
>> Sebastian Pop <s.pop@samsung.com>
>>
>> * common.opt (floop-fuse): New.
>> * doc/invoke.texi (floop-fuse): Documented.
>> * graphite-optimize-isl.c (optimize_isl): Use
>> ISL_SCHEDULE_FUSE_MAX when using flag_loop_fuse.
>> * graphite-poly.c (apply_poly_transforms): Call optimize_isl when
>> using flag_loop_fuse.
>> * graphite.c (gate_graphite_transforms): Enable graphite with
>> flag_loop_fuse.
>
>
> LGTM.
AFAICS this won't work with isl-0.15.0 where this spot changed. See
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01162.html and ff.
Apart from that it looks identical in spirit to what came out of
starting to think about the fusion part of https://gcc.gnu.org/PR66741
;)
Didn't have time to pursue this yet due to RL interference though, so
if you beat me to it..
Thanks,
>
> Tobias
>
>> gcc/testsuite/ChangeLog:
>>
>> 2015-07-16 Aditya Kumar <aditya.k7@samsung.com>
>> Sebastian Pop <s.pop@samsung.com>
>>
>>
>> * gcc.dg/graphite/fuse-1.c: New test.
>> * gcc.dg/graphite/fuse-2.c: New test.
>> ---
>> gcc/common.opt | 4 ++++
>> gcc/doc/invoke.texi | 23 +++++++++++++++++++-
>> gcc/graphite-optimize-isl.c | 5 ++++-
>> gcc/graphite-poly.c | 2 +-
>> gcc/graphite.c | 3 ++-
>> gcc/testsuite/gcc.dg/graphite/fuse-1.c | 32 ++++++++++++++++++++++++++++
>> gcc/testsuite/gcc.dg/graphite/fuse-2.c | 38
>> ++++++++++++++++++++++++++++++++++
>> 7 files changed, 103 insertions(+), 4 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c
>> create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c
>>
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index dd49ae3..200ecc1 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -1365,6 +1365,10 @@ floop-nest-optimize
>> Common Report Var(flag_loop_optimize_isl) Optimization
>> Enable the ISL based loop nest optimizer
>>
>> +floop-fuse
>> +Common Report Var(flag_loop_fuse) Optimization
>> +Enable loop fusion
>> +
>> fstrict-volatile-bitfields
>> Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
>> Force bitfield accesses to match their type width
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index b99ab1c..7cc8bb9 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -409,7 +409,7 @@ Objective-C and Objective-C++ Dialects}.
>> -fivopts -fkeep-inline-functions -fkeep-static-consts @gol
>> -flive-range-shrinkage @gol
>> -floop-block -floop-interchange -floop-strip-mine @gol
>> --floop-unroll-and-jam -floop-nest-optimize @gol
>> +-floop-unroll-and-jam -floop-nest-optimize -floop-fuse @gol
>> -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol
>> -flto-partition=@var{alg} -flto-report -flto-report-wpa
>> -fmerge-all-constants @gol
>> -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
>> @@ -8796,6 +8796,27 @@ optimizer based on the Pluto optimization
>> algorithms. It calculates a loop
>> structure optimized for data-locality and parallelism. This option
>> is experimental.
>>
>> +@item -floop-fuse
>> +@opindex floop-fuse
>> +Enable loop fusion. This option is experimental.
>> +
>> +For example, given a loop like:
>> +@smallexample
>> +DO I = 1, N
>> + A(I) = A(I) + B(I)
>> +ENDDO
>> +DO I = 1, N
>> + A(I) = A(I) + C(I)
>> +ENDDO
>> +@end smallexample
>> +@noindent
>> +loop fusion transforms the loop as if it were written:
>> +@smallexample
>> +DO I = 1, N
>> + A(I) = A(I) + B(I) + C(I)
>> +ENDDO
>> +@end smallexample
>> +
>> @item -floop-unroll-and-jam
>> @opindex floop-unroll-and-jam
>> Enable unroll and jam for the ISL based loop nest optimizer. The unroll
>> diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
>> index 624cc87..c016461 100644
>> --- a/gcc/graphite-optimize-isl.c
>> +++ b/gcc/graphite-optimize-isl.c
>> @@ -599,7 +599,10 @@ optimize_isl (scop_p scop)
>>
>> isl_options_set_schedule_max_constant_term (scop->ctx,
>> CONSTANT_BOUND);
>> isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
>> - isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
>> + if (flag_loop_fuse)
>> + isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MAX);
>> + else
>> + isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
>> isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
>>
>> #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
>> diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
>> index 4407dc5..4808fbe 100644
>> --- a/gcc/graphite-poly.c
>> +++ b/gcc/graphite-poly.c
>> @@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop)
>>
>> /* This pass needs to be run at the final stage, as it does not
>> update the lst. */
>> - if (flag_loop_optimize_isl || flag_loop_unroll_jam)
>> + if (flag_loop_optimize_isl || flag_loop_unroll_jam || flag_loop_fuse)
>> transform_done |= optimize_isl (scop);
>>
>> return transform_done;
>> diff --git a/gcc/graphite.c b/gcc/graphite.c
>> index ba8029a..51af1a2a 100644
>> --- a/gcc/graphite.c
>> +++ b/gcc/graphite.c
>> @@ -342,7 +342,8 @@ gate_graphite_transforms (void)
>> || flag_graphite_identity
>> || flag_loop_parallelize_all
>> || flag_loop_optimize_isl
>> - || flag_loop_unroll_jam)
>> + || flag_loop_unroll_jam
>> + || flag_loop_fuse)
>> flag_graphite = 1;
>>
>> return flag_graphite != 0;
>> diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c
>> b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
>> new file mode 100644
>> index 0000000..f368f47
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
>> @@ -0,0 +1,32 @@
>> +/* Check that the two loops are fused and that we manage to fold the two
>> xor
>> + operations. */
>> +/* { dg-options "-O2 -floop-fuse -fdump-tree-forwprop-all" } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^
>> 12" 1 "forwprop4" } } */
>> +/* { dg-do run } */
>> +
>> +#define MAX 100
>> +int A[MAX];
>> +
>> +extern void abort ();
>> +
>> +void fuse() {
>> +}
>> +
>> +int
>> +main (void)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MAX; i++)
>> + A[i] = i;
>> + for(int i=0; i<MAX; i++)
>> + A[i] ^= 4;
>> + for(int i=0; i<MAX; i++)
>> + A[i] ^= 8;
>> +
>> + for (i = 0; i < MAX; i++)
>> + if (A[i] != (i ^ 12))
>> + abort ();
>> +
>> + return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c
>> b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
>> new file mode 100644
>> index 0000000..e1a1cb3
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
>> @@ -0,0 +1,38 @@
>> +/* Check that the three loops are fused. */
>> +/* { dg-options "-O2 -floop-fuse" } */
>> +/* { dg-do run } */
>> +
>> +/* FIXME: Add a graphite dump mechanism to print the number of loops
>> generated
>> + by ISL and pattern match it. */
>> +
>> +#define MAX 100
>> +int A[MAX], B[MAX], C[MAX];
>> +
>> +extern void abort ();
>> +
>> +void fuse() {
>> +}
>> +
>> +int
>> +main (void)
>> +{
>> + int i;
>> +
>> + /* The next three loops should be fused. */
>> + for (i = 0; i < MAX; i++)
>> + {
>> + A[i] = i;
>> + B[i] = i + 2;
>> + C[i] = i + 1;
>> + }
>> + for(int i=0; i<MAX; i++)
>> + A[i] += B[i];
>> + for(int i=0; i<MAX; i++)
>> + A[i] += C[i];
>> +
>> + for (i = 0; i < MAX; i++)
>> + if (A[i] != 3*i+3)
>> + abort ();
>> +
>> + return 0;
>> +}
>>
>