This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR58453
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 20 Sep 2013 12:40:25 +0200 (CEST)
- Subject: [PATCH] Fix PR58453
- Authentication-results: sourceware.org; auth=none
The following "fixes" the bogus loop distribution that breaks
434.zeusmp by applying the cost model for -ftree-loop-distribute-patterns
as well (with -O3 -ftree-loop-distribution the bug doesn't reproduce).
This papers over the real issue only but it is part of the real
fix I am developing (a missing feature I pushed back for too long as
well). So as it unbreaks 434.zeusmp I'll apply this beforehand.
The real fix, while complete, needs some TLC and compile-time work.
Bootstrap and regtest running on x86_64-unknown-linux-gnu, I'll
double-check SPEC 2k6 before committing this as well.
Thanks,
Richard.
2013-09-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/58453
* tree-loop-distribution.c (distribute_loop): Apply the cost
model for -ftree-loop-distribute-patterns, too.
* gcc.dg/tree-ssa/ldist-23.c: New testcase.
Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c (revision 202774)
--- gcc/tree-loop-distribution.c (working copy)
*************** distribute_loop (struct loop *loop, vec<
*** 1514,1531 ****
any_builtin |= partition_builtin_p (partition);
}
/* If we are only distributing patterns fuse all partitions that
! were not properly classified as builtins. Else fuse partitions
! with similar memory accesses. */
if (!flag_tree_loop_distribution)
{
partition_t into;
- /* If we did not detect any builtin simply bail out. */
- if (!any_builtin)
- {
- nbp = 0;
- goto ldist_done;
- }
/* Only fuse adjacent non-builtin partitions, see PR53616.
??? Use dependence information to improve partition ordering. */
i = 0;
--- 1514,1564 ----
any_builtin |= partition_builtin_p (partition);
}
+ /* If we did not detect any builtin but are not asked to apply
+ regular loop distribution simply bail out. */
+ if (!flag_tree_loop_distribution
+ && !any_builtin)
+ {
+ nbp = 0;
+ goto ldist_done;
+ }
+
+ /* Apply our simple cost model - fuse partitions with similar
+ memory accesses. */
+ partition_t into;
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ {
+ if (partition_builtin_p (into))
+ continue;
+ for (int j = i + 1;
+ partitions.iterate (j, &partition); ++j)
+ {
+ if (!partition_builtin_p (partition)
+ && similar_memory_accesses (rdg, into, partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they have similar "
+ "memory accesses\n");
+ }
+ bitmap_ior_into (into->stmts, partition->stmts);
+ if (partition->kind == PKIND_REDUCTION)
+ into->kind = PKIND_REDUCTION;
+ partitions.ordered_remove (j);
+ partition_free (partition);
+ j--;
+ }
+ }
+ }
+
/* If we are only distributing patterns fuse all partitions that
! were not properly classified as builtins. */
if (!flag_tree_loop_distribution)
{
partition_t into;
/* Only fuse adjacent non-builtin partitions, see PR53616.
??? Use dependence information to improve partition ordering. */
i = 0;
*************** distribute_loop (struct loop *loop, vec<
*** 1549,1586 ****
}
while ((unsigned) i < partitions.length ());
}
- else
- {
- partition_t into;
- int j;
- for (i = 0; partitions.iterate (i, &into); ++i)
- {
- if (partition_builtin_p (into))
- continue;
- for (j = i + 1;
- partitions.iterate (j, &partition); ++j)
- {
- if (!partition_builtin_p (partition)
- && similar_memory_accesses (rdg, into, partition))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "fusing partitions\n");
- dump_bitmap (dump_file, into->stmts);
- dump_bitmap (dump_file, partition->stmts);
- fprintf (dump_file, "because they have similar "
- "memory accesses\n");
- }
- bitmap_ior_into (into->stmts, partition->stmts);
- if (partition->kind == PKIND_REDUCTION)
- into->kind = PKIND_REDUCTION;
- partitions.ordered_remove (j);
- partition_free (partition);
- j--;
- }
- }
- }
- }
/* Fuse all reduction partitions into the last. */
if (partitions.length () > 1)
--- 1582,1587 ----
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-23.c (working copy)
***************
*** 0 ****
--- 1,34 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+ extern void abort (void);
+
+ int a[128], b[128], c[128], d[128];
+
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+ int i;
+ for (i = 0; i < 128; ++i)
+ {
+ a[i] = a[i] + 1;
+ b[i] = d[i];
+ c[i] = a[i] / d[i];
+ }
+ }
+ int main()
+ {
+ int i;
+ for (i = 0; i < 128; ++i)
+ a[i] = i;
+ for (i = 0; i < 128; ++i)
+ d[i] = 1;
+ foo ();
+ if (c[0] != 1)
+ abort ();
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump "split to 2 loops" "ldist" } } */
+ /* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */