This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns
- From: Sebastian Pop <sebpop at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Richard Guenther <rguenther at suse dot de>
- Date: Tue, 30 Nov 2010 18:27:53 -0600
- Subject: Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns
Hi,
This patch disables for -ftree-loop-distribute-patterns a heuristic
that is used by the loop distribution pass to avoid the split of loops
with reads and writes to the same array, like this loop:
loop
A[2*i] = 0
A[2*i+1] = 1
end_loop
would be transformed into these two loops:
loop
A[2*i] = 0
end_loop
loop
A[2*i+1] = 1
end_loop
and that would hurt data locality. Now, because we don't want these
two loops to be distributed when -ftree-loop-distribute-patterns is
enabled, we have to select the seeds of the memset (0) partitions to
be only those statements "A[i] = 0" that also satisfy the test in
generate_memset_zero, that is dataref_stride_has_unit_type_stride.
Bootstrap and test in progress on amd64-linux. Ok for trunk when that
passes?
Thanks,
Sebastian
From dc42b8c6001cafaba2b38beb8760af609f68fde8 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Thu, 5 Aug 2010 15:36:33 -0500
Subject: [PATCH] Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns
2010-11-30 Sebastian Pop <sebastian.pop@amd.com>
PR tree-optimization/45199
* tree-data-ref.c (dataref_stride_has_unit_type_stride): New.
(stores_zero_from_loop): Call dataref_stride_has_unit_type_stride.
* tree-loop-distribution.c (build_rdg_partition_for_component): Do
not call rdg_flag_similar_memory_accesses when
flag_tree_loop_distribute_patterns is set.
* gcc.dg/tree-ssa/ldist-15.c: New.
* gcc.dg/tree-ssa/ldist-16.c: New.
* gfortran.dg/ldist-pr45199.f: New.
---
gcc/ChangeLog | 9 +++++++++
gcc/testsuite/ChangeLog | 7 +++++++
gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c | 23 +++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c | 21 +++++++++++++++++++++
gcc/testsuite/gfortran.dg/ldist-pr45199.f | 27 +++++++++++++++++++++++++++
gcc/tree-data-ref.c | 27 ++++++++++++++++++++++++++-
gcc/tree-loop-distribution.c | 5 +++--
7 files changed, 116 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
create mode 100644 gcc/testsuite/gfortran.dg/ldist-pr45199.f
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5706ef8..b73aeae 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2010-11-30 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/45199
+ * tree-data-ref.c (dataref_stride_has_unit_type_stride): New.
+ (stores_zero_from_loop): Call dataref_stride_has_unit_type_stride.
+ * tree-loop-distribution.c (build_rdg_partition_for_component): Do
+ not call rdg_flag_similar_memory_accesses when
+ flag_tree_loop_distribute_patterns is set.
+
2010-11-29 H.J. Lu <hongjiu.lu@intel.com>
PR driver/46712
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5792e66..20aaf04 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2010-11-30 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/45199
+ * gcc.dg/tree-ssa/ldist-15.c: New.
+ * gcc.dg/tree-ssa/ldist-16.c: New.
+ * gfortran.dg/ldist-pr45199.f: New.
+
2010-11-29 Nicola Pero <nicola.pero@meta-innovation.com>
* objc.dg/duplicate-class-1.m: New.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
new file mode 100644
index 0000000..7ce3b95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+ int i;
+
+ for (i = 0; i < n; ++i)
+ {
+ x[2*i] = 0;
+ x[2*i + 1] = 1;
+ }
+}
+
+/* We should not apply loop distribution as it is not beneficial from
+ a data locality point of view. Also it is not possible to generate
+ a memset (0) as the write has a stride of 2. */
+
+/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
new file mode 100644
index 0000000..61e8e56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+ int i;
+
+ for (i = 0; i < n; ++i)
+ {
+ x[i] = 0;
+ x[2*i + 1] = 1;
+ }
+}
+
+/* We should apply loop distribution and generate a memset (0). */
+
+/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f
new file mode 100644
index 0000000..2fab43a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f
@@ -0,0 +1,27 @@
+! { dg-do compile }
+! { dg-options "-O3 -fdump-tree-ldist-details" }
+
+ parameter(numlev=3,numoblev=1000)
+ integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev)
+ logical l_numob(numoblev,numlev)
+ do ixe=1,numoblev
+ do iye=1,numlev
+ i_otyp(ixe,iye)=0
+ i_styp(ixe,iye)=0
+ l_numob(ixe,iye)=.false.
+ enddo
+ enddo
+ do i=1,m
+ do j=1,n
+ if (l_numob(i,j)) then
+ write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j)
+ endif
+ enddo
+ enddo
+ end
+
+! GCC should apply memset zero loop distribution and it should not ICE.
+
+! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
+! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } }
+! { dg-final { cleanup-tree-dump "ldist" } }
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 3cee320..17f34de 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -4974,6 +4974,30 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
free (bbs);
}
+/* Returns true when STMT is an assignment that contains a data
+ reference on its LHS with a stride of the same size as its unit
+ type. This is the same test as in generate_memset_zero. */
+
+static bool
+dataref_stride_has_unit_type_stride (gimple stmt)
+{
+ struct data_reference *dr = XCNEW (struct data_reference);
+ tree op0 = gimple_assign_lhs (stmt);
+
+ DR_STMT (dr) = stmt;
+ DR_REF (dr) = op0;
+ if (!dr_analyze_innermost (dr))
+ return false;
+
+ return dr_analyze_innermost (dr) &&
+ (integer_zerop (size_binop (MINUS_EXPR,
+ fold_convert (sizetype, DR_STEP (dr)),
+ TYPE_SIZE_UNIT (TREE_TYPE (op0))))
+ || integer_zerop (size_binop (PLUS_EXPR,
+ TYPE_SIZE_UNIT (TREE_TYPE (op0)),
+ fold_convert (sizetype, DR_STEP (dr)))));
+}
+
/* Initialize STMTS with all the statements of LOOP that contain a
store to memory of the form "A[i] = 0". */
@@ -4994,7 +5018,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
&& (op = gimple_assign_rhs1 (stmt))
- && (integer_zerop (op) || real_zerop (op)))
+ && (integer_zerop (op) || real_zerop (op))
+ && dataref_stride_has_unit_type_stride (stmt))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 007c4f3..2c2af2c 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -781,8 +781,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
and determine those vertices that have some memory affinity with
the current nodes in the component: these are stores to the same
arrays, i.e. we're taking care of cache locality. */
- rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
- other_stores);
+ if (!flag_tree_loop_distribute_patterns)
+ rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
+ other_stores);
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
--
1.7.1