This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH GCC][11/13]Annotate partition by its parallelism execution type
On Fri, Jun 16, 2017 at 11:10 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>> Hi,
>> This patch checks and records if partition can be executed in parallel by
>> looking if there exists data dependence cycles. The information is needed
>> for distribution because the idea is to distribute parallel type partitions
>> away from sequential ones. I believe current distribution doesn't work
>> very well because it does blind distribution/fusion.
>> Bootstrap and test on x86_64 and AArch64. Is it OK?
>
> + /* In case of no data dependence. */
> + if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
> + return false;
> + /* Or the data dependence can be resolved by compilation time alias
> + check. */
> + else if (!alias_sets_conflict_p (get_alias_set (DR_REF (dr1)),
> + get_alias_set (DR_REF (dr2))))
> + return false;
>
> dependence analysis should use TBAA already, in which cases do you need this?
> It seems to fall foul of the easy mistake of not honoring GCCs memory model
> as well ... see dr_may_alias_p.
I see. Patch updated with this branch removed.
>
> + /* Further check if any data dependence prevents us from executing the
> + partition parallelly. */
> + EXECUTE_IF_SET_IN_BITMAP (partition->reads, 0, i, bi)
> + {
> + dr1 = (*datarefs_vec)[i];
> + EXECUTE_IF_SET_IN_BITMAP (partition->writes, 0, j, bj)
> + {
>
> what about write-write dependences?
>
> + EXECUTE_IF_SET_IN_BITMAP (partition->reads, 0, i, bi)
> + {
> + dr1 = (*datarefs_vec)[i];
> + EXECUTE_IF_SET_IN_BITMAP (partition->writes, i + 1, j, bj)
> + {
> + dr2 = (*datarefs_vec)[j];
> + /* Partition can only be executed sequentially if there is any
> + data dependence cycle. */
>
> exact copy of the loop nest follows?! Maybe you meant to iterate
> over writes in the first loop.
Yes, this is a copy-paste typo. Patch is also simplified because
read/write are recorded together now. Is it OK?
Thanks,
bin
2017-06-07 Bin Cheng <bin.cheng@arm.com>
* tree-loop-distribution.c (enum partition_type): New.
(struct partition): New field type.
(partition_merge_into): Update partition type.
(data_dep_in_cycle_p): New function.
(build_rdg_partition_for_vertex): Compute partition type.
(rdg_build_partitions): Dump partition type.
From f2313383cb516badacbd99a5bc0a0e4fceef1bbf Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Fri, 9 Jun 2017 13:11:59 +0100
Subject: [PATCH 10/13] partition-type-20170607.txt
---
gcc/tree-loop-distribution.c | 91 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 85 insertions(+), 6 deletions(-)
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index a2e543e..d741e9e 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -526,11 +526,19 @@ build_rdg (struct loop *loop, control_dependences *cd)
}
-
+/* Kind of distributed loop. */
enum partition_kind {
PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
};
+/* Type of distributed loop. */
+enum partition_type {
+ /* The distributed loop can be executed parallelly. */
+ PTYPE_PARALLEL = 0,
+ /* The distributed loop has to be executed sequentially. */
+ PTYPE_SEQUENTIAL
+};
+
/* Partition for loop distribution. */
struct partition
{
@@ -544,6 +552,7 @@ struct partition
number of loop (latch) iterations. */
bool plus_one;
enum partition_kind kind;
+ enum partition_type type;
/* data-references a kind != PKIND_NORMAL partition is about. */
data_reference_p main_dr;
data_reference_p secondary_dr;
@@ -619,6 +628,9 @@ static void
partition_merge_into (partition *dest, partition *partition, enum fuse_type ft)
{
dest->kind = PKIND_NORMAL;
+ if (dest->type == PTYPE_PARALLEL)
+ dest->type = partition->type;
+
bitmap_ior_into (dest->stmts, partition->stmts);
if (partition_reduction_p (partition))
dest->reduction_p = true;
@@ -1115,6 +1127,42 @@ get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
return *slot;
}
+/* In reduced dependence graph RDG for loop distribution, return true if
+ dependence between references DR1 and DR2 leads to a dependence cycle
+ and such dependence cycle can't be resolved by runtime alias check. */
+
+static bool
+data_dep_in_cycle_p (struct graph *rdg,
+ data_reference_p dr1, data_reference_p dr2)
+{
+ struct data_dependence_relation *ddr;
+
+ /* Re-shuffle data-refs to be in topological order. */
+ if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
+ > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
+ std::swap (dr1, dr2);
+
+ ddr = get_data_dependence (rdg, dr1, dr2);
+
+ /* In case of no data dependence. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ return false;
+ /* For unknown data dependence or known data dependence which can't be
+ expressed in classic distance vector, we check if it can be resolved
+ by runtime alias check. If yes, we still consider data dependence
+ as won't introduce data dependence cycle. */
+ else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
+ || DDR_NUM_DIST_VECTS (ddr) == 0)
+ return !runtime_alias_check_p (ddr, NULL, true);
+ else if (DDR_NUM_DIST_VECTS (ddr) > 1)
+ return true;
+ else if (DDR_REVERSED_P (ddr)
+ || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
+ return false;
+
+ return true;
+}
+
/* Returns a partition with all the statements needed for computing
the vertex V of the RDG, also including the loop exit conditions. */
@@ -1125,7 +1173,8 @@ build_rdg_partition_for_vertex (struct graph *rdg, int v)
auto_vec<int, 3> nodes;
unsigned i, j;
int x;
- data_reference_p dr;
+ data_reference_p dr1, dr2;
+ bitmap_iterator bi, bj;
graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
@@ -1135,15 +1184,44 @@ build_rdg_partition_for_vertex (struct graph *rdg, int v)
bitmap_set_bit (partition->loops,
loop_containing_stmt (RDG_STMT (rdg, x))->num);
- for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
+ for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr1); ++j)
{
- unsigned idx = (unsigned) DR_INDEX (dr);
+ unsigned idx = (unsigned) DR_INDEX (dr1);
gcc_assert (idx < datarefs_vec.length ());
+ /* Partition can only be executed sequentially if there is any
+ unknown data reference. */
+ if (!DR_BASE_ADDRESS (dr1) || !DR_OFFSET (dr1)
+ || !DR_INIT (dr1) || !DR_STEP (dr1))
+ partition->type = PTYPE_SEQUENTIAL;
+
bitmap_set_bit (partition->datarefs, idx);
}
}
+ if (partition->type == PTYPE_SEQUENTIAL)
+ return partition;
+
+ /* Further check if any data dependence prevents us from executing the
+ partition parallelly. */
+ EXECUTE_IF_SET_IN_BITMAP (partition->datarefs, 0, i, bi)
+ {
+ dr1 = datarefs_vec[i];
+ EXECUTE_IF_SET_IN_BITMAP (partition->datarefs, i + 1, j, bj)
+ {
+ dr2 = datarefs_vec[j];
+ if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
+ continue;
+
+ /* Partition can only be executed sequentially if there is any
+ data dependence cycle. */
+ if (data_dep_in_cycle_p (rdg, dr1, dr2))
+ {
+ partition->type = PTYPE_SEQUENTIAL;
+ return partition;
+ }
+ }
+ }
return partition;
}
@@ -1388,8 +1466,9 @@ rdg_build_partitions (struct graph *rdg,
if (dump_file && (dump_flags & TDF_DETAILS))
{
- fprintf (dump_file, "ldist useful partition:\n");
- dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "ldist creates useful %s partition:\n",
+ partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
+ bitmap_print (dump_file, partition->stmts, " ", "\n");
}
partitions->safe_push (partition);
--
1.9.1