[graphite][PATCH] split autopar in a clearer way
Li Feng
nemokingdom@gmail.com
Fri May 1 09:05:00 GMT 2009
Hi,
This is the patch for autopar pass:
mainly seperate autopar's code generation part(construcion of
NITER
and REDUCTION_LIST) from loop_parallel_p, and bypass the
failed parallel checkings due to a not correctly updated
frequency and count in loop after Graphite.
Changelog:
Split autopar in a clearer way.
* tree-parloops.c (loop_parallel_p): Remove construction of
NITER and REDUCTION_LIST.
(try_get_loop_niter): New.
(try_create_reduction_list): New.
(parallelize_loops): Bypass the failed checkings in autopar
when can_be_parallel in loop structure is set to true.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 04fe6f4..267c958 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -343,31 +343,16 @@ gather_scalar_reductions (loop_p loop, htab_t
reduction_list)
}
}
-/* Returns true if the iterations of LOOP are independent on each other (that
- is, if we can execute them in parallel), and if LOOP satisfies other
- conditions that we need to be able to parallelize it. Description of number
- of iterations is stored to NITER. Reduction analysis is done, if
- reductions are found, they are inserted to the REDUCTION_LIST. */
+/* Try to initialize NITER for code generation part. */
static bool
-loop_parallel_p (struct loop *loop, htab_t reduction_list,
- struct tree_niter_desc *niter)
+try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
{
edge exit = single_dom_exit (loop);
- VEC (ddr_p, heap) * dependence_relations;
- VEC (data_reference_p, heap) *datarefs;
- lambda_trans_matrix trans;
- bool ret = false;
- gimple_stmt_iterator gsi;
- /* Only consider innermost loops with just one exit. The innermost-loop
- restriction is not necessary, but it makes things simpler. */
- if (loop->inner || !exit)
+ if (!exit)
return false;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
-
/* We need to know # of iterations, and there should be no uses of values
defined inside loop outside of it, unless the values are invariants of
the loop. */
@@ -378,6 +363,20 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
return false;
}
+ return true;
+}
+
+/* Try to initialize REDUCTION_LIST for code generation part. */
+
+static bool
+try_create_reduction_list (loop_p loop, htab_t reduction_list)
+{
+ edge exit = single_dom_exit (loop);
+ gimple_stmt_iterator gsi;
+
+ if (!exit)
+ return false;
+
gather_scalar_reductions (loop, reduction_list);
for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -410,13 +409,13 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
}
reduc_phi = NULL;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
- {
- if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
- {
- reduc_phi = USE_STMT (use_p);
- break;
- }
- }
+ {
+ if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
+ {
+ reduc_phi = USE_STMT (use_p);
+ break;
+ }
+ }
red = reduction_phi (reduction_list, reduc_phi);
if (red == NULL)
{
@@ -432,7 +431,6 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
fprintf (dump_file, "reduction stmt is ");
print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
}
-
}
}
@@ -459,13 +457,29 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
}
}
- /* We need to version the loop to verify assumptions in runtime. */
- if (!can_duplicate_loop_p (loop))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, " FAILED: cannot be duplicated\n");
- return false;
- }
+ return true;
+}
+
+/* Data dependency analysis. Returns true if the iterations of LOOP
+ are independent on each other (that is, if we can execute them
+ in parallel). */
+
+static bool
+loop_parallel_p (struct loop *loop)
+{
+ edge exit = single_dom_exit (loop);
+ VEC (ddr_p, heap) * dependence_relations;
+ VEC (data_reference_p, heap) *datarefs;
+ lambda_trans_matrix trans;
+ bool ret = false;
+
+ /* Only consider innermost loops with just one exit. The innermost-loop
+ restriction is not necessary, but it makes things simpler. */
+ if (loop->inner || !exit)
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
/* Check for problems with dependences. If the loop can be reversed,
the iterations are independent. */
@@ -1945,16 +1959,28 @@ parallelize_loops (void)
FOR_EACH_LOOP (li, loop, 0)
{
htab_empty (reduction_list);
- if (/* Do not bother with loops in cold areas. */
- optimize_loop_nest_for_size_p (loop)
- /* Or loops that roll too little. */
- || expected_loop_iterations (loop) <= n_threads
- /* And of course, the loop must be parallelizable. */
- || !can_duplicate_loop_p (loop)
+ if (/* And of course, the loop must be parallelizable. */
+ !can_duplicate_loop_p (loop)
|| loop_has_blocks_with_irreducible_flag (loop)
/* FIXME: the check for vector phi nodes could be removed. */
- || loop_has_vector_phi_nodes (loop)
- || !loop_parallel_p (loop, reduction_list, &niter_desc))
+ || loop_has_vector_phi_nodes (loop))
+ continue;
+
+ /* FIXME: Bypass this check as graphite doesn't update the
+ count and frequency correctly now. */
+ if (!loop->can_be_parallel
+ && (expected_loop_iterations (loop) <= n_threads
+ /* Do not bother with loops in cold areas. */
+ || optimize_loop_nest_for_size_p (loop)))
+ continue;
+
+ if (!try_get_loop_niter (loop, &niter_desc))
+ continue;
+
+ if (!try_create_reduction_list (loop, reduction_list))
+ continue;
+
+ if (!loop->can_be_parallel && !loop_parallel_p (loop))
continue;
changed = true;
Li
More information about the Gcc-patches
mailing list