This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][3/n] loop distribution TLC
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 31 May 2012 16:33:56 +0200 (CEST)
- Subject: [PATCH][3/n] loop distribution TLC
This re-organizes builtin detection and generation properly into
an analysis and transform phase (noting that when we fail late
we will generate wrong code at the moment - eventually non-fatal,
but at least it will have duplicate work in the left-over loop).
With this in place adding more kinds of builtins to detect should
not be any rocket science anymore.
Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
Richard.
2012-05-31 Richard Guenther <rguenther@suse.de>
* tree-loop-distribution.c (enum partition_kind): New enum.
(struct partition_s): Add kind and main_stmt members.
(partition_alloc): Initialize kind to PKIND_NORMAL.
(partition_builtin_p): New function.
(copy_loop_before): Remove failure path and assert instead.
(generate_loops_for_partition): Likewise.
(generate_memset_zero): Fold into ...
(generate_memset_builtin): ... this.
(classify_partition): New function with code from
can_generate_builtin and generate_builtin.
(generate_builtin): Remove.
(can_generate_builtin): Likewise.
(fuse_partitions_with_similar_memory_accesses): Call
partition_builtin_p instead of can_generate_builtin.
(rdg_build_partitions): Do not call
fuse_partitions_with_similar_memory_accesses here...
(ldist_gen): ... but here after classifying all partitions.
Remove failure path of generate_code_for_partition.
(generate_code_for_partition): Generate code according
to partition classification.
Index: trunk/gcc/tree-loop-distribution.c
===================================================================
*** trunk.orig/gcc/tree-loop-distribution.c 2012-05-31 15:58:12.000000000 +0200
--- trunk/gcc/tree-loop-distribution.c 2012-05-31 16:26:17.255459534 +0200
*************** along with GCC; see the file COPYING3.
*** 52,60 ****
--- 52,65 ----
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
+ enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
+
typedef struct partition_s
{
bitmap stmts;
+ enum partition_kind kind;
+ /* Main statement a kind != PKIND_NORMAL partition is about. */
+ gimple main_stmt;
} *partition_t;
DEF_VEC_P (partition_t);
*************** partition_alloc (bitmap stmts)
*** 67,72 ****
--- 72,78 ----
{
partition_t partition = XCNEW (struct partition_s);
partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
+ partition->kind = PKIND_NORMAL;
return partition;
}
*************** partition_free (partition_t partition)
*** 79,84 ****
--- 85,97 ----
free (partition);
}
+ /* Returns true if the partition can be generated as a builtin. */
+
+ static bool
+ partition_builtin_p (partition_t partition)
+ {
+ return partition->kind != PKIND_NORMAL;
+ }
/* If bit I is not set, it means that this node represents an
operation that has already been performed, and that should not be
*************** copy_loop_before (struct loop *loop)
*** 183,198 ****
struct loop *res;
edge preheader = loop_preheader_edge (loop);
- if (!single_exit (loop))
- return NULL;
-
initialize_original_copy_tables ();
res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
free_original_copy_tables ();
- if (!res)
- return NULL;
-
update_phis_for_loop_copy (loop, res);
rename_variables_in_loop (res);
--- 196,206 ----
struct loop *res;
edge preheader = loop_preheader_edge (loop);
initialize_original_copy_tables ();
res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
+ gcc_assert (res != NULL);
free_original_copy_tables ();
update_phis_for_loop_copy (loop, res);
rename_variables_in_loop (res);
*************** create_bb_after_loop (struct loop *loop)
*** 216,225 ****
copied when COPY_P is true. All the statements not flagged in the
PARTITION bitmap are removed from the loop or from its copy. The
statements are indexed in sequence inside a basic block, and the
! basic blocks of a loop are taken in dom order. Returns true when
! the code gen succeeded. */
! static bool
generate_loops_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
--- 224,232 ----
copied when COPY_P is true. All the statements not flagged in the
PARTITION bitmap are removed from the loop or from its copy. The
statements are indexed in sequence inside a basic block, and the
! basic blocks of a loop are taken in dom order. */
! static void
generate_loops_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
*************** generate_loops_for_partition (struct loo
*** 230,242 ****
if (copy_p)
{
loop = copy_loop_before (loop);
create_preheader (loop, CP_SIMPLE_PREHEADERS);
create_bb_after_loop (loop);
}
- if (loop == NULL)
- return false;
-
/* Remove stmts not in the PARTITION bitmap. The order in which we
visit the phi nodes and the statements is exactly as in
stmts_from_loop. */
--- 237,247 ----
if (copy_p)
{
loop = copy_loop_before (loop);
+ gcc_assert (loop != NULL);
create_preheader (loop, CP_SIMPLE_PREHEADERS);
create_bb_after_loop (loop);
}
/* Remove stmts not in the PARTITION bitmap. The order in which we
visit the phi nodes and the statements is exactly as in
stmts_from_loop. */
*************** generate_loops_for_partition (struct loo
*** 293,299 ****
}
free (bbs);
- return true;
}
/* Build the size argument for a memset call. */
--- 298,303 ----
*************** build_size_arg_loc (location_t loc, tree
*** 313,331 ****
return x;
}
! /* Generate a call to memset. Return true when the operation succeeded. */
static void
! generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
! gimple_stmt_iterator bsi)
{
! tree addr_base, nb_bytes;
! bool res = false;
gimple_seq stmt_list = NULL, stmts;
- gimple fn_call;
- tree mem, fn;
struct data_reference *dr = XCNEW (struct data_reference);
! location_t loc = gimple_location (stmt);
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
--- 317,345 ----
return x;
}
! /* Generate a call to memset for PARTITION in LOOP. */
static void
! generate_memset_builtin (struct loop *loop, partition_t partition)
{
! gimple_stmt_iterator gsi;
! gimple stmt, fn_call;
! tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
gimple_seq stmt_list = NULL, stmts;
struct data_reference *dr = XCNEW (struct data_reference);
! location_t loc;
! bool res;
!
! stmt = partition->main_stmt;
! loc = gimple_location (stmt);
! op0 = gimple_assign_lhs (stmt);
! if (gimple_bb (stmt) == loop->latch)
! nb_iter = number_of_latch_executions (loop);
! else
! nb_iter = number_of_exit_cond_executions (loop);
!
! /* The new statements will be placed before LOOP. */
! gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
*************** generate_memset_zero (gimple stmt, tree
*** 353,359 ****
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call);
! gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "generated memset zero\n");
--- 367,373 ----
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call);
! gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "generated memset zero\n");
*************** generate_memset_zero (gimple stmt, tree
*** 361,473 ****
free_data_ref (dr);
}
! /* Tries to generate a builtin function for the instructions of LOOP
! pointed to by the bits set in PARTITION. Returns true when the
! operation succeeded. */
! static bool
! generate_builtin (struct loop *loop, partition_t partition, bool copy_p)
{
! bool res = false;
! unsigned i, x = 0;
basic_block *bbs;
! gimple write = NULL;
! gimple_stmt_iterator bsi;
! tree nb_iter = number_of_exit_cond_executions (loop);
!
! if (!nb_iter || nb_iter == chrec_dont_know)
! return false;
bbs = get_loop_body_in_dom_order (loop);
! for (i = 0; i < loop->num_nodes; i++)
! {
! basic_block bb = bbs[i];
!
! for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
! x++;
!
! for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
! {
! gimple stmt = gsi_stmt (bsi);
!
! if (gimple_code (stmt) == GIMPLE_LABEL
! || is_gimple_debug (stmt))
! continue;
!
! if (!bitmap_bit_p (partition->stmts, x++))
! continue;
!
! /* If the stmt has uses outside of the loop fail.
! ??? If the stmt is generated in another partition that
! is not created as builtin we can ignore this. */
! if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
! {
! if (dump_file && (dump_flags & TDF_DETAILS))
! fprintf (dump_file, "not generating builtin, partition has "
! "scalar uses outside of the loop\n");
! goto end;
! }
!
! if (is_gimple_assign (stmt)
! && !is_gimple_reg (gimple_assign_lhs (stmt)))
! {
! /* Don't generate the builtins when there are more than
! one memory write. */
! if (write != NULL)
! goto end;
!
! write = stmt;
! if (bb == loop->latch)
! nb_iter = number_of_latch_executions (loop);
! }
! }
! }
!
! if (!stmt_with_adjacent_zero_store_dr_p (write))
! goto end;
!
! /* The new statements will be placed before LOOP. */
! bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
! generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
! res = true;
!
! /* If this is the last partition for which we generate code, we have
! to destroy the loop. */
! if (!copy_p)
! {
! unsigned nbbs = loop->num_nodes;
! edge exit = single_exit (loop);
! basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
! redirect_edge_pred (exit, src);
! exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
! exit->flags |= EDGE_FALLTHRU;
! cancel_loop_tree (loop);
! rescan_loop_exit (exit, false, true);
!
! for (i = 0; i < nbbs; i++)
! delete_basic_block (bbs[i]);
! set_immediate_dominator (CDI_DOMINATORS, dest,
! recompute_dominator (CDI_DOMINATORS, dest));
! }
!
! end:
free (bbs);
! return res;
}
! /* Generates code for PARTITION. For simple loops, this function can
! generate a built-in. */
! static bool
generate_code_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
! if (generate_builtin (loop, partition, copy_p))
! return true;
! return generate_loops_for_partition (loop, partition, copy_p);
}
--- 375,430 ----
free_data_ref (dr);
}
! /* Remove and destroy the loop LOOP. */
! static void
! destroy_loop (struct loop *loop)
{
! unsigned nbbs = loop->num_nodes;
! edge exit = single_exit (loop);
! basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
basic_block *bbs;
! unsigned i;
bbs = get_loop_body_in_dom_order (loop);
! redirect_edge_pred (exit, src);
! exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
! exit->flags |= EDGE_FALLTHRU;
! cancel_loop_tree (loop);
! rescan_loop_exit (exit, false, true);
! for (i = 0; i < nbbs; i++)
! delete_basic_block (bbs[i]);
free (bbs);
!
! set_immediate_dominator (CDI_DOMINATORS, dest,
! recompute_dominator (CDI_DOMINATORS, dest));
}
! /* Generates code for PARTITION. */
! static void
generate_code_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
! switch (partition->kind)
! {
! case PKIND_MEMSET:
! generate_memset_builtin (loop, partition);
! /* If this is the last partition for which we generate code, we have
! to destroy the loop. */
! if (!copy_p)
! destroy_loop (loop);
! break;
!
! case PKIND_NORMAL:
! generate_loops_for_partition (loop, partition, copy_p);
! break;
! default:
! gcc_unreachable ();
! }
}
*************** rdg_build_components (struct graph *rdg,
*** 828,859 ****
BITMAP_FREE (saved_components);
}
! /* Returns true when it is possible to generate a builtin pattern for
! the PARTITION of RDG. For the moment we detect only the memset
! zero pattern. */
! static bool
! can_generate_builtin (struct graph *rdg, partition_t partition)
{
- unsigned i;
bitmap_iterator bi;
! int nb_reads = 0;
! int nb_writes = 0;
! int stores_zero = 0;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
! if (RDG_MEM_READS_STMT (rdg, i))
! nb_reads++;
! else if (RDG_MEM_WRITE_STMT (rdg, i))
! {
! gimple stmt = RDG_STMT (rdg, i);
! nb_writes++;
! if (!gimple_has_volatile_ops (stmt)
! && stmt_with_adjacent_zero_store_dr_p (stmt))
! stores_zero++;
! }
! return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
}
/* Returns true when PARTITION1 and PARTITION2 have similar memory
--- 785,855 ----
BITMAP_FREE (saved_components);
}
! /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
! For the moment we detect only the memset zero pattern. */
! static void
! classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
{
bitmap_iterator bi;
! unsigned i;
! tree nb_iter;
!
! partition->kind = PKIND_NORMAL;
! partition->main_stmt = NULL;
!
! /* Perform general partition disqualification for builtins. */
! nb_iter = number_of_exit_cond_executions (loop);
! if (!nb_iter || nb_iter == chrec_dont_know)
! return;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
! {
! gimple stmt = RDG_STMT (rdg, i);
!
! if (gimple_has_volatile_ops (stmt))
! return;
! /* If the stmt has uses outside of the loop fail.
! ??? If the stmt is generated in another partition that
! is not created as builtin we can ignore this. */
! if (gimple_code (stmt) != GIMPLE_PHI
! && stmt_has_scalar_dependences_outside_loop (loop, stmt))
! {
! if (dump_file && (dump_flags & TDF_DETAILS))
! fprintf (dump_file, "not generating builtin, partition has "
! "scalar uses outside of the loop\n");
! return;
! }
! }
!
! /* Detect memset. */
! EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
! {
! gimple stmt = RDG_STMT (rdg, i);
!
! if (gimple_code (stmt) == GIMPLE_PHI)
! continue;
!
! /* Any scalar stmts are ok. */
! if (!gimple_vuse (stmt))
! continue;
!
! /* Exactly one store. */
! if (gimple_assign_single_p (stmt)
! && !is_gimple_reg (gimple_assign_lhs (stmt)))
! {
! if (partition->main_stmt != NULL)
! return;
! partition->main_stmt = stmt;
! }
! else
! return;
! }
!
! if (partition->main_stmt != NULL
! && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
! partition->kind = PKIND_MEMSET;
}
/* Returns true when PARTITION1 and PARTITION2 have similar memory
*************** fuse_partitions_with_similar_memory_acce
*** 891,900 ****
partition_t partition1, partition2;
FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
! if (!can_generate_builtin (rdg, partition1))
FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
if (p1 != p2
! && !can_generate_builtin (rdg, partition2)
&& similar_memory_accesses (rdg, partition1, partition2))
{
bitmap_ior_into (partition1->stmts, partition2->stmts);
--- 887,896 ----
partition_t partition1, partition2;
FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
! if (!partition_builtin_p (partition1))
FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
if (p1 != p2
! && !partition_builtin_p (partition2)
&& similar_memory_accesses (rdg, partition1, partition2))
{
bitmap_ior_into (partition1->stmts, partition2->stmts);
*************** rdg_build_partitions (struct graph *rdg,
*** 971,978 ****
VEC_safe_push (partition_t, heap, *partitions, partition);
else
partition_free (partition);
-
- fuse_partitions_with_similar_memory_accesses (rdg, partitions);
}
/* Dump to FILE the PARTITIONS. */
--- 967,972 ----
*************** ldist_gen (struct loop *loop, struct gra
*** 1101,1111 ****
processed);
BITMAP_FREE (processed);
nbp = VEC_length (partition_t, partitions);
if (nbp == 0
|| (nbp == 1
! && !can_generate_builtin (rdg,
! VEC_index (partition_t, partitions, 0)))
|| (nbp > 1
&& partition_contains_all_rw (rdg, partitions)))
goto ldist_done;
--- 1095,1109 ----
processed);
BITMAP_FREE (processed);
+ FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
+ classify_partition (loop, rdg, partition);
+
+ fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
+
nbp = VEC_length (partition_t, partitions);
if (nbp == 0
|| (nbp == 1
! && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
|| (nbp > 1
&& partition_contains_all_rw (rdg, partitions)))
goto ldist_done;
*************** ldist_gen (struct loop *loop, struct gra
*** 1114,1121 ****
dump_rdg_partitions (dump_file, partitions);
FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
! if (!generate_code_for_partition (loop, partition, i < nbp - 1))
! goto ldist_done;
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
mark_sym_for_renaming (gimple_vop (cfun));
--- 1112,1118 ----
dump_rdg_partitions (dump_file, partitions);
FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
! generate_code_for_partition (loop, partition, i < nbp - 1);
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
mark_sym_for_renaming (gimple_vop (cfun));