* tree-flow.h (enum move_pos): Moved to tree-ssa-loop-im.c. * tree-ssa-loop-im.c: Include diagnostic-core.h for warning_at() (enum move_pos): Moved here. (movement_possibility): Made static. Reject memory operations in loops with too many memory references to handle. (determine_max_movement): Take loops_with_too_many_memrefs argument. For statements referencing memory, find the outermost superloop that is not in the loops_with_too_many_memrefs set. (determine_invariantness_stmt): Take loops_with_too_many_memrefs via dom_walk_data.global_data, and pass it along where necessary. Hoist "pos == MOVE_POSSIBLE" test. (determine_invariantness): Take loops_with_too_many_memrefs argument. (move_computations): Likewise, but unused for now. (gather_mem_refs_stmt): Fail if there are too many memory references. Use PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS as threshold. Add disabled optimization warning. (gather_mem_refs_in_loops): Take loops_with_too_many_memrefs argument. Propagate it from inner loops to outer loops. Do not propagate recorded memory references for loops on which memory optimizations are disabled. (create_vop_ref_mapping): Take loops_with_too_many_memrefs argument. Don't create a mapping on loops that are in this set. (analyze_memory_references): Take loops_with_too_many_memrefs argument and let subroutines fill it. (store_motion): Take loops_with_too_many_memrefs argument. Skip loops that are in this set. (tree_ssa_lim): Allocate, pass, and free loops_with_too_many_memrefs. Index: tree-flow.h =================================================================== --- tree-flow.h (revision 196576) +++ tree-flow.h (working copy) @@ -688,16 +688,6 @@ extern void propagate_threaded_block_debug_into (basic_block, basic_block); /* In tree-ssa-loop-im.c */ -/* The possibilities of statement movement. */ - -enum move_pos - { - MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ - MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement - become executed -- memory accesses, ... */ - MOVE_POSSIBLE /* Unlimited movement. */ - }; -extern enum move_pos movement_possibility (gimple); char *get_lsm_tmp_name (tree, unsigned); /* In tree-flow-inline.h */ Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 196576) +++ tree-ssa-loop-im.c (working copy) @@ -20,6 +20,7 @@ #include "config.h" #include "system.h" #include "coretypes.h" +#include "diagnostic-core.h" #include "tm.h" #include "tree.h" #include "tm_p.h" @@ -58,6 +59,16 @@ something; } */ +/* The possibilities of statement movement. */ + +enum move_pos +{ + MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ + MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement + become executed -- memory accesses, ... */ + MOVE_POSSIBLE /* Unlimited movement. */ +}; + /* A type for the list of statements that have to be moved in order to be able to hoist an invariant computation. */ @@ -333,8 +344,9 @@ because it may trap), return MOVE_PRESERVE_EXECUTION. Otherwise return MOVE_IMPOSSIBLE. */ -enum move_pos -movement_possibility (gimple stmt) +static enum move_pos +movement_possibility (gimple stmt, struct loop *loop, + bitmap loops_with_too_many_memrefs) { tree lhs; enum move_pos ret = MOVE_POSSIBLE; @@ -347,6 +359,10 @@ return MOVE_POSSIBLE; } + if (gimple_has_mem_ops (stmt) + && bitmap_bit_p (loops_with_too_many_memrefs, loop->num)) + return MOVE_IMPOSSIBLE; + if (gimple_code (stmt) == GIMPLE_PHI && gimple_phi_num_args (stmt) <= 2 && !virtual_operand_p (gimple_phi_result (stmt)) @@ -741,14 +757,19 @@ STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine the outermost loop in that the value computed by STMT is invariant. If MUST_PRESERVE_EXEC is true, additionally choose such a loop that - we preserve the fact whether STMT is executed. It also fills other related - information to LIM_DATA (STMT). + we preserve the fact whether STMT is executed. It also fills other + related information to LIM_DATA (STMT). + + Loops with too many memory references act as barriers, i.e. statements + that reference memory cannot be moved into or through outer loops in the + LOOPS_WITH_TOO_MANY_MEMREFS set. - The function returns false if STMT cannot be hoisted outside of the loop it - is defined in, and true otherwise. */ + The function returns false if STMT cannot be hoisted outside of the loop + it is defined in, and true otherwise. */ static bool -determine_max_movement (gimple stmt, bool must_preserve_exec) +determine_max_movement (gimple stmt, bool must_preserve_exec, + bitmap loops_with_too_many_memrefs) { basic_block bb = gimple_bb (stmt); struct loop *loop = bb->loop_father; @@ -757,10 +778,31 @@ tree val; ssa_op_iter iter; - if (must_preserve_exec) - level = ALWAYS_EXECUTED_IN (bb); + /* If STMT does not reference memory, the maximum outer loop is + easily found. Otherwise we need to walk down the loop tree. */ + if (bitmap_empty_p (loops_with_too_many_memrefs) + || ! gimple_has_mem_ops (stmt)) + { + if (must_preserve_exec) + level = ALWAYS_EXECUTED_IN (bb); + else + level = superloop_at_depth (loop, 1); + } else - level = superloop_at_depth (loop, 1); + { + for (level = loop; + loop_outer (level) != current_loops->tree_root; + level = loop_outer (level)) + { + if ((must_preserve_exec && level == ALWAYS_EXECUTED_IN (bb)) + || bitmap_bit_p (loops_with_too_many_memrefs, + loop_outer (level)->num)) + break; + } + if (level == loop) + return false; + } + lim_data->max_loop = level; if (gimple_code (stmt) == GIMPLE_PHI) @@ -1040,7 +1082,7 @@ Callback for walk_dominator_tree. */ static void -determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, +determine_invariantness_stmt (struct dom_walk_data *dw_data, basic_block bb) { enum move_pos pos; @@ -1049,6 +1091,7 @@ bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; struct loop *outermost = ALWAYS_EXECUTED_IN (bb); struct lim_aux_data *lim_data; + bitmap loops_with_too_many_memrefs = (bitmap) dw_data->global_data; if (!loop_outer (bb->loop_father)) return; @@ -1069,14 +1112,16 @@ { stmt = gsi_stmt (bsi); - pos = movement_possibility (stmt); + pos = movement_possibility (stmt, bb->loop_father, + loops_with_too_many_memrefs); if (pos == MOVE_IMPOSSIBLE) continue; lim_data = init_lim_data (stmt); lim_data->always_executed_in = outermost; - if (!determine_max_movement (stmt, false)) + if (!determine_max_movement (stmt, false, + loops_with_too_many_memrefs)) { lim_data->max_loop = NULL; continue; @@ -1098,7 +1143,8 @@ { stmt = gsi_stmt (bsi); - pos = movement_possibility (stmt); + pos = movement_possibility (stmt, bb->loop_father, + loops_with_too_many_memrefs); if (pos == MOVE_IMPOSSIBLE) { if (nonpure_call_p (stmt)) @@ -1116,7 +1162,8 @@ continue; } - if (is_gimple_assign (stmt) + if (pos == MOVE_POSSIBLE + && is_gimple_assign (stmt) && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) == GIMPLE_BINARY_RHS)) { @@ -1127,8 +1174,7 @@ /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal to be hoisted out of loop, saving expensive divide. */ - if (pos == MOVE_POSSIBLE - && gimple_assign_rhs_code (stmt) == RDIV_EXPR + if (gimple_assign_rhs_code (stmt) == RDIV_EXPR && flag_unsafe_math_optimizations && !flag_trapping_math && ol1 != NULL @@ -1138,8 +1184,7 @@ /* If the shift count is invariant, convert (A >> B) & 1 to A & (1 << B) allowing the bit mask to be hoisted out of the loop saving an expensive shift. */ - if (pos == MOVE_POSSIBLE - && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR + if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR && integer_onep (op1) && TREE_CODE (op0) == SSA_NAME && has_single_use (op0)) @@ -1152,7 +1197,8 @@ if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) continue; - if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION)) + if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION, + loops_with_too_many_memrefs)) { lim_data->max_loop = NULL; continue; @@ -1177,13 +1223,14 @@ each statement. */ static void -determine_invariantness (void) +determine_invariantness (bitmap loops_with_too_many_memrefs) { struct dom_walk_data walk_data; memset (&walk_data, 0, sizeof (struct dom_walk_data)); walk_data.dom_direction = CDI_DOMINATORS; walk_data.before_dom_children = determine_invariantness_stmt; + walk_data.global_data = loops_with_too_many_memrefs; init_walk_dominator_tree (&walk_data); walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); @@ -1329,7 +1376,7 @@ LIM_DATA structures associated with each statement.*/ static unsigned int -move_computations (void) +move_computations (bitmap loops_with_too_many_memrefs ATTRIBUTE_UNUSED) { struct dom_walk_data walk_data; unsigned int todo = 0; @@ -1524,8 +1571,7 @@ mem_ref_locs_p accs; bitmap ril = memory_accesses.refs_in_loop[loop->num]; - if (ref->accesses_in_loop.length () - <= (unsigned) loop->num) + if (ref->accesses_in_loop.length () <= (unsigned) loop->num) ref->accesses_in_loop.safe_grow_cleared (loop->num + 1); accs = ref->accesses_in_loop[loop->num]; if (!accs) @@ -1556,9 +1602,10 @@ /* Gathers memory references in statement STMT in LOOP, storing the information about them in the memory_accesses structure. Marks the vops accessed through unrecognized statements there as - well. */ + well. Return true if all is well, false if something happened + that is fatal to the rest of the LIM pass. */ -static void +static bool gather_mem_refs_stmt (struct loop *loop, gimple stmt) { tree *mem = NULL; @@ -1569,12 +1616,24 @@ unsigned id; if (!gimple_vuse (stmt)) - return; + return true; + + id = memory_accesses.refs_list.length (); + if (id > (unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Too many memory references, giving up on loop...\n"); + warning_at (gimple_location (stmt), + OPT_Wdisabled_optimization, + "-ftree-loop-im: number of memory references in loop " + "exceeds the --param loops-max-datarefs-for-datadeps " + "threshold"); + return false; + } mem = simple_mem_ref_in_stmt (stmt, &is_stored); if (!mem) { - id = memory_accesses.refs_list.length (); ref = mem_ref_alloc (error_mark_node, 0, id); memory_accesses.refs_list.safe_push (ref); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1585,7 +1644,7 @@ if (gimple_vdef (stmt)) mark_ref_stored (ref, loop); record_mem_ref_loc (ref, loop, stmt, mem); - return; + return true; } hash = iterative_hash_expr (*mem, 0); @@ -1615,34 +1674,56 @@ mark_ref_stored (ref, loop); record_mem_ref_loc (ref, loop, stmt, mem); - return; + return true; } -/* Gathers memory references in loops. */ +/* Gathers memory references in loops. + Record in LOOPS_WITH_TOO_MANY_MEMREFS loops that have so many memory + referencing statements that they are considered too large to handle. */ static void -gather_mem_refs_in_loops (void) +gather_mem_refs_in_loops (bitmap loops_with_too_many_memrefs) { - gimple_stmt_iterator bsi; - basic_block bb; struct loop *loop; loop_iterator li; bitmap lrefs, alrefs, alrefso; + basic_block bb; + bool all_ok = true; FOR_EACH_BB (bb) { - loop = bb->loop_father; + gimple_stmt_iterator bsi; + struct loop *loop = bb->loop_father; if (loop == current_loops->tree_root) continue; - for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) - gather_mem_refs_stmt (loop, gsi_stmt (bsi)); + for (bsi = gsi_start_bb (bb); + !gsi_end_p (bsi) && all_ok; + gsi_next (&bsi)) + all_ok = gather_mem_refs_stmt (loop, gsi_stmt (bsi)); + + if (! all_ok) + bitmap_set_bit (loops_with_too_many_memrefs, loop->num); + } + + /* Propagate the information about loops with too many memory + references up the loop hierarchy. */ + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + { + struct loop *outer = loop_outer (loop); + if (outer == current_loops->tree_root + || ! bitmap_bit_p (loops_with_too_many_memrefs, loop->num)) + continue; + bitmap_set_bit (loops_with_too_many_memrefs, outer->num); } /* Propagate the information about accessed memory references up the loop hierarchy. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { + if (bitmap_bit_p (loops_with_too_many_memrefs, loop->num)) + continue; + lrefs = memory_accesses.refs_in_loop[loop->num]; alrefs = memory_accesses.all_refs_in_loop[loop->num]; bitmap_ior_into (alrefs, lrefs); @@ -1685,21 +1766,22 @@ references in this loop that touch the operand. */ static void -create_vop_ref_mapping (void) +create_vop_ref_mapping (bitmap loops_with_too_many_memrefs) { loop_iterator li; struct loop *loop; FOR_EACH_LOOP (li, loop, 0) { - create_vop_ref_mapping_loop (loop); + if (! bitmap_bit_p (loops_with_too_many_memrefs, loop->num)) + create_vop_ref_mapping_loop (loop); } } /* Gathers information about memory accesses in the loops. */ -static void -analyze_memory_references (void) +static void +analyze_memory_references (bitmap loops_with_too_many_memrefs) { unsigned i; bitmap empty; @@ -1721,9 +1803,8 @@ } memory_accesses.ttae_cache = NULL; - - gather_mem_refs_in_loops (); - create_vop_ref_mapping (); + gather_mem_refs_in_loops (loops_with_too_many_memrefs); + create_vop_ref_mapping (loops_with_too_many_memrefs); } /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in @@ -2465,13 +2546,14 @@ loops. */ static void -store_motion (void) +store_motion (bitmap loops_with_too_many_memrefs) { struct loop *loop; bitmap sm_executed = BITMAP_ALLOC (NULL); for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) - store_motion_loop (loop, sm_executed); + if (! bitmap_bit_p (loops_with_too_many_memrefs, loop->num)) + store_motion_loop (loop, sm_executed); BITMAP_FREE (sm_executed); gsi_commit_edge_inserts (); @@ -2622,21 +2704,29 @@ { unsigned int todo; + /* The set of loops to which we cannot apply our memory optimizations. + For now, we punt on loops with more many memory references that we + are prepared to handle (e.g. to avoid quadratic time complexity + issues when checking for memory dependencies). */ + bitmap loops_with_too_many_memrefs = BITMAP_ALLOC (NULL); + tree_ssa_lim_initialize (); /* Gathers information about memory accesses in the loops. */ - analyze_memory_references (); + analyze_memory_references (loops_with_too_many_memrefs); /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ - determine_invariantness (); + determine_invariantness (loops_with_too_many_memrefs); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ - store_motion (); + store_motion (loops_with_too_many_memrefs); /* Move the expressions that are expensive enough. */ - todo = move_computations (); + todo = move_computations (loops_with_too_many_memrefs); + + BITMAP_FREE (loops_with_too_many_memrefs); tree_ssa_lim_finalize ();