* gimple.h (gimple_set_in_transaction): Remove. (gimple_in_transaction): Look in BB instead. (gimple_statement_base): Remove in_transaction field. * basic-block.h (enum bb_flags): Add BB_IN_TRANSACTION. * trans-mem.c (compute_transaction_bits): Place transaction bit information into basic blocks. * Makefile.in (tree-ssa-loop-im.o): Depend on TARGET_H. * doc/tm.texi: Regenerate. * tree-ssa-loop-im.c (execute_sm_if_changed): New. (execute_sm_if_changed_flag): New. (execute_sm_if_changed_flag_set): New. (execute_sm): Do not generate data races unless requested. * targhooks.c (default_can_compare_bitwise_p): New. * targhooks.h (default_can_compare_bitwise_p): Protoize. * target.def (can_compare_bitwise_p): New hook. * tree-ssa-loop-im.c (execute_sm): Guard store motion with a conditional. * opts.c (finish_options): Do not allow store or load data races with -fgnu-tm. Index: trans-mem.c =================================================================== --- trans-mem.c (revision 186108) +++ trans-mem.c (working copy) @@ -2449,13 +2449,15 @@ compute_transaction_bits (void) struct tm_region *region; VEC (basic_block, heap) *queue; unsigned int i; - gimple_stmt_iterator gsi; basic_block bb; /* ?? Perhaps we need to abstract gate_tm_init further, because we certainly don't need it to calculate CDI_DOMINATOR info. */ gate_tm_init (); + FOR_EACH_BB (bb) + bb->flags &= ~BB_IN_TRANSACTION; + for (region = all_tm_regions; region; region = region->next) { queue = get_tm_region_blocks (region->entry_block, @@ -2464,11 +2466,7 @@ compute_transaction_bits (void) NULL, /*stop_at_irr_p=*/true); for (i = 0; VEC_iterate (basic_block, queue, i, bb); ++i) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - gimple_set_in_transaction (stmt, true); - } + bb->flags |= BB_IN_TRANSACTION; VEC_free (basic_block, heap, queue); } Index: basic-block.h =================================================================== --- basic-block.h (revision 186108) +++ basic-block.h (working copy) @@ -256,7 +256,12 @@ enum bb_flags df_set_bb_dirty, but not cleared by df_analyze, so it can be used to test whether a block has been modified prior to a df_analyze call. */ - BB_MODIFIED = 1 << 12 + BB_MODIFIED = 1 << 12, + + /* Set on blocks that are in a transaction. This is calculated on + demand, and is available after calling + compute_transaction_bits(). */ + BB_IN_TRANSACTION = 1 << 13 }; /* Dummy flag for convenience in the hot/cold partitioning code. */ Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 186108) +++ tree-ssa-loop-im.c (working copy) @@ -52,7 +52,7 @@ along with GCC; see the file COPYING3. } } - Where COND and INV are is invariants, but evaluating INV may trap or be + Where COND and INV are invariants, but evaluating INV may trap or be invalid from some other reason if !COND. This may be transformed to if (cond) @@ -175,6 +175,9 @@ static struct /* The set of memory references accessed in each loop. */ VEC (bitmap, heap) *refs_in_loop; + /* The set of memory references read in each loop. */ + VEC (bitmap, heap) *reads_in_loop; + /* The set of memory references accessed in each loop, including subloops. */ VEC (bitmap, heap) *all_refs_in_loop; @@ -1555,6 +1558,13 @@ record_mem_ref_loc (mem_ref_p ref, struc VEC_safe_push (mem_ref_loc_p, heap, accs->locs, aref); bitmap_set_bit (ril, ref->id); + if (gimple_assign_single_p (stmt) + && gimple_assign_rhs1 (stmt) == ref->mem) + { + bitmap reads; + reads = VEC_index (bitmap, memory_accesses.reads_in_loop, loop->num); + bitmap_set_bit (reads, ref->id); + } } /* Marks reference REF as stored in LOOP. */ @@ -1726,6 +1736,8 @@ analyze_memory_references (void) memory_accesses.refs_list = NULL; memory_accesses.refs_in_loop = VEC_alloc (bitmap, heap, number_of_loops ()); + memory_accesses.reads_in_loop = VEC_alloc (bitmap, heap, + number_of_loops ()); memory_accesses.all_refs_in_loop = VEC_alloc (bitmap, heap, number_of_loops ()); memory_accesses.all_refs_stored_in_loop = VEC_alloc (bitmap, heap, @@ -1736,6 +1748,8 @@ analyze_memory_references (void) empty = BITMAP_ALLOC (NULL); VEC_quick_push (bitmap, memory_accesses.refs_in_loop, empty); empty = BITMAP_ALLOC (NULL); + VEC_quick_push (bitmap, memory_accesses.reads_in_loop, empty); + empty = BITMAP_ALLOC (NULL); VEC_quick_push (bitmap, memory_accesses.all_refs_in_loop, empty); empty = BITMAP_ALLOC (NULL); VEC_quick_push (bitmap, memory_accesses.all_refs_stored_in_loop, empty); @@ -1956,6 +1970,133 @@ get_lsm_tmp_name (tree ref, unsigned n) return lsm_tmp_name; } +/* Helper function for execute_sm. Emit code to store TMP_VAR into + MEM along edge EX. + + The store is only done if MEM has changed. We do this so no + changes to MEM occur on code paths that did not originally store + into it. + + The common case for execute_sm will transform: + + for (...) { + if (foo) + stuff; + else + MEM = TMP_VAR; + } + + into: + + lsm = MEM; + for (...) { + if (foo) + stuff; + else + lsm = TMP_VAR; + } + MEM = lsm; + + This function will generate: + + // Note: this load can be avoided unless there is a load already + // present in the loop. + lsm = MEM; + + flag = false; + ... + for (...) { + if (foo) + stuff; + else { + lsm = TMP_VAR; + flag = true; + } + } + if (flag) <-- + MEM = lsm; <-- +*/ + +static void +execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag) +{ + basic_block new_bb, then_bb, old_dest = ex->dest; + bool loop_has_only_one_exit = single_pred_p (ex->dest); + edge then_old_edge; + gimple_stmt_iterator gsi; + gimple stmt; + + if (loop_has_only_one_exit) + ex = split_block_after_labels (old_dest); + + old_dest = ex->dest; + new_bb = split_edge (ex); + then_bb = create_empty_bb (new_bb); + if (current_loops && new_bb->loop_father) + add_bb_to_loop (then_bb, new_bb->loop_father); + + gsi = gsi_start_bb (new_bb); + stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + /* Insert actual store. */ + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + make_edge (new_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (new_bb, old_dest, EDGE_FALSE_VALUE); + then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); + + if (!loop_has_only_one_exit) + for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + unsigned i; + + for (i = 0; i < gimple_phi_num_args (phi); i++) + if (gimple_phi_arg_edge (phi, i)->src == new_bb) + { + tree arg = gimple_phi_arg_def (phi, i); + add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION); + update_stmt (phi); + } + } + /* Remove the original fall through edge. This was the + single_succ_edge (new_bb). */ + EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU; +} + +/* Helper function for execute_sm. On every location where REF is + set, set an appropriate flag indicating the store. */ + +static tree +execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref) +{ + unsigned i; + mem_ref_loc_p loc; + tree flag; + VEC (mem_ref_loc_p, heap) *locs = NULL; + char *str = get_lsm_tmp_name (ref->mem, ~0); + + lsm_tmp_name_add ("_flag"); + flag = make_rename_temp (boolean_type_node, str); + get_all_locs_in_loop (loop, ref, &locs); + FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc) + { + gimple_stmt_iterator gsi; + gimple stmt; + + gsi = gsi_for_stmt (loc->stmt); + stmt = gimple_build_assign (flag, boolean_true_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + VEC_free (mem_ref_loc_p, heap, locs); + return flag; +} + /* Executes store motion of memory reference REF from LOOP. Exits from the LOOP are stored in EXITS. The initialization of the temporary variable is put to the preheader of the loop, and assignments @@ -1964,12 +2105,14 @@ get_lsm_tmp_name (tree ref, unsigned n) static void execute_sm (struct loop *loop, VEC (edge, heap) *exits, mem_ref_p ref) { - tree tmp_var; + tree tmp_var, store_flag; unsigned i; - gimple load, store; + gimple load; struct fmt_data fmt_data; - edge ex; + edge ex, latch_edge; struct lim_aux_data *lim_data; + bool multi_threaded_model_p = false; + bool ref_read_in_loop_p = false; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1985,23 +2128,55 @@ execute_sm (struct loop *loop, VEC (edge fmt_data.orig_loop = loop; for_each_index (&ref->mem, force_move_till, &fmt_data); + if ((flag_tm && loop_preheader_edge (loop)->src->flags & BB_IN_TRANSACTION) + || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)) + multi_threaded_model_p = true; + + if (multi_threaded_model_p) + store_flag = execute_sm_if_changed_flag_set (loop, ref); + rewrite_mem_refs (loop, ref, tmp_var); - /* Emit the load & stores. */ - load = gimple_build_assign (tmp_var, unshare_expr (ref->mem)); - lim_data = init_lim_data (load); - lim_data->max_loop = loop; - lim_data->tgt_loop = loop; - - /* Put this into the latch, so that we are sure it will be processed after - all dependencies. */ - gsi_insert_on_edge (loop_latch_edge (loop), load); - FOR_EACH_VEC_ELT (edge, exits, i, ex) - { - store = gimple_build_assign (unshare_expr (ref->mem), tmp_var); - gsi_insert_on_edge (ex, store); + /* Emit the load code into the latch, so that we are sure it will + be processed after all dependencies. */ + latch_edge = loop_latch_edge (loop); + if (multi_threaded_model_p) + { + bitmap reads = VEC_index (bitmap, memory_accesses.reads_in_loop, + loop->num); + ref_read_in_loop_p = bitmap_bit_p (reads, ref->id); + } + /* For the multi-threaded variant, we can avoid the load altogether, + since the store predicated by a flag. However, do the load if it + was originally in the loop. */ + if (!multi_threaded_model_p || ref_read_in_loop_p) + { + load = gimple_build_assign (tmp_var, unshare_expr (ref->mem)); + lim_data = init_lim_data (load); + lim_data->max_loop = loop; + lim_data->tgt_loop = loop; + gsi_insert_on_edge (latch_edge, load); + } + if (multi_threaded_model_p) + { + load = gimple_build_assign (store_flag, boolean_false_node); + lim_data = init_lim_data (load); + lim_data->max_loop = loop; + lim_data->tgt_loop = loop; + gsi_insert_on_edge (latch_edge, load); } + + /* Sink the store to every exit from the loop. */ + FOR_EACH_VEC_ELT (edge, exits, i, ex) + if (!multi_threaded_model_p) + { + gimple store; + store = gimple_build_assign (unshare_expr (ref->mem), tmp_var); + gsi_insert_on_edge (ex, store); + } + else + execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag); } /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit @@ -2433,6 +2608,10 @@ tree_ssa_lim_finalize (void) BITMAP_FREE (b); VEC_free (bitmap, heap, memory_accesses.refs_in_loop); + FOR_EACH_VEC_ELT (bitmap, memory_accesses.reads_in_loop, i, b) + BITMAP_FREE (b); + VEC_free (bitmap, heap, memory_accesses.reads_in_loop); + FOR_EACH_VEC_ELT (bitmap, memory_accesses.all_refs_in_loop, i, b) BITMAP_FREE (b); VEC_free (bitmap, heap, memory_accesses.all_refs_in_loop); Index: testsuite/gcc.dg/pr52558-1.c =================================================================== --- testsuite/gcc.dg/pr52558-1.c (revision 0) +++ testsuite/gcc.dg/pr52558-1.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "--param allow-store-data-races=0 -O2 -fdump-tree-lim1" } */ + +int count; + +struct obj { + int data; + struct obj *next; +} *q; + +void func() +{ + struct obj *p; + for (p = q; p; p = p->next) + if (p->data > 0) + count++; +} + +/* { dg-final { scan-tree-dump-times "MEM count_lsm.. count_lsm_flag" 1 "lim1" } } */ +/* { dg-final { cleanup-tree-dump "lim1" } } */ Index: testsuite/gcc.dg/pr52558-2.c =================================================================== --- testsuite/gcc.dg/pr52558-2.c (revision 0) +++ testsuite/gcc.dg/pr52558-2.c (revision 0) @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "--param allow-store-data-races=0 -O2 -fdump-tree-lim1" } */ + +int g_1 = 1; +int g_2 = 0; + +int func_1(void) +{ + int l; + for (l = 0; l < 1234; l++) + { + if (g_1) + return l; + else + g_2 = 0; + } + return 999; +} + +/* { dg-final { scan-tree-dump-times "MEM.*g_2_lsm_flag" 1 "lim1" } } */ +/* { dg-final { cleanup-tree-dump "lim1" } } */ Index: testsuite/gcc.dg/tm/reg-promotion.c =================================================================== --- testsuite/gcc.dg/tm/reg-promotion.c (revision 0) +++ testsuite/gcc.dg/tm/reg-promotion.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-fgnu-tm -O2 -fdump-tree-lim1" } */ + +int count; + +struct obj { + int data; + struct obj *next; +} *q; + +void func() +{ + struct obj *p; + __transaction_atomic { + for (p = q; p; p = p->next) + if (p->data > 0) + count++; + } +} + +/* { dg-final { scan-tree-dump-times "MEM count_lsm.. count_lsm_flag" 1 "lim1" } } */ +/* { dg-final { cleanup-tree-dump "lim1" } } */ Index: gimple.h =================================================================== --- gimple.h (revision 186108) +++ gimple.h (working copy) @@ -305,11 +305,6 @@ struct GTY(()) gimple_statement_base { /* Nonzero if this statement contains volatile operands. */ unsigned has_volatile_ops : 1; - /* Nonzero if this statement appears inside a transaction. This bit - is calculated on de-mand and has relevant information only after - it has been calculated with compute_transaction_bits. */ - unsigned in_transaction : 1; - /* The SUBCODE field can be used for tuple-specific flags for tuples that do not require subcodes. Note that SUBCODE should be at least as wide as tree codes, as several tuples store tree codes @@ -1591,15 +1586,7 @@ gimple_set_has_volatile_ops (gimple stmt static inline bool gimple_in_transaction (gimple stmt) { - return stmt->gsbase.in_transaction; -} - -/* Set the IN_TRANSACTION flag to TRANSACTIONP. */ - -static inline void -gimple_set_in_transaction (gimple stmt, bool transactionp) -{ - stmt->gsbase.in_transaction = (unsigned) transactionp; + return gimple_bb (stmt)->flags & BB_IN_TRANSACTION; } /* Return true if statement STMT may access memory. */