--- gcc/tree-pass.h (/gcc-local/trunk) (revision 30596) +++ gcc/tree-pass.h (/gcc-local/export-ddg) (revision 30596) @@ -228,6 +228,9 @@ struct dump_file_info /* Internally used for the first instance of a pass. */ #define TODO_mark_first_instance (1 << 18) +#define TODO_no_verify_trees (1 << 19) + + #define TODO_update_ssa_any \ (TODO_update_ssa \ | TODO_update_ssa_no_phi \ @@ -266,6 +269,9 @@ extern struct tree_opt_pass pass_if_conv extern struct tree_opt_pass pass_vectorize; extern struct tree_opt_pass pass_complete_unroll; extern struct tree_opt_pass pass_loop_prefetch; +extern struct tree_opt_pass pass_gather_ddg_info; +extern struct tree_opt_pass pass_disable_ddg_verification; +extern struct tree_opt_pass pass_free_ddg_info; extern struct tree_opt_pass pass_iv_optimize; extern struct tree_opt_pass pass_tree_loop_done; extern struct tree_opt_pass pass_ch; --- gcc/ddg.c (/gcc-local/trunk) (revision 30596) +++ gcc/ddg.c (/gcc-local/export-ddg) (revision 30596) @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. #include "expr.h" #include "bitmap.h" #include "ddg.h" +#include "tree-data-ref-export.h" /* A flag indicating that a ddg edge belongs to an SCC or not. */ enum edge_flag {NOT_IN_SCC = 0, IN_SCC}; @@ -333,12 +334,49 @@ build_inter_loop_deps (ddg_ptr g) } } +static int +walk_mems_2 (rtx *x, rtx mem) +{ + if (MEM_P (*x)) + { + if (may_alias_mems_by_ddr (mem, *x, 2)) + /* Indicate that dependence was determined and stop traversal. */ + return 1; + return -1; + } + return 0; +} + +static int +walk_mems_1 (rtx *x, rtx *pat) +{ + if (MEM_P (*x)) + { + /* Visit all MEMs in *PAT and check indepedence. */ + if (for_each_rtx (pat, (rtx_function) walk_mems_2, *x)) + /* Indicate that dependence was determined and stop traversal. */ + return 1; + return -1; + } + return 0; +} + +static bool +independent_by_exported_info (rtx insn1, rtx insn2) +{ + /* For each pair of MEMs in INSN1 and INSN2 check their independence. */ + return ! for_each_rtx (&PATTERN (insn1), (rtx_function) walk_mems_1, + &PATTERN (insn2)); +} /* Given two nodes, analyze their RTL insns and add inter-loop mem deps to ddg G. */ static void add_inter_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to) { + if (independent_by_exported_info (from->insn, to->insn)) + /* Do not create edge if memory references are known to be independent. */ + return; if (mem_write_insn_p (from->insn)) { if (mem_read_insn_p (to->insn)) --- gcc/tree-chrec.c (/gcc-local/trunk) (revision 30596) +++ gcc/tree-chrec.c (/gcc-local/export-ddg) (revision 30596) @@ -943,7 +943,7 @@ tree_contains_chrecs (const_tree expr, i /* Recursive helper function. */ static bool -evolution_function_is_invariant_rec_p (tree chrec, int loopnum) +evolution_function_is_invariant_rec_p (const_tree chrec, int loopnum) { if (evolution_function_is_constant_p (chrec)) return true; @@ -986,7 +986,7 @@ evolution_function_is_invariant_rec_p (t /* Return true if CHREC is invariant in loop LOOPNUM, false otherwise. */ bool -evolution_function_is_invariant_p (tree chrec, int loopnum) +evolution_function_is_invariant_p (const_tree chrec, int loopnum) { return evolution_function_is_invariant_rec_p (chrec, loopnum); } --- gcc/tree-chrec.h (/gcc-local/trunk) (revision 30596) +++ gcc/tree-chrec.h (/gcc-local/export-ddg) (revision 30596) @@ -82,7 +82,7 @@ extern bool tree_contains_chrecs (const_ extern bool evolution_function_is_affine_multivariate_p (const_tree, int); extern bool evolution_function_is_univariate_p (const_tree); extern unsigned nb_vars_in_chrec (tree); -extern bool evolution_function_is_invariant_p (tree, int); +extern bool evolution_function_is_invariant_p (const_tree, int); /* Determines whether CHREC is equal to zero. */ --- gcc/testsuite/gcc.dg/tree-ssa/structopt-2.c (/gcc-local/trunk) (revision 30596) +++ gcc/testsuite/gcc.dg/tree-ssa/structopt-2.c (/gcc-local/export-ddg) (revision 30596) @@ -39,8 +39,8 @@ int main(void) foo (x); } -/* { dg-final { scan-tree-dump-times "a.e" 0 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "a.f" 0 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "a.g" 0 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "b.e" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "a\\.e" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "a\\.f" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "a\\.g" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "b\\.e" 0 "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ --- gcc/tree-ssa-loop-ivopts.c (/gcc-local/trunk) (revision 30596) +++ gcc/tree-ssa-loop-ivopts.c (/gcc-local/export-ddg) (revision 30596) @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. #include "langhooks.h" #include "tree-affine.h" #include "target.h" +#include "tree-data-ref-export.h" /* The infinite cost. */ #define INFTY 10000000 @@ -1244,7 +1245,7 @@ find_interesting_uses_cond (struct ivopt i.e. if all its operands are defined outside of the LOOP. */ bool -expr_invariant_in_loop_p (struct loop *loop, tree expr) +expr_invariant_in_loop_p (struct loop *loop, const_tree expr) { basic_block def_bb; unsigned i, len; @@ -5069,6 +5070,11 @@ copy_ref_info (tree new_ref, tree old_re { TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref); TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref)); + /* As MEM_ORIG_EXPR binds MEM to TMR_ORIGINAL of TARGET_MEM_REF it was + produced from, we need to change all occurences of OLD_REF to + TMR_ORIGINAL (NEW_REF) in exported data. */ + if (flag_export_ddg) + replace_var_in_datarefs (old_ref, TMR_ORIGINAL (new_ref)); } } --- gcc/modulo-sched.c (/gcc-local/trunk) (revision 30596) +++ gcc/modulo-sched.c (/gcc-local/export-ddg) (revision 30596) @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. #include "ddg.h" #include "timevar.h" #include "tree-pass.h" +#include "tree-data-ref-export.h" #ifdef INSN_SCHEDULING @@ -652,10 +653,19 @@ generate_prolog_epilog (partial_schedule /* Generate the prolog, inserting its insns on the loop-entry edge. */ start_sequence (); - if (count_reg) /* Generate a subtract instruction at the beginning of the prolog to adjust the loop count by STAGE_COUNT. */ - emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage))); + if (count_reg) + { + rtx sub_reg = NULL_RTX; + + sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, + count_reg, GEN_INT (last_stage), + count_reg, 1, OPTAB_DIRECT); + gcc_assert (REG_P (sub_reg)); + if (REGNO (sub_reg) != REGNO (count_reg)) + emit_move_insn (count_reg, sub_reg); + } for (i = 0; i < last_stage; i++) duplicate_insns_of_cycles (ps, 0, i, 1); @@ -793,6 +803,10 @@ canon_loop (struct loop *loop) /* Used to calculate the upper bound of ii. */ #define MAXII_FACTOR 2 +/* Maximal ASAP saved for estimating maxII. */ +static int global_max_asap; + + /* Main entry point, perform SMS scheduling on the loops of the function that consist of single basic blocks. */ static void @@ -1019,9 +1033,10 @@ sms_schedule (void) node_order = XNEWVEC (int, g->num_nodes); mii = 1; /* Need to pass some estimate of mii. */ + global_max_asap = 0; rec_mii = sms_order_nodes (g, mii, node_order); mii = MAX (res_MII (g), rec_mii); - maxii = MAXII_FACTOR * mii; + maxii = MAX (global_max_asap, MAXII_FACTOR * mii); if (dump_file) fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", @@ -1425,32 +1440,39 @@ sms_schedule_by_order (ddg_ptr g, int mi "Trying to schedule node %d in (%d .. %d) step %d\n", u, start, end, step); - /* use must_follow & must_precede bitmaps to determine order - of nodes within the cycle. */ - sbitmap_zero (must_precede); - sbitmap_zero (must_follow); - /* TODO: We can add an insn to the must_precede or must_follow - bitmaps only if it has tight dependence to U and they - both scheduled in the same row. The current check is less - conservative and content with the fact that both U and the - insn are scheduled in the same row. */ - for (e = u_node->in; e != 0; e = e->next_in) - if (TEST_BIT (sched_nodes, e->src->cuid) - && (SMODULO (SCHED_TIME (e->src), ii) == SMODULO (start, ii))) - SET_BIT (must_precede, e->src->cuid); - - for (e = u_node->out; e != 0; e = e->next_out) - if (TEST_BIT (sched_nodes, e->dest->cuid) - && (SMODULO (SCHED_TIME (e->dest), ii) == - SMODULO ((end - step), ii))) - SET_BIT (must_follow, e->dest->cuid); - success = 0; if ((step > 0 && start < end) || (step < 0 && start > end)) for (c = start; c != end; c += step) { ps_insn_ptr psi; + /* use must_follow & must_precede bitmaps to determine order + of nodes within the cycle. */ + sbitmap_zero (must_precede); + sbitmap_zero (must_follow); + /* TODO: We can add an insn to the must_precede or must_follow + bitmaps only if it has tight dependence to U and they + both scheduled in the same row. The current check is less + conservative and content with the fact that both U and the + insn are scheduled in the same row. */ + for (e = u_node->in; e != 0; e = e->next_in) + if (TEST_BIT (sched_nodes, e->src->cuid)) + { + if (e->type == ANTI_DEP) + SET_BIT (must_precede, e->src->cuid); + else if (e->type == TRUE_DEP) + SET_BIT (must_follow, e->src->cuid); + } + + for (e = u_node->out; e != 0; e = e->next_out) + if (TEST_BIT (sched_nodes, e->dest->cuid)) + { + if (e->type == ANTI_DEP) + SET_BIT (must_follow, e->dest->cuid); + else if (e->type == TRUE_DEP) + SET_BIT (must_precede, e->dest->cuid); + } + psi = ps_add_node_check_conflicts (ps, u_node, c, must_precede, must_follow); @@ -1681,6 +1703,7 @@ calculate_order_params (ddg_ptr g, int m } } + global_max_asap = max_asap; return node_order_params_arr; } @@ -2331,6 +2354,7 @@ rest_of_handle_sms (void) basic_block bb; /* Collect loop information to be used in SMS. */ + ddg_export_disambiguate_only_intra_loop_deps (true); cfg_layout_initialize (0); sms_schedule (); @@ -2343,6 +2367,7 @@ rest_of_handle_sms (void) bb->aux = bb->next_bb; free_dominance_info (CDI_DOMINATORS); cfg_layout_finalize (); + ddg_export_disambiguate_only_intra_loop_deps (false); #endif /* INSN_SCHEDULING */ return 0; } --- gcc/function.c (/gcc-local/trunk) (revision 30596) +++ gcc/function.c (/gcc-local/export-ddg) (revision 30596) @@ -2967,7 +2967,10 @@ assign_parms_unsplit_complex (struct ass /* Set MEM_EXPR to the original decl, i.e. to PARM, instead of the copy of decl, i.e. FNARGS. */ if (DECL_INCOMING_RTL (parm) && MEM_P (DECL_INCOMING_RTL (parm))) - set_mem_expr (DECL_INCOMING_RTL (parm), parm); + { + set_mem_expr (DECL_INCOMING_RTL (parm), parm); + set_mem_orig_expr (DECL_INCOMING_RTL (parm), parm); + } } fnargs = TREE_CHAIN (fnargs); @@ -5497,6 +5500,7 @@ rest_of_handle_thread_prologue_and_epilo scheduling to operate in the epilogue. */ thread_prologue_and_epilogue_insns (); + delete_unreachable_blocks (); return 0; } --- gcc/function.h (/gcc-local/trunk) (revision 30596) +++ gcc/function.h (/gcc-local/export-ddg) (revision 30596) @@ -296,6 +296,14 @@ struct function GTY(()) during the nested function. */ struct var_refs_queue *fixup_var_refs_queue; + /* Data references and data dependence relations exported from Tree-SSA + level for use on RTL level. */ + struct ddg_info_def * GTY((skip)) x_ddg_info; + + /* This serves as GC root for trees for which data references + were exported. */ + VEC(tree, gc) *ddg_info_root_for_trees; + /* Current nesting level for temporaries. */ int x_temp_slot_level; --- gcc/alias.c (/gcc-local/trunk) (revision 30596) +++ gcc/alias.c (/gcc-local/export-ddg) (revision 30596) @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. #include "tree-pass.h" #include "ipa-type-escape.h" #include "df.h" +#include "tree-data-ref-export.h" /* The aliasing API provided here solves related but different problems: @@ -2127,6 +2128,8 @@ true_dependence (const_rtx mem, enum mac || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; + may_alias_mems_by_ddr (x, mem, 0); + if (DIFFERENT_ALIAS_SETS_P (x, mem)) return 0; @@ -2161,6 +2164,9 @@ true_dependence (const_rtx mem, enum mac SIZE_FOR_MODE (x), x_addr, 0)) return 0; + if (! may_alias_mems_by_ddr (x, mem, 1)) + return 0; + if (aliases_everything_p (x)) return 1; @@ -2203,6 +2209,8 @@ canon_true_dependence (const_rtx mem, en || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; + may_alias_mems_by_ddr (x, mem, 0); + if (DIFFERENT_ALIAS_SETS_P (x, mem)) return 0; @@ -2225,6 +2233,9 @@ canon_true_dependence (const_rtx mem, en SIZE_FOR_MODE (x), x_addr, 0)) return 0; + if (! may_alias_mems_by_ddr (x, mem, 1)) + return 0; + if (aliases_everything_p (x)) return 1; @@ -2265,6 +2276,8 @@ write_dependence_p (const_rtx mem, const || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; + may_alias_mems_by_ddr (x, mem, 0); + if (DIFFERENT_ALIAS_SETS_P (x, mem)) return 0; @@ -2298,6 +2311,9 @@ write_dependence_p (const_rtx mem, const SIZE_FOR_MODE (x), x_addr, 0)) return 0; + if (! may_alias_mems_by_ddr (x, mem, 1)) + return 0; + fixed_scalar = fixed_scalar_and_varying_struct_p (mem, x, mem_addr, x_addr, rtx_addr_varies_p); --- gcc/tree-data-ref.c (/gcc-local/trunk) (revision 30596) +++ gcc/tree-data-ref.c (/gcc-local/export-ddg) (revision 30596) @@ -784,7 +784,7 @@ dr_address_invariant_p (struct data_refe /* Frees data reference DR. */ -static void +void free_data_ref (data_reference_p dr) { BITMAP_FREE (DR_VOPS (dr)); @@ -1355,10 +1355,10 @@ non_affine_dependence_relation (struct d variables, i.e., if the ZIV (Zero Index Variable) test is true. */ static inline bool -ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) +ziv_subscript_p (const_tree chrec_a, const_tree chrec_b, int loopnum) { - return (evolution_function_is_constant_p (chrec_a) - && evolution_function_is_constant_p (chrec_b)); + return (evolution_function_is_invariant_p (chrec_a, loopnum) + && evolution_function_is_invariant_p (chrec_b, loopnum)); } /* Returns true iff CHREC_A and CHREC_B are dependent on an index @@ -1463,7 +1463,10 @@ analyze_ziv_subscript (tree chrec_a, if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "(analyze_ziv_subscript \n"); - + + if (operand_equal_p (chrec_a, chrec_b, 0)) + goto overlaps_each_iteration; + chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE); chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE); difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b); @@ -1475,6 +1478,7 @@ analyze_ziv_subscript (tree chrec_a, { /* The difference is equal to zero: the accessed index overlaps for each iteration in the loop. */ + overlaps_each_iteration: *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); *last_conflicts = chrec_dont_know; @@ -2588,13 +2592,22 @@ analyze_overlapping_iterations (tree chr /* If they are the same chrec, and are affine, they overlap on every iteration. */ - else if (eq_evolutions_p (chrec_a, chrec_b) - && evolution_function_is_affine_multivariate_p (chrec_a, lnn)) + else if (eq_evolutions_p (chrec_a, chrec_b)) { - dependence_stats.num_same_subscript_function++; - *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); - *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); - *last_conflicts = chrec_dont_know; + if (ziv_subscript_p (chrec_a, chrec_b, lnn)) + analyze_ziv_subscript (chrec_a, chrec_b, + overlap_iterations_a, overlap_iterations_b, + last_conflicts); + + else if (evolution_function_is_affine_multivariate_p (chrec_a, lnn)) + { + dependence_stats.num_same_subscript_function++; + *overlap_iterations_a = + conflict_fn (1, affine_fn_cst (integer_zero_node)); + *overlap_iterations_b = + conflict_fn (1, affine_fn_cst (integer_zero_node)); + *last_conflicts = chrec_dont_know; + } } /* If they aren't the same, and aren't affine, we can't do anything @@ -2609,7 +2622,7 @@ analyze_overlapping_iterations (tree chr *overlap_iterations_b = conflict_fn_not_known (); } - else if (ziv_subscript_p (chrec_a, chrec_b)) + else if (ziv_subscript_p (chrec_a, chrec_b, lnn)) analyze_ziv_subscript (chrec_a, chrec_b, overlap_iterations_a, overlap_iterations_b, last_conflicts); @@ -3404,12 +3417,14 @@ omega_extract_distance_vectors (omega_pb static bool omega_setup_subscript (tree access_fun_a, tree access_fun_b, struct data_dependence_relation *ddr, - omega_pb pb, bool *maybe_dependent) + omega_pb pb, bool *maybe_dependent, + struct loop *loop_nest) { int eq; tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE); tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE); tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b); + int lnn = loop_nest->num; /* When the fun_a - fun_b is not constant, the dependence is not captured by the classic distance vector representation. */ @@ -3417,7 +3432,7 @@ omega_setup_subscript (tree access_fun_a return false; /* ZIV test. */ - if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference)) + if (ziv_subscript_p (fun_a, fun_b, lnn) && !integer_zerop (difference)) { /* There is no dependence. */ *maybe_dependent = false; @@ -3455,7 +3470,8 @@ omega_setup_subscript (tree access_fun_a static bool init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb, struct data_dependence_relation *ddr, - omega_pb pb, bool *maybe_dependent) + omega_pb pb, bool *maybe_dependent, + struct loop *loop_nest) { unsigned i; int ineq; @@ -3466,7 +3482,7 @@ init_omega_for_ddr_1 (struct data_refere for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) { if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i), - ddr, pb, maybe_dependent)) + ddr, pb, maybe_dependent, loop_nest)) return false; else if (*maybe_dependent == false) { @@ -3606,7 +3622,7 @@ init_omega_for_ddr_1 (struct data_refere static bool init_omega_for_ddr (struct data_dependence_relation *ddr, - bool *maybe_dependent) + bool *maybe_dependent, struct loop *loop_nest) { omega_pb pb; bool res = false; @@ -3628,7 +3644,7 @@ init_omega_for_ddr (struct data_dependen /* Save the dependences carried by outer loops. */ pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, - maybe_dependent); + maybe_dependent, loop_nest); omega_free_problem (pb); return res; } @@ -3640,7 +3656,7 @@ init_omega_for_ddr (struct data_dependen number of variables for the constraint system is 2*NB_LOOPS. */ pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, - maybe_dependent); + maybe_dependent, loop_nest); omega_free_problem (pb); /* Stop computation if not decidable, or no dependence. */ @@ -3649,7 +3665,7 @@ init_omega_for_ddr (struct data_dependen pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb, - maybe_dependent); + maybe_dependent, loop_nest); omega_free_problem (pb); return res; @@ -3808,7 +3824,7 @@ compute_affine_dependence (struct data_d DDR_DIR_VECTS (ddr) = NULL; /* Compute the same information using Omega. */ - if (!init_omega_for_ddr (ddr, &maybe_dependent)) + if (!init_omega_for_ddr (ddr, &maybe_dependent, loop_nest)) goto csys_dont_know; if (dump_file && (dump_flags & TDF_DETAILS)) --- gcc/tree-data-ref.h (/gcc-local/trunk) (revision 30596) +++ gcc/tree-data-ref.h (/gcc-local/export-ddg) (revision 30596) @@ -323,6 +323,7 @@ extern void dump_data_dependence_directi enum data_dependence_direction); extern void free_dependence_relation (struct data_dependence_relation *); extern void free_dependence_relations (VEC (ddr_p, heap) *); +extern void free_data_ref (struct data_reference *); extern void free_data_refs (VEC (data_reference_p, heap) *); struct data_reference *create_data_ref (struct loop *, tree, tree, bool); bool find_loop_nest (struct loop *, VEC (loop_p, heap) **); --- gcc/emit-rtl.c (/gcc-local/trunk) (revision 30596) +++ gcc/emit-rtl.c (/gcc-local/export-ddg) (revision 30596) @@ -181,8 +181,8 @@ static int const_double_htab_eq (const v static rtx lookup_const_double (rtx); static hashval_t mem_attrs_htab_hash (const void *); static int mem_attrs_htab_eq (const void *, const void *); -static mem_attrs *get_mem_attrs (alias_set_type, tree, rtx, rtx, unsigned int, - enum machine_mode); +static mem_attrs *get_mem_attrs (alias_set_type, tree, tree, rtx, rtx, + unsigned int, enum machine_mode); static hashval_t reg_attrs_htab_hash (const void *); static int reg_attrs_htab_eq (const void *, const void *); static reg_attrs *get_reg_attrs (tree, int); @@ -257,7 +257,8 @@ mem_attrs_htab_hash (const void *x) return (p->alias ^ (p->align * 1000) ^ ((p->offset ? INTVAL (p->offset) : 0) * 50000) ^ ((p->size ? INTVAL (p->size) : 0) * 2500000) - ^ (size_t) iterative_hash_expr (p->expr, 0)); + ^ (size_t) iterative_hash_expr (p->expr, + iterative_hash_expr (p->orig_expr, 0))); } /* Returns nonzero if the value represented by X (which is really a @@ -274,7 +275,12 @@ mem_attrs_htab_eq (const void *x, const && p->size == q->size && p->align == q->align && (p->expr == q->expr || (p->expr != NULL_TREE && q->expr != NULL_TREE - && operand_equal_p (p->expr, q->expr, 0)))); + && operand_equal_p (p->expr, q->expr, 0))) + /* We do not use operand_equal_p for ORIG_EXPRs because we need to + distinguish memory references at different points of the loop (which + would have different indices in SSA form, like a[i_1] and a[i_2], but + were later rewritten to same a[i]). */ + && (p->orig_expr == q->orig_expr)); } /* Allocate a new mem_attrs structure and insert it into the hash table if @@ -282,8 +288,8 @@ mem_attrs_htab_eq (const void *x, const MEM of mode MODE. */ static mem_attrs * -get_mem_attrs (alias_set_type alias, tree expr, rtx offset, rtx size, - unsigned int align, enum machine_mode mode) +get_mem_attrs (alias_set_type alias, tree expr, tree orig_expr, rtx offset, + rtx size, unsigned int align, enum machine_mode mode) { mem_attrs attrs; void **slot; @@ -291,7 +297,7 @@ get_mem_attrs (alias_set_type alias, tre /* If everything is the default, we can just return zero. This must match what the corresponding MEM_* macros return when the field is not present. */ - if (alias == 0 && expr == 0 && offset == 0 + if (alias == 0 && expr == 0 && orig_expr == 0 && offset == 0 && (size == 0 || (mode != BLKmode && GET_MODE_SIZE (mode) == INTVAL (size))) && (STRICT_ALIGNMENT && mode != BLKmode @@ -300,6 +306,7 @@ get_mem_attrs (alias_set_type alias, tre attrs.alias = alias; attrs.expr = expr; + attrs.orig_expr = orig_expr; attrs.offset = offset; attrs.size = size; attrs.align = align; @@ -1413,7 +1420,7 @@ component_ref_for_mem_expr (tree ref) || TREE_CODE (inner) == NON_LVALUE_EXPR || TREE_CODE (inner) == VIEW_CONVERT_EXPR || TREE_CODE (inner) == SAVE_EXPR) - inner = TREE_OPERAND (inner, 0); + inner = TREE_OPERAND (inner, 0); if (! DECL_P (inner)) inner = NULL_TREE; @@ -1471,6 +1478,7 @@ set_mem_attributes_minus_bitpos (rtx ref { alias_set_type alias = MEM_ALIAS_SET (ref); tree expr = MEM_EXPR (ref); + tree orig_expr = NULL; rtx offset = MEM_OFFSET (ref); rtx size = MEM_SIZE (ref); unsigned int align = MEM_ALIGN (ref); @@ -1535,6 +1543,8 @@ set_mem_attributes_minus_bitpos (rtx ref { tree base; + orig_expr = t; + if (TREE_THIS_VOLATILE (t)) MEM_VOLATILE_P (ref) = 1; @@ -1710,11 +1720,13 @@ set_mem_attributes_minus_bitpos (rtx ref we're overlapping. */ offset = NULL; expr = NULL; + orig_expr = NULL; } /* Now set the attributes we computed above. */ MEM_ATTRS (ref) - = get_mem_attrs (alias, expr, offset, size, align, GET_MODE (ref)); + = get_mem_attrs (alias, expr, orig_expr, offset, size, align, + GET_MODE (ref)); /* If this is already known to be a scalar or aggregate, we are done. */ if (MEM_IN_STRUCT_P (ref) || MEM_SCALAR_P (ref)) @@ -1740,7 +1752,7 @@ void set_mem_attrs_from_reg (rtx mem, rtx reg) { MEM_ATTRS (mem) - = get_mem_attrs (MEM_ALIAS_SET (mem), REG_EXPR (reg), + = get_mem_attrs (MEM_ALIAS_SET (mem), REG_EXPR (reg), NULL_TREE, GEN_INT (REG_OFFSET (reg)), MEM_SIZE (mem), MEM_ALIGN (mem), GET_MODE (mem)); } @@ -1755,9 +1767,9 @@ set_mem_alias_set (rtx mem, alias_set_ty gcc_assert (alias_sets_conflict_p (set, MEM_ALIAS_SET (mem))); #endif - MEM_ATTRS (mem) = get_mem_attrs (set, MEM_EXPR (mem), MEM_OFFSET (mem), - MEM_SIZE (mem), MEM_ALIGN (mem), - GET_MODE (mem)); + MEM_ATTRS (mem) = get_mem_attrs (set, MEM_EXPR (mem), MEM_ORIG_EXPR (mem), + MEM_OFFSET (mem), MEM_SIZE (mem), + MEM_ALIGN (mem), GET_MODE (mem)); } /* Set the alignment of MEM to ALIGN bits. */ @@ -1766,8 +1778,8 @@ void set_mem_align (rtx mem, unsigned int align) { MEM_ATTRS (mem) = get_mem_attrs (MEM_ALIAS_SET (mem), MEM_EXPR (mem), - MEM_OFFSET (mem), MEM_SIZE (mem), align, - GET_MODE (mem)); + MEM_ORIG_EXPR (mem), MEM_OFFSET (mem), + MEM_SIZE (mem), align, GET_MODE (mem)); } /* Set the expr for MEM to EXPR. */ @@ -1775,9 +1787,29 @@ set_mem_align (rtx mem, unsigned int ali void set_mem_expr (rtx mem, tree expr) { + tree orig_expr = MEM_ORIG_EXPR (mem); + + /* If MEM_EXPR changes, clear MEM_ORIG_EXPR. If we still can preserve it, + we insert set_mem_orig_expr call right after this function call. */ + if (!expr || !mem_expr_equal_p (MEM_EXPR (mem), expr)) + orig_expr = NULL_TREE; + MEM_ATTRS (mem) - = get_mem_attrs (MEM_ALIAS_SET (mem), expr, MEM_OFFSET (mem), - MEM_SIZE (mem), MEM_ALIGN (mem), GET_MODE (mem)); + = get_mem_attrs (MEM_ALIAS_SET (mem), expr, orig_expr, + MEM_OFFSET (mem), MEM_SIZE (mem), MEM_ALIGN (mem), + GET_MODE (mem)); +} + + +/* Set the original expr for MEM to ORIG_EXPR. */ + +void +set_mem_orig_expr (rtx mem, tree orig_expr) +{ + MEM_ATTRS (mem) + = get_mem_attrs (MEM_ALIAS_SET (mem), MEM_EXPR (mem), orig_expr, + MEM_OFFSET (mem), MEM_SIZE (mem), MEM_ALIGN (mem), + GET_MODE (mem)); } /* Set the offset of MEM to OFFSET. */ @@ -1785,9 +1817,16 @@ set_mem_expr (rtx mem, tree expr) void set_mem_offset (rtx mem, rtx offset) { + tree orig_expr = MEM_ORIG_EXPR (mem); + + /* If MEM_EXPR changes, clear MEM_ORIG_EXPR. If we still can preserve it, + we insert set_mem_orig_expr call right after this function call. */ + if (offset != MEM_OFFSET (mem)) + orig_expr = NULL_TREE; + MEM_ATTRS (mem) = get_mem_attrs (MEM_ALIAS_SET (mem), MEM_EXPR (mem), - offset, MEM_SIZE (mem), MEM_ALIGN (mem), - GET_MODE (mem)); + orig_expr, offset, MEM_SIZE (mem), + MEM_ALIGN (mem), GET_MODE (mem)); } /* Set the size of MEM to SIZE. */ @@ -1796,8 +1835,8 @@ void set_mem_size (rtx mem, rtx size) { MEM_ATTRS (mem) = get_mem_attrs (MEM_ALIAS_SET (mem), MEM_EXPR (mem), - MEM_OFFSET (mem), size, MEM_ALIGN (mem), - GET_MODE (mem)); + MEM_ORIG_EXPR (mem), MEM_OFFSET (mem), + size, MEM_ALIGN (mem), GET_MODE (mem)); } /* Return a memory reference like MEMREF, but with its mode changed to MODE @@ -1864,7 +1903,8 @@ change_address (rtx memref, enum machine } MEM_ATTRS (new) - = get_mem_attrs (MEM_ALIAS_SET (memref), 0, 0, size, align, mmode); + = get_mem_attrs (MEM_ALIAS_SET (memref), NULL_TREE, NULL_TREE, 0, + size, align, mmode); return new; } @@ -1931,7 +1971,8 @@ adjust_address_1 (rtx memref, enum machi size = plus_constant (MEM_SIZE (memref), -offset); MEM_ATTRS (new) = get_mem_attrs (MEM_ALIAS_SET (memref), MEM_EXPR (memref), - memoffset, size, memalign, GET_MODE (new)); + MEM_ORIG_EXPR (memref), memoffset, size, + memalign, GET_MODE (new)); /* At some point, we should validate that this offset is within the object, if all the appropriate values are known. */ @@ -1987,7 +2028,8 @@ offset_address (rtx memref, rtx offset, /* Update the alignment to reflect the offset. Reset the offset, which we don't know. */ MEM_ATTRS (new) - = get_mem_attrs (MEM_ALIAS_SET (memref), MEM_EXPR (memref), 0, 0, + = get_mem_attrs (MEM_ALIAS_SET (memref), MEM_EXPR (memref), + MEM_ORIG_EXPR (memref), 0, 0, MIN (MEM_ALIGN (memref), pow2 * BITS_PER_UNIT), GET_MODE (new)); return new; @@ -2027,6 +2069,7 @@ widen_memory_access (rtx memref, enum ma tree expr = MEM_EXPR (new); rtx memoffset = MEM_OFFSET (new); unsigned int size = GET_MODE_SIZE (mode); + tree orig_expr = MEM_ORIG_EXPR (new); /* If there are no changes, just return the original memory reference. */ if (new == memref) @@ -2092,8 +2135,11 @@ widen_memory_access (rtx memref, enum ma /* The widened memory may alias other stuff, so zap the alias set. */ /* ??? Maybe use get_alias_set on any remaining expression. */ - MEM_ATTRS (new) = get_mem_attrs (0, expr, memoffset, GEN_INT (size), - MEM_ALIGN (new), mode); + if (expr != MEM_EXPR (new)) + orig_expr = NULL_TREE; + + MEM_ATTRS (new) = get_mem_attrs (0, expr, orig_expr, memoffset, + GEN_INT (size), MEM_ALIGN (new), mode); return new; } --- gcc/tree-data-ref-export.c (/gcc-local/trunk) (revision 30596) +++ gcc/tree-data-ref-export.c (/gcc-local/export-ddg) (revision 30596) @@ -0,0 +1,711 @@ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" + +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "langhooks.h" +#include "hashtab.h" + +#include "tree-data-ref-export.h" + +/* Holds exported data references and relations. */ +struct ddg_info_def +{ + htab_t tree_to_dataref; + + htab_t datarefs_pair_to_ddr; + + /* Used by the verifier. */ + VEC (data_reference_p, heap) *verifier_seen_datarefs; + + int ddrs_known, ddrs_no, ddrs_unknown, ddrs_not_found; + + /* Number of memory references without/with relevant exported info. */ + int refs_bad, refs_ok; + + /* Statistics on DDG info usage in RTL disambiguation. */ + int alias_fail_no_tree, alias_fail_no_drf, alias_fail_no_ddr, + alias_fail_useless_ddr, alias_fail_graceful, alias_success_useless, + alias_success_new, alias_success_no_dep, alias_success_nonzero_dist; + + /* Whether we should skip verification of exported data. Enabled as late as + possible in the RTL pipeline by a separate pass. */ + bool skip_verification; + + /* TRUE for passes that perform code motion across loop branches, like SMS. + For other passes we assume it is safe to disambiguate references that are + dependent and distance vectors are known and non-zero. */ + bool disambiguate_only_intra_loop_deps; +}; + +#define ddg_info (cfun->x_ddg_info) + +#define print(...) \ +do \ + if (dump_file) \ + fprintf (dump_file, __VA_ARGS__); \ +while (0) + +static void +print_generic_expr_1 (FILE *dump_file, const_tree t, int flags) +{ + if (dump_file) + { + print_generic_expr (dump_file, (tree) t, flags); + dump_addr (dump_file, " ", t); + } +} + +static void +print_inline_rtx_1 (FILE *dump_file, rtx x, int ind) +{ + if (dump_file) + { + print_inline_rtx (dump_file, x, ind); + dump_addr (dump_file, " ", x); + } +} + +typedef struct { + const_tree ref; + data_reference_p drf; +} tree_dataref; + +static hashval_t +htab_hash_tree (const tree_dataref *t) +{ + return htab_hash_pointer (t->ref); +} + +static int +htab_eq_tree (const tree_dataref *t1, const tree_dataref *t2) +{ + return t1->ref == t2->ref; +} + +static void +htab_del_tree_dataref (tree_dataref *t) +{ + if (t->drf) + free_data_ref (t->drf); +} + +typedef struct { + data_reference_p a; + data_reference_p b; + ddr_p ddr; +} datarefs_pair_ddr; + +static hashval_t +htab_hash_datarefs_pair (const datarefs_pair_ddr *dp) +{ + return iterative_hash (&dp->a, sizeof (data_reference_p), + htab_hash_pointer (dp->b)); +} + +static int +htab_eq_datarefs_pair (const datarefs_pair_ddr *dp1, + const datarefs_pair_ddr *dp2) +{ + return dp1->a == dp2->a && dp1->b == dp2->b; +} + +static void +htab_del_datarefs_pair (datarefs_pair_ddr *dp) +{ + free_dependence_relation (dp->ddr); +} + +/* For each loop in function, save datarefs and ddrs obtained via + compute_dependencies_for_loop into ddg_info. */ +static unsigned int +main_export_ddg (void) +{ + bool inside_tree_loop_opt_p = !!current_loops; + bool dom_info_was_avail_p = dom_info_available_p (CDI_DOMINATORS); + unsigned int n_loops; + struct loop *loop; + loop_iterator li; + + gcc_assert (!ddg_info); + ddg_info = XCNEW (struct ddg_info_def); + ddg_info->tree_to_dataref + = htab_create (1, (htab_hash) htab_hash_tree, (htab_eq) htab_eq_tree, + (htab_del) htab_del_tree_dataref); + ddg_info->datarefs_pair_to_ddr + = htab_create (1, (htab_hash) htab_hash_datarefs_pair, + (htab_eq) htab_eq_datarefs_pair, + (htab_del) htab_del_datarefs_pair); + + cfun->ddg_info_root_for_trees = NULL; + + if(!inside_tree_loop_opt_p) + loop_optimizer_init(AVOID_CFG_MODIFICATIONS); + + /* This can be more than actual number of loops, because number_of_loops () + includes deleted loops. */ + n_loops = number_of_loops () - 1; + + if (n_loops > 0) + { + int cur_loop = 0; + VEC (data_reference_p, heap) *datarefs = NULL; + VEC (ddr_p, heap) *ddrs = NULL; + + if (!inside_tree_loop_opt_p) + scev_initialize (); + + FOR_EACH_LOOP (li, loop, 0) + { + unsigned int i; + data_reference_p drf; + ddr_p ddr; + + compute_data_dependences_for_loop (loop, false, &datarefs, &ddrs); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_data_references (dump_file, datarefs); + dump_ddrs (dump_file, ddrs); + } + + for (i = 0; VEC_iterate (data_reference_p, datarefs, i, drf); i++) + { + void **slot; + tree_dataref *td; + + /* We want to save only those data references that correspond to + iteration of innermost loop containing the reference. */ + if (!drf->ref || loop != loop_containing_stmt (drf->stmt)) + continue; + + td = XNEW (tree_dataref); + td->ref = drf->ref; + td->drf = drf; + slot = htab_find_slot (ddg_info->tree_to_dataref, td, INSERT); + + gcc_assert (!*slot); + + *slot = td; + VEC_safe_push (tree, gc, cfun->ddg_info_root_for_trees, + (tree) drf->ref); + } + + for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++) + { + void **slot; + datarefs_pair_ddr *dp; + + /* As above, we want to save only those DDRs that describe + relation of references for innermost loop containing them. */ + if (!(ddr->a && ddr->b) + || loop != loop_containing_stmt (ddr->a->stmt)) + { + continue; + } + + dp = XNEW (datarefs_pair_ddr); + + dp->a = ddr->a; + dp->b = ddr->b; + dp->ddr = ddr; + slot = htab_find_slot (ddg_info->datarefs_pair_to_ddr, dp, + INSERT); + + gcc_assert (!*slot); + *slot = dp; + } + + cur_loop++; + VEC_free (data_reference_p, heap, datarefs); + VEC_free (ddr_p, heap, ddrs); + } + + if (!inside_tree_loop_opt_p) + scev_finalize (); + } + + if (!inside_tree_loop_opt_p) + loop_optimizer_finalize (); + + if (!dom_info_was_avail_p) + free_dominance_info (CDI_DOMINATORS); + + return 0; +} + +static bool +gate_export_ddg(void) +{ + return flag_export_ddg != 0; +} + +struct tree_opt_pass pass_gather_ddg_info = +{ + "export-ddg", /* name */ + gate_export_ddg, /* gate */ + main_export_ddg, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + +static unsigned int +free_ddg_info (void) +{ + if (!ddg_info) + return 0; + + /* TODO: DDR_LOOP_NESTs are not free'd. */ + htab_delete (ddg_info->datarefs_pair_to_ddr); + htab_delete (ddg_info->tree_to_dataref); + + free (ddg_info); + ddg_info = NULL; + VEC_free (tree, gc, cfun->ddg_info_root_for_trees); + return 0; +} + +struct tree_opt_pass pass_free_ddg_info = +{ + NULL, /* name */ + NULL, /* gate */ + free_ddg_info, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + 0 /* letter */ +}; + +/* Replace all occurences of FROM tree to TO in ddg_info_root_for_trees. */ +static void +replace_tree_in_ggc_root (const_tree from, const_tree to) +{ + int i; + tree t; + bool found = false; + + for (i = 0; VEC_iterate (tree, cfun->ddg_info_root_for_trees, i, t); i++) + if (t == from) + { + found = true; + VEC_replace (tree, cfun->ddg_info_root_for_trees, i, (tree) to); + } + + gcc_assert (found); +} + +/* Replace FROM tree to TO in ref fields of saved datarefs. */ +void +replace_var_in_datarefs (const_tree from, const_tree to) +{ + void **slot; + tree_dataref *td = XNEW (tree_dataref); + td->ref = from; + + slot = htab_find_slot (ddg_info->tree_to_dataref, td, NO_INSERT); + + /* IVOPTS might want to change a memory reference for which no dataref was + produced. However, it would be nice to enable this assert and see in + what cases it happens. */ + /* gcc_assert (slot); */ + + if (!slot) + { + free (td); + return; + } + + td->ref = to; + td->drf = ((tree_dataref *) (*slot))->drf; + ((tree_dataref *) (*slot))->drf = NULL; + htab_clear_slot (ddg_info->tree_to_dataref, slot); + + slot = htab_find_slot (ddg_info->tree_to_dataref, td, INSERT); + gcc_assert (!*slot); + *slot = td; + + replace_tree_in_ggc_root (from, to); +} + +/* Search for the dataref for T and return it if found, otherwise return + NULL. */ + +static data_reference_p +find_dataref (const_tree t) +{ + tree_dataref td, *td_p; + td.ref = t; + td_p = htab_find (ddg_info->tree_to_dataref, &td); + return td_p ? td_p->drf : NULL; +} + +/* Search for data dependence relation for DR1 and DR2, return it if found; + otherwise return NULL. */ +static ddr_p +find_ddr (data_reference_p dr1, data_reference_p dr2) +{ + datarefs_pair_ddr dp, *dp_p; + dp.a = dr1; + dp.b = dr2; + dp_p = htab_find (ddg_info->datarefs_pair_to_ddr, &dp); + if (dp_p) + return dp_p->ddr; + dp.a = dr2; + dp.b = dr1; + dp_p = htab_find (ddg_info->datarefs_pair_to_ddr, &dp); + return dp_p ? dp_p->ddr : NULL; +} + +/* For each data reference seen in the loop currently being verified, try to + find data dependence relation for it and DATAREF, record statistics, add + DATAREF to array of references in current loop. */ +static void +verify_ddrs (data_reference_p dataref) +{ + unsigned int i; + data_reference_p dr; + ddr_p ddr; + + for (i = 0; + VEC_iterate (data_reference_p, ddg_info->verifier_seen_datarefs, i, dr); + i++) + if ((ddr = find_ddr (dataref, dr))) + { + if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) + ddg_info->ddrs_known++; + else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + ddg_info->ddrs_no++; + else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + ddg_info->ddrs_unknown++; + } + else + ddg_info->ddrs_not_found++; + + VEC_safe_push (data_reference_p, heap, ddg_info->verifier_seen_datarefs, + dataref); +} + +/* Print trees for which we expect to find saved dataref. */ +static tree +verify_trees_gimple (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + data_reference_p dataref; + const_tree t = *tp; + + if (TREE_CODE (t) == TARGET_MEM_REF) + t = TREE_OPERAND (t, 5); + if (REFERENCE_CLASS_P (t)) + { + *walk_subtrees = 0; + if ((dataref = find_dataref (t))) + { + if (dump_flags & TDF_DETAILS) + { + print (" DATAREF: "); + print_generic_expr_1 (dump_file, t, TDF_VOPS|TDF_MEMSYMS); + print ("\n"); + } + ddg_info->refs_ok++; + verify_ddrs (dataref); + } + else + { + if (dump_flags & TDF_DETAILS) + { + print ("!DATAREF: "); + print_generic_expr_1 (dump_file, t, TDF_VOPS|TDF_MEMSYMS); + print ("\n"); + } + ddg_info->refs_bad++; + } + } + return NULL_TREE; +} + + +/* Print MEMs for which we expect to have MEM_ORIG_EXPR. */ +static int +verify_trees_rtl (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + tree t; + int dummy; + + if (x == NULL_RTX || GET_CODE (x) != MEM) + return 0; + + t = MEM_ORIG_EXPR (x); + + if (!t) + { + if (dump_flags & TDF_DETAILS) + { + print ("!MEM_ORIG_EXPR: "); + print_inline_rtx_1 (dump_file, x, 0); + print ("\n"); + } + ddg_info->refs_bad++; + } + else + { + if (dump_flags & TDF_DETAILS) + { + print (" MEM_ORIG_EXPR: "); + print_inline_rtx_1 (dump_file, x, 0); + print (" -> "); + print_generic_expr_1 (dump_file, t, TDF_VOPS|TDF_MEMSYMS); + print ("\n"); + } + verify_trees_gimple (&t, &dummy, NULL); + } + + return 0; +} + +/* Depending on current IR, either check that we have saved datarefs for all + memory references, or we have MEM_ORIG_EXPRs for MEMs. */ +void +verify_trees (void) +{ + bool inside_tree_loop_opt_p = !!current_loops; + bool dom_info_was_avail_p; + loop_iterator li; + struct loop *loop; + unsigned cur_loop = 0; + + if (!ddg_info/* || !dump_file*/) + return; + + if (dump_flags & TDF_STATS) + print ("DDG info usage in RTL aliasing: %d no tree, %d no drf, %d no ddr, " + "%d useless ddr, %d graceful fails, %d useless successes, " + "%d new successes, %d no dep, %d nonzero dist\n", + ddg_info->alias_fail_no_tree, ddg_info->alias_fail_no_drf, + ddg_info->alias_fail_no_ddr, ddg_info->alias_fail_useless_ddr, + ddg_info->alias_fail_graceful, ddg_info->alias_success_useless, + ddg_info->alias_success_new, ddg_info->alias_success_no_dep, + ddg_info->alias_success_nonzero_dist); + + ddg_info->alias_fail_no_tree = 0; + ddg_info->alias_fail_no_drf = 0; + ddg_info->alias_fail_no_ddr = 0; + ddg_info->alias_fail_useless_ddr = 0; + ddg_info->alias_fail_graceful = 0; + ddg_info->alias_success_useless = 0; + ddg_info->alias_success_new = 0; + ddg_info->alias_success_no_dep = 0; + ddg_info->alias_success_nonzero_dist = 0; + + if (ddg_info->skip_verification) + return; + + dom_info_was_avail_p = dom_info_available_p (CDI_DOMINATORS); + + if (!inside_tree_loop_opt_p) + loop_optimizer_init(AVOID_CFG_MODIFICATIONS); + + + FOR_EACH_LOOP (li, loop, 0) + { + unsigned int i; + basic_block *bbs = get_loop_body (loop); + + ddg_info->refs_bad = ddg_info->refs_ok = 0; + ddg_info->ddrs_known = ddg_info->ddrs_no = ddg_info->ddrs_unknown = 0; + ddg_info->ddrs_not_found = 0; + + for (i = 0; i < loop->num_nodes; i++) + if (current_ir_type () == IR_GIMPLE) + { + block_stmt_iterator bsi; + for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) + walk_tree_without_duplicates (bsi_stmt_ptr (bsi), + verify_trees_gimple, NULL); + } + else + { + rtx insn; + + FOR_BB_INSNS (bbs[i], insn) + if (INSN_P (insn) && !CALL_P (insn)) + for_each_rtx (&PATTERN (insn), verify_trees_rtl, NULL); + } + + VEC_free (data_reference_p, heap, ddg_info->verifier_seen_datarefs); + if (dump_flags & TDF_STATS) + print (" loop %d: %d refs, %d ok, %d bad; %d not found ddrs, " + "%d known deps, %d no deps, %d unknown deps\n", + cur_loop, ddg_info->refs_bad+ddg_info->refs_ok, + ddg_info->refs_ok, ddg_info->refs_bad, + ddg_info->ddrs_not_found, ddg_info->ddrs_known, + ddg_info->ddrs_no, ddg_info->ddrs_unknown); + cur_loop++; + } + + if (!inside_tree_loop_opt_p) + loop_optimizer_finalize (); + + if (!dom_info_was_avail_p) + free_dominance_info (CDI_DOMINATORS); +} + +static unsigned int +disable_ddg_verification (void) +{ + if (ddg_info) + ddg_info->skip_verification = true; + + return 0; +} + +struct tree_opt_pass pass_disable_ddg_verification = +{ + NULL, /* name */ + NULL, /* gate */ + disable_ddg_verification, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + 0 /* letter */ +}; + +void +ddg_export_disambiguate_only_intra_loop_deps (bool b) +{ + if (ddg_info) + ddg_info->disambiguate_only_intra_loop_deps = b; +} + +/* Return TRUE if any of DIST_VECTS is non-zero. */ +static bool +nonzero_dist_vects (VEC (lambda_vector, heap) *dist_vects, int loops_count) +{ + lambda_vector dist_v; + int i, j; + + for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, dist_v); i++) + for (j = 0; j < loops_count; j++) + if (dist_v[j]) + return true; + + return false; +} + +/* Return TRUE if we cannot prove from exported DDG info that MEM1 and MEM2 + are independent memory references. CALL is used to differentiate callers: + CALL=0 for early calls from RTL alias analysis, CALL=1 for late calls from + RTL alias analysis, CALL=2 for calls from modulo-scheduling DDG + construction. */ +bool +may_alias_mems_by_ddr (const_rtx mem1, const_rtx mem2, int call) +{ + const_tree t1 = MEM_ORIG_EXPR (mem1), t2 = MEM_ORIG_EXPR (mem2); + data_reference_p drf1, drf2; + ddr_p ddr; + + if (!ddg_info) + return true; + + if (!t1 || !t2) + { + if (call == 1) + ddg_info->alias_fail_graceful++; + else + ddg_info->alias_fail_no_tree++; + return true; + } + + drf1 = find_dataref (t1); + drf2 = find_dataref (t2); + + if (!drf1 || !drf2) + { + if (call == 1) + ddg_info->alias_fail_graceful++; + else + ddg_info->alias_fail_no_drf++; + return true; + } + + ddr = find_ddr (drf1, drf2); + + if (!ddr) + { + if (call == 1) + ddg_info->alias_fail_graceful++; + else + ddg_info->alias_fail_no_ddr++; + return true; + } + + if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + { + if (call != 1) + ddg_info->alias_success_no_dep++; + +account_new: + + if (call == 0) + ddg_info->alias_success_useless++; + else if (call == 1) + { + ddg_info->alias_success_useless--; + ddg_info->alias_success_new++; + } + else + { + gcc_assert (call == 2); + ddg_info->alias_success_new++; + } + return false; + } + + if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_NUM_DIST_VECTS (ddr) > 0 + && !ddg_info->disambiguate_only_intra_loop_deps + && nonzero_dist_vects (DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr))) + { + if (call != 1) + ddg_info->alias_success_nonzero_dist++; + + goto account_new; + } + + if (call == 1) + ddg_info->alias_fail_graceful++; + else + ddg_info->alias_fail_useless_ddr++; + return true; +} + --- gcc/cfgexpand.c (/gcc-local/trunk) (revision 30596) +++ gcc/cfgexpand.c (/gcc-local/export-ddg) (revision 30596) @@ -2003,7 +2004,7 @@ struct tree_opt_pass pass_expand = PROP_gimple_leh | PROP_cfg, /* properties_required */ PROP_rtl, /* properties_provided */ PROP_trees, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_dump_func, /* todo_flags_finish */ + TODO_no_verify_trees, /* todo_flags_start */ + TODO_dump_func|TODO_no_verify_trees, /* todo_flags_finish */ 'r' /* letter */ }; --- gcc/emit-rtl.h (/gcc-local/trunk) (revision 30596) +++ gcc/emit-rtl.h (/gcc-local/export-ddg) (revision 30596) @@ -29,6 +29,9 @@ extern void set_mem_align (rtx, unsigned /* Set the expr for MEM to EXPR. */ extern void set_mem_expr (rtx, tree); +/* Set the original expr for MEM to ORIG_EXPR. */ +extern void set_mem_orig_expr (rtx, tree); + /* Set the offset for MEM to OFFSET. */ extern void set_mem_offset (rtx, rtx); --- gcc/tree-data-ref-export.h (/gcc-local/trunk) (revision 30596) +++ gcc/tree-data-ref-export.h (/gcc-local/export-ddg) (revision 30596) @@ -0,0 +1,11 @@ +#ifndef TREE_DATA_REF_EXPORT_H +#define TREE_DATA_REF_EXPORT_H + +extern void verify_trees (void); +extern void replace_var_in_datarefs (const_tree, const_tree); + +extern void ddg_export_disambiguate_only_intra_loop_deps (bool); +extern bool may_alias_mems_by_ddr (const_rtx, const_rtx, int); + +#endif /* TREE_DATA_REF_EXPORT_H */ + --- gcc/common.opt (/gcc-local/trunk) (revision 30596) +++ gcc/common.opt (/gcc-local/export-ddg) (revision 30596) @@ -461,6 +461,10 @@ fexpensive-optimizations Common Report Var(flag_expensive_optimizations) Optimization Perform a number of minor, expensive optimizations +fexport-ddg +Common Report Var(flag_export_ddg) Init(1) +Gather and export data relation information + ffast-math Common --- gcc/rtl.h (/gcc-local/trunk) (revision 30596) +++ gcc/rtl.h (/gcc-local/export-ddg) (revision 30596) @@ -145,6 +145,7 @@ typedef struct mem_attrs GTY(()) rtx offset; /* Offset from start of DECL, as CONST_INT. */ rtx size; /* Size in bytes, as a CONST_INT. */ unsigned int align; /* Alignment of MEM in bits. */ + tree orig_expr; /* Explicit original tree expression. */ } mem_attrs; /* Structure used to describe the attributes of a REG in similar way as @@ -1187,6 +1188,11 @@ do { \ : (STRICT_ALIGNMENT && GET_MODE (RTX) != BLKmode \ ? GET_MODE_ALIGNMENT (GET_MODE (RTX)) : BITS_PER_UNIT)) +/* For a MEM rtx, the decl it is known to refer to, if it is known to + refer to part of a DECL. It may also be a COMPONENT_REF. */ +#define MEM_ORIG_EXPR(RTX) \ +(MEM_ATTRS (RTX) == 0 ? 0 : MEM_ATTRS (RTX)->orig_expr) + /* For a REG rtx, the decl it is known to refer to, if it is known to refer to part of a DECL. */ #define REG_EXPR(RTX) (REG_ATTRS (RTX) == 0 ? 0 : REG_ATTRS (RTX)->decl) --- gcc/tree-flow.h (/gcc-local/trunk) (revision 30596) +++ gcc/tree-flow.h (/gcc-local/export-ddg) (revision 30596) @@ -1113,7 +1113,7 @@ extern void linear_transform_loops (void extern void tree_check_data_deps (void); /* In tree-ssa-loop-ivopts.c */ -bool expr_invariant_in_loop_p (struct loop *, tree); +bool expr_invariant_in_loop_p (struct loop *, const_tree); bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode); unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode); --- gcc/Makefile.in (/gcc-local/trunk) (revision 30596) +++ gcc/Makefile.in (/gcc-local/export-ddg) (revision 30596) @@ -812,6 +812,7 @@ C_PRETTY_PRINT_H = c-pretty-print.h $(PR SCEV_H = tree-scalar-evolution.h $(GGC_H) tree-chrec.h $(PARAMS_H) LAMBDA_H = lambda.h $(TREE_H) vec.h $(GGC_H) TREE_DATA_REF_H = tree-data-ref.h $(LAMBDA_H) omega.h graphds.h +TREE_DATA_REF_EXPORT_H = tree-data-ref-export.h $(TREE_DATA_REF_H) VARRAY_H = varray.h $(MACHMODE_H) $(SYSTEM_H) coretypes.h $(TM_H) TREE_INLINE_H = tree-inline.h $(VARRAY_H) pointer-set.h REAL_H = real.h $(MACHMODE_H) @@ -1120,6 +1121,7 @@ OBJS-common = \ tree-chrec.o \ tree-complex.o \ tree-data-ref.o \ + tree-data-ref-export.o \ tree-dfa.o \ tree-dump.o \ tree-eh.o \ @@ -2222,6 +2224,10 @@ tree-data-ref.o: tree-data-ref.c $(CONFI $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(TREE_DATA_REF_H) $(SCEV_H) tree-pass.h tree-chrec.h langhooks.h +tree-data-ref-export.o: tree-data-ref-export.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \ + $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ + $(TREE_DATA_REF_EXPORT_H) $(SCEV_H) tree-pass.h tree-chrec.h langhooks.h tree-vect-analyze.o: tree-vect-analyze.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) \ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ --- gcc/passes.c (/gcc-local/trunk) (revision 30596) +++ gcc/passes.c (/gcc-local/export-ddg) (revision 30596) @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. #include "tree-dump.h" #include "df.h" #include "predict.h" +#include "tree-data-ref-export.h" #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO) #include "dwarf2out.h" @@ -642,6 +643,7 @@ init_optimization_passes (void) pass_may_alias. */ NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_loop_prefetch); + NEXT_PASS (pass_gather_ddg_info); NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_tree_loop_done); } @@ -774,9 +776,11 @@ init_optimization_passes (void) } NEXT_PASS (pass_compute_alignments); NEXT_PASS (pass_duplicate_computed_gotos); + NEXT_PASS (pass_disable_ddg_verification); NEXT_PASS (pass_variable_tracking); NEXT_PASS (pass_free_cfg); NEXT_PASS (pass_machine_reorg); + NEXT_PASS (pass_free_ddg_info); NEXT_PASS (pass_cleanup_barriers); NEXT_PASS (pass_delay_slots); NEXT_PASS (pass_df_finish); @@ -970,6 +974,8 @@ execute_function_todo (void *data) verify_stmts (); if (flags & TODO_verify_loops) verify_loop_closed_ssa (); + if (flag_export_ddg && !(flags & TODO_no_verify_trees)) + verify_trees (); #endif cfun->last_verified = flags & TODO_verify_all; --- gcc/config/ia64/itanium2.md (/gcc-local/trunk) (revision 30596) +++ gcc/config/ia64/itanium2.md (/gcc-local/export-ddg) (revision 30596) @@ -1072,7 +1072,7 @@ (define_bypass 3 "2_ialu" "2_mmalua,2_mmmul,2_mmshf") (define_bypass 3 "2_mmalua,2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld,2_ldc") (define_bypass 6 "2_tofr" "2_frfr,2_stf") -(define_bypass 7 "2_fmac" "2_frfr,2_stf") +;; (define_bypass 7 "2_fmac" "2_frfr,2_stf") ;; We don't use here fcmp because scall may be predicated. (define_bypass 0 "2_fcvtfx,2_fld,2_flda,2_fldc,2_fmac,2_fmisc,2_frar_i,2_frar_m,\