This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][6/n] tree LIM TLC
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 12 Mar 2013 16:25:12 +0100 (CET)
- Subject: [PATCH][6/n] tree LIM TLC
(Un-?)surprisingly the most effective compile-time reduction for
the testcase in PR39326 is to employ ao_ref caching for
alias oracle queries and caching of expanded affine-combinations
for affine disambiguations.
This reduces compile-time to a manageable amount in the first place
for me (so I'm sending it "late" in the series).
Bootstrap and regtest scheduled on x86_64-unknown-linux-gnu, queued
for 4.9.
Richard.
2013-03-12 Richard Biener <rguenther@suse.de>
PR tree-optimization/39326
* tree-ssa-loop-im.c (struct mem_ref): Replace mem member
with an ao_ref typed one. Add affine-combination cache members.
(MEM_ANALYZABLE): Adjust.
(memref_eq): Likewise.
(mem_ref_alloc): Likewise.
(gather_mem_refs_stmt): Likewise.
(execute_sm_if_changed_flag_set): Likewise.
(execute_sm): Likewise.
(ref_always_accessed_p): Likewise.
(refs_independent_p): Likewise.
(can_sm_ref_p): Likewise.
(mem_refs_may_alias_p): Use ao_ref members to query the oracle.
Cache expanded affine combinations.
Index: trunk/gcc/tree-ssa-loop-im.c
===================================================================
*** trunk.orig/gcc/tree-ssa-loop-im.c 2013-03-12 15:11:12.000000000 +0100
--- trunk/gcc/tree-ssa-loop-im.c 2013-03-12 16:20:49.115169595 +0100
*************** typedef struct mem_ref_locs
*** 117,126 ****
typedef struct mem_ref
{
- tree mem; /* The memory itself. */
unsigned id; /* ID assigned to the memory reference
(its index in memory_accesses.refs_list) */
hashval_t hash; /* Its hash value. */
bitmap stored; /* The set of loops in that this memory location
is stored to. */
vec<mem_ref_locs_p> accesses_in_loop;
--- 117,130 ----
typedef struct mem_ref
{
unsigned id; /* ID assigned to the memory reference
(its index in memory_accesses.refs_list) */
hashval_t hash; /* Its hash value. */
+
+ /* The memory access itself and associated caching of alias-oracle
+ query meta-data. */
+ ao_ref mem; /* The ao_ref of this memory access. */
+
bitmap stored; /* The set of loops in that this memory location
is stored to. */
vec<mem_ref_locs_p> accesses_in_loop;
*************** typedef struct mem_ref
*** 142,147 ****
--- 146,155 ----
bitmap indep_ref; /* The set of memory references on that
this reference is independent. */
bitmap dep_ref; /* The complement of INDEP_REF. */
+
+ /* The expanded affine combination of this memory access. */
+ aff_tree aff_off;
+ double_int aff_size;
} *mem_ref_p;
*************** static bool ref_indep_loop_p (struct loo
*** 186,192 ****
#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
/* Whether the reference was analyzable. */
! #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)
static struct lim_aux_data *
init_lim_data (gimple stmt)
--- 194,200 ----
#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
/* Whether the reference was analyzable. */
! #define MEM_ANALYZABLE(REF) ((REF)->mem.ref != error_mark_node)
static struct lim_aux_data *
init_lim_data (gimple stmt)
*************** memref_eq (const void *obj1, const void
*** 1435,1441 ****
{
const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
! return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
}
/* Releases list of memory reference locations ACCS. */
--- 1443,1449 ----
{
const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
! return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
}
/* Releases list of memory reference locations ACCS. */
*************** static mem_ref_p
*** 1477,1483 ****
mem_ref_alloc (tree mem, unsigned hash, unsigned id)
{
mem_ref_p ref = XNEW (struct mem_ref);
! ref->mem = mem;
ref->id = id;
ref->hash = hash;
ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
--- 1485,1491 ----
mem_ref_alloc (tree mem, unsigned hash, unsigned id)
{
mem_ref_p ref = XNEW (struct mem_ref);
! ao_ref_init (&ref->mem, mem);
ref->id = id;
ref->hash = hash;
ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
*************** mem_ref_alloc (tree mem, unsigned hash,
*** 1487,1492 ****
--- 1495,1502 ----
ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
ref->accesses_in_loop.create (0);
+ ref->aff_off.type = NULL_TREE;
+
return ref;
}
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1586,1592 ****
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Memory reference %u: ", id);
! print_generic_expr (dump_file, ref->mem, TDF_SLIM);
fprintf (dump_file, "\n");
}
}
--- 1596,1602 ----
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Memory reference %u: ", id);
! print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
fprintf (dump_file, "\n");
}
}
*************** analyze_memory_references (struct loop *
*** 1638,1653 ****
tree_to_aff_combination_expand. */
static bool
! mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
{
/* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
object and their offset differ in such a way that the locations cannot
overlap, then they cannot alias. */
- double_int size1, size2;
- aff_tree off1, off2;
/* Perform basic offset and type-based disambiguation. */
! if (!refs_may_alias_p (mem1, mem2))
return false;
/* The expansion of addresses may be a bit expensive, thus we only do
--- 1648,1661 ----
tree_to_aff_combination_expand. */
static bool
! mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2)
{
/* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
object and their offset differ in such a way that the locations cannot
overlap, then they cannot alias. */
/* Perform basic offset and type-based disambiguation. */
! if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
return false;
/* The expansion of addresses may be a bit expensive, thus we only do
*************** mem_refs_may_alias_p (tree mem1, tree me
*** 1655,1668 ****
if (optimize < 2)
return true;
! get_inner_reference_aff (mem1, &off1, &size1);
! get_inner_reference_aff (mem2, &off2, &size2);
! aff_combination_expand (&off1, ttae_cache);
! aff_combination_expand (&off2, ttae_cache);
! aff_combination_scale (&off1, double_int_minus_one);
! aff_combination_add (&off2, &off1);
! if (aff_comb_cannot_overlap_p (&off2, size1, size2))
return false;
return true;
--- 1663,1686 ----
if (optimize < 2)
return true;
! if (mem1->aff_off.type == NULL_TREE)
! {
! get_inner_reference_aff (mem1->mem.ref, &mem1->aff_off, &mem1->aff_size);
! aff_combination_expand (&mem1->aff_off, &memory_accesses.ttae_cache);
! gcc_assert (mem1->aff_off.type != NULL_TREE);
! }
! if (mem2->aff_off.type == NULL_TREE)
! {
! get_inner_reference_aff (mem2->mem.ref, &mem2->aff_off, &mem2->aff_size);
! aff_combination_expand (&mem2->aff_off, &memory_accesses.ttae_cache);
! gcc_assert (mem2->aff_off.type != NULL_TREE);
! }
!
! aff_tree tem = mem1->aff_off;
! aff_combination_scale (&tem, double_int_minus_one);
! aff_combination_add (&tem, &mem2->aff_off);
! if (aff_comb_cannot_overlap_p (&tem, mem1->aff_size, mem2->aff_size))
return false;
return true;
*************** execute_sm_if_changed_flag_set (struct l
*** 1987,1993 ****
mem_ref_loc_p loc;
tree flag;
vec<mem_ref_loc_p> locs = vNULL;
! char *str = get_lsm_tmp_name (ref->mem, ~0);
lsm_tmp_name_add ("_flag");
flag = create_tmp_reg (boolean_type_node, str);
--- 2005,2011 ----
mem_ref_loc_p loc;
tree flag;
vec<mem_ref_loc_p> locs = vNULL;
! char *str = get_lsm_tmp_name (ref->mem.ref, ~0);
lsm_tmp_name_add ("_flag");
flag = create_tmp_reg (boolean_type_node, str);
*************** execute_sm (struct loop *loop, vec<edge>
*** 2029,2044 ****
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Executing store motion of ");
! print_generic_expr (dump_file, ref->mem, 0);
fprintf (dump_file, " from loop %d\n", loop->num);
}
! tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
! get_lsm_tmp_name (ref->mem, ~0));
fmt_data.loop = loop;
fmt_data.orig_loop = loop;
! for_each_index (&ref->mem, force_move_till, &fmt_data);
if (block_in_transaction (loop_preheader_edge (loop)->src)
|| !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
--- 2047,2062 ----
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Executing store motion of ");
! print_generic_expr (dump_file, ref->mem.ref, 0);
fprintf (dump_file, " from loop %d\n", loop->num);
}
! tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
! get_lsm_tmp_name (ref->mem.ref, ~0));
fmt_data.loop = loop;
fmt_data.orig_loop = loop;
! for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
if (block_in_transaction (loop_preheader_edge (loop)->src)
|| !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
*************** execute_sm (struct loop *loop, vec<edge>
*** 2056,2062 ****
/* FIXME/TODO: For the multi-threaded variant, we could avoid this
load altogether, since the store is predicated by a flag. We
could, do the load only if it was originally in the loop. */
! load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
lim_data = init_lim_data (load);
lim_data->max_loop = loop;
lim_data->tgt_loop = loop;
--- 2074,2080 ----
/* FIXME/TODO: For the multi-threaded variant, we could avoid this
load altogether, since the store is predicated by a flag. We
could, do the load only if it was originally in the loop. */
! load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
lim_data = init_lim_data (load);
lim_data->max_loop = loop;
lim_data->tgt_loop = loop;
*************** execute_sm (struct loop *loop, vec<edge>
*** 2076,2086 ****
if (!multi_threaded_model_p)
{
gimple store;
! store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
gsi_insert_on_edge (ex, store);
}
else
! execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
}
/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
--- 2094,2104 ----
if (!multi_threaded_model_p)
{
gimple store;
! store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
gsi_insert_on_edge (ex, store);
}
else
! execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
}
/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
*************** ref_always_accessed_p (struct loop *loop
*** 2114,2120 ****
struct loop *must_exec;
tree base;
! base = get_base_address (ref->mem);
if (INDIRECT_REF_P (base)
|| TREE_CODE (base) == MEM_REF)
base = TREE_OPERAND (base, 0);
--- 2132,2138 ----
struct loop *must_exec;
tree base;
! base = ao_ref_base (&ref->mem);
if (INDIRECT_REF_P (base)
|| TREE_CODE (base) == MEM_REF)
base = TREE_OPERAND (base, 0);
*************** refs_independent_p (mem_ref_p ref1, mem_
*** 2187,2194 ****
fprintf (dump_file, "Querying dependency of refs %u and %u: ",
ref1->id, ref2->id);
! if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
! &memory_accesses.ttae_cache))
{
bitmap_set_bit (ref1->dep_ref, ref2->id);
if (dump_file && (dump_flags & TDF_DETAILS))
--- 2205,2211 ----
fprintf (dump_file, "Querying dependency of refs %u and %u: ",
ref1->id, ref2->id);
! if (mem_refs_may_alias_p (ref1, ref2))
{
bitmap_set_bit (ref1->dep_ref, ref2->id);
if (dump_file && (dump_flags & TDF_DETAILS))
*************** can_sm_ref_p (struct loop *loop, mem_ref
*** 2284,2304 ****
return false;
/* It should be movable. */
! if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
! || TREE_THIS_VOLATILE (ref->mem)
! || !for_each_index (&ref->mem, may_move_till, loop))
return false;
/* If it can throw fail, we do not properly update EH info. */
! if (tree_could_throw_p (ref->mem))
return false;
/* If it can trap, it must be always executed in LOOP.
Readonly memory locations may trap when storing to them, but
tree_could_trap_p is a predicate for rvalues, so check that
explicitly. */
! base = get_base_address (ref->mem);
! if ((tree_could_trap_p (ref->mem)
|| (DECL_P (base) && TREE_READONLY (base)))
&& !ref_always_accessed_p (loop, ref, true))
return false;
--- 2301,2321 ----
return false;
/* It should be movable. */
! if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
! || TREE_THIS_VOLATILE (ref->mem.ref)
! || !for_each_index (&ref->mem.ref, may_move_till, loop))
return false;
/* If it can throw fail, we do not properly update EH info. */
! if (tree_could_throw_p (ref->mem.ref))
return false;
/* If it can trap, it must be always executed in LOOP.
Readonly memory locations may trap when storing to them, but
tree_could_trap_p is a predicate for rvalues, so check that
explicitly. */
! base = ao_ref_base (&ref->mem);
! if ((tree_could_trap_p (ref->mem.ref)
|| (DECL_P (base) && TREE_READONLY (base)))
&& !ref_always_accessed_p (loop, ref, true))
return false;