[PATCH] Find constant definition for by-ref argument using dominance information (PR ipa/90401)
Richard Biener
richard.guenther@gmail.com
Wed Jun 5 10:15:00 GMT 2019
On Wed, Jun 5, 2019 at 10:59 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> IPA-CP can not identify a constant by-ref argument to a function, if definition
> of the argument is not in same basic block where the callsite lies in. This is
> because IPA-CP only does local search in the callsite basic block.So this patch
> implemented an enhanced algorithm, which uses dominance information to guide
> traverse in virtual SSA web, to find out constant definitions in dominating basic
> block.
>
> Feng
>
> ---
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 526ed45be89..cc076c337af 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2019-06-04 Feng Xue <fxue@os.amperecomputing.com>
> +
> + PR ipa/90401
> + * ipa-prop.c (add_to_agg_contents_list): New function.
> + (clobber_by_agg_contents_list_p): Likewise.
> + (extract_mem_content): Likewise.
> + (strictly_dominated_by_ssa_p): Likewise.
> + (get_place_in_agg_contents_list): Delete.
> + (determine_known_aggregate_parts): Renamed from
> + determine_locally_known_aggregate_parts.
> +
> 2019-06-04 Segher Boessenkool <segher@kernel.crashing.org>
>
> * config/rs6000/constraints.md (define_register_constraint "wp"):
> diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
> index d86c2f3db55..405cdf7730b 100644
> --- a/gcc/ipa-prop.c
> +++ b/gcc/ipa-prop.c
> @@ -1458,7 +1458,7 @@ get_ssa_def_if_simple_copy (tree rhs)
> return rhs;
> }
>
> -/* Simple linked list, describing known contents of an aggregate beforere
> +/* Simple linked list, describing known contents of an aggregate before
> call. */
>
> struct ipa_known_agg_contents_list
> @@ -1471,41 +1471,64 @@ struct ipa_known_agg_contents_list
> struct ipa_known_agg_contents_list *next;
> };
>
> -/* Find the proper place in linked list of ipa_known_agg_contents_list
> - structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
> - unless there is a partial overlap, in which case return NULL, or such
> - element is already there, in which case set *ALREADY_THERE to true. */
> +/* Add a known content item into a linked list of ipa_known_agg_contents_list,
> + in which all elements are sorted ascendingly by offset. When ALLOW_DUP is
> + false, insert the item only if there is no duplicate one (with same offset
> + and size) in the list. And if the item is added, return true. */
>
> -static struct ipa_known_agg_contents_list **
> -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
> - HOST_WIDE_INT lhs_offset,
> - HOST_WIDE_INT lhs_size,
> - bool *already_there)
> +static inline bool
> +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist,
> + struct ipa_known_agg_contents_list *item,
> + bool allow_dup = true)
> {
> - struct ipa_known_agg_contents_list **p = list;
> - while (*p && (*p)->offset < lhs_offset)
> + struct ipa_known_agg_contents_list *list = *plist;
> +
> + for (; list; list = list->next)
> {
> - if ((*p)->offset + (*p)->size > lhs_offset)
> - return NULL;
> - p = &(*p)->next;
> + if (list->offset > item->offset)
> + break;
> +
> + if (list->offset == item->offset && list->size == item->size
> + && !allow_dup)
> + return false;
> +
> + plist = &list->next;
> }
>
> - if (*p && (*p)->offset < lhs_offset + lhs_size)
> + item->next = list;
> + *plist = item;
> + return true;
> +}
> +
> +/* Check whether a given known content is clobbered by certain element in
> + a linked list of ipa_known_agg_contents_list. A special case is that
> + we can ignore those constant items completely same as the given item,
> + that is they have same offset/size/value. */
> +
> +static inline bool
> +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list,
> + struct ipa_known_agg_contents_list *item)
> +{
> + for (; list; list = list->next)
> {
> - if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
> - /* We already know this value is subsequently overwritten with
> - something else. */
> - *already_there = true;
> - else
> - /* Otherwise this is a partial overlap which we cannot
> - represent. */
> - return NULL;
> + if (list->offset > item->offset)
> + return list->offset < item->offset + item->size;
> +
> + /* For the constant item, we can skip comparison with identical items in
> + the list, because its content remains unchanged after clobbering. */
> + if (list->offset == item->offset && list->size == item->size
> + && list->constant && item->constant
> + && operand_equal_p (list->constant, item->constant, 0))
> + continue;
> +
> + if (list->offset + list->size > item->offset)
> + return true;
> }
> - return p;
> + return false;
> }
>
> /* Build aggregate jump function from LIST, assuming there are exactly
> - CONST_COUNT constant entries there and that th offset of the passed argument
> + CONST_COUNT constant entries there and that offset of the passed argument
> is ARG_OFFSET and store it into JFUNC. */
>
> static void
> @@ -1528,6 +1551,66 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
> }
> }
>
> +/* If STMT is a memory store to the object whose address is BASE, extract
> + information (offset, size, and value) into CONTENT, and return true,
> + otherwise we conservatively assume the whole object is modified with
> + unknown content, and return false. CHECK_REF means that access to object
> + is expected to be in form of MEM_REF expression. */
> +
> +static bool
> +extract_mem_content (gimple *stmt, tree base, bool check_ref,
> + struct ipa_known_agg_contents_list *content)
> +{
> + HOST_WIDE_INT lhs_offset, lhs_size;
> + tree lhs, rhs, lhs_base;
> + bool reverse;
> +
> + if (!gimple_assign_single_p (stmt))
> + return false;
> +
> + lhs = gimple_assign_lhs (stmt);
> + rhs = gimple_assign_rhs1 (stmt);
> +
> + if (!is_gimple_reg_type (TREE_TYPE (rhs))
> + || TREE_CODE (lhs) == BIT_FIELD_REF
> + || contains_bitfld_component_ref_p (lhs))
> + return false;
> +
> + lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> + &lhs_size, &reverse);
> + if (!lhs_base)
> + return false;
> +
> + if (check_ref)
> + {
> + if (TREE_CODE (lhs_base) != MEM_REF
> + || TREE_OPERAND (lhs_base, 0) != base
> + || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> + return false;
> + }
> + else if (lhs_base != base)
> + return false;
> +
> + rhs = get_ssa_def_if_simple_copy (rhs);
> +
> + content->size = lhs_size;
> + content->offset = lhs_offset;
> + content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE;
> + content->next = NULL;
> +
> + return true;
> +}
> +
> +/* Return true if defintion of SSA name strictly dominates basic block BB. */
> +
> +static inline bool
> +strictly_dominated_by_ssa_p (basic_block bb, tree ssa)
> +{
> + basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa));
> +
> + return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb);
> +}
> +
> /* Traverse statements from CALL backwards, scanning whether an aggregate given
> in ARG is filled in with constant values. ARG can either be an aggregate
> expression or a pointer to an aggregate. ARG_TYPE is the type of the
> @@ -1535,19 +1618,22 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
> subsequently stored. */
>
> static void
> -determine_locally_known_aggregate_parts (gcall *call, tree arg,
> - tree arg_type,
> - struct ipa_jump_func *jfunc)
> +determine_known_aggregate_parts (gcall *call, tree arg,
> + tree arg_type,
> + struct ipa_jump_func *jfunc)
> {
> - struct ipa_known_agg_contents_list *list = NULL;
> + struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL;
> + basic_block call_bb = gimple_bb (call);
> + hash_set <tree> visited;
> + auto_vec <tree> worklist;
> int item_count = 0, const_count = 0;
> + int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS);
> HOST_WIDE_INT arg_offset, arg_size;
> - gimple_stmt_iterator gsi;
> tree arg_base;
> bool check_ref, by_ref;
> ao_ref r;
>
> - if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
> + if (ipa_max_agg_items == 0)
> return;
>
> /* The function operates in three stages. First, we prepare check_ref, r,
> @@ -1606,82 +1692,139 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
> ao_ref_init (&r, arg);
> }
>
> - /* Second stage walks back the BB, looks at individual statements and as long
> - as it is confident of how the statements affect contents of the
> - aggregates, it builds a sorted linked list of ipa_agg_jf_list structures
> - describing it. */
> - gsi = gsi_for_stmt (call);
> - gsi_prev (&gsi);
> - for (; !gsi_end_p (gsi); gsi_prev (&gsi))
> - {
> - struct ipa_known_agg_contents_list *n, **p;
> - gimple *stmt = gsi_stmt (gsi);
> - HOST_WIDE_INT lhs_offset, lhs_size;
> - tree lhs, rhs, lhs_base;
> - bool reverse;
> -
> - if (!stmt_may_clobber_ref_p_1 (stmt, &r))
> - continue;
> - if (!gimple_assign_single_p (stmt))
> - break;
> -
> - lhs = gimple_assign_lhs (stmt);
> - rhs = gimple_assign_rhs1 (stmt);
> - if (!is_gimple_reg_type (TREE_TYPE (rhs))
> - || TREE_CODE (lhs) == BIT_FIELD_REF
> - || contains_bitfld_component_ref_p (lhs))
> - break;
> -
> - lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> - &lhs_size, &reverse);
> - if (!lhs_base)
> - break;
> -
> - if (check_ref)
> - {
> - if (TREE_CODE (lhs_base) != MEM_REF
> - || TREE_OPERAND (lhs_base, 0) != arg_base
> - || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> - break;
> - }
> - else if (lhs_base != arg_base)
> - {
> - if (DECL_P (lhs_base))
> - continue;
> - else
> - break;
> - }
> -
> - bool already_there = false;
> - p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
> - &already_there);
> - if (!p)
> - break;
> - if (already_there)
> - continue;
> -
> - rhs = get_ssa_def_if_simple_copy (rhs);
> - n = XALLOCA (struct ipa_known_agg_contents_list);
> - n->size = lhs_size;
> - n->offset = lhs_offset;
> - if (is_gimple_ip_invariant (rhs))
> - {
> - n->constant = rhs;
> - const_count++;
> - }
> - else
> - n->constant = NULL_TREE;
> - n->next = *p;
> - *p = n;
> -
> - item_count++;
> - if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)
> - || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
> - break;
> - }
> + /* Second stage walks back from the call statement, looks at individual
> + virtual operand and as long as it is confident of how definition
> + statement of the virtual operand affects content of the aggregate, it
> + builds a sorted linked list of ipa_agg_jf_list structures describing it.
> +
> + Actually, the stage involves kind of global reachability analysis on
> + in-memory variables definitions. A complete resolution requires iterative
> + dataflow equation computation, which is somewhat complex and seems to be
> + overkill to this collecting-constant task.
> +
> + So here we use a simpler algorithm that we believe can archive nearly
> + same result in most situations. In the algorithm, we traverse virtual SSA
> + web backwards starting from the call, by stepping from one dominating
> + virtual operand (its definition dominates the call) to another dominating
> + one. Before a dom virtual operand is processed, we will ensure all those
> + non-dom virtual operands, which are backwards reachable from previous dom
> + virtual operand, have been processed. But only dom virtual operand is
> + regarded as possible constant value provider to the call argument. And
> + non-dom virtual operand is just checked to see whether it kills
> + definitions of some dom virtual operands. */
> +
> + for (tree dom_vuse = gimple_vuse (call); dom_vuse; )
> + {
> + gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse);
> + bool exist = visited.add (dom_vuse);
> +
> + gcc_assert (!exist);
> +
> + if (gimple_code (stmt) != GIMPLE_PHI)
> + {
> + if (stmt_may_clobber_ref_p_1 (stmt, &r))
> + {
> + struct ipa_known_agg_contents_list *content =
> + XALLOCA (struct ipa_known_agg_contents_list);
> +
> + if (!extract_mem_content (stmt, arg_base, check_ref, content))
> + break;
> +
> + /* Now we get a dom virtual operand, and need to check whether
> + its value can finally reach the call. And put it into the
> + content list only if it is unique. */
> + if (!clobber_by_agg_contents_list_p (all_list, content)
> + && add_to_agg_contents_list (&list, content, false))
> + {
> + struct ipa_known_agg_contents_list *copy =
> + XALLOCA (struct ipa_known_agg_contents_list);
> +
> + *copy = *content;
> + content = copy;
> +
> + item_count +=1;
> + const_count += !!(content->constant);
> +
> + if (item_count == 2 * ipa_max_agg_items
> + || const_count == ipa_max_agg_items)
> + break;
> + }
> + add_to_agg_contents_list (&all_list, content);
> + }
> + dom_vuse = gimple_vuse (stmt);
> + continue;
> + }
> +
> + /* We get to a merge point in virtual SSA web, then go through all
> + non-dom virtual operands before handling next dom virtual operand. */
> + worklist.safe_push (dom_vuse);
> + dom_vuse = NULL_TREE;
> +
> + do
> + {
> + tree vuse = worklist.pop ();
> + gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> + auto_vec<tree, 6> append;
> +
> + /* Non-dom virtual operand should not be the DEFAULT definition. */
> + gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse));
> +
> + if (gimple_code (stmt) != GIMPLE_PHI)
> + {
> + if (stmt_may_clobber_ref_p_1 (stmt, &r))
> + {
> + struct ipa_known_agg_contents_list *content =
> + XALLOCA (struct ipa_known_agg_contents_list);
> +
> + if (!extract_mem_content (stmt, arg_base, check_ref, content))
> + goto finish;
> +
> + add_to_agg_contents_list (&all_list, content);
I don't think your PHI handling works correct. If you have
agg.part1 = 0;
if ()
agg.part2 = 1;
call (agg);
then you seem to end up above for agg.part2 because you push that onto the
worklist below.
> + }
> + append.safe_push (gimple_vuse (stmt));
> + }
> + else
> + {
> + for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
> + {
> + tree phi_arg = gimple_phi_arg_def (stmt, i);
> +
> + if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
> + goto finish;
> +
> + append.safe_push (phi_arg);
Not sure why you first push to append and then move parts to the
worklist below - it seems to only complicate code.
What you want to do is (probably) use get_continuation_for_phi
which for a virtual PHI node and a reference computes a
virtual operand you can continue your processing with. Thus sth
like
bitmap vistied = NULL;
dom_vuse = get_continuation_for_phi (stmt, &ref, counter /* limit walking */,
&visited, false, NULL, NULL);
if (visited)
BITMAP_FREE (visited);
and have ref be
ao_ref ref = ao_ref_init (arg);
where you then can remove the worklist alltogether.
You need to limit work you want to do, otherwise it becomes quite
expensive - assign an overall limit of alias queries, get_continuation_for_phi
will decrement counter for each query it does and return NULL if
counter reaches zero. Each extract_mem_content call should be
accounted as well.
Richard.
> + }
> + }
> +
> + for (unsigned i = 0; i < append.length (); ++i)
> + {
> + if (visited.add (vuse = append[i]))
> + continue;
> +
> + if (SSA_NAME_IS_DEFAULT_DEF (vuse)
> + || strictly_dominated_by_ssa_p (call_bb, vuse))
> + {
> + /* Found a new dom virtual operand, stop going further until
> + all pending non-dom virtual operands are processed. */
> + gcc_assert (!dom_vuse);
> + dom_vuse = vuse;
> + }
> + else
> + worklist.safe_push (vuse);
> + }
> + } while (!worklist.is_empty ());
> +
> + gcc_assert (dom_vuse);
> +
> + /* It is asserted that unprocessed dom virtual operand should be in
> + non-visited state. */
> + visited.remove (dom_vuse);
> + }
> +
> +finish:
>
> /* Third stage just goes over the list and creates an appropriate vector of
> - ipa_agg_jf_item structures out of it, of sourse only if there are
> + ipa_agg_jf_item structures out of it, of course only if there are
> any known constants to begin with. */
>
> if (const_count)
> @@ -1691,6 +1834,7 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
> }
> }
>
> +
> /* Return the Ith param type of callee associated with call graph
> edge E. */
>
> @@ -1797,7 +1941,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type,
> jf->m_vr = ipa_get_value_range (type, min, max);
> }
>
> -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a
> +/* Assign to JF a pointer to a value_range just like TMP but either fetch a
> copy from ipa_vr_hash_table or allocate a new on in GC memory. */
>
> static void
> @@ -1978,7 +2122,7 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi,
> || !ipa_get_jf_ancestor_agg_preserved (jfunc))
> && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
> || POINTER_TYPE_P (param_type)))
> - determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
> + determine_known_aggregate_parts (call, arg, param_type, jfunc);
> }
> if (!useful_context)
> vec_free (args->polymorphic_call_contexts);
> diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> new file mode 100644
> index 00000000000..131c4e265bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */
> +
> +int data1;
> +
> +int callee1(int *v)
> +{
> + if (*v < 2)
> + return 0;
> + else
> + {
> + int t = data1;
> +
> + data1 = *v;
> + *v = t;
> +
> + return 1;
> + }
> +}
> +
> +int __attribute((pure)) callee2(int *v)
> +{
> + if (*v < 2)
> + return 0;
> + else
> + {
> + data1 = v[0] + v[2];
> +
> + return 1;
> + }
> +}
> +
> +int caller1(int c, int *r)
> +{
> + int a = 1;
> +
> + if (c)
> + return callee1(&a);
> + else
> + {
> + *r = 2;
> + return callee1(r);
> + }
> +}
> +
> +int data2[200];
> +int data3;
> +
> +int __attribute((const)) gen_cond(int);
> +
> +int caller2(void)
> +{
> + int i, j;
> + int sum = 0;
> + int a[8];
> +
> + a[0] = 3;
> + for (i = 0; i < 100; i++)
> + {
> + if (gen_cond (i))
> + continue;
> +
> + a[2] = 4;
> + for (j = 0; j < 100; j++)
> + {
> + data2[i + j] = (i ^ j) + data3;
> +
> + sum += callee2(a);
> + }
> + }
> +
> + return sum;
> +}
> +
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */
>
More information about the Gcc-patches
mailing list