Patch ping (was Re: [WIP PATCH] Improve tail call analysis and inliner EH clobber through variable life analysis (PR tree-optimization/89060))
Jakub Jelinek
jakub@redhat.com
Fri May 3 07:36:00 GMT 2019
Hi!
I'd like to ping this patch for stage1.
Bootstrapped/regtested it again last night successfully.
On Fri, Feb 08, 2019 at 08:36:33AM +0100, Jakub Jelinek wrote:
> The following patch uses a simple data flow to find live (addressable)
> variables at certain spots (for tail call discovery at the point of the
> potential tail call, so that we don't reject tail calls because of variables
> that can be known out of scope already so that people don't have to find
> ugly workarounds if they really need tail calls, and for the recently
> added inline EH pad clobber addition so that we don't add too many
> variables). Bootstrapped/regtested on x86_64-linux and i686-linux.
>
> Haven't gathered statistics on how often does it trigger yet.
> Shall I use double queue (pending/working sbitmaps) to speed it up (guess I
> could gather statistics if that helps reasonably)? But if so, perhaps
> add_scope_conflicts should change too. I haven't shared code with
> what add_scope_conflicts does because it isn't really that big chunk of code
> and would need many modifications to make it generic enough to handle
> add_scope_conflicts needs, plus as I wanted to make it a helper for other
> optimizations, I didn't want to use bb->aux etc. For tail call, I see
> another option to compute it unconditionally once at the start of the pass
> for all may_be_aliased auto_var_in_fn_p vars and then just walk a single
> bb with each (potential) tail call, instead of computing it for every
> potential tail call again (on the other side with perhaps smaller set of
> variables). In that case e.g. vars == NULL argument could imply the
> VAR_P && may_be_aliased && auto_var_in_fn_p test instead of bitmap_bit_p
> test, we could drop the stop_after argument and instead export the _1
> function renamed to something to walk a single bb (or wrapper to it that
> would set up the structure) until stop_after.
>
> Thoughts on this?
>
> 2019-02-08 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/89060
> * tree-ssa-live.h (compute_live_vars, destroy_live_vars): Declare.
> * tree-ssa-live.c: Include gimple-walk.h and cfganal.h.
> (struct compute_live_vars_data): New type.
> (compute_live_vars_visit, compute_live_vars_1, compute_live_vars,
> destroy_live_vars): New functions.
> * tree-tailcall.c (find_tail_calls): Perform variable life analysis
> before punting.
> * tree-inline.h (struct copy_body_data): Add eh_landing_pad_dest
> member.
> * tree-inline.c (add_clobbers_to_eh_landing_pad): Remove BB argument.
> Perform variable life analysis to select variables that really need
> clobbers added.
> (copy_edges_for_bb): Don't call add_clobbers_to_eh_landing_pad here,
> instead set id->eh_landing_pad_dest and assert it is the same.
> (copy_cfg_body): Call it here if id->eh_landing_pad_dest is non-NULL.
>
> * gcc.dg/tree-ssa/pr89060.c: New test.
>
> --- gcc/tree-ssa-live.h.jj 2019-01-01 12:37:16.967978068 +0100
> +++ gcc/tree-ssa-live.h 2019-02-07 19:02:58.233530924 +0100
> @@ -265,6 +265,8 @@ extern tree_live_info_p calculate_live_r
> extern void debug (tree_live_info_d &ref);
> extern void debug (tree_live_info_d *ptr);
> extern void dump_live_info (FILE *, tree_live_info_p, int);
> +extern vec<bitmap_head> compute_live_vars (struct function *, bitmap, gimple *);
> +extern void destroy_live_vars (vec<bitmap_head> &);
>
>
> /* Return TRUE if P is marked as a global in LIVE. */
> --- gcc/tree-ssa-live.c.jj 2019-01-01 12:37:16.055993032 +0100
> +++ gcc/tree-ssa-live.c 2019-02-07 19:34:33.046401259 +0100
> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
> #include "stringpool.h"
> #include "attribs.h"
> #include "optinfo.h"
> +#include "gimple-walk.h"
> +#include "cfganal.h"
>
> static void verify_live_on_entry (tree_live_info_p);
>
> @@ -1194,8 +1196,142 @@ calculate_live_ranges (var_map map, bool
>
> return live;
> }
> +
> +/* Data structure for compute_live_vars* functions. */
>
> +struct compute_live_vars_data {
> + /* Vector of bitmaps for live vars at the end of basic blocks,
> + indexed by bb->index. ACTIVE[ENTRY_BLOCK] must be empty bitmap,
> + ACTIVE[EXIT_BLOCK] is used for STOP_AFTER. */
> + vec<bitmap_head> active;
> + /* Work bitmap of currently live variables. */
> + bitmap work;
> + /* Bitmap of interesting variables. Variables with uids not in this
> + bitmap are not tracked. */
> + bitmap vars;
> +};
>
> +/* Callback for walk_stmt_load_store_addr_ops. If OP is a VAR_DECL with
> + uid set in DATA->vars, enter its uid into bitmap DATA->work. */
> +
> +static bool
> +compute_live_vars_visit (gimple *, tree op, tree, void *pdata)
> +{
> + compute_live_vars_data *data = (compute_live_vars_data *) pdata;
> + op = get_base_address (op);
> + if (op && VAR_P (op) && bitmap_bit_p (data->vars, DECL_UID (op)))
> + bitmap_set_bit (data->work, DECL_UID (op));
> + return false;
> +}
> +
> +/* Helper routine for compute_live_vars, calculating the sets of live
> + variables at the end of BB, leaving the result in DATA->work.
> + If STOP_AFTER is non-NULL, stop processing after that stmt. */
> +
> +static void
> +compute_live_vars_1 (basic_block bb, compute_live_vars_data *data,
> + gimple *stop_after)
> +{
> + edge e;
> + edge_iterator ei;
> + gimple_stmt_iterator gsi;
> + walk_stmt_load_store_addr_fn visit = compute_live_vars_visit;
> +
> + bitmap_clear (data->work);
> + FOR_EACH_EDGE (e, ei, bb->preds)
> + bitmap_ior_into (data->work, &data->active[e->src->index]);
> +
> + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + walk_stmt_load_store_addr_ops (gsi_stmt (gsi), data, NULL, NULL, visit);
> + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> + gimple *stmt = gsi_stmt (gsi);
> +
> + if (gimple_clobber_p (stmt))
> + {
> + tree lhs = gimple_assign_lhs (stmt);
> + if (VAR_P (lhs) && bitmap_bit_p (data->vars, DECL_UID (lhs)))
> + bitmap_clear_bit (data->work, DECL_UID (lhs));
> + }
> + else if (!is_gimple_debug (stmt))
> + walk_stmt_load_store_addr_ops (stmt, data, visit, visit, visit);
> + if (stmt == stop_after)
> + break;
> + }
> +}
> +
> +/* For function FN and bitmap of automatic variables VARS, compute which
> + of those variables are (might be) live at the end of each basic block.
> + If STOP_AFTER is non-NULL, compute additionally a bitmap of variables
> + live after the STOP_AFTER statement and store that into element 1
> + of the returned vector. */
> +
> +vec<bitmap_head>
> +compute_live_vars (struct function *fn, bitmap vars, gimple *stop_after)
> +{
> + vec<bitmap_head> active;
> +
> + /* We approximate the live range of a stack variable by taking the first
> + mention of its name as starting point(s), and by the end-of-scope
> + death clobber added by gimplify as ending point(s) of the range.
> + This overapproximates in the case we for instance moved an address-taken
> + operation upward, without also moving a dereference to it upwards.
> + But it's conservatively correct as a variable never can hold values
> + before its name is mentioned at least once.
> +
> + We then do a mostly classical bitmap liveness algorithm. */
> +
> + active.create (last_basic_block_for_fn (fn));
> + active.quick_grow (last_basic_block_for_fn (fn));
> + for (int i = 0; i < last_basic_block_for_fn (fn); i++)
> + bitmap_initialize (&active[i], &bitmap_default_obstack);
> +
> + bitmap work = BITMAP_ALLOC (NULL);
> +
> + int *rpo = XNEWVEC (int, last_basic_block_for_fn (fn));
> + int n_bbs = pre_and_rev_post_order_compute_fn (fn, NULL, rpo, false);
> +
> + bool changed = true;
> + compute_live_vars_data data = { active, work, vars };
> + while (changed)
> + {
> + int i;
> + changed = false;
> + for (i = 0; i < n_bbs; i++)
> + {
> + basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]);
> + compute_live_vars_1 (bb, &data, NULL);
> + if (bitmap_ior_into (&active[bb->index], work))
> + changed = true;
> + }
> + }
> +
> + free (rpo);
> +
> + if (stop_after)
> + {
> + basic_block bb = gimple_bb (stop_after);
> + compute_live_vars_1 (bb, &data, stop_after);
> + bitmap_copy (&active[EXIT_BLOCK], work);
> + }
> +
> + BITMAP_FREE (work);
> +
> + return active;
> +}
> +
> +/* Destroy what compute_live_vars has returned when it is no longer needed. */
> +
> +void
> +destroy_live_vars (vec<bitmap_head> &active)
> +{
> + unsigned len = active.length ();
> + for (unsigned i = 0; i < len; i++)
> + bitmap_clear (&active[i]);
> +
> + active.release ();
> +}
> +
> /* Output partition map MAP to file F. */
>
> void
> --- gcc/tree-tailcall.c.jj 2019-01-01 12:37:21.359906007 +0100
> +++ gcc/tree-tailcall.c 2019-02-07 19:20:15.496501224 +0100
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
> #include "cfgloop.h"
> #include "common/common-target.h"
> #include "ipa-utils.h"
> +#include "tree-ssa-live.h"
>
> /* The file implements the tail recursion elimination. It is also used to
> analyze the tail calls in general, passing the results to the rtl level
> @@ -515,6 +516,7 @@ find_tail_calls (basic_block bb, struct
> /* Make sure the tail invocation of this function does not indirectly
> refer to local variables. (Passing variables directly by value
> is OK.) */
> + bitmap local_decls = NULL;
> FOR_EACH_LOCAL_DECL (cfun, idx, var)
> {
> if (TREE_CODE (var) != PARM_DECL
> @@ -522,6 +524,28 @@ find_tail_calls (basic_block bb, struct
> && may_be_aliased (var)
> && (ref_maybe_used_by_stmt_p (call, var)
> || call_may_clobber_ref_p (call, var)))
> + {
> + if (!VAR_P (var))
> + {
> + if (local_decls)
> + BITMAP_FREE (local_decls);
> + return;
> + }
> + if (local_decls == NULL)
> + local_decls = BITMAP_ALLOC (NULL);
> + bitmap_set_bit (local_decls, DECL_UID (var));
> + }
> + }
> +
> + /* If any such variables are found, try harder using variable life
> + tracking to see if those variables aren't already out of scope. */
> + if (local_decls)
> + {
> + vec<bitmap_head> live = compute_live_vars (cfun, local_decls, call);
> + bool any_live_p = !bitmap_empty_p (&live[EXIT_BLOCK]);
> + destroy_live_vars (live);
> + BITMAP_FREE (local_decls);
> + if (any_live_p)
> return;
> }
>
> --- gcc/tree-inline.h.jj 2019-01-18 11:06:53.526717141 +0100
> +++ gcc/tree-inline.h 2019-02-07 19:46:45.090363784 +0100
> @@ -156,6 +156,10 @@ struct copy_body_data
> when inlining a call within an OpenMP SIMD-on-SIMT loop. */
> vec<tree> *dst_simt_vars;
>
> + /* Basic block to which clobbers for local variables from the inline
> + function that need to live in memory should be added. */
> + basic_block eh_landing_pad_dest;
> +
> /* If clobbers for local variables from the inline function
> that need to live in memory should be added to EH landing pads
> outside of the inlined function, this should be the number
> --- gcc/tree-inline.c.jj 2019-01-27 12:55:19.639502799 +0100
> +++ gcc/tree-inline.c 2019-02-07 20:54:11.237908304 +0100
> @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3.
> #include "attribs.h"
> #include "sreal.h"
> #include "tree-cfgcleanup.h"
> +#include "tree-ssa-live.h"
>
> /* I'm not real happy about this, but we need to handle gimple and
> non-gimple trees. */
> @@ -2191,12 +2192,14 @@ update_ssa_across_abnormal_edges (basic_
> }
>
> /* Insert clobbers for automatic variables of inlined ID->src_fn
> - function at the start of basic block BB. */
> + function at the start of basic block ID->eh_landing_pad_dest. */
>
> static void
> -add_clobbers_to_eh_landing_pad (basic_block bb, copy_body_data *id)
> +add_clobbers_to_eh_landing_pad (copy_body_data *id)
> {
> tree var;
> + basic_block bb = id->eh_landing_pad_dest;
> + bitmap vars = NULL;
> unsigned int i;
> FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var)
> if (VAR_P (var)
> @@ -2218,12 +2221,44 @@ add_clobbers_to_eh_landing_pad (basic_bl
> && !is_gimple_reg (new_var)
> && auto_var_in_fn_p (new_var, id->dst_fn))
> {
> + if (vars == NULL)
> + vars = BITMAP_ALLOC (NULL);
> + bitmap_set_bit (vars, DECL_UID (var));
> + }
> + }
> + if (vars == NULL)
> + return;
> +
> + vec<bitmap_head> live = compute_live_vars (id->src_cfun, vars, NULL);
> + FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var)
> + if (VAR_P (var) && bitmap_bit_p (vars, DECL_UID (var)))
> + {
> + edge e;
> + edge_iterator ei;
> + bool needed = false;
> + FOR_EACH_EDGE (e, ei, bb->preds)
> + if ((e->flags & EDGE_EH) != 0
> + && e->src->index >= id->add_clobbers_to_eh_landing_pads)
> + {
> + basic_block src_bb = (basic_block) e->src->aux;
> +
> + if (bitmap_bit_p (&live[src_bb->index], DECL_UID (var)))
> + {
> + needed = true;
> + break;
> + }
> + }
> + if (needed)
> + {
> + tree new_var = *id->decl_map->get (var);
> gimple_stmt_iterator gsi = gsi_after_labels (bb);
> tree clobber = build_clobber (TREE_TYPE (new_var));
> gimple *clobber_stmt = gimple_build_assign (new_var, clobber);
> gsi_insert_before (&gsi, clobber_stmt, GSI_NEW_STMT);
> }
> }
> + destroy_live_vars (live);
> + BITMAP_FREE (vars);
> }
>
> /* Copy edges from BB into its copy constructed earlier, scale profile
> @@ -2358,8 +2393,10 @@ copy_edges_for_bb (basic_block bb, profi
> e->probability = profile_probability::never ();
> if (e->dest->index < id->add_clobbers_to_eh_landing_pads)
> {
> - add_clobbers_to_eh_landing_pad (e->dest, id);
> - id->add_clobbers_to_eh_landing_pads = 0;
> + if (id->eh_landing_pad_dest == NULL)
> + id->eh_landing_pad_dest = e->dest;
> + else
> + gcc_assert (id->eh_landing_pad_dest == e->dest);
> }
> }
> }
> @@ -2802,6 +2839,12 @@ copy_cfg_body (copy_body_data * id,
> need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map,
> abnormal_goto_dest, id);
>
> + if (id->eh_landing_pad_dest)
> + {
> + add_clobbers_to_eh_landing_pad (id);
> + id->eh_landing_pad_dest = NULL;
> + }
> +
> if (new_entry)
> {
> edge e = make_edge (entry_block_map, (basic_block)new_entry->aux,
> --- gcc/testsuite/gcc.dg/tree-ssa/pr89060.c.jj 2019-02-07 19:39:13.481790192 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr89060.c 2019-02-07 19:40:17.532736916 +0100
> @@ -0,0 +1,26 @@
> +/* PR tree-optimization/89060 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-tailc" } */
> +/* { dg-final { scan-tree-dump-not "baz \\\(1\\\); \\\[tail call\\\]" "tailc" } } */
> +/* { dg-final { scan-tree-dump "baz \\\(2\\\); \\\[tail call\\\]" "tailc" } } */
> +
> +void qux (char *, int n);
> +int baz (int);
> +
> +int
> +foo (int n)
> +{
> + char buf[64];
> + qux (buf, n);
> + return baz (1);
> +}
> +
> +int
> +bar (int n)
> +{
> + {
> + char buf[64];
> + qux (buf, n);
> + }
> + return baz (2);
> +}
Jakub
More information about the Gcc-patches
mailing list