This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp4, trunk] Fix up simd loop handling (PR tree-optimization/58392)
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Richard Henderson <rth at redhat dot com>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 13 Sep 2013 09:34:40 +0200 (CEST)
- Subject: Re: [gomp4, trunk] Fix up simd loop handling (PR tree-optimization/58392)
- Authentication-results: sourceware.org; auth=none
- References: <20130912143611 dot GW1817 at tucnak dot redhat dot com>
On Thu, 12 Sep 2013, Jakub Jelinek wrote:
> Hi!
>
> This patch fixes PR58392 and some related issues I've discovered.
> move_sese_region_to_fn wasn't remapping loop->simduid VAR_DECL,
> and wasn't setting dest_cfun->has_{simduid,force_vect}_loops
> when needed, the inliner wasn't copying/remapping simduid nor
> force_vect (just conservatively setting cfun->has_{simduid,force_vect}_loops
> if src_cfun had those set) and omp-low.c didn't handle outer var refs in
> simd loops nested in non-parallel/task contexts well.
> Tested on x86_64-linux. Ok for gomp-4_0-branch and trunk (just the part
> without the testcase for now, because Aldy's backport of the stuff this
> is fixing is already on the trunk)?
Ok.
Thanks,
Richard.
> 2013-09-12 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/58392
> * tree-cfg.c (move_sese_region_to_fn): Rename loop variable
> to avoid shadowing of outer loop variable. If
> saved_cfun->has_simduid_loops or saved_cfun->has_force_vect_loops,
> replace_by_duplicate_decl simduid of loops that have it set and
> set dest_cfun->has_simduid_loops and/or
> dest_cfun->has_force_vect_loops.
> * omp-low.c (build_outer_var_ref): Call maybe_lookup_decl_in_outer_ctx
> instead of maybe_lookup_decl.
> * tree-inline.c (copy_loops): Change blocks_to_copy argument to id.
> Use id->blocks_to_copy instead of blocks_to_copy. Adjust recursive
> call. Copy over force_vect and copy and remap simduid. Set
> cfun->has_simduid_loops and/or cfun->has_force_vect_loops.
> (copy_cfg_body): Remove blocks_to_copy argument. Use
> id->blocks_to_copy instead of blocks_to_copy. Adjust copy_loops
> caller. Don't set cfun->has_simduid_loops and/or
> cfun->has_force_vect_loops here.
> (copy_body): Remove blocks_to_copy argument. Adjust copy_cfg_body
> caller.
> (expand_call_inline, tree_function_versioning): Adjust copy_body
> callers.
>
> * testsuite/libgomp.c/pr58392.c: New test.
>
> --- gcc/tree-cfg.c.jj 2013-09-05 09:19:03.000000000 +0200
> +++ gcc/tree-cfg.c 2013-09-12 15:19:40.103267115 +0200
> @@ -6777,10 +6777,10 @@ move_sese_region_to_fn (struct function
> if (bb->loop_father->header == bb
> && loop_outer (bb->loop_father) == loop)
> {
> - struct loop *loop = bb->loop_father;
> + struct loop *this_loop = bb->loop_father;
> flow_loop_tree_node_remove (bb->loop_father);
> - flow_loop_tree_node_add (get_loop (dest_cfun, 0), loop);
> - fixup_loop_arrays_after_move (saved_cfun, cfun, loop);
> + flow_loop_tree_node_add (get_loop (dest_cfun, 0), this_loop);
> + fixup_loop_arrays_after_move (saved_cfun, cfun, this_loop);
> }
>
> /* Remove loop exits from the outlined region. */
> @@ -6835,6 +6835,23 @@ move_sese_region_to_fn (struct function
> outer; outer = loop_outer (outer))
> outer->num_nodes -= bbs.length ();
>
> + if (saved_cfun->has_simduid_loops || saved_cfun->has_force_vect_loops)
> + {
> + struct loop *aloop;
> + for (i = 0; vec_safe_iterate (loops->larray, i, &aloop); i++)
> + if (aloop != NULL)
> + {
> + if (aloop->simduid)
> + {
> + replace_by_duplicate_decl (&aloop->simduid, d.vars_map,
> + d.to_context);
> + dest_cfun->has_simduid_loops = true;
> + }
> + if (aloop->force_vect)
> + dest_cfun->has_force_vect_loops = true;
> + }
> + }
> +
> /* Rewire BLOCK_SUBBLOCKS of orig_block. */
> if (orig_block)
> {
> --- gcc/omp-low.c.jj 2013-09-11 18:19:39.000000000 +0200
> +++ gcc/omp-low.c 2013-09-12 13:55:34.081148580 +0200
> @@ -976,7 +976,7 @@ build_outer_var_ref (tree var, omp_conte
> if (ctx->outer && is_taskreg_ctx (ctx))
> x = lookup_decl (var, ctx->outer);
> else if (ctx->outer)
> - x = maybe_lookup_decl (var, ctx->outer);
> + x = maybe_lookup_decl_in_outer_ctx (var, ctx);
> if (x == NULL_TREE)
> x = var;
> }
> --- gcc/tree-inline.c.jj 2013-08-27 20:53:28.000000000 +0200
> +++ gcc/tree-inline.c 2013-09-12 16:05:58.348124256 +0200
> @@ -2254,14 +2254,14 @@ maybe_move_debug_stmts_to_successors (co
> as siblings of DEST_PARENT. */
>
> static void
> -copy_loops (bitmap blocks_to_copy,
> +copy_loops (copy_body_data *id,
> struct loop *dest_parent, struct loop *src_parent)
> {
> struct loop *src_loop = src_parent->inner;
> while (src_loop)
> {
> - if (!blocks_to_copy
> - || bitmap_bit_p (blocks_to_copy, src_loop->header->index))
> + if (!id->blocks_to_copy
> + || bitmap_bit_p (id->blocks_to_copy, src_loop->header->index))
> {
> struct loop *dest_loop = alloc_loop ();
>
> @@ -2285,8 +2285,19 @@ copy_loops (bitmap blocks_to_copy,
> place_new_loop (cfun, dest_loop);
> flow_loop_tree_node_add (dest_parent, dest_loop);
>
> + if (src_loop->simduid)
> + {
> + dest_loop->simduid = remap_decl (src_loop->simduid, id);
> + cfun->has_simduid_loops = true;
> + }
> + if (src_loop->force_vect)
> + {
> + dest_loop->force_vect = true;
> + cfun->has_force_vect_loops = true;
> + }
> +
> /* Recurse. */
> - copy_loops (blocks_to_copy, dest_loop, src_loop);
> + copy_loops (id, dest_loop, src_loop);
> }
> src_loop = src_loop->next;
> }
> @@ -2315,7 +2326,7 @@ redirect_all_calls (copy_body_data * id,
> static tree
> copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale,
> basic_block entry_block_map, basic_block exit_block_map,
> - bitmap blocks_to_copy, basic_block new_entry)
> + basic_block new_entry)
> {
> tree callee_fndecl = id->src_fn;
> /* Original cfun for the callee, doesn't change. */
> @@ -2379,7 +2390,7 @@ copy_cfg_body (copy_body_data * id, gcov
>
> /* Use aux pointers to map the original blocks to copy. */
> FOR_EACH_BB_FN (bb, cfun_to_copy)
> - if (!blocks_to_copy || bitmap_bit_p (blocks_to_copy, bb->index))
> + if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index))
> {
> basic_block new_bb = copy_bb (id, bb, frequency_scale, count_scale);
> bb->aux = new_bb;
> @@ -2393,8 +2404,8 @@ copy_cfg_body (copy_body_data * id, gcov
> bool can_make_abormal_goto
> = id->gimple_call && stmt_can_make_abnormal_goto (id->gimple_call);
> FOR_ALL_BB_FN (bb, cfun_to_copy)
> - if (!blocks_to_copy
> - || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
> + if (!id->blocks_to_copy
> + || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
> need_debug_cleanup |= copy_edges_for_bb (bb, count_scale, exit_block_map,
> can_make_abormal_goto);
>
> @@ -2409,12 +2420,10 @@ copy_cfg_body (copy_body_data * id, gcov
> if (loops_for_fn (src_cfun) != NULL
> && current_loops != NULL)
> {
> - copy_loops (blocks_to_copy, entry_block_map->loop_father,
> + copy_loops (id, entry_block_map->loop_father,
> get_loop (src_cfun, 0));
> /* Defer to cfgcleanup to update loop-father fields of basic-blocks. */
> loops_state_set (LOOPS_NEED_FIXUP);
> - cfun->has_force_vect_loops |= src_cfun->has_force_vect_loops;
> - cfun->has_simduid_loops |= src_cfun->has_simduid_loops;
> }
>
> /* If the loop tree in the source function needed fixup, mark the
> @@ -2424,8 +2433,8 @@ copy_cfg_body (copy_body_data * id, gcov
>
> if (gimple_in_ssa_p (cfun))
> FOR_ALL_BB_FN (bb, cfun_to_copy)
> - if (!blocks_to_copy
> - || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
> + if (!id->blocks_to_copy
> + || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
> copy_phis_for_bb (bb, id);
>
> FOR_ALL_BB_FN (bb, cfun_to_copy)
> @@ -2597,7 +2606,7 @@ copy_tree_body (copy_body_data *id)
> static tree
> copy_body (copy_body_data *id, gcov_type count, int frequency_scale,
> basic_block entry_block_map, basic_block exit_block_map,
> - bitmap blocks_to_copy, basic_block new_entry)
> + basic_block new_entry)
> {
> tree fndecl = id->src_fn;
> tree body;
> @@ -2605,7 +2614,7 @@ copy_body (copy_body_data *id, gcov_type
> /* If this body has a CFG, walk CFG and copy. */
> gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (fndecl)));
> body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map,
> - blocks_to_copy, new_entry);
> + new_entry);
> copy_debug_stmts (id);
>
> return body;
> @@ -4209,7 +4218,7 @@ expand_call_inline (basic_block bb, gimp
> duplicate our body before altering anything. */
> copy_body (id, bb->count,
> GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE),
> - bb, return_block, NULL, NULL);
> + bb, return_block, NULL);
>
> /* Reset the escaped solution. */
> if (cfun->gimple_df)
> @@ -5336,7 +5345,7 @@ tree_function_versioning (tree old_decl,
>
> /* Copy the Function's body. */
> copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
> - ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
> + ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, new_entry);
>
> /* Renumber the lexical scoping (non-code) blocks consecutively. */
> number_blocks (new_decl);
> --- libgomp/testsuite/libgomp.c/pr58392.c.jj 2013-09-12 16:09:10.493065328 +0200
> +++ libgomp/testsuite/libgomp.c/pr58392.c 2013-09-12 16:08:57.000000000 +0200
> @@ -0,0 +1,58 @@
> +/* PR tree-optimization/58392 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-msse2" { target sse2_runtime } } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +extern void abort (void);
> +int d[32 * 32];
> +
> +__attribute__((noinline, noclone)) int
> +foo (int a, int b)
> +{
> + int j, c = 0;
> + #pragma omp parallel for reduction(+: c)
> + for (j = 0; j < a; j += 32)
> + {
> + int l;
> + #pragma omp simd reduction(+: c)
> + for (l = 0; l < b; ++l)
> + c += d[j + l];
> + }
> + return c;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +bar (int a)
> +{
> + int j, c = 0;
> + #pragma omp parallel for simd reduction(+: c)
> + for (j = 0; j < a; ++j)
> + c += d[j];
> + return c;
> +}
> +
> +__attribute__((noinline)) static int
> +baz (int a)
> +{
> + int j, c = 0;
> + #pragma omp simd reduction(+: c)
> + for (j = 0; j < a; ++j)
> + c += d[j];
> + return c;
> +}
> +
> +int
> +main ()
> +{
> + int i;
> + for (i = 0; i < 32 * 32; i++)
> + d[i] = (i & 31);
> + if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
> + abort ();
> + if (bar (32 * 32) != (31 * 32 / 2) * 32)
> + abort ();
> + if (baz (32 * 32) != (31 * 32 / 2) * 32)
> + abort ();
> + return 0;
> +}
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend