[PATCH] rtl-optimization/96812 - remap dependence info on RTL loop unrolling

Richard Biener rguenther@suse.de
Tue Sep 1 07:09:10 GMT 2020


On Thu, 27 Aug 2020, Richard Biener wrote:

> This carries over the PR87609 fix also to RTL loop unrolling.  The
> gcc.dg/torture/pr90328.c testcase otherwise is miscompiled with
> the tree-ssa-address.c hunk (or alternatively with -fno-ivopts
> on master).  I've tried to find the correct abstraction and
> adjusted two other duplicate_insn_chain users for which I do not
> have testcases.  There may be other insn-chain copying routines
> that could be affected but hopefully most appropriately go through
> CFG hooks.
> 
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> Any comments?

None, so I'll push this later today after re-boostrap/test.

Richard.

> Thanks,
> Richard.
> 
> 2020-08-27  Richard Biener  <rguenther@suse.de>
> 
> 	PR rtl-optimization/96812
> 	* tree-ssa-address.c (copy_ref_info): Also copy dependence info.
> 	* cfgrtl.h (duplicate_insn_chain): Adjust prototype.
> 	* cfgrtl.c (duplicate_insn_chain): Remap dependence info
> 	if requested.
> 	(cfg_layout_duplicate_bb): Make sure we remap dependence info.
> 	* modulo-sched.c (duplicate_insns_of_cycles): Remap dependence
> 	info.
> 	(generate_prolog_epilog): Adjust.
> 	* config/c6x/c6x.c (hwloop_optimize): Remap dependence info.
> ---
>  gcc/cfgrtl.c           | 60 ++++++++++++++++++++++++++++++++++++++----
>  gcc/cfgrtl.h           |  3 ++-
>  gcc/config/c6x/c6x.c   |  3 ++-
>  gcc/modulo-sched.c     | 10 ++++---
>  gcc/tree-ssa-address.c | 10 +++++++
>  5 files changed, 75 insertions(+), 11 deletions(-)
> 
> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> index 03fa688fed6..eb5ccd42ed7 100644
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfgloop.h"
>  #include "tree-pass.h"
>  #include "print-rtl.h"
> +#include "rtl-iter.h"
> +#include "gimplify.h"
>  
>  /* Disable warnings about missing quoting in GCC diagnostics.  */
>  #if __GNUC__ >= 10
> @@ -4199,7 +4201,8 @@ cfg_layout_can_duplicate_bb_p (const_basic_block bb)
>  }
>  
>  rtx_insn *
> -duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
> +duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
> +		      class loop *loop, copy_bb_data *id)
>  {
>    rtx_insn *insn, *next, *copy;
>    rtx_note *last;
> @@ -4228,6 +4231,51 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
>  	      && ANY_RETURN_P (JUMP_LABEL (insn)))
>  	    JUMP_LABEL (copy) = JUMP_LABEL (insn);
>            maybe_copy_prologue_epilogue_insn (insn, copy);
> +	  /* If requested remap dependence info of cliques brought in
> +	     via inlining.  */
> +	  if (id)
> +	    {
> +	      subrtx_iterator::array_type array;
> +	      FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> +		if (MEM_P (*iter) && MEM_EXPR (*iter))
> +		  {
> +		    tree op = MEM_EXPR (*iter);
> +		    if (TREE_CODE (op) == WITH_SIZE_EXPR)
> +		      op = TREE_OPERAND (op, 0);
> +		    while (handled_component_p (op))
> +		      op = TREE_OPERAND (op, 0);
> +		    if ((TREE_CODE (op) == MEM_REF
> +			 || TREE_CODE (op) == TARGET_MEM_REF)
> +			&& MR_DEPENDENCE_CLIQUE (op) > 1
> +			&& (!loop
> +			    || (MR_DEPENDENCE_CLIQUE (op)
> +				!= loop->owned_clique)))
> +		      {
> +			if (!id->dependence_map)
> +			  id->dependence_map = new hash_map<dependence_hash,
> +			      unsigned short>;
> +			bool existed;
> +			unsigned short &newc = id->dependence_map->get_or_insert
> +					 (MR_DEPENDENCE_CLIQUE (op), &existed);
> +			if (!existed)
> +			  {
> +			    gcc_assert
> +			      (MR_DEPENDENCE_CLIQUE (op) <= cfun->last_clique);
> +			    newc = ++cfun->last_clique;
> +			  }
> +			/* We cannot adjust MR_DEPENDENCE_CLIQUE in-place
> +			   since MEM_EXPR is shared so make a copy and
> +			   walk to the subtree again.  */
> +			tree new_expr = unshare_expr (MEM_EXPR (*iter));
> +			if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
> +			  new_expr = TREE_OPERAND (new_expr, 0);
> +			while (handled_component_p (new_expr))
> +			  new_expr = TREE_OPERAND (new_expr, 0);
> +			MR_DEPENDENCE_CLIQUE (new_expr) = newc;
> +			set_mem_expr (const_cast <rtx> (*iter), new_expr);
> +		      }
> +		  }
> +	    }
>  	  break;
>  
>  	case JUMP_TABLE_DATA:
> @@ -4292,12 +4340,14 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
>  /* Create a duplicate of the basic block BB.  */
>  
>  static basic_block
> -cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
> +cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *id)
>  {
>    rtx_insn *insn;
>    basic_block new_bb;
>  
> -  insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb));
> +  class loop *loop = (id && current_loops) ? bb->loop_father : NULL;
> +
> +  insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb), loop, id);
>    new_bb = create_basic_block (insn,
>  			       insn ? get_last_insn () : NULL,
>  			       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
> @@ -4308,7 +4358,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
>        insn = BB_HEADER (bb);
>        while (NEXT_INSN (insn))
>  	insn = NEXT_INSN (insn);
> -      insn = duplicate_insn_chain (BB_HEADER (bb), insn);
> +      insn = duplicate_insn_chain (BB_HEADER (bb), insn, loop, id);
>        if (insn)
>  	BB_HEADER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
>      }
> @@ -4318,7 +4368,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
>        insn = BB_FOOTER (bb);
>        while (NEXT_INSN (insn))
>  	insn = NEXT_INSN (insn);
> -      insn = duplicate_insn_chain (BB_FOOTER (bb), insn);
> +      insn = duplicate_insn_chain (BB_FOOTER (bb), insn, loop, id);
>        if (insn)
>  	BB_FOOTER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
>      }
> diff --git a/gcc/cfgrtl.h b/gcc/cfgrtl.h
> index ae03e82650d..ae62d6cf05c 100644
> --- a/gcc/cfgrtl.h
> +++ b/gcc/cfgrtl.h
> @@ -49,7 +49,8 @@ extern bool purge_all_dead_edges (void);
>  extern bool fixup_abnormal_edges (void);
>  extern rtx_insn *unlink_insn_chain (rtx_insn *, rtx_insn *);
>  extern void relink_block_chain (bool);
> -extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *);
> +extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *,
> +				       class loop *, class copy_bb_data *);
>  extern void cfg_layout_initialize (int);
>  extern void cfg_layout_finalize (void);
>  extern void break_superblocks (void);
> diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
> index 6bd793bdd21..9aa7ef0620c 100644
> --- a/gcc/config/c6x/c6x.c
> +++ b/gcc/config/c6x/c6x.c
> @@ -5600,7 +5600,8 @@ hwloop_optimize (hwloop_info loop)
>        int j;
>        rtx_insn *this_iter;
>  
> -      this_iter = duplicate_insn_chain (head_insn, tail_insn);
> +      copy_bb_data id;
> +      this_iter = duplicate_insn_chain (head_insn, tail_insn, NULL, &id);
>        j = 0;
>        while (this_iter)
>  	{
> diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
> index 86701e05550..6f699a874e3 100644
> --- a/gcc/modulo-sched.c
> +++ b/gcc/modulo-sched.c
> @@ -1085,10 +1085,11 @@ optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
>  
>  static void
>  duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
> -			   int to_stage, rtx count_reg)
> +			   int to_stage, rtx count_reg, class loop *loop)
>  {
>    int row;
>    ps_insn_ptr ps_ij;
> +  copy_bb_data id;
>  
>    for (row = 0; row < ps->ii; row++)
>      for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
> @@ -1113,7 +1114,8 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
>  	if (from_stage <= last_u && to_stage >= first_u)
>  	  {
>  	    if (u < ps->g->num_nodes)
> -	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
> +	      duplicate_insn_chain (ps_first_note (ps, u), u_insn,
> +				    loop, &id);
>  	    else
>  	      emit_insn (copy_rtx (PATTERN (u_insn)));
>  	  }
> @@ -1151,7 +1153,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
>      }
>  
>    for (i = 0; i < last_stage; i++)
> -    duplicate_insns_of_cycles (ps, 0, i, count_reg);
> +    duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
>  
>    /* Put the prolog on the entry edge.  */
>    e = loop_preheader_edge (loop);
> @@ -1165,7 +1167,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
>    start_sequence ();
>  
>    for (i = 0; i < last_stage; i++)
> -    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
> +    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
>  
>    /* Put the epilogue on the exit edge.  */
>    gcc_assert (single_exit (loop));
> diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
> index 93eae192212..baf495a00bc 100644
> --- a/gcc/tree-ssa-address.c
> +++ b/gcc/tree-ssa-address.c
> @@ -1044,6 +1044,16 @@ copy_ref_info (tree new_ref, tree old_ref)
>  	}
>      }
>  
> +  /* We can transfer dependence info.  */
> +  if (!MR_DEPENDENCE_CLIQUE (new_ref)
> +      && (TREE_CODE (base) == MEM_REF
> +	  || TREE_CODE (base) == TARGET_MEM_REF)
> +      && MR_DEPENDENCE_CLIQUE (base))
> +    {
> +      MR_DEPENDENCE_CLIQUE (new_ref) = MR_DEPENDENCE_CLIQUE (base);
> +      MR_DEPENDENCE_BASE (new_ref) = MR_DEPENDENCE_BASE (base);
> +    }
> +
>    /* And alignment info.  Note we cannot transfer misalignment info
>       since that sits on the SSA name but this is flow-sensitive info
>       which we cannot transfer in this generic routine.  */
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


More information about the Gcc-patches mailing list