[PATCH][RFC] Fix PR87609 - dependence info copying

Richard Biener rguenther@suse.de
Mon Oct 15 13:31:00 GMT 2018


During CFG transforms like loop unrolling or peeling (or jump threading
or loop heder copying performing essentially the latter) we need
to remap dependence cliques similar to how we need to do during inlining
to avoid false non-dependences across iterations.

We've talked about this a bit in the context of optimization passes
wanting to transfer the knowledge they impose on parts of the IL
via runtime checks to followup passes (in which case also hoisting
across such checks is problematic).

PR87609 now shows a case where this is important for code that
was brought in via inlining as well.

The patch does this by hooking into copy_bbs() as the primitive
(hopefully exclusively) used by such transforms.  I've not yet
touched the RTL parts which need to remap cliques as they appear
in MEM_EXPRs of MEM_ATTRs of MEMs copied.  As said above both
jump threading and unrolling are affected.

The patch as-is may end up pessimizing jump threaded code that is
_not_ performing peeling.  To avoid this the interface I chose
might not be optimal (there's no way to disable copy_bbs behavior
at the moment).

Similar the remapping is not strictly needed for loop versioning.

One option might be to pass down the copy_bb_info instance to
copy_bbs from the callers which should know whether it's necessary
to remap dependence info or not.

I've also added a checker that we do not end up with dependence
info on addresses (but at the moment I do remap that in the
patch anyhow).

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Comments appreciated - Bin, you had sth similar IIRC?

Thanks,
Richard.

2018-10-15  Richard Biener  <rguenther@suse.de>

	* cfghooks.h (dependence_hash): New typedef.
	(struct copy_bb_data): New type.
	(cfg_hooks::duplicate_block): Adjust to take a copy_bb_data argument.
	(duplicate_block): Likewise.
	* cfghooks.c (duplicate_block): Pass down copy_bb_data.
	(copy_bbs): Create and pass down copy_bb_data.
	* cfgrtl.c (cfg_layout_duplicate_bb): Adjust.
	(rtl_duplicate_bb): Likewise.
	* tree-cfg.c (verify_address): Verify addresses do not have
	dependence info.
	(gimple_duplicate_bb): If the copy_bb_data arg is not NULL
	remap dependence info.

	* gcc.dg/torture/restrict-7.c: New testcase.

Index: gcc/cfghooks.h
===================================================================
--- gcc/cfghooks.h	(revision 265162)
+++ gcc/cfghooks.h	(working copy)
@@ -54,6 +54,19 @@ struct profile_record
   bool run;
 };
 
+typedef int_hash <unsigned short, 0> dependence_hash;
+
+/* Optional data for duplicate_block.   */
+
+struct copy_bb_data
+{
+  copy_bb_data() : dependence_map (NULL) {}
+  ~copy_bb_data () { delete dependence_map; }
+
+  /* A map from the copied BBs dependence info cliques to
+     equivalents in the BBs duplicated to.  */
+  hash_map<dependence_hash, unsigned short> *dependence_map;
+};
 
 struct cfg_hooks
 {
@@ -112,7 +125,7 @@ struct cfg_hooks
   bool (*can_duplicate_block_p) (const_basic_block a);
 
   /* Duplicate block A.  */
-  basic_block (*duplicate_block) (basic_block a);
+  basic_block (*duplicate_block) (basic_block a, copy_bb_data *);
 
   /* Higher level functions representable by primitive operations above if
      we didn't have some oddities in RTL and Tree representations.  */
@@ -227,7 +240,8 @@ extern void tidy_fallthru_edges (void);
 extern void predict_edge (edge e, enum br_predictor predictor, int probability);
 extern bool predicted_by_p (const_basic_block bb, enum br_predictor predictor);
 extern bool can_duplicate_block_p (const_basic_block);
-extern basic_block duplicate_block (basic_block, edge, basic_block);
+extern basic_block duplicate_block (basic_block, edge, basic_block,
+				    copy_bb_data * = NULL);
 extern bool block_ends_with_call_p (basic_block bb);
 extern bool empty_block_p (basic_block);
 extern basic_block split_block_before_cond_jump (basic_block);
Index: gcc/cfghooks.c
===================================================================
--- gcc/cfghooks.c	(revision 265162)
+++ gcc/cfghooks.c	(working copy)
@@ -1066,7 +1066,7 @@ can_duplicate_block_p (const_basic_block
    AFTER.  */
 
 basic_block
-duplicate_block (basic_block bb, edge e, basic_block after)
+duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
 {
   edge s, n;
   basic_block new_bb;
@@ -1082,7 +1082,7 @@ duplicate_block (basic_block bb, edge e,
 
   gcc_checking_assert (can_duplicate_block_p (bb));
 
-  new_bb = cfg_hooks->duplicate_block (bb);
+  new_bb = cfg_hooks->duplicate_block (bb, id);
   if (after)
     move_block_after (new_bb, after);
 
@@ -1337,6 +1337,7 @@ copy_bbs (basic_block *bbs, unsigned n,
   unsigned i, j;
   basic_block bb, new_bb, dom_bb;
   edge e;
+  copy_bb_data id;
 
   /* Mark the blocks to be copied.  This is used by edge creation hooks
      to decide whether to reallocate PHI nodes capacity to avoid reallocating
@@ -1349,7 +1350,7 @@ copy_bbs (basic_block *bbs, unsigned n,
     {
       /* Duplicate.  */
       bb = bbs[i];
-      new_bb = new_bbs[i] = duplicate_block (bb, NULL, after);
+      new_bb = new_bbs[i] = duplicate_block (bb, NULL, after, &id);
       after = new_bb;
       if (bb->loop_father)
 	{
Index: gcc/cfgrtl.c
===================================================================
--- gcc/cfgrtl.c	(revision 265162)
+++ gcc/cfgrtl.c	(working copy)
@@ -4234,7 +4234,7 @@ duplicate_insn_chain (rtx_insn *from, rt
 /* Create a duplicate of the basic block BB.  */
 
 static basic_block
-cfg_layout_duplicate_bb (basic_block bb)
+cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
 {
   rtx_insn *insn;
   basic_block new_bb;
@@ -5060,9 +5060,9 @@ rtl_can_remove_branch_p (const_edge e)
 }
 
 static basic_block
-rtl_duplicate_bb (basic_block bb)
+rtl_duplicate_bb (basic_block bb, copy_bb_data *id)
 {
-  bb = cfg_layout_duplicate_bb (bb);
+  bb = cfg_layout_duplicate_bb (bb, id);
   bb->aux = NULL;
   return bb;
 }
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 265162)
+++ gcc/tree-cfg.c	(working copy)
@@ -3016,6 +3016,14 @@ verify_address (tree t, bool verify_addr
   while (handled_component_p (base))
     base = TREE_OPERAND (base, 0);
 
+  if ((TREE_CODE (base) == MEM_REF
+       || TREE_CODE (base) == TARGET_MEM_REF)
+      && MR_DEPENDENCE_CLIQUE (base) != 0)
+    {
+      error ("address with dependence info set");
+      return true;
+    }
+
   if (!(VAR_P (base)
 	|| TREE_CODE (base) == PARM_DECL
 	|| TREE_CODE (base) == RESULT_DECL))
@@ -6144,7 +6152,7 @@ gimple_can_duplicate_bb_p (const_basic_b
    preserve SSA form.  */
 
 static basic_block
-gimple_duplicate_bb (basic_block bb)
+gimple_duplicate_bb (basic_block bb, copy_bb_data *id)
 {
   basic_block new_bb;
   gimple_stmt_iterator gsi_tgt;
@@ -6208,6 +6216,33 @@ gimple_duplicate_bb (basic_block bb)
 	      && (!VAR_P (base) || !DECL_HAS_VALUE_EXPR_P (base)))
 	    DECL_NONSHAREABLE (base) = 1;
 	}
+ 
+      if (id)
+	for (unsigned i = 0; i < gimple_num_ops (copy); ++i)
+	  {
+	    tree op = gimple_op (copy, i);
+	    if (!op)
+	      continue;
+	    if (TREE_CODE (op) == ADDR_EXPR
+		|| TREE_CODE (op) == WITH_SIZE_EXPR)
+	      op = TREE_OPERAND (op, 0);
+	    while (handled_component_p (op))
+	      op = TREE_OPERAND (op, 0);
+	    if ((TREE_CODE (op) == MEM_REF
+		 || TREE_CODE (op) == TARGET_MEM_REF)
+		&& MR_DEPENDENCE_CLIQUE (op) != 0)
+	      {
+		if (!id->dependence_map)
+		  id->dependence_map = new hash_map<dependence_hash,
+						    unsigned short>;
+		bool existed;
+		unsigned short &newc = id->dependence_map->get_or_insert
+		    (MR_DEPENDENCE_CLIQUE (op), &existed);
+		if (!existed)
+		  newc = ++cfun->last_clique;
+		MR_DEPENDENCE_CLIQUE (op) = newc;
+	      }
+	  }
 
       /* Create new names for all the definitions created by COPY and
 	 add replacement mappings for each new name.  */
Index: gcc/testsuite/gcc.dg/torture/restrict-7.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/restrict-7.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/restrict-7.c	(working copy)
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+extern void abort (void);
+
+static inline __attribute__((always_inline)) void
+copy(int *restrict a, int *restrict b)
+{
+  *b = *a;
+  *a = 7;
+}
+
+void __attribute__((noinline))
+floppy(int mat[static 2], unsigned idxs[static 3])
+{
+  for (int i = 0; i < 3; i++)
+    copy(&mat[i%2], &mat[idxs[i]]);
+}
+
+int main()
+{
+  int mat[2] = {10, 20};
+  unsigned idxs[3] = {1, 0, 1};
+  floppy(mat, idxs);
+  if (mat[0] != 7 || mat[1] != 10)
+    abort ();
+  return 0;
+}



More information about the Gcc-patches mailing list