This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix PR43984, PRE misses full redundancies


Hi,

[Sebastian, you're CCEd because of a graphite bug exposed with parts of 
this patch]

the problem is that PRE uses delayed edge inserts, but commits those 
inserts only after eliminate.  Hence redundancies between those won't be 
found.  The patch simply changes the order.

Now, there's a good question why to use delayed edge inserts at all.  PRE 
runs with pre-split critical edges, hence we should be able to use the 
_immediate variants.  But this was changed for PR17587 (before that it 
effectively did immediate inserts).  And later loop_optimizer_init calls 
were inserted which introduce fake edges that we want to remove before 
insertion.

For these two reasons I haven't considered doing immediate inserts as part 
of this patch.

To fix the testcase only the movement in execute_pre() is necessary.  
Doing just that will expose a bug in graphite in that it can't handle some 
invariant PHI nodes which aren't loop entry PHIs.  The uses of 
loop_entry_phi_arg in remove_invariant_phi (via reduction_phi_p) seem 
confused in that they don't expect PHIs inside loops that happen to be 
loop invariant.

In particular graphite/pr43097.f will ICE with just the change in 
execute_pre, because the situation is like so:

outer_loop:

  pretmp_130 = ...

  inner_loop:
    ...
    goto inner_loop;

<bb 11>:
  D.1594_171 = pretmp.21_130;

<bb 12>:
  # prephitmp.24_149 = PHI <pretmp.21_130(7), D.1594_171(11)>

Here SCEV can now determine that _149 is loop invariant (it can't before 
the PRE change due to some non-removed redundancies), but graphite is 
confused by the fact that both edges into the PHI are non-loop-entry 
edges.

Instead of letting graphite fail for the time being, I reworked PRE a bit 
to remove the superflous PHI node itself.  As it was an inserted one this 
required an easy way to remove that stmt from the inserted_exprs list, 
which is most easily done with a bitmap on the LHS SSA_NAME versions, 
instead of using a vector of gimple statements.  That way it's even more 
compact.

Anyway, the version with just the execute_pre change regstrapped on 
x86_64-linux all langs+Ada, with only the above graphite regression.  The 
current patch is still in regstrapping.  Okay for trunk if that passes?


Ciao,
Michael.
-- 
	PR tree-optimization/43984

	* tree-ssa-pre.c (inserted_phi_names): Remove.
	(inserted_exprs): Change to bitmap.
	(create_expression_by_pieces): Set bits, don't append to vector.
	(insert_into_preds_of_block): Don't handle inserted_phi_names.
	(eliminate): Don't look at inserted_phi_names, remove deleted
	PHIs from inserted_exprs.
	(remove_dead_inserted_code): Adjust to use bitmaps instead of
	vectors.
	(init_pre, fini_pre): Allocate and free bitmaps.
	(execute_pre): Insert insns on edges before elimination.

testsuite/
	* gfortran.dg/pr43984.f90: New test.

Index: tree-ssa-pre.c
===================================================================
--- tree-ssa-pre.c	(revision 159066)
+++ tree-ssa-pre.c	(working copy)
@@ -2608,8 +2608,7 @@ can_PRE_operation (tree op)
 /* Inserted expressions are placed onto this worklist, which is used
    for performing quick dead code elimination of insertions we made
    that didn't turn out to be necessary.   */
-static VEC(gimple,heap) *inserted_exprs;
-static bitmap inserted_phi_names;
+static bitmap inserted_exprs;
 
 /* Pool allocated fake store expressions are placed onto this
    worklist, which, after performing dead code elimination, is walked
@@ -3083,9 +3082,9 @@ create_expression_by_pieces (basic_block
 	  tree forcedname = gimple_get_lhs (stmt);
 	  pre_expr nameexpr;
 
-	  VEC_safe_push (gimple, heap, inserted_exprs, stmt);
 	  if (TREE_CODE (forcedname) == SSA_NAME)
 	    {
+	      bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname));
 	      VN_INFO_GET (forcedname)->valnum = forcedname;
 	      VN_INFO (forcedname)->value_id = get_next_value_id ();
 	      nameexpr = get_or_alloc_expr_for_name (forcedname);
@@ -3116,7 +3115,7 @@ create_expression_by_pieces (basic_block
   gimple_set_plf (newstmt, NECESSARY, false);
 
   gimple_seq_add_stmt (stmts, newstmt);
-  VEC_safe_push (gimple, heap, inserted_exprs, newstmt);
+  bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name));
 
   /* All the symbols in NEWEXPR should be put into SSA form.  */
   mark_symbols_for_renaming (newstmt);
@@ -3299,7 +3298,10 @@ insert_into_preds_of_block (basic_block
 			  for (; !gsi_end_p (gsi); gsi_next (&gsi))
 			    {
 			      gimple stmt = gsi_stmt (gsi);
-			      VEC_safe_push (gimple, heap, inserted_exprs, stmt);
+			      tree lhs = gimple_get_lhs (stmt);
+			      if (TREE_CODE (lhs) == SSA_NAME)
+				bitmap_set_bit (inserted_exprs,
+						SSA_NAME_VERSION (lhs));
 			      gimple_set_plf (stmt, NECESSARY, false);
 			    }
 			  gsi_insert_seq_on_edge (pred, stmts);
@@ -3338,7 +3340,9 @@ insert_into_preds_of_block (basic_block
 		  for (; !gsi_end_p (gsi); gsi_next (&gsi))
 		    {
 		      gimple stmt = gsi_stmt (gsi);
-		      VEC_safe_push (gimple, heap, inserted_exprs, stmt);
+		      tree lhs = gimple_get_lhs (stmt);
+		      if (TREE_CODE (lhs) == SSA_NAME)
+			bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (lhs));
 		      gimple_set_plf (stmt, NECESSARY, false);
 		    }
 		  gsi_insert_seq_on_edge (pred, stmts);
@@ -3374,9 +3378,7 @@ insert_into_preds_of_block (basic_block
   gimple_set_plf (phi, NECESSARY, false);
   VN_INFO_GET (gimple_phi_result (phi))->valnum = gimple_phi_result (phi);
   VN_INFO (gimple_phi_result (phi))->value_id = val;
-  VEC_safe_push (gimple, heap, inserted_exprs, phi);
-  bitmap_set_bit (inserted_phi_names,
-		  SSA_NAME_VERSION (gimple_phi_result (phi)));
+  bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (gimple_phi_result (phi)));
   FOR_EACH_EDGE (pred, ei, block->preds)
     {
       pre_expr ae = avail[pred->src->index];
@@ -4309,8 +4311,7 @@ eliminate (void)
 	     replacing the PHI with a single copy if possible.
 	     Do not touch inserted, single-argument or virtual PHIs.  */
 	  if (gimple_phi_num_args (phi) == 1
-	      || !is_gimple_reg (res)
-	      || bitmap_bit_p (inserted_phi_names, SSA_NAME_VERSION (res)))
+	      || !is_gimple_reg (res))
 	    {
 	      gsi_next (&gsi);
 	      continue;
@@ -4345,6 +4346,7 @@ eliminate (void)
 	    }
 
 	  remove_phi_node (&gsi, false);
+	  bitmap_clear_bit (inserted_exprs, SSA_NAME_VERSION (res));
 
 	  if (!useless_type_conversion_p (TREE_TYPE (res), TREE_TYPE (sprime)))
 	    sprime = fold_convert (TREE_TYPE (res), sprime);
@@ -4434,19 +4436,23 @@ mark_operand_necessary (tree op)
 static void
 remove_dead_inserted_code (void)
 {
-  VEC(gimple,heap) *worklist = NULL;
-  int i;
+  bitmap worklist;
+  unsigned i;
+  bitmap_iterator bi;
   gimple t;
 
-  worklist = VEC_alloc (gimple, heap, VEC_length (gimple, inserted_exprs));
-  for (i = 0; VEC_iterate (gimple, inserted_exprs, i, t); i++)
+  worklist = BITMAP_ALLOC (NULL);
+  EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
     {
+      t = SSA_NAME_DEF_STMT (ssa_name (i));
       if (gimple_plf (t, NECESSARY))
-	VEC_quick_push (gimple, worklist, t);
+	bitmap_set_bit (worklist, i);
     }
-  while (VEC_length (gimple, worklist) > 0)
+  while (!bitmap_empty_p (worklist))
     {
-      t = VEC_pop (gimple, worklist);
+      i = bitmap_first_set_bit (worklist);
+      bitmap_clear_bit (worklist, i);
+      t = SSA_NAME_DEF_STMT (ssa_name (i));
 
       /* PHI nodes are somewhat special in that each PHI alternative has
 	 data and control dependencies.  All the statements feeding the
@@ -4455,7 +4461,6 @@ remove_dead_inserted_code (void)
 	{
 	  unsigned k;
 
-	  VEC_reserve (gimple, heap, worklist, gimple_phi_num_args (t));
 	  for (k = 0; k < gimple_phi_num_args (t); k++)
 	    {
 	      tree arg = PHI_ARG_DEF (t, k);
@@ -4463,7 +4468,7 @@ remove_dead_inserted_code (void)
 		{
 		  gimple n = mark_operand_necessary (arg);
 		  if (n)
-		    VEC_quick_push (gimple, worklist, n);
+		    bitmap_set_bit (worklist, SSA_NAME_VERSION (arg));
 		}
 	    }
 	}
@@ -4484,13 +4489,14 @@ remove_dead_inserted_code (void)
 	    {
 	      gimple n = mark_operand_necessary (use);
 	      if (n)
-		VEC_safe_push (gimple, heap, worklist, n);
+		bitmap_set_bit (worklist, SSA_NAME_VERSION (use));
 	    }
 	}
     }
 
-  for (i = 0; VEC_iterate (gimple, inserted_exprs, i, t); i++)
+  EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
     {
+      t = SSA_NAME_DEF_STMT (ssa_name (i));
       if (!gimple_plf (t, NECESSARY))
 	{
 	  gimple_stmt_iterator gsi;
@@ -4511,7 +4517,7 @@ remove_dead_inserted_code (void)
 	    }
 	}
     }
-  VEC_free (gimple, heap, worklist);
+  BITMAP_FREE (worklist);
 }
 
 /* Compute a reverse post-order in *POST_ORDER.  If INCLUDE_ENTRY_EXIT is
@@ -4604,7 +4610,7 @@ init_pre (bool do_fre)
 
   in_fre = do_fre;
 
-  inserted_exprs = NULL;
+  inserted_exprs = BITMAP_ALLOC (NULL);
   need_creation = NULL;
   pretemp = NULL_TREE;
   storetemp = NULL_TREE;
@@ -4624,7 +4630,6 @@ init_pre (bool do_fre)
   calculate_dominance_info (CDI_DOMINATORS);
 
   bitmap_obstack_initialize (&grand_bitmap_obstack);
-  inserted_phi_names = BITMAP_ALLOC (&grand_bitmap_obstack);
   phi_translate_table = htab_create (5110, expr_pred_trans_hash,
 				     expr_pred_trans_eq, free);
   expression_to_id = htab_create (num_ssa_names * 3,
@@ -4655,7 +4660,7 @@ fini_pre (bool do_fre)
 
   free (postorder);
   VEC_free (bitmap_set_t, heap, value_expressions);
-  VEC_free (gimple, heap, inserted_exprs);
+  BITMAP_FREE (inserted_exprs);
   VEC_free (gimple, heap, need_creation);
   bitmap_obstack_release (&grand_bitmap_obstack);
   free_alloc_pool (bitmap_set_pool);
@@ -4740,6 +4745,12 @@ execute_pre (bool do_fre)
       insert ();
     }
 
+  /* Make sure to remove fake edges before committing our inserts.
+     This makes sure we don't end up with extra critical edges that
+     we would need to split.  */
+  remove_fake_exit_edges ();
+  gsi_commit_edge_inserts ();
+
   /* Remove all the redundant expressions.  */
   todo |= eliminate ();
 
@@ -4749,12 +4760,6 @@ execute_pre (bool do_fre)
   statistics_counter_event (cfun, "Eliminated", pre_stats.eliminations);
   statistics_counter_event (cfun, "Constified", pre_stats.constified);
 
-  /* Make sure to remove fake edges before committing our inserts.
-     This makes sure we don't end up with extra critical edges that
-     we would need to split.  */
-  remove_fake_exit_edges ();
-  gsi_commit_edge_inserts ();
-
   clear_expression_ids ();
   free_scc_vn ();
   if (!do_fre)
Index: testsuite/gfortran.dg/pr43984.f90
===================================================================
--- testsuite/gfortran.dg/pr43984.f90	(revision 0)
+++ testsuite/gfortran.dg/pr43984.f90	(revision 0)
@@ -0,0 +1,56 @@
+! { dg-do compile }
+! { dg-options "-O2 -fno-tree-dominator-opts -fdump-tree-pre" }
+module test
+
+   type shell1quartet_type
+
+   integer(kind=kind(1)) :: ab_l_sum
+   integer(kind=kind(1)), dimension(:), pointer :: ab_form_3dints_x_indices => NULL()
+   integer(kind=kind(1)), dimension(:), pointer :: ab_form_3dints_yz_rms_indices => NULL()
+
+    end type
+
+contains
+subroutine make_esss(self,esss)
+  type(shell1quartet_type) :: self
+  intent(in) :: self
+  real(kind=kind(1.0d0)), dimension(:), intent(out) :: esss
+  real(kind=kind(1.0d0)), dimension(:), pointer :: Izz
+  real(kind=kind(1.0d0)), dimension(:,:), pointer :: Ix,Iy,Iz,Iyz
+  integer(kind=kind(1)), dimension(:), pointer  :: e_x,ii_ivec
+  integer(kind=kind(1)) :: dim, dim1, nroots, ii,z,y
+
+    dim = self%ab_l_sum+1
+    dim1 = self%ab_l_sum+2
+    nroots = (dim1) / 2
+    call create_(Ix,nroots,dim)
+    call create_(Iy,nroots,dim)
+    call create_(Iz,nroots,dim)
+    call create_(Iyz,nroots,dim*dim1/2)
+
+    e_x => self%ab_form_3dints_x_indices
+    ii_ivec => self%ab_form_3dints_yz_rms_indices
+
+    call foo(Ix)
+    call foo(Iy)
+    call foo(Iz)
+
+    esss = ZERO
+    ii = 0
+    do z=1,dim
+      Izz => Iz(:,z)
+      do y=1,dim1-z
+        ii = ii + 1
+        Iyz(:,ii) = Izz * Iy(:,y)
+      end do
+    end do
+    esss = esss + sum(Ix(:,e_x) * Iyz(:,ii_ivec),1)
+
+end subroutine
+
+end
+
+! There should be three loads from iyz.data, not four.
+
+! { dg-final { scan-tree-dump-times "= iyz.data" 3 "pre" } }
+! { dg-final { cleanup-tree-dump "pre" } }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]