This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] tree-ssa-dom.c: Fix tree-optimization/18694.


Hi,

Attached is a patch to fix PR 18694.

thread_across_edge threads an edge through a COND_EXPR or SWITCH_EXPR.
When threading edge E, it asks itself "What would the condition look
like if it appeared at the end of E->src?"

To answer this question correctly, we really have to be aware that PHI
nodes are evaluated and assigned "at the same time".  The problem is
that thread_across_edge doesn't follow this rule.  It evaluates PHI
nodes in the order they appear in the PHI chain.

Here is the problematic BB, which is E->dest, that is, the basic block
with the COND_EXPR that we are trying to thread through.

  TMT.0_24 = PHI <TMT.0_26(1), TMT.0_23(5)>;
  ivtmp14_6 = PHI <0(1), ivtmp14_14(5)>;
  d_5 = PHI <d23_9(1), d23_2(5)>;   <- d_5 is defined
  tmp_4 = PHI <t_8(1), d_5(5)>;     <- d_5 is used
  d23_3 = PHI <d23_9(1), d23_2(5)>;
<L23>:;
  if (tmp_4 < d_5) goto here; goto there;

Suppose we are threading edge <bb 5> to <L23>.  Among other things,
thread_across_edge makes two "copy-of" relations like so:

  record_const_or_copy (d_5, d23_2);
  record_const_or_copy (tmp_4, d_5);

Then we end up with transitive closures like so:

  SSA_NAME_VALUE (d_5)   == d23_2
  SSA_NAME_VALUE (tmp_4) == d23_2

So thread_across_edge "thinks" that tmp_4 < d_5 is equivalent to
d23_2 < d23_2, which is folded to false.  Doh!

The patch fixes the problem by simulating the simultaneous evaluation
of PHI nodes.  Specifically, we note all the "copy-of" relations from
PHI arguments to existing known values and then we associate PHI
results with the RHS of the "copy-of" relations we have noted.

The patch also adds a fast lane for the case where E->dest does not
dominate E->src, in which case we know PHI results do not appear in
PHI arguments on edge E.  This cuts 40% of the time spent in
record_temporary_equivalences_from_phi while compiling insn-attrtab.i.

I tested this patch with cc1-i files to see if I am limiting any jump
threading opportunities.  It turns out that stmt.c:expand_asm_operands
gets one fewer jump threading opportunity at DOM1.  Other than that,
we get exactly the same number of jump threading opportunities.  It's
very plausible that we have been silently miscompiling stmt.c.

Tested on i686-pc-linux-gnu.  OK to apply?

Kazu Hirata

2004-12-10  Kazu Hirata  <kazu@cs.umass.edu>

	PR tree-optimization/18694
	* tree-ssa-dom.c (record_temporary_equivalences_from_phis):
	New.
	(thread_across_edge): Call it.

2004-12-10  Kazu Hirata  <kazu@cs.umass.edu>

	PR tree-optimization/18694
	* gcc.c-torture/execute/pr18694.c: New.

Index: tree-ssa-dom.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-dom.c,v
retrieving revision 2.73
diff -u -d -p -r2.73 tree-ssa-dom.c
--- tree-ssa-dom.c	3 Dec 2004 07:38:39 -0000	2.73
+++ tree-ssa-dom.c	9 Dec 2004 20:29:39 -0000
@@ -297,6 +297,7 @@ static void register_definitions_for_stm
 static edge single_incoming_edge_ignoring_loop_edges (basic_block);
 static void restore_nonzero_vars_to_original_value (void);
 static inline bool unsafe_associative_fp_binop (tree);
+static void record_const_or_copy_1 (tree, tree, tree);
 
 /* Local version of fold that doesn't introduce cruft.  */
 
@@ -534,6 +535,85 @@ struct tree_opt_pass pass_dominator = 
   0					/* letter */
 };
 
+/* Record temporary equivalences from PHI arguments on edge E.  */
+
+static void
+record_temporary_equivalences_from_phis (edge e)
+{
+  tree phi;
+  unsigned int num_phi;
+
+  /* If E->dest does not dominate E->src, then PHI results cannot
+     possibly appear in the RHS of PHI nodes.  If they did, then we
+     would have a case where a definition does not dominate its use.
+     Therefore, it's safe to evaluate PHI nodes in the order they
+     appear.  */
+  if (!dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+    {
+      for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
+	{
+	  tree src = PHI_ARG_DEF_FROM_EDGE (phi, e);
+	  tree dst = PHI_RESULT (phi);
+	  record_const_or_copy (dst, src);
+	  register_new_def (dst, &block_defs_stack);
+	}
+      return;
+    }
+
+  /* Create temporary equivalences for PHI results.  When we create
+     equivalences from PHI nodes, we have to take into account the
+     fact that the RHS of all PHI nodes are evaluated first and then
+     assigned to the LHS.  Therefore, we cannot create equivalences by
+     scanning one PHI node at a time.  Instead, we have to record
+     existing SSA_NAME_VALUEs for each of PHI results at E->dest and
+     then create equivalences at the same time.  */
+
+  /* Count the number of PHI nodes.  */
+  num_phi = 0;
+  for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
+    num_phi++;
+
+  if (num_phi)
+    {
+      tree *phi_safe_ssa_name_values = xmalloc (sizeof (tree) * num_phi * 2);
+      unsigned int i;
+
+      /* Note the existing SSA_NAME_VALUEs for each PHI argument on
+	 edge E before we make equivalences.  */
+      for (phi = phi_nodes (e->dest), i = 0;
+	   phi;
+	   phi = PHI_CHAIN (phi), i += 2)
+	{
+	  tree src = PHI_ARG_DEF (phi, e->dest_idx);
+	  tree dst = PHI_RESULT (phi);
+	  tree prev_src = SSA_NAME_VALUE (dst);
+
+	  if (TREE_CODE (src) == SSA_NAME)
+	    {
+	      tree tmp = SSA_NAME_VALUE (src);
+	      if (tmp)
+		src = tmp;
+	    }
+
+	  phi_safe_ssa_name_values[i + 0] = src;
+	  phi_safe_ssa_name_values[i + 1] = prev_src;
+	}
+
+      /* Create an equivalence for each PHI result.  */
+      for (phi = phi_nodes (e->dest), i = 0;
+	   phi;
+	   phi = PHI_CHAIN (phi), i += 2)
+	{
+	  tree src  = phi_safe_ssa_name_values[i + 0];
+	  tree prev = phi_safe_ssa_name_values[i + 1];
+	  tree dst = PHI_RESULT (phi);
+	  record_const_or_copy_1 (dst, src, prev);
+	  register_new_def (dst, &block_defs_stack);
+	}
+
+      free (phi_safe_ssa_name_values);
+    }
+}
 
 /* We are exiting BB, see if the target block begins with a conditional
    jump which has a known value when reached via BB.  */
@@ -543,16 +623,8 @@ thread_across_edge (struct dom_walk_data
 {
   block_stmt_iterator bsi;
   tree stmt = NULL;
-  tree phi;
 
-  /* Each PHI creates a temporary equivalence, record them.  */
-  for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
-    {
-      tree src = PHI_ARG_DEF_FROM_EDGE (phi, e);
-      tree dst = PHI_RESULT (phi);
-      record_const_or_copy (dst, src);
-      register_new_def (dst, &block_defs_stack);
-    }
+  record_temporary_equivalences_from_phis (e);
 
   for (bsi = bsi_start (e->dest); ! bsi_end_p (bsi); bsi_next (&bsi))
     {
--- /dev/null	2004-11-28 04:53:41.599129640 -0500
+++ pr18694.c	2004-12-09 00:50:31.000000000 -0500
@@ -0,0 +1,32 @@
+/* PR tree-optimization/18694
+
+   The dominator optimization didn't take the PHI evaluation order
+   into account when threading an edge.  */
+
+extern void abort (void) __attribute__((noreturn));
+extern void exit (int) __attribute__((noreturn));
+
+void __attribute__((noinline))
+foo (int i)
+{
+  int next_n = 1;
+  int j = 0;
+
+  for (; i != 0; i--)
+    {
+      int n;
+
+      for (n = next_n; j < n; j++)
+	next_n++;
+
+      if (j != n)
+	abort ();
+    }
+}
+
+int
+main (void)
+{
+  foo (2);
+  exit (0);
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]