This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[tree-ssa][PATCH]: Speed up SSAPRE some more


This speeds up SSAPRE so that it's faster than SSA-CCP on interpret.ii
again, at least on my machine.

The main time sinks i took care of were

1.  Brain dead recomputation of IDF+ when it wasn't actually necessary
(including changing it to use the cached
results to speed up computing the IDF+).

2. Traversing an array of EPHI arguments (interpret.ii has a high degree
of connectivity, so both PHI's and EPHI's have a ton of arguments), a
large number of times, just to find what position a given edge was in.
This was replaced with a hash table mapping (ephi, edge) ==> operand
index.

Bootstrapped and regtested on x86.

The split_critical_edges part isn't turned on right now due to bugs
elsewhere (i'll remove other code when i do), but it does fix a testcase
pointed out by Jeff a week or so ago, when combined with some of Andrew's
uncommitted fixes to edge insertion, so I didn't see the harm in leaving
it in case someone else needs it to get work done.

--Dan
2003-09-25  Daniel Berlin  <dberlin@dberlin.org>

	* tree-alias-pre.c (split_critical_edges): New function, temporarily
	disabled until some edge splitting/insertion problems are fixed.
	(opnum_of_ephi): Take an edge argument, constify. Use hash table lookup.
	Update all callers.
	(ephi_pindex_eq): New function.
	(ephi_pindex_hash): New function.
	(ephi_pindex_htab): New variable.
	(add_ephi_pred): Update hash table.
	(expr_phi_insertion): Don't free the bitmap returned by compute_idfs
	anymore.
	(idfs_cache): New variable.
	(compute_idfs): Rewrite to use cache as much as possible, and not
	recompute when we can avoid it.
Index: tree-ssa-pre.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-ssa-pre.c,v
retrieving revision 1.1.4.87
diff -u -3 -p -r1.1.4.87 tree-ssa-pre.c
--- tree-ssa-pre.c	24 Sep 2003 21:43:04 -0000	1.1.4.87
+++ tree-ssa-pre.c	26 Sep 2003 02:39:03 -0000
@@ -129,7 +129,7 @@ static inline tree maybe_find_rhs_use_fo
 static inline tree find_rhs_use_for_var (tree, tree);
 static tree create_ephi_node (basic_block, unsigned int);
 static inline int opnum_of_phi (tree, int);
-static inline int opnum_of_ephi (tree, int);
+static inline int opnum_of_ephi (const tree, const edge);
 static tree subst_phis (struct expr_info *, tree, basic_block, basic_block);
 static void generate_expr_as_of_bb (struct expr_info *, tree, int,
 				    basic_block);
@@ -250,6 +250,7 @@ static inline bool injured_real_occ_phi_
 static void compute_du_info (struct expr_info *);
 static void add_ephi_use (tree, tree, int);
 static void insert_one_operand (struct expr_info *, tree, int, tree, edge);
+static void split_critical_edges (void);

 /* Bitmap of E-PHI predecessor operands have already been created.
    We only create one phi-pred per block.  */
@@ -268,6 +269,9 @@ static int preorder_count;
 /* Whether we need to recompute dominators due to basic block changes.  */
 static bool redo_dominators = false;

+/* Iterated dominance frontiers cache.  */
+static bitmap *idfs_cache;
+
 /* Partial redundancies statistics. */
 static struct pre_stats_d
 {
@@ -307,14 +311,67 @@ struct expr_info
   tree temp;
 };

+/* Trying to lookup ephi pred operand indexes takes forever on graphs
+   that have high connectivity because it's an O(n) linked list
+   traversal.  Thus, we set up a hashtable that tells us the operand
+   index for a given edge.  */
+
+typedef struct ephi_pred_index_elt
+{
+  tree ephi;
+  edge edge;
+  int opnd;
+} ephi_pindex_t;
+
+/* Hash an (ephi, edge, opnd) tuple.  */
+
+static hashval_t
+ephi_pindex_hash (const void *p)
+{
+  const ephi_pindex_t *ep = (const ephi_pindex_t *)p;
+  return htab_hash_pointer (ep->ephi) + htab_hash_pointer (ep->edge);
+}
+
+/* Determine equality of an (ephi, edge, opnd) tuple.  */
+
+static int
+ephi_pindex_eq (const void *p1, const void *p2)
+{
+  const ephi_pindex_t *ep1 = (const ephi_pindex_t *)p1;
+  const ephi_pindex_t *ep2 = (const ephi_pindex_t *)p2;
+
+  return ep1->ephi == ep2->ephi && ep1->edge == ep2->edge;
+}
+
+/* The (ephi, edge) => opnd mapping hashtable.  */
+static htab_t ephi_pindex_htab;
+
 /* Add an ephi predecessor to a PHI.  */
+
 static int
 add_ephi_pred (tree phi, tree def, edge e)
 {
   int i = EPHI_NUM_ARGS (phi);
-
+  void **slot;
+  ephi_pindex_t ep, *epp;
+
   EPHI_ARG_PRED (phi, i) = def;
   EPHI_ARG_EDGE (phi, i) = e;
+
+  ep.ephi = phi;
+  ep.edge = e;
+  slot = htab_find_slot (ephi_pindex_htab, (void *)&ep, INSERT);
+  if (*slot == NULL)
+    {
+      epp = xmalloc (sizeof (*epp));
+      epp->ephi = phi;
+      epp->edge = e;
+      epp->opnd = i;
+      *slot = (void *)epp;
+    }
+  else
+    abort ();
+
   EPHI_NUM_ARGS (phi)++;
   return i;
 }
@@ -641,7 +698,7 @@ expr_phi_insertion (bitmap * dfs, struct
   bitmap_zero (dfphis);
   varphis = BITMAP_XMALLOC ();
   bitmap_zero (varphis);
-
+
   /*  Compute where we need to place EPHIS. There are two types of
       places we need EPHI's: Those places we would normally place a
       PHI for the occurrence (calculated by determining the IDF+ of
@@ -664,8 +721,7 @@ expr_phi_insertion (bitmap * dfs, struct
       occurp = occur ? occurp : kill ? killp : leftp;
       occur = occur ? occur : kill ? kill : left;
       temp = compute_idfs (dfs, occur);
-      bitmap_a_or_b (dfphis, dfphis, temp);
-      BITMAP_XFREE (temp);
+      bitmap_a_or_b (dfphis, dfphis, temp);
       if (kill != NULL)
 	continue;
       occur = TREE_OPERAND (occur, 1);
@@ -927,13 +983,16 @@ injured_real_occ_real_occ (struct expr_i

 /* Determine the operand number of predecessor block J in EPHI.  */
 static inline int
-opnum_of_ephi (tree ephi, int j)
+opnum_of_ephi (const tree ephi, const edge e)
 {
-  int i;
-  for (i = 0; i < EPHI_NUM_ARGS (ephi); i++)
-    if (EPHI_ARG_EDGE (ephi, i)->src->index == j)
-      return i;
-  abort ();
+  ephi_pindex_t ep, *epp;
+
+  ep.ephi = ephi;
+  ep.edge = e;
+  epp = htab_find (ephi_pindex_htab, &ep);
+  if (epp == NULL)
+    abort ();
+  return epp->opnd;
 }

 /* Determine the phi operand index for J, for PHI.  */
@@ -1273,7 +1332,8 @@ rename_1 (struct expr_info *ei)
 		  if (ephi_at_block (e->dest) != NULL_TREE)
 		    {
 		      tree ephi = ephi_at_block (e->dest);
-		      int opnum = opnum_of_ephi (ephi, pred_bb->index);
+		      int opnum = opnum_of_ephi (ephi, e);
+
 		      EPHI_ARG_DELAYED_RENAME (ephi, opnum) = true;
 		      EPHI_ARG_DEF (ephi, opnum) = tos;
 		    }
@@ -1852,7 +1912,7 @@ finalize_1 (struct expr_info *ei)
 		continue;
 	      if (ephi_will_be_avail (ephi))
 		{
-		  int opnd_indx = opnum_of_ephi (ephi, bb_for_stmt (x)->index);
+		  int opnd_indx = opnum_of_ephi (ephi, succ);
 #ifdef ENABLE_CHECKING
 		  if (EPHI_ARG_PRED (ephi, opnd_indx) != x)
 		    abort ();
@@ -2519,19 +2579,30 @@ code_motion (struct expr_info *ei)
     }
 }

+/* Compute the iterated dominance frontier of a statement.  */

-/* Compute the iterated dominance frontier of a statement. */
 static bitmap
 compute_idfs (bitmap * dfs, tree stmt)
 {
   fibheap_t worklist;
-  sbitmap inworklist = sbitmap_alloc (last_basic_block);
-  bitmap idf = BITMAP_XMALLOC ();
+  sbitmap inworklist, done;
+  bitmap idf;
   size_t i;
   basic_block block;
+
+  block = bb_for_stmt (stmt);
+  if (idfs_cache[block->index] != NULL)
+    return idfs_cache[block->index];
+
+  inworklist = sbitmap_alloc (last_basic_block);
+  done = sbitmap_alloc (last_basic_block);
   worklist = fibheap_new ();
   sbitmap_zero (inworklist);
+  sbitmap_zero (done);
+
+  idf = BITMAP_XMALLOC ();
   bitmap_zero (idf);
+
   block = bb_for_stmt (stmt);
   fibheap_insert (worklist, block->index, (void *)(size_t)block->index);
   SET_BIT (inworklist, block->index);
@@ -2539,18 +2610,36 @@ compute_idfs (bitmap * dfs, tree stmt)
   while (!fibheap_empty (worklist))
     {
       int a = (size_t) fibheap_extract_min (worklist);
-      bitmap_a_or_b (idf, idf, dfs[a]);
-      EXECUTE_IF_SET_IN_BITMAP (dfs[a], 0, i,
-      {
-	if (!TEST_BIT (inworklist, i))
-	  {
+      if (TEST_BIT (done, a))
+	continue;
+      SET_BIT (done, a);
+      if (idfs_cache[a])
+	{
+	  bitmap_a_or_b (idf, idf, idfs_cache[a]);
+	  EXECUTE_IF_SET_IN_BITMAP (idfs_cache[a], 0, i,
+          {
 	    SET_BIT (inworklist, i);
-	    fibheap_insert (worklist, i, (void *)i);
-	  }
-      });
+	    SET_BIT (done, i);
+	  });
+	}
+      else
+	{
+	  bitmap_a_or_b (idf, idf, dfs[a]);
+	  EXECUTE_IF_SET_IN_BITMAP (dfs[a], 0, i,
+          {
+	    if (!TEST_BIT (inworklist, i))
+	      {
+		SET_BIT (inworklist, i);
+		fibheap_insert (worklist, i, (void *)i);
+	      }
+	  });
+	}
+
     }
   fibheap_delete (worklist);
   sbitmap_free (inworklist);
+  sbitmap_free (done);
+  idfs_cache[block->index] = idf;
   return idf;

 }
@@ -2717,6 +2806,8 @@ pre_expression (struct expr_info *slot,
   ei->temp = create_tmp_var (TREE_TYPE (ei->expr), "pretmp");
   add_referenced_tmp_var (ei->temp);
   bitmap_clear (created_phi_preds);
+  ephi_pindex_htab = htab_create (500, ephi_pindex_hash, ephi_pindex_eq, free);
+
   if (!expr_phi_insertion ((bitmap *)data, ei))
     goto cleanup;
   rename_1 (ei);
@@ -2771,7 +2862,30 @@ pre_expression (struct expr_info *slot,
   return 0;
 }

+static void
+split_critical_edges (void)
+{
+#if 0
+  struct edge_list *el = create_edge_list ();
+  int i;
+  edge e;
+  for (i = 0; i < NUM_EDGES (el); i++)
+  {
+    e = INDEX_EDGE (el, i);
+    if (EDGE_CRITICAL_P (e) && !(e->flags & EDGE_ABNORMAL))
+      {
+	tree label = build1 (LABEL_EXPR, void_type_node, NULL_TREE);
+	LABEL_EXPR_LABEL (label) = build_decl (LABEL_DECL, NULL_TREE, NULL_TREE);
+	DECL_ARTIFICIAL (LABEL_EXPR_LABEL (label)) = 1;
+	DECL_CONTEXT (LABEL_EXPR_LABEL (label)) = current_function_decl;

+	bsi_insert_on_edge (e, label);
+      }
+  }
+  bsi_commit_edge_inserts (0, NULL);
+  free_edge_list (el);
+#endif
+}
 /* Main entry point to the SSA-PRE pass.

    PHASE indicates which dump file from the DUMP_FILES array to use when
@@ -2786,7 +2900,8 @@ tree_perform_ssapre (tree fndecl, enum t
   varray_type bexprs;
   size_t k;
   int i;
-
+
+  split_critical_edges ();
   timevar_push (TV_TREE_PRE);
   dump_file = dump_begin (phase, &dump_flags);
   VARRAY_GENERIC_PTR_INIT (bexprs, 1, "bexprs");
@@ -2797,6 +2912,9 @@ tree_perform_ssapre (tree fndecl, enum t
   /* DCE screws the dom_children up, without bothering to fix it. So fix it. */
   currbbs = n_basic_blocks;
   build_dominator_tree (pre_idom);
+
+  /* Initialize IDFS cache.  */
+  idfs_cache = xcalloc (currbbs, sizeof (bitmap));

   /* Compute dominance frontiers.  */
   pre_dfs = (bitmap *) xmalloc (sizeof (bitmap) * currbbs);
@@ -2805,7 +2923,6 @@ tree_perform_ssapre (tree fndecl, enum t
   compute_dominance_frontiers (pre_dfs, pre_idom);

   created_phi_preds = BITMAP_XMALLOC ();
-
   FOR_EACH_BB (block)
     for (j = bsi_start (block); !bsi_end_p (j); bsi_next (&j))
       {
@@ -2873,11 +2990,17 @@ tree_perform_ssapre (tree fndecl, enum t
 	process_left_occs_and_kills (bexprs, slot, bsi_stmt (j));
       }
   ggc_push_context ();
+
   for (k = 0; k < VARRAY_ACTIVE_SIZE (bexprs); k++)
     {
       pre_expression (VARRAY_GENERIC_PTR (bexprs, k), pre_dfs);
       free_expr_info (VARRAY_GENERIC_PTR (bexprs, k));
       clear_all_eref_arrays ();
+      if (ephi_pindex_htab)
+	{
+	  htab_delete (ephi_pindex_htab);
+	  ephi_pindex_htab = NULL;
+	}
       ggc_collect ();
       if (redo_dominators)
 	{
@@ -2887,7 +3010,9 @@ tree_perform_ssapre (tree fndecl, enum t
 	  for (i = 0; i < currbbs; i++)
 	    BITMAP_XFREE (pre_dfs[i]);
 	  free (pre_dfs);
-
+	  for (i = 0; i < currbbs; i++)
+	    if (idfs_cache[i] != NULL)
+	      BITMAP_XFREE (idfs_cache[i]);
 	  /* Recompute immediate dominators.  */
 	  pre_idom = calculate_dominance_info (CDI_DOMINATORS);
 	  build_dominator_tree (pre_idom);
@@ -2898,7 +3023,7 @@ tree_perform_ssapre (tree fndecl, enum t
 	  for (i = 0; i < currbbs; i++)
 	    pre_dfs[i] = BITMAP_XMALLOC ();
 	  compute_dominance_frontiers (pre_dfs, pre_idom);
-
+	  idfs_cache = xcalloc (currbbs, sizeof (bitmap));
 	}
     }
   ggc_pop_context ();
@@ -2921,7 +3046,10 @@ tree_perform_ssapre (tree fndecl, enum t
   free_dominance_info (pre_idom);
   for (i = 0; i < currbbs; i++)
     BITMAP_XFREE (pre_dfs[i]);
-  BITMAP_XFREE (created_phi_preds);
   free (pre_dfs);
+  BITMAP_XFREE (created_phi_preds);
+  for (i = 0; i < currbbs; i++)
+    if (idfs_cache[i] != NULL)
+      BITMAP_XFREE (idfs_cache[i]);
   timevar_pop (TV_TREE_PRE);
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]