[PATCH] Improve PTA for PR91257, new bitmap_ior_into_and_free

Richard Biener rguenther@suse.de
Tue Jul 30 12:16:00 GMT 2019


On Mon, 29 Jul 2019, Richard Biener wrote:

> 
> The following patch addresses appearant quadraticness in PTAs
> SCC merging algorithm which does (simplified)
> 
>  for (node N in SCC)
>    bitmap_ior_into (pred(leader), pred(N));
> 
> with the linked list implementation this leads to increasingly large
> amount of walking for pred(leader).
> 
> The patch changes this to
> 
>  while (split /= 2)
>    for (node N in lower-half-of-SCC)
>      bitmap_ior_into_and_free (pred (N), pred (N in upper half of SCC));
> 
> so in each outer iteration reducing the number of bitmaps by a factor
> of two and thus hopefully (and in practice for the testcase) reducing
> the intermediate bitmap sizes we work on.
> 
> This improves
> 
>  tree PTA                           :   8.38 ( 23%)   0.22 ( 33%)   8.62 ( 
> 24%)    7679 kB (  1%)
> 
> to
> 
>  tree PTA                           :   5.30 ( 16%)   0.22 ( 32%)   5.54 ( 
> 16%)   28081 kB (  4%)
> 
> for the reduced testcase.
> 
> I somehow didn't manage a 1:1 conversion so I need to re-check details
> here still I'd like to hear comments about the new bitmap API
> which is a source-destructive variant for bitmap_ior_into, instead of
> copying elements not in A moving them from B and in the end releasing
> the rest.  Note using this API with keeping the merging as-is doesn't
> help PTA time.

This is what I applied.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Richard.

2019-07-30  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91257
	* bitmap.h (bitmap_ior_into_and_free): Declare.
	* bitmap.c (bitmap_list_unlink_element): Add defaulted param
	whether to add the unliked element to the freelist.
	(bitmap_list_insert_element_after): Add defaulted param for
	an already allocated element.
	(bitmap_ior_into_and_free): New function.
	* tree-ssa-structalias.c (condense_visit): Reduce the
	ponts-to and edge bitmaps of the SCC members in a
	logarithmic fashion rather than all to one.

Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c	(revision 273795)
+++ gcc/bitmap.c	(working copy)
@@ -252,7 +252,8 @@ bitmap_list_link_element (bitmap head, b
    and return it to the freelist.  */
 
 static inline void
-bitmap_list_unlink_element (bitmap head, bitmap_element *element)
+bitmap_list_unlink_element (bitmap head, bitmap_element *element,
+			    bool to_freelist = true)
 {
   bitmap_element *next = element->next;
   bitmap_element *prev = element->prev;
@@ -279,18 +280,21 @@ bitmap_list_unlink_element (bitmap head,
 	head->indx = 0;
     }
 
-  bitmap_elem_to_freelist (head, element);
+  if (to_freelist)
+    bitmap_elem_to_freelist (head, element);
 }
 
-/* Insert a new uninitialized element into bitmap HEAD after element
-   ELT.  If ELT is NULL, insert the element at the start.  Return the
-   new element.  */
+/* Insert a new uninitialized element (or NODE if not NULL) into bitmap
+   HEAD after element ELT.  If ELT is NULL, insert the element at the start.
+   Return the new element.  */
 
 static bitmap_element *
 bitmap_list_insert_element_after (bitmap head,
-				  bitmap_element *elt, unsigned int indx)
+				  bitmap_element *elt, unsigned int indx,
+				  bitmap_element *node = NULL)
 {
-  bitmap_element *node = bitmap_element_allocate (head);
+  if (!node)
+    node = bitmap_element_allocate (head);
   node->indx = indx;
 
   gcc_checking_assert (!head->tree_form);
@@ -2009,6 +2013,56 @@ bitmap_ior_into (bitmap a, const_bitmap
   return changed;
 }
 
+/* A |= B.  Return true if A changes.  Free B (re-using its storage
+   for the result).  */
+
+bool
+bitmap_ior_into_and_free (bitmap a, bitmap *b_)
+{
+  bitmap b = *b_;
+  bitmap_element *a_elt = a->first;
+  bitmap_element *b_elt = b->first;
+  bitmap_element *a_prev = NULL;
+  bitmap_element **a_prev_pnext = &a->first;
+  bool changed = false;
+
+  gcc_checking_assert (!a->tree_form && !b->tree_form);
+  gcc_assert (a->obstack == b->obstack);
+  if (a == b)
+    return false;
+
+  while (b_elt)
+    {
+      /* If A lags behind B, just advance it.  */
+      if (!a_elt || a_elt->indx == b_elt->indx)
+	{
+	  changed = bitmap_elt_ior (a, a_elt, a_prev, a_elt, b_elt, changed);
+	  b_elt = b_elt->next;
+	}
+      else if (a_elt->indx > b_elt->indx)
+	{
+	  bitmap_element *b_elt_next = b_elt->next;
+	  bitmap_list_unlink_element (b, b_elt, false);
+	  bitmap_list_insert_element_after (a, a_prev, b_elt->indx, b_elt);
+	  b_elt = b_elt_next;
+	}
+
+      a_prev = *a_prev_pnext;
+      a_prev_pnext = &a_prev->next;
+      a_elt = *a_prev_pnext;
+    }
+
+  gcc_checking_assert (!a->current == !a->first);
+  if (a->current)
+    a->indx = a->current->indx;
+
+  if (b->obstack)
+    BITMAP_FREE (*b_);
+  else
+    bitmap_clear (b);
+  return changed;
+}
+
 /* DST = A ^ B  */
 
 void
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h	(revision 273795)
+++ gcc/bitmap.h	(working copy)
@@ -399,6 +399,7 @@ extern void bitmap_clear_range (bitmap,
 extern void bitmap_set_range (bitmap, unsigned int, unsigned int);
 extern bool bitmap_ior (bitmap, const_bitmap, const_bitmap);
 extern bool bitmap_ior_into (bitmap, const_bitmap);
+extern bool bitmap_ior_into_and_free (bitmap, bitmap *);
 extern void bitmap_xor (bitmap, const_bitmap, const_bitmap);
 extern void bitmap_xor_into (bitmap, const_bitmap);
 
Index: gcc/tree-ssa-structalias.c
===================================================================
--- gcc/tree-ssa-structalias.c	(revision 273795)
+++ gcc/tree-ssa-structalias.c	(working copy)
@@ -2069,36 +2069,73 @@ condense_visit (constraint_graph_t graph
   /* See if any components have been identified.  */
   if (si->dfs[n] == my_dfs)
     {
-      while (si->scc_stack.length () != 0
-	     && si->dfs[si->scc_stack.last ()] >= my_dfs)
+      if (si->scc_stack.length () != 0
+	  && si->dfs[si->scc_stack.last ()] >= my_dfs)
 	{
-	  unsigned int w = si->scc_stack.pop ();
-	  si->node_mapping[w] = n;
-
-	  if (!bitmap_bit_p (graph->direct_nodes, w))
-	    bitmap_clear_bit (graph->direct_nodes, n);
-
-	  /* Unify our nodes.  */
-	  if (graph->preds[w])
-	    {
-	      if (!graph->preds[n])
-		graph->preds[n] = BITMAP_ALLOC (&predbitmap_obstack);
-	      bitmap_ior_into (graph->preds[n], graph->preds[w]);
-	    }
-	  if (graph->implicit_preds[w])
+	  /* Find the first node of the SCC and do non-bitmap work.  */
+	  bool direct_p = true;
+	  unsigned first = si->scc_stack.length ();
+	  do
 	    {
-	      if (!graph->implicit_preds[n])
-		graph->implicit_preds[n] = BITMAP_ALLOC (&predbitmap_obstack);
-	      bitmap_ior_into (graph->implicit_preds[n],
-			       graph->implicit_preds[w]);
+	      --first;
+	      unsigned int w = si->scc_stack[first];
+	      si->node_mapping[w] = n;
+	      if (!bitmap_bit_p (graph->direct_nodes, w))
+		direct_p = false;
 	    }
-	  if (graph->points_to[w])
+	  while (first > 0
+		 && si->dfs[si->scc_stack[first - 1]] >= my_dfs);
+	  if (!direct_p)
+	    bitmap_clear_bit (graph->direct_nodes, n);
+
+	  /* Want to reduce to node n, push that first.  */
+	  si->scc_stack.reserve (1);
+	  si->scc_stack.quick_push (si->scc_stack[first]);
+	  si->scc_stack[first] = n;
+
+	  unsigned scc_size = si->scc_stack.length () - first;
+	  unsigned split = scc_size / 2;
+	  unsigned carry = scc_size - split * 2;
+	  while (split > 0)
 	    {
-	      if (!graph->points_to[n])
-		graph->points_to[n] = BITMAP_ALLOC (&predbitmap_obstack);
-	      bitmap_ior_into (graph->points_to[n],
-			       graph->points_to[w]);
+	      for (unsigned i = 0; i < split; ++i)
+		{
+		  unsigned a = si->scc_stack[first + i];
+		  unsigned b = si->scc_stack[first + split + carry + i];
+
+		  /* Unify our nodes.  */
+		  if (graph->preds[b])
+		    {
+		      if (!graph->preds[a])
+			std::swap (graph->preds[a], graph->preds[b]);
+		      else
+			bitmap_ior_into_and_free (graph->preds[a],
+						  &graph->preds[b]);
+		    }
+		  if (graph->implicit_preds[b])
+		    {
+		      if (!graph->implicit_preds[a])
+			std::swap (graph->implicit_preds[a],
+				   graph->implicit_preds[b]);
+		      else
+			bitmap_ior_into_and_free (graph->implicit_preds[a],
+						  &graph->implicit_preds[b]);
+		    }
+		  if (graph->points_to[b])
+		    {
+		      if (!graph->points_to[a])
+			std::swap (graph->points_to[a], graph->points_to[b]);
+		      else
+			bitmap_ior_into_and_free (graph->points_to[a],
+						  &graph->points_to[b]);
+		    }
+		}
+	      unsigned remain = split + carry;
+	      split = remain / 2;
+	      carry = remain - split * 2;
 	    }
+	  /* Actually pop the SCC.  */
+	  si->scc_stack.truncate (first);
 	}
       bitmap_set_bit (si->deleted, n);
     }



More information about the Gcc-patches mailing list