This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFA][PATCH][tree-optimization/64058] Add new coalescing tie breaker heuristic V2



This patch contains two parts. First is a bit of raw infrastructure in bitmap.c. That change factors out the code to count the bits set in a given BITMAP_WORD and uses that new function from bitmap_count_bits.

It also introduces bitmap_count_unique_bits which counts the unique bits set across two bitmaps.


The second patch introduces the new tiebreaker in the coalescer and utilizes the new bitmap_count_unique_bits in the process. The major change since V1 is lazily counting those bits only when the major sort keys are the same for two coalesce_pairs. And we include the data for the new heuristic in the debugging dumps.

As I feared building a good testcase for this is a major PITA. Testing for a specific set of coalescings is not easy at all and IMHO is going to be way too fragile.

I did verify by hand that the test in 64058 and its duplicate were fixed by various iterations of this patch using trunk compilers reported in those BZs. Spot checking other tests showed a small, but consistent improvement in the generated code (fewer copies).

Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk now?

Jeff

commit 716d40d9b853795f007476936f2ac06851d201e0
Author: Jeff Law <law@tor.usersys.redhat.com>
Date:   Wed Mar 16 16:56:03 2016 -0400

    	PR tree-optimization/64058
    	* bitmap.c (bitmap_count_unique_bits): New function.
    	(bitmap_count_bits_in_word): New function, extracted from
    	bitmap_count_bits.
    	(bitmap_count_bits): Use bitmap_count_bits_in_word.
    	* bitmap.h (bitmap_count_unique_bits): Declare it.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6f52c2d..05389b6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
 2016-03-18  Jeff Law  <law@redhat.com>
 
+	PR tree-optimization/64058
+	* bitmap.c (bitmap_count_unique_bits): New function.
+	(bitmap_count_bits_in_word): New function, extracted from
+	bitmap_count_bits.
+	(bitmap_count_bits): Use bitmap_count_bits_in_word.
+	* bitmap.h (bitmap_count_unique_bits): Declare it.
+
 	PR rtl-optimization/70263
 	* ira.c (memref_used_between_p): Assert we found END in the insn chain.
 	(update_equiv_regs): When trying to move a store to after the insn
diff --git a/gcc/bitmap.c b/gcc/bitmap.c
index ac20ae5..0c05512 100644
--- a/gcc/bitmap.c
+++ b/gcc/bitmap.c
@@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a)
   return ret;
 }
 #endif
+
+/* Count and return the number of bits set in the bitmap word BITS.  */
+static unsigned long
+bitmap_count_bits_in_word (const BITMAP_WORD *bits)
+{
+  unsigned long count = 0;
+
+  for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+    {
+#if GCC_VERSION >= 3400
+      /* Note that popcountl matches BITMAP_WORD in type, so the actual size
+	 of BITMAP_WORD is not material.  */
+      count += __builtin_popcountl (bits[ix]);
+#else
+      count += bitmap_popcount (bits[ix]);
+#endif
+    }
+  return count;
+}
+
 /* Count the number of bits set in the bitmap, and return it.  */
 
 unsigned long
@@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a)
 {
   unsigned long count = 0;
   const bitmap_element *elt;
-  unsigned ix;
 
   for (elt = a->first; elt; elt = elt->next)
+    count += bitmap_count_bits_in_word (elt->bits);
+
+  return count;
+}
+
+/* Count the number of unique bits set in A and B and return it.  */
+
+unsigned long
+bitmap_count_unique_bits (const_bitmap a, const_bitmap b)
+{
+  unsigned long count = 0;
+  const bitmap_element *elt_a, *elt_b;
+
+  for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; )
     {
-      for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+      /* If we're at different indices, then count all the bits
+	 in the lower element.  If we're at the same index, then
+	 count the bits in the IOR of the two elements.  */
+      if (elt_a->indx < elt_b->indx)
 	{
-#if GCC_VERSION >= 3400
- 	  /* Note that popcountl matches BITMAP_WORD in type, so the actual size
-	 of BITMAP_WORD is not material.  */
-	  count += __builtin_popcountl (elt->bits[ix]);
-#else
-	  count += bitmap_popcount (elt->bits[ix]);
-#endif
+	  count += bitmap_count_bits_in_word (elt_a->bits);
+	  elt_a = elt_a->next;
+	}
+      else if (elt_b->indx < elt_a->indx)
+	{
+	  count += bitmap_count_bits_in_word (elt_b->bits);
+	  elt_b = elt_b->next;
+	}
+      else
+	{
+	  BITMAP_WORD bits[BITMAP_ELEMENT_WORDS];
+	  for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+	    bits[ix] = elt_a->bits[ix] | elt_b->bits[ix];
+	  count += bitmap_count_bits_in_word (bits);
+	  elt_a = elt_a->next;
+	  elt_b = elt_b->next;
 	}
     }
   return count;
diff --git a/gcc/bitmap.h b/gcc/bitmap.h
index 805e37e..1115711 100644
--- a/gcc/bitmap.h
+++ b/gcc/bitmap.h
@@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap);
 /* Count the number of bits set in the bitmap.  */
 extern unsigned long bitmap_count_bits (const_bitmap);
 
+/* Count the number of unique bits set across the two bitmaps.  */
+extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap);
+
 /* Boolean operations on bitmaps.  The _into variants are two operand
    versions that modify the first source operand.  The other variants
    are three operand versions that to not destroy the source bitmaps.
commit e49388235bf159ab76b95f5af90aa25c5e5813cd
Author: Jeff Law <law@tor.usersys.redhat.com>
Date:   Wed Mar 16 17:03:29 2016 -0400

    And finally the new heuristic
    
    	PR tree-optimization/64058
    	* tree-ssa-coalesce.c (struct coalesce_pair): Add new field
    	CONFLICT_COUNT.
    	(struct ssa_conflicts): Move up earlier in the file.
    	(conflicts_, var_map_): New static variables.
    	(initialize_conflict_count): New function to initialize the
    	CONFLICT_COUNT field for each conflict pair.
    	(compare_pairs): Lazily initialize the conflict count and use it
    	as the first tie-breaker.
    	(sort_coalesce_list): Add new arguments conflicts, map.  Initialize
    	and wipe conflicts_ and map_ around the call to qsort.  Remove
    	special case for 2 coalesce pairs.
    	(find_coalesce_pair): Initialize CONFLICT_COUNT field.
    	(dump_coalesce_list): Print the CONFLICT_COUNT field too.
    	* bitmap.c (bitmap_count_unique_bits): New function.
    	(bitmap_count_bits_in_word): New function, extracted from
    	bitmap_count_bits.
    	(bitmap_count_bits): Use bitmap_count_bits_in_word.
    	* bitmap.h (bitmap_count_unique_bits): Declare it.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 05389b6..96f1f11 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,17 @@
 2016-03-18  Jeff Law  <law@redhat.com>
 
 	PR tree-optimization/64058
+	* tree-ssa-coalesce.c (struct coalesce_pair): Add new field
+	CONFLICT_COUNT.
+	(struct ssa_conflicts): Move up earlier in the file.
+	(conflicts_, var_map_): New static variables.
+	(initialize_conflict_count): New function to initialize the
+	CONFLICT_COUNT field for each conflict pair.
+	(compare_pairs): Lazily initialize the conflict count and use it
+	as the first tie-breaker.
+	(sort_coalesce_list): Add new arguments conflicts, map.  Initialize
+	and wipe conflicts_ and map_ around the call to qsort.  Remove
+	special case for 2 coalesce pairs.
 	* bitmap.c (bitmap_count_unique_bits): New function.
 	(bitmap_count_bits_in_word): New function, extracted from
 	bitmap_count_bits.
diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
index e49876e..7120d29 100644
--- a/gcc/tree-ssa-coalesce.c
+++ b/gcc/tree-ssa-coalesce.c
@@ -51,12 +51,40 @@ struct coalesce_pair
   int second_element;
   int cost;
 
+  /* A count of the number of unique partitions this pair would conflict
+     with if coalescing was successful.  This is the secondary sort key,
+     given two pairs with equal costs, we will prefer the pair with a smaller
+     conflict set.
+
+     This is lazily initialized when we discover two coalescing pairs have
+     the same primary cost.
+
+     Note this is not updated and propagated as pairs are coalesced.  */
+  int conflict_count;
+
   /* The order in which coalescing pairs are discovered is recorded in this
      field, which is used as the final tie breaker when sorting coalesce
      pairs.  */
   int index;
 };
 
+/* This represents a conflict graph.  Implemented as an array of bitmaps.
+   A full matrix is used for conflicts rather than just upper triangular form.
+   this make sit much simpler and faster to perform conflict merges.  */
+
+struct ssa_conflicts
+{
+  bitmap_obstack obstack;	/* A place to allocate our bitmaps.  */
+  vec<bitmap> conflicts;
+};
+
+/* The narrow API of the qsort comparison function doesn't allow easy
+   access to additional arguments.  So we have two globals (ick) to hold
+   the data we need.  They're initialized before the call to qsort and
+   wiped immediately after.  */
+ssa_conflicts *conflicts_;
+var_map map_;
+
 /* Coalesce pair hashtable helpers.  */
 
 struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair>
@@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2, bool create)
       pair->second_element = p.second_element;
       pair->cost = 0;
       pair->index = num_coalesce_pairs (cl);
+      pair->conflict_count = 0;
       *slot = pair;
     }
 
@@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int value)
     }
 }
 
+/* Compute and record how many unique conflicts would exist for the
+   representative partition for each coalesce pair in CL.
+
+   CONFLICTS is the conflict graph and MAP is the current partition view.  */
+
+static void
+initialize_conflict_count (coalesce_pair *p,
+			   ssa_conflicts *conflicts,
+			   var_map map)
+{
+  int p1 = var_to_partition (map, ssa_name (p->first_element));
+  int p2 = var_to_partition (map, ssa_name (p->second_element));
+
+  /* 4 cases.  If both P1 and P2 have conflicts, then build their
+     union and count the members.  Else handle the degenerate cases
+     in the obvious ways.  */
+  if (conflicts->conflicts[p1] && conflicts->conflicts[p2])
+    p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1],
+						  conflicts->conflicts[p2]);
+  else if (conflicts->conflicts[p1])
+    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]);
+  else if (conflicts->conflicts[p2])
+    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]);
+  else
+    p->conflict_count = 0;
+}
+
 
 /* Comparison function to allow qsort to sort P1 and P2 in Ascending order.  */
 
 static int
 compare_pairs (const void *p1, const void *p2)
 {
-  const coalesce_pair *const *const pp1 = (const coalesce_pair *const *) p1;
-  const coalesce_pair *const *const pp2 = (const coalesce_pair *const *) p2;
+  coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1;
+  coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2;
   int result;
 
   result = (* pp1)->cost - (* pp2)->cost;
-  /* And if everything else is equal, then sort based on which
-     coalesce pair was found first.  */
+  /* We use the size of the resulting conflict set as the secondary sort key.
+     Given two equal costing coalesce pairs, we want to prefer the pair that
+     has the smaller conflict set.  */
   if (result == 0)
-    result = (*pp2)->index - (*pp1)->index;
+    {
+      if (flag_expensive_optimizations)
+	{
+	  /* Lazily initialize the conflict counts as it's fairly expensive
+	     to compute.  */
+	  if ((*pp2)->conflict_count == 0)
+	    initialize_conflict_count (*pp2, conflicts_, map_);
+	  if ((*pp1)->conflict_count == 0)
+	    initialize_conflict_count (*pp1, conflicts_, map_);
+
+	  result = (*pp2)->conflict_count - (*pp1)->conflict_count;
+	}
+
+      /* And if everything else is equal, then sort based on which
+	 coalesce pair was found first.  */
+      if (result == 0)
+	result = (*pp2)->index - (*pp1)->index;
+    }
 
   return result;
 }
@@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2)
    in order from most important coalesce to least important.  */
 
 static void
-sort_coalesce_list (coalesce_list *cl)
+sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map map)
 {
   unsigned x, num;
   coalesce_pair *p;
@@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl)
   if (num == 1)
     return;
 
-  /* If there are only 2, just pick swap them if the order isn't correct.  */
-  if (num == 2)
-    {
-      if (cl->sorted[0]->cost > cl->sorted[1]->cost)
-	std::swap (cl->sorted[0], cl->sorted[1]);
-      return;
-    }
-
-  /* Only call qsort if there are more than 2 items.
-     ??? Maybe std::sort will do better, provided that compare_pairs
-     can be inlined.  */
-  if (num > 2)
-      qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
+  /* We don't want to depend on qsort_r, so we have to stuff away
+     additional data into globals so it can be referenced in
+     compare_pairs.  */
+  conflicts_ = conflicts;
+  map_ = map;
+  qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
+  conflicts_ = NULL;
+  map_ = NULL;
 }
 
 
@@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
 	  print_generic_expr (f, var1, TDF_SLIM);
 	  fprintf (f, " <-> ");
 	  print_generic_expr (f, var2, TDF_SLIM);
-	  fprintf (f, "  (%1d), ", node->cost);
+	  fprintf (f, "  (%1d, %1d), ", node->cost, node->conflict_count);
 	  fprintf (f, "\n");
 	}
     }
@@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
       for (x = cl->num_sorted - 1 ; x >=0; x--)
         {
 	  node = cl->sorted[x];
-	  fprintf (f, "(%d) ", node->cost);
+	  fprintf (f, "(%d, %d) ", node->cost, node->conflict_count);
 	  var = ssa_name (node->first_element);
 	  print_generic_expr (f, var, TDF_SLIM);
 	  fprintf (f, " <-> ");
@@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
 }
 
 
-/* This represents a conflict graph.  Implemented as an array of bitmaps.
-   A full matrix is used for conflicts rather than just upper triangular form.
-   this make sit much simpler and faster to perform conflict merges.  */
-
-struct ssa_conflicts
-{
-  bitmap_obstack obstack;	/* A place to allocate our bitmaps.  */
-  vec<bitmap> conflicts;
-};
-
 /* Return an empty new conflict graph for SIZE elements.  */
 
 static inline ssa_conflicts *
@@ -1800,7 +1859,7 @@ coalesce_ssa_name (void)
   if (dump_file && (dump_flags & TDF_DETAILS))
     ssa_conflicts_dump (dump_file, graph);
 
-  sort_coalesce_list (cl);
+  sort_coalesce_list (cl, graph, map);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]