This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFB: patch to fix PR37534


Hi, Luis.

Could you test the following patch. It uses another spill heuristic. Usually spill priority is spill cost divided by # of left conflicts for the corresponding node in the graph. Some RA literature mentions division by #left conflicts in power 2. The patch uses a heuristic close to the second one. I don't know why but instead of 17% degradation it gives 10% improvement on facerec (for -O2 -mtune=power6).

Actually I tried many things to solve the problem:

o different coalescing algorithms (iterative and optimistic ones)

o usage of union of cover classes for pseudos. This is one drawback I see in IRA with comparison with the old RA. When pseudo is used only for transferring memory value (r<-m ... m<-r), it can be done through float or integer registers. When coloring is not possible for one class (e.g. integer registers) we could use another class (e.g. float registers).

o using coloring with different spill heuristics and choosing the best one (Bernstein's approach)

o different spill heuristics in reload.

But the best result is achieved by this patch which is ironically the simplest one.

In general the patch does not improve overall SPEC rates on other platforms. Because of NP-complete nature of RA, I don't believe that we can achieve better IRA behaviour on all tests but I think we should achieve not worse overall SPEC results.



Index: ira-color.c
===================================================================
--- ira-color.c	(revision 141500)
+++ ira-color.c	(working copy)
@@ -84,6 +84,78 @@ static VEC(ira_allocno_t,heap) *removed_
 
 
 
+/* Array whose elements contain accumulated live ranges length for the
+   corresponding allocno.  */
+static int *allocno_live_range_lengths;
+
+/* Return accumulated live ranges length of ALLOCNO.  */
+static int
+get_allocno_live_range_length (ira_allocno_t allocno)
+{
+  int allocno_lr = 0;
+  allocno_live_range_t r;
+  
+  for (r = ALLOCNO_LIVE_RANGES (allocno); r != NULL; r = r->next)
+    allocno_lr += r->finish - r->start + 1;
+  ira_assert (allocno_lr >= 0);
+  return allocno_lr;
+}
+
+/* Initiate array allocno_live_range_lengths.  */
+static void
+initiate_allocno_live_range_lengths (void)
+{
+  int i, length;
+  ira_allocno_t a, parent_a;
+  ira_loop_tree_node_t parent;
+
+  allocno_live_range_lengths
+    = (int *) ira_allocate (ira_allocnos_num * sizeof (int));
+  memset (allocno_live_range_lengths, 0, ira_allocnos_num * sizeof (int));
+  for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
+    for (a = ira_regno_allocno_map[i];
+	 a != NULL;
+	 a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
+      {
+	length = get_allocno_live_range_length (a);
+	allocno_live_range_lengths[ALLOCNO_NUM (a)] += length;
+	if ((parent_a = ALLOCNO_CAP (a)) != NULL
+	    || ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) != NULL
+		&& (parent_a = parent->regno_allocno_map[i]) != NULL))
+	  allocno_live_range_lengths[ALLOCNO_NUM (parent_a)]
+	    += allocno_live_range_lengths[ALLOCNO_NUM (a)];
+      }
+}
+
+/* Free array allocno_live_range_lengths.  */
+static void
+finish_allocno_live_range_lengths (void)
+{
+  ira_free (allocno_live_range_lengths);
+}
+
+
+
+/* Cost multiplier to use to calculate allocno spill priority more
+   accurately.  */
+static int cost_multiplier;
+
+/* Return spill priority of allocno A.  */
+static int
+allocno_spill_priority (ira_allocno_t a)
+{
+  int cost;
+
+  cost = ALLOCNO_TEMP (a) * cost_multiplier;
+  return (cost
+	  / (ALLOCNO_LEFT_CONFLICTS_NUM (a)
+	     * ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a)
+	     * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a)][ALLOCNO_MODE (a)]
+	     + 1));
+}
+
+
+
 /* This page contains functions used to choose hard registers for
    allocnos.  */
 
@@ -374,6 +446,9 @@ print_coalesced_allocno (ira_allocno_t a
     }
 }
 
+/* The current cost of the allocation.  */
+static int curr_loop_coloring_cost;
+
 /* Choose a hard register for ALLOCNO (or for all coalesced allocnos
    represented by ALLOCNO).  If RETRY_P is TRUE, it means that the
    function called from function `ira_reassign_conflict_allocnos' and
@@ -532,10 +607,9 @@ assign_hard_reg (ira_allocno_t allocno, 
 	  cost += add_cost;
 	  full_cost += add_cost;
 	}
-      if (min_cost > cost)
-	min_cost = cost;
       if (min_full_cost > full_cost)
 	{
+	  min_cost = cost;
 	  min_full_cost = full_cost;
 	  best_hard_regno = hard_regno;
 	  ira_assert (hard_regno >= 0);
@@ -576,8 +650,13 @@ assign_hard_reg (ira_allocno_t allocno, 
 	}
       return false;
     }
-  if (best_hard_regno >= 0)
-    allocated_hardreg_p[best_hard_regno] = true;
+  if (best_hard_regno < 0)
+    curr_loop_coloring_cost += mem_cost;
+  else
+    {
+      allocated_hardreg_p[best_hard_regno] = true;
+      curr_loop_coloring_cost += min_cost;
+    }
   for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);;
        a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a))
     {
@@ -775,6 +854,26 @@ static splay_tree uncolorable_allocnos_s
    into account.  */
 #define USE_SPLAY_P(CLASS) (uncolorable_allocnos_num[CLASS] > 4000)
 
+/* Print the splay tree node NODE to stderr.  */
+static int
+print_splay_tree_node (splay_tree_node node, void *data ATTRIBUTE_UNUSED)
+{
+  ira_allocno_t a = (ira_allocno_t) node->key;
+
+  fprintf (stderr, "a%d(%d)\n", ALLOCNO_NUM (a), allocno_spill_priority (a));
+  return 0;
+}
+
+extern void debug_splay_tree (splay_tree tree);
+
+/* Print the splay tree to stderr.  */
+void
+debug_splay_tree (splay_tree tree)
+{
+  splay_tree_foreach (tree, print_splay_tree_node, NULL);
+  fprintf (stderr, "\n");
+}
+
 /* Put ALLOCNO onto the coloring stack without removing it from its
    bucket.  Pushing allocno to the coloring stack can result in moving
    conflicting allocnos from the uncolorable bucket to the colorable
@@ -995,18 +1094,15 @@ allocno_spill_priority_compare (splay_tr
   int pri1, pri2, diff;
   ira_allocno_t a1 = (ira_allocno_t) k1, a2 = (ira_allocno_t) k2;
   
-  pri1 = (ALLOCNO_TEMP (a1)
-	  / (ALLOCNO_LEFT_CONFLICTS_NUM (a1)
-	     * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a1)][ALLOCNO_MODE (a1)]
-	     + 1));
-  pri2 = (ALLOCNO_TEMP (a2)
-	  / (ALLOCNO_LEFT_CONFLICTS_NUM (a2)
-	     * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a2)][ALLOCNO_MODE (a2)]
-	     + 1));
+  pri1 = allocno_spill_priority (a1);
+  pri2 = allocno_spill_priority (a2);
   if ((diff = pri1 - pri2) != 0)
     return diff;
   if ((diff = ALLOCNO_TEMP (a1) - ALLOCNO_TEMP (a2)) != 0)
     return diff;
+  if ((diff = (allocno_live_range_lengths[ALLOCNO_NUM (a2)]
+	       - allocno_live_range_lengths[ALLOCNO_NUM (a1)])) != 0)
+    return diff;
   return ALLOCNO_NUM (a1) - ALLOCNO_NUM (a2);
 }
 
@@ -1049,10 +1145,12 @@ push_allocnos_to_stack (void)
 {
   ira_allocno_t allocno, a, i_allocno, *allocno_vec;
   enum reg_class cover_class, rclass;
-  int allocno_pri, i_allocno_pri, allocno_cost, i_allocno_cost;
+  int allocno_pri, i_allocno_pri;
+  int allocno_cost, i_allocno_cost, allocno_lr, i_allocno_lr;
   int i, j, num, cover_class_allocnos_num[N_REG_CLASSES];
   ira_allocno_t *cover_class_allocnos[N_REG_CLASSES];
-  int cost;
+  int cost, max_cost;
+  bool set_p;
 
   /* Initialize.  */
   VEC_truncate(ira_allocno_t, removed_splay_allocno_vec, 0);
@@ -1063,6 +1161,7 @@ push_allocnos_to_stack (void)
       cover_class_allocnos[cover_class] = NULL;
       uncolorable_allocnos_splay_tree[cover_class] = NULL;
     }
+  max_cost = 0;
   /* Calculate uncolorable allocno spill costs.  */
   for (allocno = uncolorable_allocno_bucket;
        allocno != NULL;
@@ -1081,7 +1180,12 @@ push_allocnos_to_stack (void)
 	/* ??? Remove cost of copies between the coalesced
 	   allocnos.  */
 	ALLOCNO_TEMP (allocno) = cost;
+	if (cost < 0)
+	  cost = -cost;
+	if (cost > max_cost)
+	  max_cost = cost;
       }
+  cost_multiplier = max_cost == 0 ? 1 : INT_MAX / max_cost;
   /* Define place where to put uncolorable allocnos of the same cover
      class.  */
   for (num = i = 0; i < ira_reg_class_cover_size; i++)
@@ -1156,7 +1260,7 @@ push_allocnos_to_stack (void)
 	  ira_assert (num > 0);
 	  allocno_vec = cover_class_allocnos[cover_class];
 	  allocno = NULL;
-	  allocno_pri = allocno_cost = 0;
+	  allocno_pri = allocno_cost = 0; allocno_lr = -1;
 	  /* Sort uncolorable allocno to find the one with the lowest
 	     spill cost.  */
 	  for (i = 0, j = num - 1; i <= j;)
@@ -1172,35 +1276,35 @@ push_allocnos_to_stack (void)
 	      if (ALLOCNO_IN_GRAPH_P (i_allocno))
 		{
 		  i++;
-		  if (ALLOCNO_TEMP (i_allocno) == INT_MAX)
+		  i_allocno_cost = ALLOCNO_TEMP (i_allocno);
+		  i_allocno_pri = allocno_spill_priority (i_allocno);
+		  i_allocno_lr = -1;
+		  if (allocno == NULL)
+		    set_p = true;
+		  else if (allocno_pri > i_allocno_pri)
+		    set_p = true;
+		  else if (allocno_pri != i_allocno_pri)
+		    set_p = false;
+		  else if (allocno_cost > i_allocno_cost)
+		    set_p = true;
+		  else if (allocno_cost != i_allocno_cost)
+		    set_p = false;
+		  else
 		    {
-		      ira_allocno_t a;
-		      int cost = 0;
-		      
-		      for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (i_allocno);;
-			   a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a))
-			{
-			  cost += calculate_allocno_spill_cost (i_allocno);
-			  if (a == i_allocno)
-			    break;
-			}
-		      /* ??? Remove cost of copies between the coalesced
-			 allocnos.  */
-		      ALLOCNO_TEMP (i_allocno) = cost;
+		      i_allocno_lr
+			= allocno_live_range_lengths[ALLOCNO_NUM (i_allocno)];
+		      if (allocno_lr < 0)
+			allocno_lr
+			  = allocno_live_range_lengths[ALLOCNO_NUM (allocno)];
+		      if (allocno_lr < i_allocno_lr)
+			set_p = true;
+		      else if (allocno_lr != i_allocno_lr)
+			set_p = false;
+		      else
+			set_p
+			  = ALLOCNO_NUM (allocno) > ALLOCNO_NUM (i_allocno);
 		    }
-		  i_allocno_cost = ALLOCNO_TEMP (i_allocno);
-		  i_allocno_pri
-		    = (i_allocno_cost
-		       / (ALLOCNO_LEFT_CONFLICTS_NUM (i_allocno)
-			  * ira_reg_class_nregs[ALLOCNO_COVER_CLASS
-						(i_allocno)]
-			  [ALLOCNO_MODE (i_allocno)] + 1));
-		  if (allocno == NULL || allocno_pri > i_allocno_pri
-		      || (allocno_pri == i_allocno_pri
-			  && (allocno_cost > i_allocno_cost
-			      || (allocno_cost == i_allocno_cost 
-				  && (ALLOCNO_NUM (allocno)
-				      > ALLOCNO_NUM (i_allocno))))))
+		  if (set_p)
 		    {
 		      allocno = i_allocno;
 		      allocno_cost = i_allocno_cost;
@@ -1260,16 +1364,23 @@ pop_allocnos_from_stack (void)
 	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
 	    fprintf (ira_dump_file, "assign memory\n");
 	}
-      else if (assign_hard_reg (allocno, false))
-	{
-	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-	    fprintf (ira_dump_file, "assign reg %d\n",
-		     ALLOCNO_HARD_REGNO (allocno));
-	}
-      else if (ALLOCNO_ASSIGNED_P (allocno))
+      else
 	{
-	  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-	    fprintf (ira_dump_file, "spill\n");
+	  int cost_before = curr_loop_coloring_cost;
+
+	  if (assign_hard_reg (allocno, false))
+	    {
+	      if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
+		fprintf (ira_dump_file, "assign reg %d (cost %d)\n",
+			 ALLOCNO_HARD_REGNO (allocno),
+			 curr_loop_coloring_cost - cost_before);
+	    }
+	  else if (ALLOCNO_ASSIGNED_P (allocno))
+	    {
+	      if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
+		fprintf (ira_dump_file, "spill (cost %d)\n",
+			 curr_loop_coloring_cost - cost_before);
+	    }
 	}
       ALLOCNO_IN_GRAPH_P (allocno) = true;
     }
@@ -1619,6 +1730,15 @@ color_allocnos (void)
   /* Put the allocnos into the corresponding buckets.  */
   colorable_allocno_bucket = NULL;
   uncolorable_allocno_bucket = NULL;
+  curr_loop_coloring_cost = 0;
+  EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi)
+    {
+      a = ira_allocnos[i];
+      ALLOCNO_HARD_REGNO (a) = -1;
+      ALLOCNO_MAY_BE_SPILLED_P (a) = false;
+      ALLOCNO_ASSIGNED_P (a) = false;
+      ALLOCNO_SPLAY_REMOVED_P (a) = false;
+    }
   EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi)
     {
       a = ira_allocnos[i];
@@ -1722,6 +1842,8 @@ color_pass (ira_loop_tree_node_t loop_tr
     }
   /* Color all mentioned allocnos including transparent ones.  */
   color_allocnos ();
+  if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL)
+    fprintf (ira_dump_file, "  Cost %d\n", curr_loop_coloring_cost);
   /* Process caps.  They are processed just once.  */
   if (flag_ira_algorithm == IRA_ALGORITHM_MIXED
       || flag_ira_algorithm == IRA_ALGORITHM_REGIONAL)
@@ -1863,8 +1985,12 @@ do_coloring (void)
   if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
     fprintf (ira_dump_file, "\n**** Allocnos coloring:\n\n");
   
+  initiate_allocno_live_range_lengths ();
+
   ira_traverse_loop_tree (false, ira_loop_tree_root, color_pass, NULL);
 
+  finish_allocno_live_range_lengths ();
+  
   if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL)
     ira_print_disposition (ira_dump_file);
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]