This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFA: v2 patch to fix calculix degradation


I fixed the problem reported by H.J. in

http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00942.html

The patch was successfully bootstrapped (with all enabled languages) and tested on x86, x86_64, ppc64, an itanium.

Ok to commit?

2008-11-19 Vladimir Makarov <vmakarov@redhat.com>

* doc/invoke.texi (ira-max-loops-num): Change semantics.

* ira-int.h (struct ira_loop_tree_node): New member to_remove_p.

   * ira-color.c (allocno_spill_priority): New function.
   (remove_allocno_from_bucket_and_push, push_allocno_to_spill):
   Print more info about the spilled allocno.
   (push_allocnos_to_stack): Use allocno_spill_priority.  Add more
   checks on bad spill.

   * ira-build.c (loop_node_to_be_removed_p): Remove.
   (loop_compare_func, mark_loops_for_removal): New functions.
   (remove_uneccesary_loop_nodes_from_loop_t): Use member
   to_remove_p.
   (remove_unnecessary_allocnos): Call mark_loops_for_removal.

* ira.c (ira): Don't change flag_ira_algorithm.

* params.def (ira-max-loops-num): Change the value.


Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 142018)
+++ doc/invoke.texi	(working copy)
@@ -7603,10 +7603,10 @@ be disabled.  The default maximum SCC si
 
 @item ira-max-loops-num
 IRA uses a regional register allocation by default.  If a function
-contains loops more than number given by the parameter, non-regional
-register allocator will be used even when option
-@option{-fira-algorithm} is given.  The default value of the parameter
-is 20.
+contains loops more than number given by the parameter, only at most
+given number of the most frequently executed loops will form regions
+for the regional register allocation.  The default value of the
+parameter is 100.
 
 @end table
 @end table
Index: ira-int.h
===================================================================
--- ira-int.h	(revision 142018)
+++ ira-int.h	(working copy)
@@ -98,6 +98,10 @@ struct ira_loop_tree_node
   /* All the following members are defined only for nodes representing
      loops.  */
 
+  /* True if the loop was marked for removal from the register
+     allocation.  */
+  bool to_remove_p;
+
   /* Allocnos in the loop corresponding to their regnos.  If it is
      NULL the loop does not form a separate register allocation region
      (e.g. because it has abnormal enter/exit edges and we can not put
Index: ira-color.c
===================================================================
--- ira-color.c	(revision 142018)
+++ ira-color.c	(working copy)
@@ -655,6 +655,17 @@ static ira_allocno_t uncolorable_allocno
    of given *cover* class in the uncolorable_bucket.  */
 static int uncolorable_allocnos_num[N_REG_CLASSES];
 
+/* Return the current spill priority of allocno A.  The less the
+   number, the more preferable the allocno for spilling.  */
+static int
+allocno_spill_priority (ira_allocno_t a)
+{
+  return (ALLOCNO_TEMP (a)
+	  / (ALLOCNO_LEFT_CONFLICTS_NUM (a)
+	     * ira_reg_class_nregs[ALLOCNO_COVER_CLASS (a)][ALLOCNO_MODE (a)]
+	     + 1));
+}
+
 /* Add ALLOCNO to bucket *BUCKET_PTR.  ALLOCNO should be not in a bucket
    before the call.  */
 static void
@@ -925,7 +936,12 @@ remove_allocno_from_bucket_and_push (ira
     {
       fprintf (ira_dump_file, "      Pushing");
       print_coalesced_allocno (allocno);
-      fprintf (ira_dump_file, "%s\n", colorable_p ? "" : "(potential spill)");
+      if (colorable_p)
+	fprintf (ira_dump_file, "\n");
+      else
+	fprintf (ira_dump_file, "(potential spill: %spri=%d, cost=%d)\n",
+		 ALLOCNO_BAD_SPILL_P (allocno) ? "bad spill, " : "",
+		 allocno_spill_priority (allocno), ALLOCNO_TEMP (allocno));
     }
   cover_class = ALLOCNO_COVER_CLASS (allocno);
   ira_assert ((colorable_p
@@ -959,7 +975,7 @@ push_allocno_to_spill (ira_allocno_t all
   delete_allocno_from_bucket (allocno, &uncolorable_allocno_bucket);
   ALLOCNO_MAY_BE_SPILLED_P (allocno) = true;
   if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-    fprintf (ira_dump_file, "      Pushing p%d(%d) (potential spill)\n",
+    fprintf (ira_dump_file, "      Pushing p%d(%d) (spill for NO_REGS)\n",
 	     ALLOCNO_NUM (allocno), ALLOCNO_REGNO (allocno));
   push_allocno_to_stack (allocno);
 }
@@ -1224,21 +1240,18 @@ push_allocnos_to_stack (void)
 		  i++;
 		  ira_assert (ALLOCNO_TEMP (i_allocno) != INT_MAX);
 		  i_allocno_cost = ALLOCNO_TEMP (i_allocno);
-		  i_allocno_pri
-		    = (i_allocno_cost
-		       / (ALLOCNO_LEFT_CONFLICTS_NUM (i_allocno)
-			  * ira_reg_class_nregs[ALLOCNO_COVER_CLASS
-						(i_allocno)]
-			  [ALLOCNO_MODE (i_allocno)] + 1));
+		  i_allocno_pri = allocno_spill_priority (i_allocno);
 		  if (allocno == NULL
 		      || (! ALLOCNO_BAD_SPILL_P (i_allocno)
 			  && ALLOCNO_BAD_SPILL_P (allocno))
-		      || allocno_pri > i_allocno_pri
-		      || (allocno_pri == i_allocno_pri
-			  && (allocno_cost > i_allocno_cost
-			      || (allocno_cost == i_allocno_cost 
-				  && (ALLOCNO_NUM (allocno)
-				      > ALLOCNO_NUM (i_allocno))))))
+		      || (! (ALLOCNO_BAD_SPILL_P (i_allocno)
+			     && ! ALLOCNO_BAD_SPILL_P (allocno))
+			  && (allocno_pri > i_allocno_pri
+			      || (allocno_pri == i_allocno_pri
+				  && (allocno_cost > i_allocno_cost
+				      || (allocno_cost == i_allocno_cost 
+					  && (ALLOCNO_NUM (allocno)
+					      > ALLOCNO_NUM (i_allocno))))))))
 		    {
 		      allocno = i_allocno;
 		      allocno_cost = i_allocno_cost;
Index: ira-build.c
===================================================================
--- ira-build.c	(revision 142018)
+++ ira-build.c	(working copy)
@@ -1677,20 +1677,81 @@ low_pressure_loop_node_p (ira_loop_tree_
   return true;
 }
 
-/* Return TRUE if NODE represents a loop with should be removed from
-   regional allocation.  We remove a loop with low register pressure
-   inside another loop with register pressure.  In this case a
-   separate allocation of the loop hardly helps (for irregular
-   register file architecture it could help by choosing a better hard
-   register in the loop but we prefer faster allocation even in this
-   case).  */
-static bool
-loop_node_to_be_removed_p (ira_loop_tree_node_t node)
+/* Sort loops for marking them for removal.  We put already marked
+   loops first, then less frequent loops next, and then outer loops
+   next.  */
+static int
+loop_compare_func (const void *v1p, const void *v2p)
 {
-  return (node->parent != NULL && low_pressure_loop_node_p (node->parent)
-	  && low_pressure_loop_node_p (node));
+  int diff;
+  ira_loop_tree_node_t l1 = *(const ira_loop_tree_node_t *) v1p;
+  ira_loop_tree_node_t l2 = *(const ira_loop_tree_node_t *) v2p;
+
+  ira_assert (l1->parent != NULL && l2->parent != NULL);
+  if (l1->to_remove_p && ! l2->to_remove_p)
+    return -1;
+  if (! l1->to_remove_p && l2->to_remove_p)
+    return 1;
+  if ((diff = l1->loop->header->frequency - l2->loop->header->frequency) != 0)
+    return diff;
+  if ((diff = (int) loop_depth (l1->loop) - (int) loop_depth (l2->loop)) != 0)
+    return diff;
+  /* Make sorting stable.  */
+  return l1->loop->num - l2->loop->num;
 }
 
+
+/* Mark loops which should be removed from regional allocation.  We
+   remove a loop with low register pressure inside another loop with
+   register pressure.  In this case a separate allocation of the loop
+   hardly helps (for irregular register file architecture it could
+   help by choosing a better hard register in the loop but we prefer
+   faster allocation even in this case).  We also remove cheap loops
+   if there are more than IRA_MAX_LOOPS_NUM of them.  */
+static void
+mark_loops_for_removal (void)
+{
+  int i, n;
+  ira_loop_tree_node_t *sorted_loops;
+  loop_p loop;
+
+  sorted_loops
+    = (ira_loop_tree_node_t *) ira_allocate (sizeof (ira_loop_tree_node_t)
+					     * VEC_length (loop_p,
+							   ira_loops.larray));
+  for (n = i = 0; VEC_iterate (loop_p, ira_loops.larray, i, loop); i++)
+    if (ira_loop_nodes[i].regno_allocno_map != NULL)
+      {
+	if (ira_loop_nodes[i].parent == NULL)
+	  {
+	    /* Don't remove the root.  */
+	    ira_loop_nodes[i].to_remove_p = false;
+	    continue;
+	  }
+	sorted_loops[n++] = &ira_loop_nodes[i];
+	ira_loop_nodes[i].to_remove_p
+	  = (low_pressure_loop_node_p (ira_loop_nodes[i].parent)
+	     && low_pressure_loop_node_p (&ira_loop_nodes[i]));
+      }
+  qsort (sorted_loops, n, sizeof (ira_loop_tree_node_t), loop_compare_func);
+  for (i = 0; n - i + 1 > IRA_MAX_LOOPS_NUM; i++)
+    {
+      sorted_loops[i]->to_remove_p = true;
+      if (internal_flag_ira_verbose > 1 && ira_dump_file != NULL)
+	fprintf
+	  (ira_dump_file,
+	   "  Mark loop %d (header %d, freq %d, depth %d) for removal (%s)\n",
+	   sorted_loops[i]->loop->num, sorted_loops[i]->loop->header->index,
+	   sorted_loops[i]->loop->header->frequency,
+	   loop_depth (sorted_loops[i]->loop),
+	   low_pressure_loop_node_p (sorted_loops[i]->parent)
+	   && low_pressure_loop_node_p (sorted_loops[i])
+	   ? "low pressure" : "cheap loop");
+    }
+  ira_free (sorted_loops);
+}
+
+
 /* Definition of vector of loop tree nodes.  */
 DEF_VEC_P(ira_loop_tree_node_t);
 DEF_VEC_ALLOC_P(ira_loop_tree_node_t, heap);
@@ -1710,7 +1771,7 @@ remove_uneccesary_loop_nodes_from_loop_t
   bool remove_p;
   ira_loop_tree_node_t subnode;
 
-  remove_p = loop_node_to_be_removed_p (node);
+  remove_p = node->to_remove_p;
   if (! remove_p)
     VEC_safe_push (ira_loop_tree_node_t, heap, children_vec, node);
   start = VEC_length (ira_loop_tree_node_t, children_vec);
@@ -1759,13 +1820,13 @@ remove_unnecessary_allocnos (void)
       {
 	next_a = ALLOCNO_NEXT_REGNO_ALLOCNO (a);
 	a_node = ALLOCNO_LOOP_TREE_NODE (a);
-	if (! loop_node_to_be_removed_p (a_node))
+	if (! a_node->to_remove_p)
 	  prev_a = a;
 	else
 	  {
 	    for (parent = a_node->parent;
 		 (parent_a = parent->regno_allocno_map[regno]) == NULL
-		   && loop_node_to_be_removed_p (parent);
+		   && parent->to_remove_p;
 		 parent = parent->parent)
 	      ;
 	    if (parent_a == NULL)
@@ -1843,6 +1904,7 @@ remove_unnecessary_allocnos (void)
 static void
 remove_unnecessary_regions (void)
 {
+  mark_loops_for_removal ();
   children_vec
     = VEC_alloc (ira_loop_tree_node_t, heap,
 		 last_basic_block + VEC_length (loop_p, ira_loops.larray));
Index: ira.c
===================================================================
--- ira.c	(revision 142018)
+++ ira.c	(working copy)
@@ -1725,7 +1725,6 @@ ira (FILE *f)
   bool loops_p;
   int max_regno_before_ira, ira_max_point_before_emit;
   int rebuild_p;
-  int saved_flag_ira_algorithm;
   int saved_flag_ira_share_spill_slots;
   basic_block bb;
 
@@ -1801,9 +1800,6 @@ ira (FILE *f)
   ira_assert (current_loops == NULL);
   flow_loops_find (&ira_loops);
   current_loops = &ira_loops;
-  saved_flag_ira_algorithm = flag_ira_algorithm;
-  if (optimize && number_of_loops () > (unsigned) IRA_MAX_LOOPS_NUM)
-    flag_ira_algorithm = IRA_ALGORITHM_CB;
       
   if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
     fprintf (ira_dump_file, "Building IRA IR\n");
@@ -1935,8 +1931,6 @@ ira (FILE *f)
     bb->loop_father = NULL;
   current_loops = NULL;
 
-  flag_ira_algorithm = saved_flag_ira_algorithm;
-
   regstat_free_ri ();
   regstat_free_n_sets_and_refs ();
       
Index: params.def
===================================================================
--- params.def	(revision 142018)
+++ params.def	(working copy)
@@ -754,7 +754,7 @@ DEFPARAM (PARAM_DF_DOUBLE_QUEUE_THRESHOL
 DEFPARAM (PARAM_IRA_MAX_LOOPS_NUM,
 	  "ira-max-loops-num",
 	  "max loops number for regional RA",
-	  50, 0, 0)
+	  100, 0, 0)
 
 /* Switch initialization conversion will refuse to create arrays that are
    bigger than this parameter times the number of switch branches.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]