This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: gcc compile-time performance



The biggest offender on x86 for some of my profiles seems to be
for_each_rtx(), which actually stems a tiny bit from stupidity in
for_each_rtx (some recursion can be eliminated) and a lot of stupidity
in CSE.

CSE's approx_reg_cost() is the true problem...  That thing gets called
on every expression CSE inserts into it's tables, every address that
the cost is computed for, etc. (which is "a lot").  What's more it is
implemented stupidly (construct a reg set just to check for bits set,
ummm why not just do the counter bumping in the for_each_rtx helper
function, duh...)

Furthermore, when approx_reg_cost does get a REG, it should just
return -1 to for_each_rtx so it doesn't look into subexpressions
of the REG (translating into 2 or 1 useless recursive call to
for_each_rtx depending upon whether simple recursion has been
eliminated from for_each_rtx).

Finally, the whole hardreg counting thing is just to see if there
is ONE hard reg present in the expression.  This also only matters
on SMALL_REGISTER_CLASSES machines, so we can just return '1' from
approx_reg_cost_1() if we see a hard reg on a S_R_C target and check
that in the top-level return from for_each_rtx().

I've begun to hack up most of this...  patch below in case anyone
wants to play along at home.

One big disappointment is that, because approx_reg_cost runs on
just about any RTX, we can't use note_uses() just like gcse.c
does to find REGs.  note_uses only works on the toplevel pattern
of an INSN.

Even with my fixes below, for_each_rtx still is the third function
listed in the x86 profiles (right under memset() and cse_insn()).
I started to look into the memset() issues, but got distracted
when I noticed this approx_reg_cost() buisness...

Basically, I discovered that the compiler spends most of it's time
computing a heuristic... I think that speaks for itself :-)

--- cse.c.~1~	Mon Feb 18 18:53:23 2002
+++ cse.c	Sun May 19 05:34:34 2002
@@ -719,10 +719,38 @@ approx_reg_cost_1 (xp, data)
      void *data;
 {
   rtx x = *xp;
-  regset set = (regset) data;
+  int *cost_p = data;
+  enum rtx_code code;
+
+  if (!x)
+    return 0;
+
+  code = GET_CODE (x);
+  if (code == SUBREG)
+    {
+      x = SUBREG_REG (x);
+      code = GET_CODE (x);
+    }
+  if (GET_CODE (x) == REG)
+    {
+      int i = REGNO (x);
+
+      if (! CHEAP_REGNO (i))
+	{
+	  if (i < FIRST_PSEUDO_REGISTER)
+	    {
+	      if (SMALL_REGISTER_CLASSES)
+		return 1;
+	      *cost_p += 2;
+	    }
+	  else
+	    *cost_p += 1;
+	}
+
+       /* No need to look at subexpressions.  */
+       return -1;
+    }
 
-  if (x && GET_CODE (x) == REG)
-    SET_REGNO_REG_SET (set, REGNO (x));
   return 0;
 }
 
@@ -735,28 +763,14 @@ static int
 approx_reg_cost (x)
      rtx x;
 {
-  regset_head set;
-  int i;
-  int cost = 0;
-  int hardregs = 0;
-
-  INIT_REG_SET (&set);
-  for_each_rtx (&x, approx_reg_cost_1, (void *)&set);
-
-  EXECUTE_IF_SET_IN_REG_SET
-    (&set, 0, i,
-     {
-       if (! CHEAP_REGNO (i))
-	 {
-	   if (i < FIRST_PSEUDO_REGISTER)
-	     hardregs++;
-
-	   cost += i < FIRST_PSEUDO_REGISTER ? 2 : 1;
-	 }
-     });
+  int cost;
+
+  cost = 0;
+
+  if (for_each_rtx (&x, approx_reg_cost_1, (void *)&cost))
+    return MAX_COST;
 
-  CLEAR_REG_SET (&set);
-  return hardregs && SMALL_REGISTER_CLASSES ? MAX_COST : cost;
+  return cost;
 }
 
 /* Return a negative value if an rtx A, whose costs are given by COST_A
--- rtlanal.c.~1~	Mon Apr 15 16:11:27 2002
+++ rtlanal.c	Sun May 19 04:39:16 2002
@@ -2709,6 +2709,7 @@ for_each_rtx (x, f, data)
   int i;
 
   /* Call F on X.  */
+ repeat:
   result = (*f) (x, data);
   if (result == -1)
     /* Do not traverse sub-expressions.  */
@@ -2724,11 +2725,16 @@ for_each_rtx (x, f, data)
   length = GET_RTX_LENGTH (GET_CODE (*x));
   format = GET_RTX_FORMAT (GET_CODE (*x));
 
-  for (i = 0; i < length; ++i) 
+  for (i = length - 1; i >= 0; --i) 
     {
       switch (format[i]) 
 	{
 	case 'e':
+	  if (i == 0)
+	    {
+	      x = &XEXP (*x, 0);
+	      goto repeat;
+	    }
 	  result = for_each_rtx (&XEXP (*x, i), f, data);
 	  if (result != 0)
 	    return result;
@@ -2739,8 +2745,13 @@ for_each_rtx (x, f, data)
 	  if (XVEC (*x, i) != 0) 
 	    {
 	      int j;
-	      for (j = 0; j < XVECLEN (*x, i); ++j)
+	      for (j = XVECLEN (*x, i) - 1; j >= 0; --j)
 		{
+		  if (i == 0 && j == 0)
+		    {
+		      x = &XVECEXP (*x, i, j);
+		      goto repeat;
+		    }
 		  result = for_each_rtx (&XVECEXP (*x, i, j), f, data);
 		  if (result != 0)
 		    return result;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]