PATCH: More CSE memory reduction

Mark Mitchell mark@codesourcery.com
Fri Mar 31 08:31:00 GMT 2000


This patch removes the N^2 memory usage in CSE, but doesn't fix the
N^2 time complexity, which seems considerably harder.  However, this
patch does significantly reduce the constant, and I can see how more
could be done as well.

Again, I'm working in a tree where a bzero call gets expanded to
several thousand sets in a single basic block -- that shouldn't
happen, but it's a good approximation to cases that really do happen.
Machine-generated, or even human-generated code, can certainly wind up
with several thousand instructions in a basic block.  Before last
nights CONST_INT sharing patch, we used 249MB of GC-able memory by the
end of CSE, and took 80s to compile gcc/tree.c.  With that patch,
those numbers were reduced to 192MB and 74s.  This patch trims the
numbers to 5MB and 54s.  I'll take a 98% memory reduction and 32%
speedup any day.

The basic issue was that alias.c's canon_rtx generates new RTL in some
cases.  But, it's always the same RTL, so it makes sense to cache the
value.

This file still takes 60MB to compile (with the overzealous bzero).
The only reason for that, now, is that the same issue exists in flow:
there are O(N^2) calls to canon_rtx.  I think I can play some similar
games there as well.

--
Mark Mitchell                   mark@codesourcery.com
CodeSourcery, LLC               http://www.codesourcery.com

2000-03-31  Mark Mitchell  <mark@codesourcery.com>

	* alias.c (canon_rtx): Make it global.
	(rtx_equal_for_memref_p): CONST_INT equality is now pointer
	equality.
	* cse.c (struct table_elt): Add canon_exp.
	(insert): Clear it.
	(invalidate): Canonicalize expressions only once.
	* rtl.h (canon_rtx): Declare.

Index: alias.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/alias.c,v
retrieving revision 1.74
diff -c -p -r1.74 alias.c
*** alias.c	2000/03/31 08:57:53	1.74
--- alias.c	2000/03/31 10:21:34
*************** typedef struct alias_set_entry
*** 79,85 ****
    splay_tree children;
  } *alias_set_entry;
  
- static rtx canon_rtx			PARAMS ((rtx));
  static int rtx_equal_for_memref_p	PARAMS ((rtx, rtx));
  static rtx find_symbolic_term		PARAMS ((rtx));
  static rtx get_addr			PARAMS ((rtx));
--- 79,84 ----
*************** record_base_value (regno, val, invariant
*** 544,550 ****
    reg_base_value[regno] = find_base_value (val);
  }
  
! static rtx
  canon_rtx (x)
       rtx x;
  {
--- 543,554 ----
    reg_base_value[regno] = find_base_value (val);
  }
  
! /* Returns a canonical version of X, from the point of view alias
!    analysis.  (For example, if X is a MEM whose address is a register,
!    and the register has a known value (say a SYMBOL_REF), then a MEM
!    whose address is the SYMBOL_REF is returned.)  */
! 
! rtx
  canon_rtx (x)
       rtx x;
  {
*************** rtx_equal_for_memref_p (x, y)
*** 627,649 ****
    if (GET_MODE (x) != GET_MODE (y))
      return 0;
  
!   /* REG, LABEL_REF, and SYMBOL_REF can be compared nonrecursively.  */
  
!   if (code == REG)
!     return REGNO (x) == REGNO (y);
!   if (code == LABEL_REF)
!     return XEXP (x, 0) == XEXP (y, 0);
!   if (code == SYMBOL_REF)
!     return XSTR (x, 0) == XSTR (y, 0);
!   if (code == CONST_INT)
!     return INTVAL (x) == INTVAL (y);
!   /* There's no need to compare the contents of CONST_DOUBLEs because
!      they're unique. */
!   if (code == CONST_DOUBLE)
!     return 0;
!   if (code == ADDRESSOF)
!     return (REGNO (XEXP (x, 0)) == REGNO (XEXP (y, 0))
! 	    && XINT (x, 1) == XINT (y, 1));
  
    /* For commutative operations, the RTX match if the operand match in any
       order.  Also handle the simple binary and unary cases without a loop.  */
--- 631,662 ----
    if (GET_MODE (x) != GET_MODE (y))
      return 0;
  
!   /* Some RTL can be compared without a recursive examination.  */
!   switch (code)
!     {
!     case REG:
!       return REGNO (x) == REGNO (y);
  
!     case LABEL_REF:
!       return XEXP (x, 0) == XEXP (y, 0);
!       
!     case SYMBOL_REF:
!       return XSTR (x, 0) == XSTR (y, 0);
! 
!     case CONST_INT:
!     case CONST_DOUBLE:
!       /* There's no need to compare the contents of CONST_DOUBLEs or
! 	 CONST_INTs because pointer equality is a good enough
! 	 comparison for these nodes.  */
!       return 0;
! 
!     case ADDRESSOF:
!       return (REGNO (XEXP (x, 0)) == REGNO (XEXP (y, 0))
! 	      && XINT (x, 1) == XINT (y, 1));
! 
!     default:
!       break;
!     }
  
    /* For commutative operations, the RTX match if the operand match in any
       order.  Also handle the simple binary and unary cases without a loop.  */
Index: cse.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cse.c,v
retrieving revision 1.135
diff -c -p -r1.135 cse.c
*** cse.c	2000/03/31 08:57:53	1.135
--- cse.c	2000/03/31 10:21:45
*************** static int hash_arg_in_memory;
*** 408,413 ****
--- 408,416 ----
     each recording one expression's information.
     That expression is in the `exp' field.
  
+    The canon_exp field contains a canonical (from the point of view of
+    alias analysis) version of the `exp' field.
+ 
     Those elements with the same hash code are chained in both directions
     through the `next_same_hash' and `prev_same_hash' fields.
  
*************** static int hash_arg_in_memory;
*** 447,452 ****
--- 450,456 ----
  struct table_elt
  {
    rtx exp;
+   rtx canon_exp;
    struct table_elt *next_same_hash;
    struct table_elt *prev_same_hash;
    struct table_elt *next_same_value;
*************** insert (x, classp, hash, mode)
*** 1498,1503 ****
--- 1502,1508 ----
      }
  
    elt->exp = x;
+   elt->canon_exp = NULL_RTX;
    elt->cost = COST (x);
    elt->next_same_value = 0;
    elt->prev_same_value = 0;
*************** invalidate (x, full_mode)
*** 1823,1828 ****
--- 1828,1837 ----
        return;
  
      case MEM:
+       /* Calculate the canonical version of X here so that
+ 	 true_dependence doesn't generate new RTL for X on each call.  */
+       x = canon_rtx (x);
+ 
        /* Remove all hash table elements that refer to overlapping pieces of
  	 memory.  */
        if (full_mode == VOIDmode)
*************** invalidate (x, full_mode)
*** 1835,1845 ****
  	  for (p = table[i]; p; p = next)
  	    {
  	      next = p->next_same_hash;
! 	      if (p->in_memory
! 		  && (GET_CODE (p->exp) != MEM
! 		      || true_dependence (x, full_mode, p->exp,
! 					  cse_rtx_varies_p)))
! 		remove_from_table (p, i);
  	    }
  	}
        return;
--- 1844,1866 ----
  	  for (p = table[i]; p; p = next)
  	    {
  	      next = p->next_same_hash;
! 	      if (p->in_memory)
! 		{
! 		  if (GET_CODE (p->exp) != MEM)
! 		    remove_from_table (p, i);
! 		  else 
! 		    {
! 		      /* Just canonicalize the expression once;
! 			 otherwise each time we call invalidate
! 			 true_dependence will canonicalize the
! 			 expression again.  */
! 		      if (!p->canon_exp)
! 			p->canon_exp = canon_rtx (p->exp);
! 		      if (true_dependence (x, full_mode, p->canon_exp,
! 					   cse_rtx_varies_p))
! 			remove_from_table (p, i);
! 		    }
! 		}
  	    }
  	}
        return;
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/rtl.h,v
retrieving revision 1.182
diff -c -p -r1.182 rtl.h
*** rtl.h	2000/03/29 01:56:04	1.182
--- rtl.h	2000/03/31 10:21:47
*************** extern void fancy_abort PARAMS ((const c
*** 1761,1766 ****
--- 1761,1767 ----
  #endif
  
  /* In alias.c */
+ extern rtx canon_rtx                    PARAMS ((rtx));
  extern int true_dependence		PARAMS ((rtx, enum machine_mode, rtx,
  						int (*)(rtx)));
  extern int read_dependence		PARAMS ((rtx, rtx));


More information about the Gcc-patches mailing list