This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Decrease compile time memory with heavy find_base_{value,term} on i?86/x86_64 (PR rtl-optimization/63191)


Hi!

On the testcase in the PR (too large for our testsuite) we waste several
gigabytes of memory from allocations in ix86_delegitimize_address called
from find_base_{value,term}.
E.g. for find_base_term
 (plus:SI (value:SI 1:1 @0x2c60f50/0x2c50f40)
     (const:SI (plus:SI (unspec:SI [
                     (symbol_ref:SI ("_ZL2Zs") [flags 0x2] <var_decl 0x7fffefc19900 Zs>)
                 ] UNSPEC_GOTOFF)
             (const_int 8 [0x8]))))
on which it returns
 (const:SI (plus:SI (symbol_ref:SI ("_ZL2Zs") [flags 0x2] <var_decl 0x7fffefc19900 Zs>)
         (const_int 8 [0x8])))
it needs to allocate the plus and const, but it is all wasted, the caller
for CONST just looks through it and for PLUS if the other operand is
CONST_INT also just recurses on the first operand.
The following patch fixes that by handling delegitimization slightly
differently when called from ix86_find_base_term - we don't allocate memory
that is known to be not useful to the caller.
The template is kind of forced inline, so that we don't slow down the normal
delegitimization (but if you prefer normal static inline with bool
base_term_p argument, it is easy to change that).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-03-10  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/63191
	* config/i386/i386.c (ix86_delegitimize_address): Turn into small
	wrapper function, moved the whole old content into ...
	(ix86_delegitimize_address_tmpl): ... this.  New template.
	(ix86_find_base_term): Use ix86_delegitimize_address_tmpl<true>
	instead of ix86_delegitimize_address.

--- gcc/config/i386/i386.c.jj	2017-03-07 20:04:52.000000000 +0100
+++ gcc/config/i386/i386.c	2017-03-10 14:46:24.351775710 +0100
@@ -17255,10 +17255,17 @@ ix86_delegitimize_tls_address (rtx orig_
    has a different PIC label for each routine but the DWARF debugging
    information is not associated with any particular routine, so it's
    necessary to remove references to the PIC label from RTL stored by
-   the DWARF output code.  */
+   the DWARF output code.
 
-static rtx
-ix86_delegitimize_address (rtx x)
+   This template is used in the normal ix86_delegitimize_address
+   entrypoint (e.g. used in the target delegitimization hook) and
+   in ix86_find_base_term.  As compile time memory optimization, we
+   avoid allocating rtxes that will not change anything on the outcome
+   of the callers (find_base_value and find_base_term).  */
+
+template <bool base_term_p>
+static inline rtx
+ix86_delegitimize_address_tmpl (rtx x)
 {
   rtx orig_x = delegitimize_mem_from_attrs (x);
   /* addend is NULL or some rtx if x is something+GOTOFF where
@@ -17285,6 +17292,10 @@ ix86_delegitimize_address (rtx x)
           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
         {
+	  /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
+	     base.  A CONST can't be arg_pointer_rtx based.  */
+	  if (base_term_p && MEM_P (orig_x))
+	    return orig_x;
 	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
 	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
 	  if (MEM_P (orig_x))
@@ -17361,7 +17372,9 @@ ix86_delegitimize_address (rtx x)
   if (! result)
     return ix86_delegitimize_tls_address (orig_x);
 
-  if (const_addend)
+  /* For (PLUS something CONST_INT) both find_base_{value,term} just
+     recurse on the first operand.  */
+  if (const_addend && !base_term_p)
     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   if (reg_addend)
     result = gen_rtx_PLUS (Pmode, reg_addend, result);
@@ -17399,6 +17412,14 @@ ix86_delegitimize_address (rtx x)
   return result;
 }
 
+/* The normal instantiation of the above template.  */
+
+static rtx
+ix86_delegitimize_address (rtx x)
+{
+  return ix86_delegitimize_address_tmpl<false> (x);
+}
+
 /* If X is a machine specific address (i.e. a symbol or label being
    referenced as a displacement from the GOT implemented using an
    UNSPEC), then return the base term.  Otherwise return X.  */
@@ -17424,7 +17445,7 @@ ix86_find_base_term (rtx x)
       return XVECEXP (term, 0, 0);
     }
 
-  return ix86_delegitimize_address (x);
+  return ix86_delegitimize_address_tmpl<true> (x);
 }
 
 static void

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]