This is the mail archive of the mailing list for the GCC project.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] fix part of PR33928

The testcase exposes deficiencies in the addressing mode selection as
performed by fwprop.  This fixes the first of it.

In the testcase's hot inner loop we have a lot of array accesses of the
form fp[N] with N between -12 and 12, and as a result a lot of addresses
of the shape -N(%r12).

Doing the addressing mode selection in CSE caused loop-invariant.c to
skip these invariants; doing it in fwprop2 (i.e. after RTL LIM) instead
causes it to hoist things like these to the loop preheader.

        leaq    -8(%r12), %rsi
        leaq    8(%r12), %r10
        leaq    -16(%r12), %r9
        leaq    -24(%r12), %rbx
        leaq    -32(%r12), %rbp
        leaq    -40(%r12), %rdi
        leaq    -48(%r12), %r11
        leaq    40(%r12), %rdx

This however causes higher register pressure and less scheduling freedom
in the remainder of the RTL pipeline.  The attached patch distinguishes
invariant that are cheap addresses and are always used in MEMs, and
gives a very low per-iteration cost to them.

This change allows to lower register pressure in this testcase, so that
renaming registers and scheduling brings us back to pre-fwprop
performance.  The reason regrename helps is that, on targets without
pre-regalloc scheduling (such as x86), the register allocator will often
create very small live ranges which are not amenable for post-regalloc
scheduling.  I'll investigate whether this can be improved in some way.

I don't plan to have optimal performance at -O1.   Even after this
patch, you still need also "-fforward-propagate -fschedule-insns2
-frename-registers".  It would be possible to modify fwprop so that it
does not require UD chains, so as to enable it at -O1, but I think it's
plausible to have performance regressions between versions without the
scheduler (since you're basically playing roulette as to whether the
processor will like the code).

(Note: the addr_use_p field is unused, but so was the n_uses field
before this patch and I found a use for it; I can remove it in the final
patch of course).

Bootstrapped/regtested i686-pc-linux-gnu, ok for 4.5 and after a while
for 4.4?

2009-05-07  Paolo Bonzini  <>

	* loop-invariant.c (struct use): Add addr_use_p.
	(struct def): Add n_addr_uses.
	(struct invariant): Add cheap_address.
	(create_new_invariant): Set cheap_address.
	(record_use): Accept df_ref.  Set addr_use_p and update n_addr_uses.
	(record_uses): Pass df_ref to record_use.
	(get_inv_cost): Return zero comp_cost for cheap addresses used only
	as such.

Index: gcc/loop-invariant.c
--- gcc/loop-invariant.c	(revision 147239)
+++ gcc/loop-invariant.c	(working copy)
@@ -71,7 +71,7 @@ struct use
   rtx *pos;			/* Position of the use.  */
   rtx insn;			/* The insn in that the use occurs.  */
+  unsigned addr_use_p;		/* Whether the use occurs in an address.  */
   struct use *next;		/* Next use in the list.  */
@@ -82,6 +82,7 @@ struct def
   struct use *uses;		/* The list of uses that are uniquely reached
 				   by it.  */
   unsigned n_uses;		/* Number of such uses.  */
+  unsigned n_addr_uses;		/* Number of such uses.  */
   unsigned invno;		/* The corresponding invariant.  */
@@ -114,6 +115,9 @@ struct invariant
   /* Whether to move the invariant.  */
   bool move;
+  /* Whether the invariant is cheap when used as an address.  */
+  bool cheap_address;
   /* Cost of the invariant.  */
   unsigned cost;
@@ -679,9 +683,16 @@ create_new_invariant (struct def *def, r
   /* If the set is simple, usually by moving it we move the whole store out of
      the loop.  Otherwise we save only cost of the computation.  */
   if (def)
-    inv->cost = rtx_cost (set, SET, speed);
+    {
+      inv->cost = rtx_cost (set, SET, speed);
+      inv->cheap_address = address_cost (SET_SRC (set), word_mode,
+					 speed) < COSTS_N_INSNS (1);
+    }
-    inv->cost = rtx_cost (SET_SRC (set), SET, speed);
+    {
+      inv->cost = rtx_cost (SET_SRC (set), SET, speed);
+      inv->cheap_address = false;
+    }
   inv->move = false;
   inv->reg = NULL_RTX;
@@ -708,17 +719,18 @@ create_new_invariant (struct def *def, r
 /* Record USE at DEF.  */
 static void
-record_use (struct def *def, rtx *use, rtx insn)
+record_use (struct def *def, df_ref use)
   struct use *u = XNEW (struct use);
-  gcc_assert (REG_P (*use));
-  u->pos = use;
-  u->insn = insn;
+  u->pos = DF_REF_REAL_LOC (use);
+  u->insn = DF_REF_INSN (use);
+  u->addr_use_p = (DF_REF_TYPE (use) != DF_REF_REG_USE);
   u->next = def->uses;
   def->uses = u;
+  if (u->addr_use_p)
+    def->n_addr_uses++;
 /* Finds the invariants USE depends on and store them to the DEPENDS_ON
@@ -865,14 +877,14 @@ record_uses (rtx insn)
       df_ref use = *use_rec;
       inv = invariant_for_use (use);
       if (inv)
-	record_use (inv->def, DF_REF_REAL_LOC (use), DF_REF_INSN (use));
+	record_use (inv->def, use);
   for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
       df_ref use = *use_rec;
       inv = invariant_for_use (use);
       if (inv)
-	record_use (inv->def, DF_REF_REAL_LOC (use), DF_REF_INSN (use));
+	record_use (inv->def, use);
@@ -992,6 +1004,10 @@ get_inv_cost (struct invariant *inv, int
   inv->stamp = actual_stamp;
+  if (inv->cheap_address
+      && inv->def->n_addr_uses == inv->def->n_uses)
+    return;
   (*comp_cost) += inv->cost;
 #ifdef STACK_REGS

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]