This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Possible fix for 30% performance regression in PR 33928


In the commit for

http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=118475

the two routines canon_for_address and find_best_address were removed and not used.

Since this change didn't seem truly related to the other changes in cse.c, and the problem in PR33928 was related to memory addressing, I added these two routines back in and called find_best_address in fold_rtx before processing MEMs. This fixed the regression and brought the CPU time in the test program back to 108ms instead of 140ms.

I've listed the svn diff below for the changes to cse.c from revision 118475. cse.c hasn't changed since this revision, so this will apply cleanly.

Brad

pythagoras-90% svn diff
Index: gcc/cse.c
===================================================================
--- gcc/cse.c   (revision 118475)
+++ gcc/cse.c   (working copy)
@@ -600,6 +600,7 @@
 static unsigned hash_rtx_string (const char *);

static rtx canon_reg (rtx, rtx);
+static void find_best_addr (rtx, rtx *, enum machine_mode);
static enum rtx_code find_comparison_args (enum rtx_code, rtx *, rtx *,
enum machine_mode *,
enum machine_mode *);
@@ -730,6 +731,57 @@
return cost;
}


+/* Returns a canonical version of X for the address, from the point of view,
+ that all multiplications are represented as MULT instead of the multiply
+ by a power of 2 being represented as ASHIFT. */
+
+static rtx
+canon_for_address (rtx x)
+{
+ enum rtx_code code;
+ enum machine_mode mode;
+ rtx new = 0;
+ int i;
+ const char *fmt;
+
+ if (!x)
+ return x;
+
+ code = GET_CODE (x);
+ mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case ASHIFT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode)
+ && INTVAL (XEXP (x, 1)) >= 0)
+ {
+ new = canon_for_address (XEXP (x, 0));
+ new = gen_rtx_MULT (mode, new,
+ gen_int_mode ((HOST_WIDE_INT) 1
+ << INTVAL (XEXP (x, 1)),
+ mode));
+ }
+ break;
+ default:
+ break;
+
+ }
+ if (new)
+ return new;
+
+ /* Now recursively process each operand of this operation. */
+ fmt = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); i++)
+ if (fmt[i] == 'e')
+ {
+ new = canon_for_address (XEXP (x, i));
+ XEXP (x, i) = new;
+ }
+ return x;
+}
+
/* Return a negative value if an rtx A, whose costs are given by COST_A
and REGCOST_A, is more desirable than an rtx B.
Return a positive value if A is less desirable, or 0 if the two are
@@ -2762,6 +2814,231 @@
return x;
}



+/* LOC is a location within INSN that is an operand address (the contents of
+ a MEM). Find the best equivalent address to use that is valid for this
+ insn.
+
+ On most CISC machines, complicated address modes are costly, and rtx_cost
+ is a good approximation for that cost. However, most RISC machines have
+ only a few (usually only one) memory reference formats. If an address is
+ valid at all, it is often just as cheap as any other address. Hence, for
+ RISC machines, we use `address_cost' to compare the costs of various
+ addresses. For two addresses of equal cost, choose the one with the
+ highest `rtx_cost' value as that has the potential of eliminating the
+ most insns. For equal costs, we choose the first in the equivalence
+ class. Note that we ignore the fact that pseudo registers are cheaper than
+ hard registers here because we would also prefer the pseudo registers. */
+
+static void
+find_best_addr (rtx insn, rtx *loc, enum machine_mode mode)
+{
+ struct table_elt *elt;
+ rtx addr = *loc;
+ struct table_elt *p;
+ int found_better = 1;
+ int save_do_not_record = do_not_record;
+ int save_hash_arg_in_memory = hash_arg_in_memory;
+ int addr_volatile;
+ int regno;
+ unsigned hash;
+
+ /* Do not try to replace constant addresses or addresses of local and
+ argument slots. These MEM expressions are made only once and inserted
+ in many instructions, as well as being used to control symbol table
+ output. It is not safe to clobber them.
+
+ There are some uncommon cases where the address is already in a register
+ for some reason, but we cannot take advantage of that because we have
+ no easy way to unshare the MEM. In addition, looking up all stack
+ addresses is costly. */
+ if ((GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0))
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT
+ && (regno = REGNO (XEXP (addr, 0)),
+ regno == FRAME_POINTER_REGNUM || regno == HARD_FRAME_POINTER_REGNUM
+ || regno == ARG_POINTER_REGNUM))
+ || (REG_P (addr)
+ && (regno = REGNO (addr), regno == FRAME_POINTER_REGNUM
+ || regno == HARD_FRAME_POINTER_REGNUM
+ || regno == ARG_POINTER_REGNUM))
+ || CONSTANT_ADDRESS_P (addr))
+ return;
+
+ /* If this address is not simply a register, try to fold it. This will
+ sometimes simplify the expression. Many simplifications
+ will not be valid, but some, usually applying the associative rule, will
+ be valid and produce better code. */
+ if (!REG_P (addr))
+ {
+ rtx folded = canon_for_address (fold_rtx (addr, NULL_RTX));
+
+ if (folded != addr)
+ {
+ int addr_folded_cost = address_cost (folded, mode);
+ int addr_cost = address_cost (addr, mode);
+
+ if ((addr_folded_cost < addr_cost
+ || (addr_folded_cost == addr_cost
+ /* ??? The rtx_cost comparison is left over from an older
+ version of this code. It is probably no longer helpful.*/
+ && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM)
+ || approx_reg_cost (folded) < approx_reg_cost (addr))))
+ && validate_change (insn, loc, folded, 0))
+ addr = folded;
+ }
+ }
+
+ /* If this address is not in the hash table, we can't look for equivalences
+ of the whole address. Also, ignore if volatile. */
+
+ do_not_record = 0;
+ hash = HASH (addr, Pmode);
+ addr_volatile = do_not_record;
+ do_not_record = save_do_not_record;
+ hash_arg_in_memory = save_hash_arg_in_memory;
+
+ if (addr_volatile)
+ return;
+
+ elt = lookup (addr, hash, Pmode);
+
+ if (elt)
+ {
+ /* We need to find the best (under the criteria documented above) entry
+ in the class that is valid. We use the `flag' field to indicate
+ choices that were invalid and iterate until we can't find a better
+ one that hasn't already been tried. */
+
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ p->flag = 0;
+
+ while (found_better)
+ {
+ int best_addr_cost = address_cost (*loc, mode);
+ int best_rtx_cost = (elt->cost + 1) >> 1;
+ int exp_cost;
+ struct table_elt *best_elt = elt;
+
+ found_better = 0;
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ if (! p->flag)
+ {
+ if ((REG_P (p->exp)
+ || exp_equiv_p (p->exp, p->exp, 1, false))
+ && ((exp_cost = address_cost (p->exp, mode)) < best_addr_cost
+ || (exp_cost == best_addr_cost
+ && ((p->cost + 1) >> 1) > best_rtx_cost)))
+ {
+ found_better = 1;
+ best_addr_cost = exp_cost;
+ best_rtx_cost = (p->cost + 1) >> 1;
+ best_elt = p;
+ }
+ }
+
+ if (found_better)
+ {
+ if (validate_change (insn, loc,
+ canon_reg (copy_rtx (best_elt->exp),
+ NULL_RTX), 0))
+ return;
+ else
+ best_elt->flag = 1;
+ }
+ }
+ }
+
+ /* If the address is a binary operation with the first operand a register
+ and the second a constant, do the same as above, but looking for
+ equivalences of the register. Then try to simplify before checking for
+ the best address to use. This catches a few cases: First is when we
+ have REG+const and the register is another REG+const. We can often merge
+ the constants and eliminate one insn and one register. It may also be
+ that a machine has a cheap REG+REG+const. Finally, this improves the
+ code on the Alpha for unaligned byte stores. */
+
+ if (flag_expensive_optimizations
+ && ARITHMETIC_P (*loc)
+ && REG_P (XEXP (*loc, 0)))
+ {
+ rtx op1 = XEXP (*loc, 1);
+
+ do_not_record = 0;
+ hash = HASH (XEXP (*loc, 0), Pmode);
+ do_not_record = save_do_not_record;
+ hash_arg_in_memory = save_hash_arg_in_memory;
+
+ elt = lookup (XEXP (*loc, 0), hash, Pmode);
+ if (elt == 0)
+ return;
+
+ /* We need to find the best (under the criteria documented above) entry
+ in the class that is valid. We use the `flag' field to indicate
+ choices that were invalid and iterate until we can't find a better
+ one that hasn't already been tried. */
+
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ p->flag = 0;
+
+ while (found_better)
+ {
+ int best_addr_cost = address_cost (*loc, mode);
+ int best_rtx_cost = (COST (*loc) + 1) >> 1;
+ struct table_elt *best_elt = elt;
+ rtx best_rtx = *loc;
+ int count;
+
+ /* This is at worst case an O(n^2) algorithm, so limit our search
+ to the first 32 elements on the list. This avoids trouble
+ compiling code with very long basic blocks that can easily
+ call simplify_gen_binary so many times that we run out of
+ memory. */
+
+ found_better = 0;
+ for (p = elt->first_same_value, count = 0;
+ p && count < 32;
+ p = p->next_same_value, count++)
+ if (! p->flag
+ && (REG_P (p->exp)
+ || (GET_CODE (p->exp) != EXPR_LIST
+ && exp_equiv_p (p->exp, p->exp, 1, false))))
+
+ {
+ rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode,
+ p->exp, op1);
+ int new_cost;
+
+ /* Get the canonical version of the address so we can accept
+ more. */
+ new = canon_for_address (new);
+
+ new_cost = address_cost (new, mode);
+
+ if (new_cost < best_addr_cost
+ || (new_cost == best_addr_cost
+ && (COST (new) + 1) >> 1 > best_rtx_cost))
+ {
+ found_better = 1;
+ best_addr_cost = new_cost;
+ best_rtx_cost = (COST (new) + 1) >> 1;
+ best_elt = p;
+ best_rtx = new;
+ }
+ }
+
+ if (found_better)
+ {
+ if (validate_change (insn, loc,
+ canon_reg (copy_rtx (best_rtx),
+ NULL_RTX), 0))
+ return;
+ else
+ best_elt->flag = 1;
+ }
+ }
+ }
+}
+


/* Given an operation (CODE, *PARG1, *PARG2), where code is a comparison
operation (EQ, NE, GT, etc.), follow it back through the hash table and
what values are being compared.
@@ -3000,6 +3277,12 @@
switch (code)
{
case MEM:
+ if (insn != 0)
+ {
+ mode = GET_MODE (x);
+ find_best_addr (insn, &XEXP (x, 0), mode);
+ }
+ /* fall through */
case SUBREG:
if ((new = equiv_constant (x)) != NULL_RTX)
return new;



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]