This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PR30643] restore memory cse
On Mar 9, 2007, Alexandre Oliva <aoliva@redhat.com> wrote:
> Both were bootstrapped and tested on x86_64-linux-gnu. Ok to install?
> Is the 4.1 patch ok for the 4.2 branch if it passes bootstrap and
> tests as well?
Sorry, I'd failed to refresh the patch for the trunk.
Here is the version that I tested.
for gcc/ChangeLog
from Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/30643
* cse.c (cse_insn): Recompute dest_hash after insert_regs for
dest_addr_elt.
Revert part of:
2006-11-03 Paolo Bonzini <bonzini@gnu.org>
Steven Bosscher <steven@gcc.gnu.org>
* cse.c (fold_rtx_mem, fold_rtx_mem_1, find_best_addr,
canon_for_address, table_size): Restore.
(new_basic_block, insert, remove_from_table): Restore references to
table_size.
(fold_rtx): Fold MEMs after processing them with
equiv_constant. Recurse.
for gcc/testsuite/ChangeLog
from Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/30643
* gcc.dg/pr30643.c: New.
Index: gcc/cse.c
===================================================================
--- gcc/cse.c.orig 2007-03-08 08:46:19.000000000 -0300
+++ gcc/cse.c 2007-03-08 14:22:54.000000000 -0300
@@ -519,6 +519,10 @@ struct table_elt
static struct table_elt *table[HASH_SIZE];
+/* Number of elements in the hash table. */
+
+static unsigned int table_size;
+
/* Chain of `struct table_elt's made so far for this function
but currently removed from the table. */
@@ -590,6 +594,7 @@ static inline unsigned safe_hash (rtx, e
static unsigned hash_rtx_string (const char *);
static rtx canon_reg (rtx, rtx);
+static void find_best_addr (rtx, rtx *, enum machine_mode);
static enum rtx_code find_comparison_args (enum rtx_code, rtx *, rtx *,
enum machine_mode *,
enum machine_mode *);
@@ -716,6 +721,58 @@ approx_reg_cost (rtx x)
return cost;
}
+/* Returns a canonical version of X for the address, from the point of view,
+ that all multiplications are represented as MULT instead of the multiply
+ by a power of 2 being represented as ASHIFT. */
+
+static rtx
+canon_for_address (rtx x)
+{
+ enum rtx_code code;
+ enum machine_mode mode;
+ rtx new = 0;
+ int i;
+ const char *fmt;
+
+ if (!x)
+ return x;
+
+ code = GET_CODE (x);
+ mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case ASHIFT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode)
+ && INTVAL (XEXP (x, 1)) >= 0)
+ {
+ new = canon_for_address (XEXP (x, 0));
+ new = gen_rtx_MULT (mode, new,
+ gen_int_mode ((HOST_WIDE_INT) 1
+ << INTVAL (XEXP (x, 1)),
+ mode));
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (new)
+ return new;
+
+ /* Now recursively process each operand of this operation. */
+ fmt = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); i++)
+ if (fmt[i] == 'e')
+ {
+ new = canon_for_address (XEXP (x, i));
+ XEXP (x, i) = new;
+ }
+
+ return x;
+}
+
/* Return a negative value if an rtx A, whose costs are given by COST_A
and REGCOST_A, is more desirable than an rtx B.
Return a positive value if A is less desirable, or 0 if the two are
@@ -895,6 +952,8 @@ new_basic_block (void)
}
}
+ table_size = 0;
+
#ifdef HAVE_cc0
prev_insn_cc0 = 0;
#endif
@@ -1304,6 +1363,8 @@ remove_from_table (struct table_elt *elt
/* Now add it to the free element chain. */
elt->next_same_hash = free_element_chain;
free_element_chain = elt;
+
+ table_size--;
}
/* Look up X in the hash table and return its table element,
@@ -1581,6 +1642,8 @@ insert (rtx x, struct table_elt *classp,
}
}
+ table_size++;
+
return elt;
}
@@ -2747,6 +2810,234 @@ canon_reg (rtx x, rtx insn)
return x;
}
+/* LOC is a location within INSN that is an operand address (the contents of
+ a MEM). Find the best equivalent address to use that is valid for this
+ insn.
+
+ On most CISC machines, complicated address modes are costly, and rtx_cost
+ is a good approximation for that cost. However, most RISC machines have
+ only a few (usually only one) memory reference formats. If an address is
+ valid at all, it is often just as cheap as any other address. Hence, for
+ RISC machines, we use `address_cost' to compare the costs of various
+ addresses. For two addresses of equal cost, choose the one with the
+ highest `rtx_cost' value as that has the potential of eliminating the
+ most insns. For equal costs, we choose the first in the equivalence
+ class. Note that we ignore the fact that pseudo registers are cheaper than
+ hard registers here because we would also prefer the pseudo registers. */
+
+static void
+find_best_addr (rtx insn, rtx *loc, enum machine_mode mode)
+{
+ struct table_elt *elt;
+ rtx addr = *loc;
+ struct table_elt *p;
+ int found_better = 1;
+ int save_do_not_record = do_not_record;
+ int save_hash_arg_in_memory = hash_arg_in_memory;
+ int addr_volatile;
+ int regno;
+ unsigned hash;
+
+ /* Do not try to replace constant addresses or addresses of local and
+ argument slots. These MEM expressions are made only once and inserted
+ in many instructions, as well as being used to control symbol table
+ output. It is not safe to clobber them.
+
+ There are some uncommon cases where the address is already in a register
+ for some reason, but we cannot take advantage of that because we have
+ no easy way to unshare the MEM. In addition, looking up all stack
+ addresses is costly. */
+ if ((GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0))
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT
+ && (regno = REGNO (XEXP (addr, 0)),
+ regno == FRAME_POINTER_REGNUM || regno == HARD_FRAME_POINTER_REGNUM
+ || regno == ARG_POINTER_REGNUM))
+ || (REG_P (addr)
+ && (regno = REGNO (addr), regno == FRAME_POINTER_REGNUM
+ || regno == HARD_FRAME_POINTER_REGNUM
+ || regno == ARG_POINTER_REGNUM))
+ || CONSTANT_ADDRESS_P (addr))
+ return;
+
+ /* If this address is not simply a register, try to fold it. This will
+ sometimes simplify the expression. Many simplifications
+ will not be valid, but some, usually applying the associative rule, will
+ be valid and produce better code. */
+ if (!REG_P (addr))
+ {
+ rtx folded = canon_for_address (fold_rtx (addr, NULL_RTX));
+
+ if (folded != addr)
+ {
+ int addr_folded_cost = address_cost (folded, mode);
+ int addr_cost = address_cost (addr, mode);
+
+ if ((addr_folded_cost < addr_cost
+ || (addr_folded_cost == addr_cost
+ /* ??? The rtx_cost comparison is left over from an older
+ version of this code. It is probably no longer helpful.*/
+ && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM)
+ || approx_reg_cost (folded) < approx_reg_cost (addr))))
+ && validate_change (insn, loc, folded,
+ num_changes_pending () != 0))
+ addr = folded;
+ }
+ }
+
+ /* If this address is not in the hash table, we can't look for equivalences
+ of the whole address. Also, ignore if volatile. */
+
+ do_not_record = 0;
+ hash = HASH (addr, Pmode);
+ addr_volatile = do_not_record;
+ do_not_record = save_do_not_record;
+ hash_arg_in_memory = save_hash_arg_in_memory;
+
+ if (addr_volatile)
+ return;
+
+ elt = lookup (addr, hash, Pmode);
+
+ if (elt)
+ {
+ /* We need to find the best (under the criteria documented above) entry
+ in the class that is valid. We use the `flag' field to indicate
+ choices that were invalid and iterate until we can't find a better
+ one that hasn't already been tried. */
+
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ p->flag = 0;
+
+ while (found_better)
+ {
+ int best_addr_cost = address_cost (*loc, mode);
+ int best_rtx_cost = (elt->cost + 1) >> 1;
+ int exp_cost;
+ struct table_elt *best_elt = elt;
+
+ found_better = 0;
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ if (! p->flag)
+ {
+ if ((REG_P (p->exp)
+ || exp_equiv_p (p->exp, p->exp, 1, false))
+ && ((exp_cost = address_cost (p->exp, mode)) < best_addr_cost
+ || (exp_cost == best_addr_cost
+ && ((p->cost + 1) >> 1) > best_rtx_cost)))
+ {
+ found_better = 1;
+ best_addr_cost = exp_cost;
+ best_rtx_cost = (p->cost + 1) >> 1;
+ best_elt = p;
+ }
+ }
+
+ if (found_better)
+ {
+ if (validate_change (insn, loc,
+ canon_reg (copy_rtx (best_elt->exp),
+ NULL_RTX),
+ num_changes_pending () != 0))
+ return;
+ else
+ best_elt->flag = 1;
+ }
+ }
+ }
+
+ /* If the address is a binary operation with the first operand a register
+ and the second a constant, do the same as above, but looking for
+ equivalences of the register. Then try to simplify before checking for
+ the best address to use. This catches a few cases: First is when we
+ have REG+const and the register is another REG+const. We can often merge
+ the constants and eliminate one insn and one register. It may also be
+ that a machine has a cheap REG+REG+const. Finally, this improves the
+ code on the Alpha for unaligned byte stores. */
+
+ if (flag_expensive_optimizations
+ && ARITHMETIC_P (*loc)
+ && REG_P (XEXP (*loc, 0)))
+ {
+ rtx op1 = XEXP (*loc, 1);
+
+ do_not_record = 0;
+ hash = HASH (XEXP (*loc, 0), Pmode);
+ do_not_record = save_do_not_record;
+ hash_arg_in_memory = save_hash_arg_in_memory;
+
+ elt = lookup (XEXP (*loc, 0), hash, Pmode);
+ if (elt == 0)
+ return;
+
+ /* We need to find the best (under the criteria documented above) entry
+ in the class that is valid. We use the `flag' field to indicate
+ choices that were invalid and iterate until we can't find a better
+ one that hasn't already been tried. */
+
+ for (p = elt->first_same_value; p; p = p->next_same_value)
+ p->flag = 0;
+
+ while (found_better)
+ {
+ int best_addr_cost = address_cost (*loc, mode);
+ int best_rtx_cost = (COST (*loc) + 1) >> 1;
+ struct table_elt *best_elt = elt;
+ rtx best_rtx = *loc;
+ int count;
+
+ /* This is at worst case an O(n^2) algorithm, so limit our search
+ to the first 32 elements on the list. This avoids trouble
+ compiling code with very long basic blocks that can easily
+ call simplify_gen_binary so many times that we run out of
+ memory. */
+
+ found_better = 0;
+ for (p = elt->first_same_value, count = 0;
+ p && count < 32;
+ p = p->next_same_value, count++)
+ if (! p->flag
+ && (REG_P (p->exp)
+ || (GET_CODE (p->exp) != EXPR_LIST
+ && exp_equiv_p (p->exp, p->exp, 1, false))))
+
+ {
+ rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode,
+ p->exp, op1);
+ int new_cost;
+
+ /* Get the canonical version of the address so we can accept
+ more. */
+ new = canon_for_address (new);
+
+ new_cost = address_cost (new, mode);
+
+ if (new_cost < best_addr_cost
+ || (new_cost == best_addr_cost
+ && (COST (new) + 1) >> 1 > best_rtx_cost))
+ {
+ found_better = 1;
+ best_addr_cost = new_cost;
+ best_rtx_cost = (COST (new) + 1) >> 1;
+ best_elt = p;
+ best_rtx = new;
+ }
+ }
+
+ if (found_better)
+ {
+ if (validate_change (insn, loc,
+ canon_reg (copy_rtx (best_rtx),
+ NULL_RTX),
+ num_changes_pending () != 0))
+ return;
+ else
+ best_elt->flag = 1;
+ }
+ }
+ }
+}
+
/* Given an operation (CODE, *PARG1, *PARG2), where code is a comparison
operation (EQ, NE, GT, etc.), follow it back through the hash table and
what values are being compared.
@@ -2941,6 +3232,217 @@ find_comparison_args (enum rtx_code code
return code;
}
+/* Fold MEM. Not to be called directly, see fold_rtx_mem instead. */
+
+static rtx
+fold_rtx_mem_1 (rtx x, rtx insn)
+{
+ enum machine_mode mode = GET_MODE (x);
+ rtx new;
+
+ /* If we are not actually processing an insn, don't try to find the
+ best address. Not only don't we care, but we could modify the
+ MEM in an invalid way since we have no insn to validate
+ against. */
+ if (insn != 0)
+ find_best_addr (insn, &XEXP (x, 0), mode);
+
+ {
+ /* Even if we don't fold in the insn itself, we can safely do so
+ here, in hopes of getting a constant. */
+ rtx addr = fold_rtx (XEXP (x, 0), NULL_RTX);
+ rtx base = 0;
+ HOST_WIDE_INT offset = 0;
+
+ if (REG_P (addr)
+ && REGNO_QTY_VALID_P (REGNO (addr)))
+ {
+ int addr_q = REG_QTY (REGNO (addr));
+ struct qty_table_elem *addr_ent = &qty_table[addr_q];
+
+ if (GET_MODE (addr) == addr_ent->mode
+ && addr_ent->const_rtx != NULL_RTX)
+ addr = addr_ent->const_rtx;
+ }
+
+ /* Call target hook to avoid the effects of -fpic etc.... */
+ addr = targetm.delegitimize_address (addr);
+
+ /* If address is constant, split it into a base and integer
+ offset. */
+ if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+ base = addr;
+ else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+ {
+ base = XEXP (XEXP (addr, 0), 0);
+ offset = INTVAL (XEXP (XEXP (addr, 0), 1));
+ }
+ else if (GET_CODE (addr) == LO_SUM
+ && GET_CODE (XEXP (addr, 1)) == SYMBOL_REF)
+ base = XEXP (addr, 1);
+
+ /* If this is a constant pool reference, we can fold it into its
+ constant to allow better value tracking. */
+ if (base && GET_CODE (base) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (base))
+ {
+ rtx constant = get_pool_constant (base);
+ enum machine_mode const_mode = get_pool_mode (base);
+ rtx new;
+
+ if (CONSTANT_P (constant) && GET_CODE (constant) != CONST_INT)
+ {
+ constant_pool_entries_cost = COST (constant);
+ constant_pool_entries_regcost = approx_reg_cost (constant);
+ }
+
+ /* If we are loading the full constant, we have an
+ equivalence. */
+ if (offset == 0 && mode == const_mode)
+ return constant;
+
+ /* If this actually isn't a constant (weird!), we can't do
+ anything. Otherwise, handle the two most common cases:
+ extracting a word from a multi-word constant, and
+ extracting the low-order bits. Other cases don't seem
+ common enough to worry about. */
+ if (! CONSTANT_P (constant))
+ return x;
+
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) == UNITS_PER_WORD
+ && offset % UNITS_PER_WORD == 0
+ && (new = operand_subword (constant,
+ offset / UNITS_PER_WORD,
+ 0, const_mode)) != 0)
+ return new;
+
+ if (((BYTES_BIG_ENDIAN
+ && offset == GET_MODE_SIZE (GET_MODE (constant)) - 1)
+ || (! BYTES_BIG_ENDIAN && offset == 0))
+ && (new = gen_lowpart (mode, constant)) != 0)
+ return new;
+ }
+
+ /* If this is a reference to a label at a known position in a jump
+ table, we also know its value. */
+ if (base && GET_CODE (base) == LABEL_REF)
+ {
+ rtx label = XEXP (base, 0);
+ rtx table_insn = NEXT_INSN (label);
+
+ if (table_insn && JUMP_P (table_insn)
+ && GET_CODE (PATTERN (table_insn)) == ADDR_VEC)
+ {
+ rtx table = PATTERN (table_insn);
+
+ if (offset >= 0
+ && (offset / GET_MODE_SIZE (GET_MODE (table))
+ < XVECLEN (table, 0)))
+ {
+ rtx label = XVECEXP
+ (table, 0, offset / GET_MODE_SIZE (GET_MODE (table)));
+ rtx set;
+
+ /* If we have an insn that loads the label from the
+ jumptable into a reg, we don't want to set the reg
+ to the label, because this may cause a reference to
+ the label to remain after the label is removed in
+ some very obscure cases (PR middle-end/18628). */
+ if (!insn)
+ return label;
+
+ set = single_set (insn);
+
+ if (! set || SET_SRC (set) != x)
+ return x;
+
+ /* If it's a jump, it's safe to reference the label. */
+ if (SET_DEST (set) == pc_rtx)
+ return label;
+
+ return x;
+ }
+ }
+ if (table_insn && JUMP_P (table_insn)
+ && GET_CODE (PATTERN (table_insn)) == ADDR_DIFF_VEC)
+ {
+ rtx table = PATTERN (table_insn);
+
+ if (offset >= 0
+ && (offset / GET_MODE_SIZE (GET_MODE (table))
+ < XVECLEN (table, 1)))
+ {
+ offset /= GET_MODE_SIZE (GET_MODE (table));
+ new = gen_rtx_MINUS (Pmode, XVECEXP (table, 1, offset),
+ XEXP (table, 0));
+
+ if (GET_MODE (table) != Pmode)
+ new = gen_rtx_TRUNCATE (GET_MODE (table), new);
+
+ /* Indicate this is a constant. This isn't a valid
+ form of CONST, but it will only be used to fold the
+ next insns and then discarded, so it should be
+ safe.
+
+ Note this expression must be explicitly discarded,
+ by cse_insn, else it may end up in a REG_EQUAL note
+ and "escape" to cause problems elsewhere. */
+ return gen_rtx_CONST (GET_MODE (new), new);
+ }
+ }
+ }
+
+ return x;
+ }
+}
+
+/* Fold MEM. */
+
+static rtx
+fold_rtx_mem (rtx x, rtx insn)
+{
+ /* To avoid infinite oscillations between fold_rtx and fold_rtx_mem,
+ refuse to allow recursion of the latter past n levels. This can
+ happen because fold_rtx_mem will try to fold the address of the
+ memory reference it is passed, i.e. conceptually throwing away
+ the MEM and reinjecting the bare address into fold_rtx. As a
+ result, patterns like
+
+ set (reg1)
+ (plus (reg)
+ (mem (plus (reg2) (const_int))))
+
+ set (reg2)
+ (plus (reg)
+ (mem (plus (reg1) (const_int))))
+
+ will defeat any "first-order" short-circuit put in either
+ function to prevent these infinite oscillations.
+
+ The heuristics for determining n is as follows: since each time
+ it is invoked fold_rtx_mem throws away a MEM, and since MEMs
+ are generically not nested, we assume that each invocation of
+ fold_rtx_mem corresponds to a new "top-level" operand, i.e.
+ the source or the destination of a SET. So fold_rtx_mem is
+ bound to stop or cycle before n recursions, n being the number
+ of expressions recorded in the hash table. We also leave some
+ play to account for the initial steps. */
+
+ static unsigned int depth;
+ rtx ret;
+
+ if (depth > 3 + table_size)
+ return x;
+
+ depth++;
+ ret = fold_rtx_mem_1 (x, insn);
+ depth--;
+
+ return ret;
+}
+
/* If X is a nontrivial arithmetic operation on an argument for which
a constant value can be determined, return the result of operating
on that value, as a constant. Otherwise, return X, possibly with
@@ -2984,12 +3486,16 @@ fold_rtx (rtx x, rtx insn)
code = GET_CODE (x);
switch (code)
{
- case MEM:
case SUBREG:
if ((new = equiv_constant (x)) != NULL_RTX)
return new;
return x;
+ case MEM:
+ if ((new = equiv_constant (x)) != NULL_RTX)
+ return new;
+ return fold_rtx_mem (x, insn);
+
case CONST:
case CONST_INT:
case CONST_DOUBLE:
@@ -3014,7 +3520,7 @@ fold_rtx (rtx x, rtx insn)
{
for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
validate_change (insn, &ASM_OPERANDS_INPUT (x, i),
- fold_rtx (ASM_OPERANDS_INPUT (x, i), insn), 0);
+ fold_rtx (ASM_OPERANDS_INPUT (x, i), insn), 1);
}
return x;
@@ -3045,11 +3551,35 @@ fold_rtx (rtx x, rtx insn)
{
rtx folded_arg = XEXP (x, i), const_arg;
enum machine_mode mode_arg = GET_MODE (folded_arg);
+
+ switch (GET_CODE (folded_arg))
+ {
+ case REG:
+ const_arg = equiv_constant (folded_arg);
+ break;
+
+ case CONST:
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ const_arg = folded_arg;
+ break;
+
#ifdef HAVE_cc0
- if (CC0_P (folded_arg))
- folded_arg = prev_insn_cc0, mode_arg = prev_insn_cc0_mode;
+ case CC0:
+ folded_arg = prev_insn_cc0;
+ mode_arg = prev_insn_cc0_mode;
+ const_arg = equiv_constant (folded_arg);
+ break;
#endif
- const_arg = equiv_constant (folded_arg);
+
+ default:
+ folded_arg = fold_rtx (folded_arg, insn);
+ const_arg = equiv_constant (folded_arg);
+ break;
+ }
/* For the first three operands, see if the operand
is constant or equivalent to a constant. */
@@ -5254,8 +5784,11 @@ cse_insn (rtx insn, rtx libcall_insn)
{
if (insert_regs (x, NULL, 0))
{
+ rtx dest = SET_DEST (sets[i].rtl);
+
rehash_using_reg (x);
hash = HASH (x, mode);
+ sets[i].dest_hash = HASH (dest, GET_MODE (dest));
}
elt = insert (x, NULL, hash, mode);
}
Index: gcc/testsuite/gcc.dg/pr30643.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/gcc.dg/pr30643.c 2007-03-08 08:49:05.000000000 -0300
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "undefined" } } */
+
+/* Make sure we optimize all calls away. */
+
+extern void undefined (void);
+struct s { int a, b; };
+void bar (struct s *ps, int *p, int *__restrict__ rp, int *__restrict__ rq)
+{
+ ps->a = 0;
+ ps->b = 1;
+ if (ps->a != 0)
+ undefined ();
+ p[0] = 0;
+ p[1] = 1;
+ if (p[0] != 0)
+ undefined ();
+ rp[0] = 0;
+ rq[0] = 1;
+ if (rp[0] != 0)
+ undefined ();
+}
+int main (void) {
+ return 0;
+}
--
Alexandre Oliva http://www.lsd.ic.unicamp.br/~oliva/
FSF Latin America Board Member http://www.fsfla.org/
Red Hat Compiler Engineer aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist oliva@{lsd.ic.unicamp.br, gnu.org}