This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR46556 (poor address generation)


On 10/05/2011 10:16 PM, William J. Schmidt wrote:
OK, I see.  If there's a better place downstream to make a swizzle, I'm
certainly fine with that.

I disabled locally_poor_mem_replacement and added some dump information
in should_replace_address to show the costs for the replacement I'm
trying to avoid:

In should_replace_address:
   old_rtx = (reg/f:DI 125 [ D.2036 ])
   new_rtx = (plus:DI (reg/v/f:DI 126 [ p ])
         (reg:DI 128))
   address_cost (old_rtx) = 0
   address_cost (new_rtx) = 0
   set_src_cost (old_rtx) = 0
   set_src_cost (new_rtx) = 4

In insn 11, replacing
  (mem/s:SI (reg/f:DI 125 [ D.2036 ]) [2 p_1(D)->a S4 A32])
  with (mem/s:SI (plus:DI (reg/v/f:DI 126 [ p ])
             (reg:DI 128)) [2 p_1(D)->a S4 A32])
Changed insn 11
deferring rescan insn with uid = 11.
deferring rescan insn with uid = 11.

And IIUC the other address is based on pseudo 125 as well, but the combination is (plus (plus (reg 126) (reg 128)) (const_int X)) and cannot be represented on ppc. I think _this_ is the problem, so I'm afraid your patch could cause pessimizations on x86 for example. On x86, which has a cheap REG+REG+CONST addressing mode, it is much better to propagate pseudo 125 so that you can delete the set altogether.


However, indeed there is no downstream pass that undoes the transformation. Perhaps we can do it in CSE, since this _is_ CSE after all. :) The attached untested (uncompiled) patch is an attempt.

Paolo
Index: cse.c
===================================================================
--- cse.c	(revision 177688)
+++ cse.c	(working copy)
@@ -3136,6 +3136,75 @@ find_comparison_args (enum rtx_code code
   return code;
 }
 
+static rtx
+lookup_addr (rtx insn, rtx *loc, enum machine_mode mode)
+{
+  struct table_elt *elt, *p;
+  int regno;
+  int hash;
+  int base_cost;
+  rtx addr = *loc;
+  rtx exp;
+
+  /* Try to reuse existing registers for addresses, in hope of shortening
+     live ranges for the registers that compose the addresses.  This happens
+     when you have
+
+         (set (reg C) (plus (reg A) (reg B))
+         (set (reg D) (mem (reg C)))
+         (set (reg E) (mem (plus (reg C) (const_int X))))
+
+     In this case fwprop will try to propagate into the addresses, but
+     if propagation into reg E fails, the only result will have been to
+     uselessly lengthen the live range of A and B.  */
+
+  if (!REG_P (addr))
+    return;
+
+  regno = REGNO (addr);
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == HARD_FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return;
+
+   /* If this address is not in the hash table, we can't look for equivalences
+      of the whole address.  Also, ignore if volatile.  */
+
+  {
+    int save_do_not_record = do_not_record;
+    int save_hash_arg_in_memory = hash_arg_in_memory;
+    int addr_volatile;
+
+    do_not_record = 0;
+    hash = HASH (addr, Pmode);
+    addr_volatile = do_not_record;
+    do_not_record = save_do_not_record;
+    hash_arg_in_memory = save_hash_arg_in_memory;
+
+    if (addr_volatile)
+      return;
+  }
+
+  /* Try to find a REG that holds the same address.  */
+
+  elt = lookup (addr, hash, Pmode);
+  if (!elt)
+    return;
+
+  base_cost = address_cost (*loc, mode);
+  for (p = elt->first_same_value; p; p = p->next_same_value)
+    {
+      exp = p->exp;
+      if (REG_P (exp)
+          && exp_equiv_p (exp, exp, 1, false)
+          && address_cost (exp, mode) > base_cost)
+        break;
+    }
+
+  if (p)
+    validate_change (insn, loc, canon_reg (copy_rtx (exp), NULL_RTX), 0));
+}
+
 /* If X is a nontrivial arithmetic operation on an argument for which
    a constant value can be determined, return the result of operating
    on that value, as a constant.  Otherwise, return X, possibly with
@@ -3180,6 +3249,12 @@ fold_rtx (rtx x, rtx insn)
   switch (code)
     {
     case MEM:
+      if ((new_rtx = equiv_constant (x)) != NULL_RTX)
+        return new_rtx;
+      if (insn)
+        lookup_addr (insn, &XEXP (x, 0), GET_MODE (x));
+      return x;
+
     case SUBREG:
       if ((new_rtx = equiv_constant (x)) != NULL_RTX)
         return new_rtx;
Index: passes.c
===================================================================
--- passes.c	(revision 177688)
+++ passes.c	(working copy)
@@ -1448,9 +1448,9 @@ init_optimization_passes (void)
 	}
       NEXT_PASS (pass_web);
       NEXT_PASS (pass_rtl_cprop);
+      NEXT_PASS (pass_rtl_fwprop_addr);
       NEXT_PASS (pass_cse2);
       NEXT_PASS (pass_rtl_dse1);
-      NEXT_PASS (pass_rtl_fwprop_addr);
       NEXT_PASS (pass_inc_dec);
       NEXT_PASS (pass_initialize_regs);
       NEXT_PASS (pass_ud_rtl_dce);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]