This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Fix PR rtl-optimization/27616


This is a regression present on mainline and 4.1 branch at -O and above.
CSE enters a infinite loop for the following excerpt of RTL on x86:

;; Start of basic block 3, registers live: (nil)
(code_label 12 11 13 3 3 "" [1 uses])

(note 13 12 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)

(insn 15 13 17 3 (parallel [
            (set (reg/v/f:SI 59 [ cur ])
                (plus:SI (reg/f:SI 58 [ pretmp.21 ])
                    (mem/s:SI (plus:SI (reg/v/f:SI 60 [ first ])
                            (const_int 4 [0x4])) [3 <variable>.offset_next+0 
S4 A32])))
            (clobber (reg:CC 17 flags))
        ]) 148 {*addsi_1} (nil)
    (nil))

(insn 17 15 18 3 (parallel [
            (set (reg:SI 61)
                (plus:SI (reg/f:SI 58 [ pretmp.21 ])
                    (mem/s:SI (plus:SI (reg/v/f:SI 59 [ cur ])
                            (const_int 4 [0x4])) [3 <variable>.offset_next+0 
S4 A32])))
            (clobber (reg:CC 17 flags))
        ]) 148 {*addsi_1} (nil)
    (nil))

(insn 18 17 19 3 (set (reg:CCZ 17 flags)
        (compare:CCZ (reg:SI 61)
            (reg/v/f:SI 60 [ first ]))) 2 {*cmpsi_1_insn} (nil)
    (nil))

(jump_insn 19 18 22 3 (set (pc)
        (if_then_else (ne (reg:CCZ 17 flags)
                (const_int 0 [0x0]))
            (label_ref:SI 35)
            (pc))) 350 {*jcc_1} (nil)
    (expr_list:REG_BR_PROB (const_int 500 [0x1f4])
        (nil)))
;; End of basic block 3, registers live:
 (nil)

;; Start of basic block 4, registers live: (nil)
(note 22 19 24 4 [bb 4] NOTE_INSN_BASIC_BLOCK)

(insn 24 22 26 4 (set (mem/s:SI (plus:SI (reg/v/f:SI 60 [ first ])
                (const_int 4 [0x4])) [3 <variable>.offset_next+0 S4 A32])
        (const_int 0 [0x0])) 34 {*movsi_1} (nil)
    (nil))

starting in find_best_addr on insn 24 and expression:

(plus:SI (reg/v/f:SI 60 [ first ])
    (const_int 4 [0x4]))

This is an oscillation with period 6 between fold_rtx and fold_rtx_mem:

#0  fold_rtx (x=0x55759dd4, insn=0x0) at /home/eric/svn/gcc/gcc/cse.c:3621
#1  0x081a640a in fold_rtx_mem (x=0x55759de0, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:3453
#2  0x081a8203 in fold_rtx (x=0x55759de0, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:3678
#3  0x081ad4d0 in fold_rtx (x=0x55759d8c, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:4283
#4  0x081a640a in fold_rtx_mem (x=0x55759d98, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:3453
#5  0x081a8203 in fold_rtx (x=0x55759d98, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:3678
#6  0x081ad4d0 in fold_rtx (x=0x55759dd4, insn=0x0)
    at /home/eric/svn/gcc/gcc/cse.c:4283

The fundamental problem is that fold_rtx operates recursively on its operands
after equivalencing them, unlike the tree folder.  More specifically:

  /* If we have (<op> <reg> <const_int>) for an associative OP and REG
     is known to be of similar form, we may be able to replace the
     operation with a combined operation.  This may eliminate the
     intermediate operation if every use is simplified in this way.
     Note that the similar optimization done by combine.c only works
     if the intermediate operation's result has only one reference.  */

This is a recipe for infinite loops and the code already contains a couple of
kludges against that:

              /* If we have compiled a statement like
                 "if (x == (x & mask1))", and now are looking at
                 "x & mask2", we will have a case where the first operand
                 of Y is the same as our first operand.  Unless we detect
                 this case, an infinite loop will result.  */
              if (XEXP (y, 0) == folded_arg0)
                break;
[...]
              /* If Y contains our first operand (the most common way this
                 can happen is if Y is a MEM), we would do into an infinite
                 loop if we tried to fold it.  So don't in that case.  */

              if (! reg_mentioned_p (folded_arg0, y))
                y = fold_rtx (y, insn);

I have another one for the -O1 case but it falls apart at -O2 because PRE
hoists an equivalencing insn out of the extended BB under consideration.

The problematic pattern is

  set (reg1)
      (plus (reg)
            (mem (plus (reg2) (const_int))))

  set (reg2)
      (plus (reg)
            (mem (plus (reg1) (const_int))))

so in particular it defeats all the first order kludges like the above two.


I think that the only robust solution is to cap the recursion depth and that 
fold_rtx_mem is the best place to do it as explained in the patch.

Comments on the heuristics for the cap welcome.


2006-08-09  Eric Botcazou  <ebotcazou@libertysurf.fr>

	PR rtl-optimization/27616
	* cse.c (table_size): New static variable.
	(new_basic_block): Initialize it to 0.
	(remove_from_table): Decrement it.
	(insert): Increment it.
	(fold_rtx_mem_1): New function, renamed from fold_rtx_mem.
	(fold_rtx_mem): Enforce a cap on the recursion depth.  Call
	fold_rtx_mem_1 if under the cap.
	(fold_rtx) <RTX_COMM_ARITH>: In the associative case, delay a little
	the lookup of the equivalent expression and test for equality of the
	first operand of the equivalent expression before in turn looking up
	an equivalent constant for the second operand.


2006-08-09  Eric Botcazou  <ebotcazou@libertysurf.fr>

	* gcc.c-torture/compile/20060807-1.c: New test.


-- 
Eric Botcazou
/* PR rtl-optimization/27616 */
/* Reported by Lee Ji Hwan <moonz@kaist.ac.kr> */
/* Testcase by Andrew Pinski <pinskia@gcc.gnu.org> */

struct chunk_s
{
  unsigned int size;
  int offset_next;
};

typedef struct chunk_s chunk_t;

void foo(chunk_t *first)
{
  chunk_t *cur;
  char *first0;

  do {
    first0 = (char *) first;
    cur = (chunk_t *) (first0 + first->offset_next);
    if ((chunk_t *) (first0 + cur->offset_next) != first)
      return ;

    first->offset_next = 0;

  } while (cur->size != 0);
}
Index: cse.c
===================================================================
--- cse.c	(revision 115944)
+++ cse.c	(working copy)
@@ -528,6 +528,10 @@ struct table_elt
 
 static struct table_elt *table[HASH_SIZE];
 
+/* Number of elements in the hash table.  */
+
+static unsigned int table_size;
+
 /* Chain of `struct table_elt's made so far for this function
    but currently removed from the table.  */
 
@@ -961,6 +965,8 @@ new_basic_block (void)
 	}
     }
 
+  table_size = 0;
+
 #ifdef HAVE_cc0
   prev_insn = 0;
   prev_insn_cc0 = 0;
@@ -1371,6 +1377,8 @@ remove_from_table (struct table_elt *elt
   /* Now add it to the free element chain.  */
   elt->next_same_hash = free_element_chain;
   free_element_chain = elt;
+
+  table_size--;
 }
 
 /* Look up X in the hash table and return its table element,
@@ -1648,6 +1656,8 @@ insert (rtx x, struct table_elt *classp,
 	}
     }
 
+  table_size++;
+
   return elt;
 }
 
@@ -3432,10 +3442,10 @@ fold_rtx_subreg (rtx x, rtx insn)
   return x;
 }
 
-/* Fold MEM.  */
+/* Fold MEM.  Not to be called directly, see fold_rtx_mem instead.  */
 
 static rtx
-fold_rtx_mem (rtx x, rtx insn)
+fold_rtx_mem_1 (rtx x, rtx insn)
 {
   enum machine_mode mode = GET_MODE (x);
   rtx new;
@@ -3598,6 +3608,51 @@ fold_rtx_mem (rtx x, rtx insn)
   }
 }
 
+/* Fold MEM.  */
+
+static rtx
+fold_rtx_mem (rtx x, rtx insn)
+{
+  /* To avoid infinite oscillations between fold_rtx and fold_rtx_mem,
+     refuse to allow recursion of the latter past n levels.  This can
+     happen because fold_rtx_mem will try to fold the address of the
+     memory reference it is passed, i.e. conceptually throwing away
+     the MEM and reinjecting the bare address into fold_rtx.  As a
+     result, patterns like
+
+       set (reg1)
+	   (plus (reg)
+		 (mem (plus (reg2) (const_int))))
+
+       set (reg2)
+	   (plus (reg)
+		 (mem (plus (reg1) (const_int))))
+
+     will defeat any "first-order" short-circuit put in either
+     function to prevent these infinite oscillations.
+
+     The heuristics for determining n is as follows: since each time
+     it is invoked fold_rtx_mem throws away a MEM, and since MEMs
+     are generically not nested, we assume that each invocation of
+     fold_rtx_mem corresponds to a new "top-level" operand, i.e.
+     the source or the destination of a SET.  So fold_rtx_mem is
+     bound to cycle after n recursions, n being the number of
+     expressions recorded in the hash table.  We also leave some
+     play to account for the initial steps.  */
+
+  static unsigned int depth;
+  rtx ret;
+
+  if (depth > 3 + table_size)
+    return x;
+
+  depth++;
+  ret = fold_rtx_mem_1 (x, insn);
+  depth--;
+
+  return ret;
+}
+
 /* If X is a nontrivial arithmetic operation on an argument
    for which a constant value can be determined, return
    the result of operating on that value, as a constant.
@@ -4262,10 +4317,8 @@ fold_rtx (rtx x, rtx insn)
 	    {
 	      int is_shift
 		= (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
-	      rtx y = lookup_as_function (folded_arg0, code);
-	      rtx inner_const;
+	      rtx y, inner_const, new_const;
 	      enum rtx_code associate_code;
-	      rtx new_const;
 
 	      if (is_shift
 		  && (INTVAL (const_arg1) >= GET_MODE_BITSIZE (mode)
@@ -4278,11 +4331,9 @@ fold_rtx (rtx x, rtx insn)
 		    break;
 		}
 
+	      y = lookup_as_function (folded_arg0, code);
 	      if (y == 0)
 		break;
-	      inner_const = equiv_constant (fold_rtx (XEXP (y, 1), 0));
-	      if (!inner_const || GET_CODE (inner_const) != CONST_INT)
-		break;
 
 	      /* If we have compiled a statement like
 		 "if (x == (x & mask1))", and now are looking at
@@ -4292,6 +4343,10 @@ fold_rtx (rtx x, rtx insn)
 	      if (XEXP (y, 0) == folded_arg0)
 		break;
 
+	      inner_const = equiv_constant (fold_rtx (XEXP (y, 1), 0));
+	      if (!inner_const || GET_CODE (inner_const) != CONST_INT)
+		break;
+
 	      /* Don't associate these operations if they are a PLUS with the
 		 same constant and it is a power of two.  These might be doable
 		 with a pre- or post-increment.  Similarly for two subtracts of

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]