RFA: patch for PR optimization/11864

Joern Rennecke joern.rennecke@superh.com
Thu Jan 15 19:56:00 GMT 2004


As already outlined in bugzilla, the bug is in reload_cse_simplify_operands.
For those who like to read the rtl for themselves:
just after reload, we got:

(call_insn/u:HI 41 80 42 1 (nil) (parallel [
            (set (reg:SI 0 r0)
                (call (mem:SI (reg/f:SI 8 r8 [162]) [0 S4 A32])
                    (const_int 0 [0x0])))
            (use (reg:PSI 151 fpscr))
            (clobber (reg:SI 146 pr))
        ]) 169 {call_valuei} (nil)
    (expr_list:REG_EH_REGION (const_int -1 [0xffffffffffffffff])
        (nil))
    (expr_list (use (mem:BLK (scratch) [0 A8]))
        (nil)))

(insn:HI 42 41 48 1 (nil) (set (reg:SI 0 r0 [175])
        (reg:SI 0 r0)) 122 {movsi_i} (insn_list 41 (nil))
    (expr_list:REG_EQUAL (expr_list (use (mem:BLK (scratch) [0 A8]))
            (expr_list (reg/f:SI 8 r8 [162])
                (nil)))
        (nil)))

(insn:HI 48 42 50 1 (nil) (set (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])
        (reg:QI 0 r0 [175])) 131 {movqi_i} (insn_list 42 (nil))
    (nil))

(note:HI 50 48 52 1 ("pr11864-1.c") 31)

(insn:HI 52 50 53 1 (nil) (set (reg:QI 1 r1 [180])
        (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])) 131 {movqi_i} (nil)
    (expr_list:REG_EQUIV (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])
        (expr_list:REG_EQUAL (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])
            (nil))))

(note:HI 53 52 54 1 NOTE_INSN_DELETED)

(insn:HI 54 53 55 1 (nil) (set (reg:SI 147 t)
        (eq:SI (reg:SI 1 r1 [180])
            (const_int 0 [0x0]))) 1 {cmpeqsi_t} (insn_list 52 (nil))
    (nil))

And after reload_cse_simplify_operands had its go at instruction 52, it looks
like this:

(insn:HI 52 50 53 1 (nil) (set (reg:QI 1 r1 [180])
        (reg:QI 0 r0)) 131 {movqi_i} (nil)
    (expr_list:REG_EQUIV (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])
        (expr_list:REG_EQUAL (mem/s:QI (reg/f:SI 9 r9 [158]) [0 a+0 S1 A8])
            (nil))))

This, with 0xff00 in r0, instead of 0 we are comparing 0xff00 against 0 in
instruction 54.

A well-tuned port which has LOAD_EXTEND_OP will also have a matching
zero / sign extension pattern that allows a memory operand.  So in the common
case that this is a simple load, I make the extension explicit, so
that reload_cse_simplify_operands can safely operate on it.  I also
recognize a few other simple cases, otherwise the optimization is
suppressed for memory operands with implicit zero / sign extensions.

The source filename here is for 3.3, but the patch applies cleanly to
postreload in 3.4 .  I'm currently testing this for i686-pc-linux-gnu
native and cross to sh-elf.

2004-01-15  J"orn Rennecke <joern.rennecke@superh.com>

	* reload1.c (reload_cse_simplify_operands): Don't remove
	implicit extension from LOAD_EXTEND_OP.

Index: reload1.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reload1.c,v
retrieving revision 1.366.2.6
diff -p -u -r1.366.2.6 reload1.c
--- reload1.c	7 Jun 2003 05:30:09 -0000	1.366.2.6
+++ reload1.c	15 Jan 2004 19:27:54 -0000
@@ -8355,6 +8355,8 @@ reload_cse_simplify_operands (insn, test
     {
       cselib_val *v;
       struct elt_loc_list *l;
+      rtx op;
+      enum machine_mode mode;
 
       CLEAR_HARD_REG_SET (equiv_regs[i]);
 
@@ -8366,7 +8368,52 @@ reload_cse_simplify_operands (insn, test
 	      && recog_data.operand_mode[i] == VOIDmode))
 	continue;
 
-      v = cselib_lookup (recog_data.operand[i], recog_data.operand_mode[i], 0);
+      op = recog_data.operand[i];
+      mode = GET_MODE (op);
+#ifdef LOAD_EXTEND_OP
+      if (GET_CODE (op) == MEM
+	  && GET_MODE_BITSIZE (mode) < BITS_PER_WORD
+	  && LOAD_EXTEND_OP (mode) != NIL)
+	{
+	  rtx set = single_set (insn);
+
+	  /* We might have multiple sets, some of which do implict
+	     extension.  Punt on this for now.  */
+	  if (! set)
+	    continue;
+	  /* If the destination is a also MEM or a STRICT_LOW_PART, no
+	     extension applies.
+	     Also, if there is an explicit extension, we don't have to
+	     worry about an implicit one.  */
+	  else if (GET_CODE (SET_DEST (set)) == MEM
+		   || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART
+		   || GET_CODE (SET_SRC (set)) == ZERO_EXTEND
+		   || GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
+	    ; /* Continue ordinary processing.  */
+	  /* If this is a straight load, make the extension explicit.  */
+	  else if (GET_CODE (SET_DEST (set)) == REG
+		   && recog_data.n_operands == 2
+		   && SET_SRC (set) == op
+		   && SET_DEST (set) == recog_data.operand[1-i])
+	    {
+	      validate_change (insn, recog_data.operand_loc[i],
+			       gen_rtx_fmt_e (LOAD_EXTEND_OP (mode),
+					      word_mode, op),
+			       1);
+	      validate_change (insn, recog_data.operand_loc[1-i],
+			       gen_rtx_REG (word_mode, REGNO (SET_DEST (set))),
+			       1);
+	      if (! apply_change_group)
+		return 0;
+	      return reload_cse_simplify_operands (insn, testreg);
+	    }
+	  else
+	    /* ??? There might be arithmetic operations with memory that are
+	       safe to optimize, but is it worth the trouble?  */
+	    continue;
+	}
+#endif /* LOAD_EXTEND_OP */
+      v = cselib_lookup (op, recog_data.operand_mode[i], 0);
       if (! v)
 	continue;
 



More information about the Gcc-patches mailing list