This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Improve code generated by cmpstrsi patterns


This patch improves the code generated by one specific construction -
but a common one.  Whenever memcmp or strcmp is used in conditional
context, on a platform with a cmpstrsi pattern, we generate the
tristate value of the comparison and immediately convert it back to
flags.  We get RTL that looks something like this:

(set (reg:CC flags)
     (compare:SI (mem:BLK (reg:SI 46) 0)
		 (mem:BLK (reg:SI 47) 0)))

(set (reg:QI 49)
     (gtu:QI (reg:CC flags)
	     (const_int 0)))

(set (reg:QI 50)
     (ltu:QI (reg:CC flags)
	     (const_int 0)))

(set (reg:CC flags)
     (compare:QI (reg:QI 49)
		 (reg:QI 50)))

The later three insns can be thrown away.  To implement the
optimization requires changes in three separate places.  First, we
must fix a bug in use_crosses_set_p where it will count a deleted insn
as a use.  [A real RTL dump will have several insns between the set of
reg 50 and the comparison, all of which get zapped by combine.]
Second, simplify_binary_operation needs to be taught that

(compare (gtu (reg:CC flags) (const_int 0))
	 (ltu (reg:CC flags) (const_int 0))

is equivalent to (reg:CC flags).  I've worked this out on paper, and
yes it is always true.

Third, the result of combination is 

(set (reg:CC flags) (reg:CC flags))

This is not a recognizable insn on i386.  combine is not prepared to
handle deleting all three insns under consideration, so instead I
added a dummy pattern which matches precisely that no-op move.  It
will be zapped later on - if all else fails, by the final jump pass.

It might be that a different pass is better suited to this
optimization.  If so, please enlighten me.

Bootstrapped i386-linux.  No C regressions.  I get loads of C++
regressions (see other message) but I believe they are unrelated.

Note that the only other MD file with a cmpstrsi pattern is pj.md.  I
am not competent to tell whether it could benefit from this
optimization.  pj.md is a cc0 target, even though (as far as I can
tell) it would be better modelled otherwise.

zw

	* combine.c (try_combine): Remove redundant conditional.
	(use_crosses_set_p): If reg_last_set[regno] is a deleted insn,
	it does not count.
	* simplify-rtx.c (simplify_binary_operation) [case COMPARE]:
	Convert (compare (gt (flags) 0) (lt (flags) 0)) to (flags).

	* i386.md: Add a dummy insn so that (set (flags) (flags)) will
	be recognizable.

===================================================================
Index: combine.c
--- combine.c	2000/06/13 21:47:38	1.135
+++ combine.c	2000/07/15 17:56:45
@@ -2514,22 +2514,19 @@ try_combine (i3, i2, i1, new_direct_jump
 
     if (i3_subst_into_i2 && GET_CODE (PATTERN (i2)) == PARALLEL)
       {
-	if (GET_CODE (PATTERN (i2)) == PARALLEL)
-	  {
-	    for (i = 0; i < XVECLEN (PATTERN (i2), 0); i++)
-	      if (GET_CODE (SET_DEST (XVECEXP (PATTERN (i2), 0, i))) == REG
-		  && SET_DEST (XVECEXP (PATTERN (i2), 0, i)) != i2dest
-		  && ! find_reg_note (i2, REG_UNUSED,
-				      SET_DEST (XVECEXP (PATTERN (i2), 0, i))))
-		for (temp = NEXT_INSN (i2);
-		     temp && (this_basic_block == n_basic_blocks - 1
-			      || BLOCK_HEAD (this_basic_block) != temp);
-		     temp = NEXT_INSN (temp))
-		  if (temp != i3 && GET_RTX_CLASS (GET_CODE (temp)) == 'i')
-		    for (link = LOG_LINKS (temp); link; link = XEXP (link, 1))
-		      if (XEXP (link, 0) == i2)
-			XEXP (link, 0) = i3;
-	  }
+	for (i = 0; i < XVECLEN (PATTERN (i2), 0); i++)
+	  if (GET_CODE (SET_DEST (XVECEXP (PATTERN (i2), 0, i))) == REG
+	      && SET_DEST (XVECEXP (PATTERN (i2), 0, i)) != i2dest
+	      && ! find_reg_note (i2, REG_UNUSED,
+				  SET_DEST (XVECEXP (PATTERN (i2), 0, i))))
+	    for (temp = NEXT_INSN (i2);
+		 temp && (this_basic_block == n_basic_blocks - 1
+			  || BLOCK_HEAD (this_basic_block) != temp);
+		 temp = NEXT_INSN (temp))
+	      if (temp != i3 && GET_RTX_CLASS (GET_CODE (temp)) == 'i')
+		for (link = LOG_LINKS (temp); link; link = XEXP (link, 1))
+		  if (XEXP (link, 0) == i2)
+		    XEXP (link, 0) = i3;
 
 	if (i3notes)
 	  {
@@ -11504,7 +11501,10 @@ use_crosses_set_p (x, from_cuid)
 #endif
       for (; regno < endreg; regno++)
 	if (reg_last_set[regno]
-	    && INSN_CUID (reg_last_set[regno]) > from_cuid)
+	    /* If the last set has been deleted, it doesn't count.  */
+	    && INSN_CUID (reg_last_set[regno]) > from_cuid
+	    && ! INSN_DELETED_P (reg_last_set [regno])
+	    && GET_CODE (reg_last_set [regno]) != NOTE)
 	  return 1;
       return 0;
     }
===================================================================
Index: simplify-rtx.c
--- simplify-rtx.c	2000/05/24 20:26:54	1.19
+++ simplify-rtx.c	2000/07/15 17:56:46
@@ -948,11 +948,29 @@ simplify_binary_operation (code, mode, o
 	       || ! FLOAT_MODE_P (mode) || flag_fast_math)
 	      && op1 == CONST0_RTX (mode))
 	    return op0;
+#endif
+
+	  /* Convert (compare (gt (flags) 0) (lt (flags) 0)) to (flags).  */
+	  if (((GET_CODE (op0) == GT && GET_CODE (op1) == LT)
+	       || (GET_CODE (op0) == GTU && GET_CODE (op1) == LTU))
+	      && XEXP (op0, 1) == const0_rtx && XEXP (op1, 1) == const0_rtx)
+	    {
+	      rtx xop00 = XEXP (op0, 0);
+	      rtx xop10 = XEXP (op1, 0);
+
+#ifdef HAVE_cc0
+	      if (GET_CODE (xop00) == CC0 && GET_CODE (xop10) == CC0)
 #else
-	  /* Do nothing here.  */
+	      if (GET_CODE (xop00) == REG && GET_CODE (xop10) == REG
+		  && GET_MODE (xop00) == GET_MODE (xop10)
+		  && REGNO (xop00) == REGNO (xop10)
+		  && GET_MODE_CLASS (GET_MODE (xop00)) == MODE_CC
+		  && GET_MODE_CLASS (GET_MODE (xop10)) == MODE_CC)
 #endif
-	  break;
-	      
+		return xop00;
+	    }
+
+	  break;	      
 	case MINUS:
 	  /* None of these optimizations can be done for IEEE
 	     floating point.  */
===================================================================
Index: config/i386/i386.md
--- config/i386/i386.md	2000/07/07 23:49:18	1.163
+++ config/i386/i386.md	2000/07/15 17:56:47
@@ -1191,6 +1191,13 @@
   [(set_attr "type" "icmp")
    (set_attr "mode" "QI")])
 
+;; Dummy insn to help combine.
+(define_insn "*cc_nop_move"
+  [(set (reg 17) (reg 17))]
+  ""
+  "* abort ();  /* cc_nop_move survived to final! */"
+  [(set_attr "type" "other")])
+
 ;; These implement float point compares.
 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
 ;; which would allow mix and match FP modes on the compares.  Which is what

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]