This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Improve code generated by cmpstrsi patterns
- To: gcc-patches at gcc dot gnu dot org
- Subject: Improve code generated by cmpstrsi patterns
- From: Zack Weinberg <zack at wolery dot cumb dot org>
- Date: Sat, 15 Jul 2000 11:07:21 -0700
This patch improves the code generated by one specific construction -
but a common one. Whenever memcmp or strcmp is used in conditional
context, on a platform with a cmpstrsi pattern, we generate the
tristate value of the comparison and immediately convert it back to
flags. We get RTL that looks something like this:
(set (reg:CC flags)
(compare:SI (mem:BLK (reg:SI 46) 0)
(mem:BLK (reg:SI 47) 0)))
(set (reg:QI 49)
(gtu:QI (reg:CC flags)
(const_int 0)))
(set (reg:QI 50)
(ltu:QI (reg:CC flags)
(const_int 0)))
(set (reg:CC flags)
(compare:QI (reg:QI 49)
(reg:QI 50)))
The later three insns can be thrown away. To implement the
optimization requires changes in three separate places. First, we
must fix a bug in use_crosses_set_p where it will count a deleted insn
as a use. [A real RTL dump will have several insns between the set of
reg 50 and the comparison, all of which get zapped by combine.]
Second, simplify_binary_operation needs to be taught that
(compare (gtu (reg:CC flags) (const_int 0))
(ltu (reg:CC flags) (const_int 0))
is equivalent to (reg:CC flags). I've worked this out on paper, and
yes it is always true.
Third, the result of combination is
(set (reg:CC flags) (reg:CC flags))
This is not a recognizable insn on i386. combine is not prepared to
handle deleting all three insns under consideration, so instead I
added a dummy pattern which matches precisely that no-op move. It
will be zapped later on - if all else fails, by the final jump pass.
It might be that a different pass is better suited to this
optimization. If so, please enlighten me.
Bootstrapped i386-linux. No C regressions. I get loads of C++
regressions (see other message) but I believe they are unrelated.
Note that the only other MD file with a cmpstrsi pattern is pj.md. I
am not competent to tell whether it could benefit from this
optimization. pj.md is a cc0 target, even though (as far as I can
tell) it would be better modelled otherwise.
zw
* combine.c (try_combine): Remove redundant conditional.
(use_crosses_set_p): If reg_last_set[regno] is a deleted insn,
it does not count.
* simplify-rtx.c (simplify_binary_operation) [case COMPARE]:
Convert (compare (gt (flags) 0) (lt (flags) 0)) to (flags).
* i386.md: Add a dummy insn so that (set (flags) (flags)) will
be recognizable.
===================================================================
Index: combine.c
--- combine.c 2000/06/13 21:47:38 1.135
+++ combine.c 2000/07/15 17:56:45
@@ -2514,22 +2514,19 @@ try_combine (i3, i2, i1, new_direct_jump
if (i3_subst_into_i2 && GET_CODE (PATTERN (i2)) == PARALLEL)
{
- if (GET_CODE (PATTERN (i2)) == PARALLEL)
- {
- for (i = 0; i < XVECLEN (PATTERN (i2), 0); i++)
- if (GET_CODE (SET_DEST (XVECEXP (PATTERN (i2), 0, i))) == REG
- && SET_DEST (XVECEXP (PATTERN (i2), 0, i)) != i2dest
- && ! find_reg_note (i2, REG_UNUSED,
- SET_DEST (XVECEXP (PATTERN (i2), 0, i))))
- for (temp = NEXT_INSN (i2);
- temp && (this_basic_block == n_basic_blocks - 1
- || BLOCK_HEAD (this_basic_block) != temp);
- temp = NEXT_INSN (temp))
- if (temp != i3 && GET_RTX_CLASS (GET_CODE (temp)) == 'i')
- for (link = LOG_LINKS (temp); link; link = XEXP (link, 1))
- if (XEXP (link, 0) == i2)
- XEXP (link, 0) = i3;
- }
+ for (i = 0; i < XVECLEN (PATTERN (i2), 0); i++)
+ if (GET_CODE (SET_DEST (XVECEXP (PATTERN (i2), 0, i))) == REG
+ && SET_DEST (XVECEXP (PATTERN (i2), 0, i)) != i2dest
+ && ! find_reg_note (i2, REG_UNUSED,
+ SET_DEST (XVECEXP (PATTERN (i2), 0, i))))
+ for (temp = NEXT_INSN (i2);
+ temp && (this_basic_block == n_basic_blocks - 1
+ || BLOCK_HEAD (this_basic_block) != temp);
+ temp = NEXT_INSN (temp))
+ if (temp != i3 && GET_RTX_CLASS (GET_CODE (temp)) == 'i')
+ for (link = LOG_LINKS (temp); link; link = XEXP (link, 1))
+ if (XEXP (link, 0) == i2)
+ XEXP (link, 0) = i3;
if (i3notes)
{
@@ -11504,7 +11501,10 @@ use_crosses_set_p (x, from_cuid)
#endif
for (; regno < endreg; regno++)
if (reg_last_set[regno]
- && INSN_CUID (reg_last_set[regno]) > from_cuid)
+ /* If the last set has been deleted, it doesn't count. */
+ && INSN_CUID (reg_last_set[regno]) > from_cuid
+ && ! INSN_DELETED_P (reg_last_set [regno])
+ && GET_CODE (reg_last_set [regno]) != NOTE)
return 1;
return 0;
}
===================================================================
Index: simplify-rtx.c
--- simplify-rtx.c 2000/05/24 20:26:54 1.19
+++ simplify-rtx.c 2000/07/15 17:56:46
@@ -948,11 +948,29 @@ simplify_binary_operation (code, mode, o
|| ! FLOAT_MODE_P (mode) || flag_fast_math)
&& op1 == CONST0_RTX (mode))
return op0;
+#endif
+
+ /* Convert (compare (gt (flags) 0) (lt (flags) 0)) to (flags). */
+ if (((GET_CODE (op0) == GT && GET_CODE (op1) == LT)
+ || (GET_CODE (op0) == GTU && GET_CODE (op1) == LTU))
+ && XEXP (op0, 1) == const0_rtx && XEXP (op1, 1) == const0_rtx)
+ {
+ rtx xop00 = XEXP (op0, 0);
+ rtx xop10 = XEXP (op1, 0);
+
+#ifdef HAVE_cc0
+ if (GET_CODE (xop00) == CC0 && GET_CODE (xop10) == CC0)
#else
- /* Do nothing here. */
+ if (GET_CODE (xop00) == REG && GET_CODE (xop10) == REG
+ && GET_MODE (xop00) == GET_MODE (xop10)
+ && REGNO (xop00) == REGNO (xop10)
+ && GET_MODE_CLASS (GET_MODE (xop00)) == MODE_CC
+ && GET_MODE_CLASS (GET_MODE (xop10)) == MODE_CC)
#endif
- break;
-
+ return xop00;
+ }
+
+ break;
case MINUS:
/* None of these optimizations can be done for IEEE
floating point. */
===================================================================
Index: config/i386/i386.md
--- config/i386/i386.md 2000/07/07 23:49:18 1.163
+++ config/i386/i386.md 2000/07/15 17:56:47
@@ -1191,6 +1191,13 @@
[(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+;; Dummy insn to help combine.
+(define_insn "*cc_nop_move"
+ [(set (reg 17) (reg 17))]
+ ""
+ "* abort (); /* cc_nop_move survived to final! */"
+ [(set_attr "type" "other")])
+
;; These implement float point compares.
;; %%% See if we can get away with VOIDmode operands on the actual insns,
;; which would allow mix and match FP modes on the compares. Which is what