This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
fix x86 setcc partial reg stall
- To: gcc-patches at gcc dot gnu dot org
- Subject: fix x86 setcc partial reg stall
- From: Richard Henderson <rth at redhat dot com>
- Date: Wed, 10 Oct 2001 19:20:09 -0700
- Cc: pcarlini at unitus dot it
> http://gcc.gnu.org/ml/gcc/2001-09/msg00217.html
The bit to fix the
seta %al
andl $1, %eax
partial register stall is the test splitter change to look for and
simplify subregs. But that still leaves us with
seta %al
testb $1, %al
je Lfoo
which is just plain stupid. Fixing that is a bit more involved.
The issue there is that the "seta" is actually
(ge:QI (reg:CCFPU 17) (const_int 0))
i.e. a floating point comparison, and that generically it is not
safe to assume too much about CCmode operations. So the code in
simplify_comparison gives up before figuring out that
(eq (ge:QI (reg:CCFPU 17) (const_int 0))
(const_int 0))
can be simplified to
(unlt (reg:CCFPU 17) (const_int 0))
For that, I created two new splitters that combine can use to
merge the three instructions. At the same time I had to correct
an oversight in combine that would prevent a split against a jump
insn from matching at all.
Which, for the test case at hand, leaves us with
fnstsw %ax
sahf
- seta %al
- andl $1, %eax
- je .L12
+ jbe .L12
which is most definitely better.
Bootstrap is still underway,
r~
* combine.c (try_combine): Handle a SEQUENCE of one insn.
* i386.c (test splitter): Narrow tests vs paradoxical subregs.
(jcc splitters): Add two splitters to simplify compound compares
that simplify_comparison can't handle.
Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.236
diff -c -p -d -r1.236 combine.c
*** combine.c 2001/10/10 11:33:05 1.236
--- combine.c 2001/10/11 02:03:12
*************** try_combine (i3, i2, i1, new_direct_jump
*** 2132,2137 ****
--- 2132,2143 ----
}
}
+ /* If we've split a jump pattern, we'll wind up with a sequence even
+ with one instruction. We can handle that below, so extract it. */
+ if (m_split && GET_CODE (m_split) == SEQUENCE
+ && XVECLEN (m_split, 0) == 1)
+ m_split = PATTERN (XVECEXP (m_split, 0, 0));
+
if (m_split && GET_CODE (m_split) != SEQUENCE)
{
insn_code_number = recog_for_combine (&m_split, i3, &new_i3_notes);
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.300
diff -c -p -d -r1.300 i386.md
*** i386.md 2001/10/08 22:38:45 1.300
--- i386.md 2001/10/11 02:03:13
***************
*** 8125,8131 ****
HOST_WIDE_INT len = INTVAL (operands[1]);
HOST_WIDE_INT pos = INTVAL (operands[2]);
HOST_WIDE_INT mask;
! enum machine_mode mode;
mode = GET_MODE (operands[0]);
if (GET_CODE (operands[0]) == MEM)
--- 8125,8131 ----
HOST_WIDE_INT len = INTVAL (operands[1]);
HOST_WIDE_INT pos = INTVAL (operands[2]);
HOST_WIDE_INT mask;
! enum machine_mode mode, submode;
mode = GET_MODE (operands[0]);
if (GET_CODE (operands[0]) == MEM)
***************
*** 8138,8143 ****
--- 8138,8152 ----
operands[0] = adjust_address (operands[0], mode, 0);
}
}
+ else if (GET_CODE (operands[0]) == SUBREG
+ && (submode = GET_MODE (SUBREG_REG (operands[0])),
+ GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+ && pos + len <= GET_MODE_BITSIZE (submode))
+ {
+ /* Narrow a paradoxical subreg to prevent partial register stalls. */
+ mode = submode;
+ operands[0] = SUBREG_REG (operands[0]);
+ }
else if (mode == HImode && pos + len <= 8)
{
/* Small HImode tests can be converted to QImode. */
***************
*** 12744,12749 ****
--- 12753,12807 ----
(const_int 124)))
(const_int 0)
(const_int 1)))])
+
+ ;; In general it is not safe to assume too much about CCmode registers,
+ ;; so simplify-rtx stops when it sees a second one. Under certain
+ ;; conditions this is safe on x86, so help combine not create
+ ;;
+ ;; seta %al
+ ;; testb %al, %al
+ ;; je Lfoo
+
+ (define_split
+ [(set (pc)
+ (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+ [(reg 17) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+ {
+ PUT_MODE (operands[0], VOIDmode);
+ })
+
+ (define_split
+ [(set (pc)
+ (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+ [(reg 17) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+ {
+ rtx new_op0 = copy_rtx (operands[0]);
+ operands[0] = new_op0;
+ PUT_MODE (new_op0, VOIDmode);
+ PUT_CODE (new_op0, REVERSE_CONDITION (GET_CODE (new_op0),
+ GET_MODE (XEXP (new_op0, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op0, VOIDmode))
+ FAIL;
+ })
;; Define combination compare-and-branch fp compare instructions to use
;; during early optimization. Splitting the operation apart early makes