[PATCH v2] i386: Fix up *testqi_ext_3 define_insn_and_split [PR94567]

Jakub Jelinek jakub@redhat.com
Fri Apr 17 12:38:30 GMT 2020


On Fri, Apr 17, 2020 at 02:12:19PM +0200, Uros Bizjak wrote:
> Is it possible to perform widening only for !TARGET_PARTIAL_REG_STALL?
> I'm worried that highpart may not be cleared, so the effects of
> partial reg stall can be quite noticeable.

So, like this then?
All I've added was another condition, interdiff:
diff -u gcc/config/i386/i386.md gcc/config/i386/i386.md
--- gcc/config/i386/i386.md	2020-04-16 14:21:41.686972979 +0200
+++ gcc/config/i386/i386.md	2020-04-17 14:29:33.882690098 +0200
@@ -8750,6 +8750,18 @@
 			     == GET_MODE_PRECISION (GET_MODE (operands[2]))
 			     && !register_operand (operands[2],
 						   GET_MODE (operands[2])))
+			 /* And we shouldn't widen if
+			    TARGET_PARTIAL_REG_STALL.  */
+			 || (TARGET_PARTIAL_REG_STALL
+			     && (INTVAL (operands[3]) + INTVAL (operands[4])
+				 >= (paradoxical_subreg_p (operands[2])
+				     && (GET_MODE_CLASS
+					  (GET_MODE (SUBREG_REG (operands[2])))
+					 == MODE_INT)
+				     ? GET_MODE_PRECISION
+					 (GET_MODE (SUBREG_REG (operands[2])))
+				     : GET_MODE_PRECISION
+					 (GET_MODE (operands[2])))))
 			 ? CCZmode : CCNOmode)"
   "#"
   "&& 1"
and nothing in the splitter would need to change because the condition
would ensure that for TARGET_PARTIAL_REG_STALL we require in all problematic
cases CCZmode.  The only exception would be the pos + len == 8
optimization which we wouldn't perform (of course with CCZmode we'd still
do), but that isn't a partial reg stall thing, that is an optimization to
use shorter testb instead of testw.

2020-04-17  Jakub Jelinek  <jakub@redhat.com>
	    Jeff Law  <law@redhat.com>

	PR target/94567
	* config/i386/i386.md (*testqi_ext_3): Use CCZmode rather than
	CCNOmode in ix86_match_ccmode if len is equal to <MODE>mode precision,
	or pos + len >= 32, or pos + len is equal to operands[2] precision
	and operands[2] is not a register operand.  During splitting perform
	SImode AND if operands[0] doesn't have CCZmode and pos + len is
	equal to mode precision.

	* gcc.c-torture/execute/pr94567.c: New test.

--- gcc/config/i386/i386.md.jj	2020-04-17 08:49:33.236921107 +0200
+++ gcc/config/i386/i386.md	2020-04-17 14:29:33.882690098 +0200
@@ -8730,10 +8730,38 @@ (define_insn_and_split "*testqi_ext_3"
 	   && INTVAL (operands[3]) > 32
 	   && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
    && ix86_match_ccmode (insn,
-			 /* *testdi_1 requires CCZmode if the mask has bit
+			 /* If zero_extract mode precision is the same
+			    as len, the SF of the zero_extract
+			    comparison will be the most significant
+			    extracted bit, but this could be matched
+			    after splitting only for pos 0 len all bits
+			    trivial extractions.  Require CCZmode.  */
+			 (GET_MODE_PRECISION (<MODE>mode)
+			  == INTVAL (operands[3]))
+			 /* Otherwise, require CCZmode if we'd use a mask
+			    with the most significant bit set and can't
+			    widen it to wider mode.  *testdi_1 also
+			    requires CCZmode if the mask has bit
 			    31 set and all bits above it clear.  */
-			 GET_MODE (operands[2]) == DImode
-			 && INTVAL (operands[3]) + INTVAL (operands[4]) == 32
+			 || (INTVAL (operands[3]) + INTVAL (operands[4])
+			     >= 32)
+			 /* We can't widen also if val is not a REG.  */
+			 || (INTVAL (operands[3]) + INTVAL (operands[4])
+			     == GET_MODE_PRECISION (GET_MODE (operands[2]))
+			     && !register_operand (operands[2],
+						   GET_MODE (operands[2])))
+			 /* And we shouldn't widen if
+			    TARGET_PARTIAL_REG_STALL.  */
+			 || (TARGET_PARTIAL_REG_STALL
+			     && (INTVAL (operands[3]) + INTVAL (operands[4])
+				 >= (paradoxical_subreg_p (operands[2])
+				     && (GET_MODE_CLASS
+					  (GET_MODE (SUBREG_REG (operands[2])))
+					 == MODE_INT)
+				     ? GET_MODE_PRECISION
+					 (GET_MODE (SUBREG_REG (operands[2])))
+				     : GET_MODE_PRECISION
+					 (GET_MODE (operands[2])))))
 			 ? CCZmode : CCNOmode)"
   "#"
   "&& 1"
@@ -8750,7 +8778,10 @@ (define_insn_and_split "*testqi_ext_3"
 
       /* Narrow paradoxical subregs to prevent partial register stalls.  */
       if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
-	  && GET_MODE_CLASS (submode) == MODE_INT)
+	  && GET_MODE_CLASS (submode) == MODE_INT
+	  && (GET_MODE (operands[0]) == CCZmode
+	      || pos + len < GET_MODE_PRECISION (submode)
+	      || REG_P (SUBREG_REG (val))))
 	{
 	  val = SUBREG_REG (val);
 	  mode = submode;
@@ -8758,14 +8789,32 @@ (define_insn_and_split "*testqi_ext_3"
     }
 
   /* Small HImode tests can be converted to QImode.  */
-  if (register_operand (val, HImode) && pos + len <= 8)
+  if (pos + len <= 8
+      && register_operand (val, HImode))
     {
-      val = gen_lowpart (QImode, val);
-      mode = QImode;
+      rtx nval = gen_lowpart (QImode, val);
+      if (!MEM_P (nval)
+	  || GET_MODE (operands[0]) == CCZmode
+	  || pos + len < 8)
+	{
+	  val = nval;
+	  mode = QImode;
+	}
     }
 
   gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
 
+  /* If the mask is going to have the sign bit set in the mode
+     we want to do the comparison in and user isn't interested just
+     in the zero flag, then we must widen the target mode.  */
+  if (pos + len == GET_MODE_PRECISION (mode)
+      && GET_MODE (operands[0]) != CCZmode)
+    {
+      gcc_assert (pos + len < 32 && !MEM_P (val));
+      mode = SImode;
+      val = gen_lowpart (mode, val);
+    }
+
   wide_int mask
     = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
 
--- gcc/testsuite/gcc.c-torture/execute/pr94567.c.jj	2020-04-17 14:19:34.610683856 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr94567.c	2020-04-17 14:19:34.610683856 +0200
@@ -0,0 +1,26 @@
+/* PR target/94567 */
+
+volatile int a = 1, b;
+short c, d = 4, f = 2, g;
+unsigned short e = 53736;
+
+int
+foo (int i, int j)
+{
+  return i && j ? 0 : i + j;
+}
+
+int
+main ()
+{
+  for (; a; a = 0)
+    {
+      unsigned short k = e;
+      g = k >> 3;
+      if (foo (g < (f || c), b))
+	d = 0;
+    }
+  if (d != 4)
+    __builtin_abort ();
+  return 0;
+}


	Jakub



More information about the Gcc-patches mailing list