This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix CSE CLZ/CTZ handling (PR rtl-optimization/85376)


Hi!

The following testcase is miscompiled, because due to various disabled
optimization passes we end up with a dead bsf instruction (CTZ) of a
register known to be zero.
fold_rtx uses simplify_unary_operation, which has in this case:
        case CTZ:
          if (wi::ne_p (op0, 0))
            int_value = wi::ctz (op0);
          else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
            int_value = GET_MODE_PRECISION (imode);
          result = wi::shwi (int_value, result_mode);
          break;
x86_64 is a target where CTZ_DEFINED_VALUE_AT_ZERO is false, the instruction
keeps previous value of the destination register, so something pretty
random.  As it is undefined, simplifying it to something random is fine,
except when used the way CSE uses it, by remembering that the value
(const_int 32) is stored in the destination register and optimizing later
code that has (set some_reg (const_int 32)) to that destination register.
Beucase that destination register contains an indeterminate value, we can't
expect it will be exactly 32.

The following patch let us punt in these cases.  Bootstrapped/regtested on
x86_64-linux and i686-linux, ok for trunk?

Another option would be to tweak simplify-rtx.c and instead of doing
          else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
            int_value = GET_MODE_PRECISION (imode);
do
          else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
	    return NULL_RTX;
and similarly for CLZ, haven't tested what would break if anything;
we've been doing something like that since r62453 when the
C?Z_DEFINED_VALUE_AT_ZERO macros have been introduced, and before that
actually the same, just unconditionally assumed the value is undefined at 0.

2018-04-12  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/85376
	* cse.c (fold_rtx): For CLZ and CTZ don't try to simplify if
	the source is known to be zero and CLZ/CTZ is not defined at zero
	for the target.

	* gcc.dg/pr85376.c: New test.

--- gcc/cse.c.jj	2018-02-12 23:24:47.350482694 +0100
+++ gcc/cse.c	2018-04-12 17:49:32.157664289 +0200
@@ -3322,6 +3322,19 @@ fold_rtx (rtx x, rtx_insn *insn)
 	    && mode_arg0 == VOIDmode)
 	  break;
 
+	/* Avoid recording a constant value for CLZ or CTZ if the argument is
+	   known to be zero when the operation is undefined for zero on the
+	   target.  See PR85376.  */
+	if ((code == CLZ || code == CTZ)
+	    && ((const_arg0 ? const_arg0 : folded_arg0) == CONST0_RTX (mode)))
+	  {
+	    int dummy;
+	    scalar_mode imode = GET_MODE_INNER (mode);
+	    if ((code == CLZ && !CLZ_DEFINED_VALUE_AT_ZERO (imode, dummy))
+		|| (code == CTZ && !CTZ_DEFINED_VALUE_AT_ZERO (imode, dummy)))
+	      break;
+	  }
+
 	new_rtx = simplify_unary_operation (code, mode,
 					    const_arg0 ? const_arg0 : folded_arg0,
 					    mode_arg0);
--- gcc/testsuite/gcc.dg/pr85376.c.jj	2018-04-12 17:44:41.506370642 +0200
+++ gcc/testsuite/gcc.dg/pr85376.c	2018-04-12 17:45:11.669401115 +0200
@@ -0,0 +1,32 @@
+/* PR rtl-optimization/85376 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-Og -fno-dce -fgcse -fno-tree-ccp -fno-tree-copy-prop -Wno-psabi" } */
+
+typedef unsigned int U __attribute__ ((vector_size (64)));
+typedef unsigned __int128 V __attribute__ ((vector_size (64)));
+unsigned int e, i, l;
+unsigned char f;
+U g, h, k, j;
+
+static inline V
+foo (unsigned char n, unsigned short o, unsigned int p, U q, U r, U s)
+{
+  unsigned int t;
+  o <<= 5;
+  q[7] >>= __builtin_add_overflow (0xfffffff0, __builtin_ffs (n), &s[5]);
+  t = __builtin_ffs (g[7]);
+  e *= __builtin_sub_overflow (o, t, &f);
+  return f + (V) g + (V) h + (V) q + i + (V) j + (V) s + (V) k + l;
+}
+
+int
+main ()
+{
+  if (__SIZEOF_INT128__ != 16 || __SIZEOF_INT__ != 4 || __CHAR_BIT__ != 8)
+    return 0;
+  V x = foo (0, 1, 5, (U) { }, (U) { }, (U) { });
+  for (unsigned i = 0; i < 4; i++)
+    if ((unsigned int) x[i] != 0x20)
+      __builtin_abort ();
+  return 0;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]