[PATCH] Fix ctz issues (PR93231)

Wilco Dijkstra Wilco.Dijkstra@arm.com
Mon Jan 13 17:53:00 GMT 2020


Further improve the ctz recognition: Avoid ICEing on negative shift
counts or multiply constants.  Check the type is 8 bits for the string
constant case to avoid accidentally matching a wide STRING_CST.
Add a tree_expr_nonzero_p check to allow the optimization even if
CTZ_DEFINED_VALUE_AT_ZERO returns 0 or 1.  Add extra test cases.

(note the diff uses the old tree and includes Jakub's bootstrap fixes)

Bootstrap OK on AArch64 and x64.

ChangeLog:
2020-01-13  Wilco Dijkstra  <wdijkstr@arm.com>

	PR tree-optimization/93231
	* tree-ssa-forwprop.c
	(optimize_count_trailing_zeroes): Use tree_to_shwi for shift
	and TREE_INT_CST_LOW for multiply constants.  Check CST_STRING
	element size is 8 bits.
	(simplify_count_trailing_zeroes): Add test to handle known non-zero
	inputs more efficiently.

    testsuite/
	* gcc.dg/pr90838.c: New test.
	* gcc.dg/pr93231.c: New test.
---

diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
new file mode 100644
index 0000000000000000000000000000000000000000..8070d439f6404dc6884a11e58f1db41c435e61fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
+
+int ctz1 (unsigned x)
+{
+  static const char table[32] = "\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14"
+    "\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+  const int u = 0;
+  static short table[64] =
+    {
+      32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+      10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+      31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+      30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+    };
+
+  x = (x & -x) * 0x0450FBAF;
+  return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+  static int table[32] =
+    {
+      0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+      31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+    };
+
+  if (x == 0) return 32;
+  x = (x & -x) * 0x04D7651F;
+  return table[x >> 27];
+}
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+     0,  1, 12,  2, 13, 22, 17,  3,
+    14, 33, 23, 36, 18, 58, 28,  4,
+    62, 15, 34, 26, 24, 48, 50, 37,
+    19, 55, 59, 52, 29, 44, 39,  5,
+    63, 11, 21, 16, 32, 35, 57, 27,
+    61, 25, 47, 49, 54, 51, 43, 38,
+    10, 20, 31, 56, 60, 46, 53, 42,
+     9, 30, 45, 41,  8, 40,  7,  6,
+};
+
+int ctz4 (unsigned long x)
+{
+  unsigned long lsb = x & -x;
+  return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/pr93231.c b/gcc/testsuite/gcc.dg/pr93231.c
new file mode 100644
index 0000000000000000000000000000000000000000..80853bad23b28abbe51bb6e2b9f8beeb06618e2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr93231.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-details -Wno-shift-count-negative" } */
+
+int ctz_ice1 (int x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((int)((x & -x) * -0x077CB531)) >> 27];
+}
+
+int ctz_ice2 (unsigned x)
+{
+  static const char table[32] =
+    {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+  return table[((unsigned)((x & -x) * 0x077CB531U)) >> -27];
+}
+
+// This should never match
+int ctz_fail (int x)
+{
+  static const unsigned short int table[32] =
+    u"\x0100\x021c\x0e1d\x0318\x161e\x0f14\x1119\x0804\x1b1f\x170d\x1315\x0710\x0c1a\x0612\x050b\x090a";
+
+  return table[((int)((x & -x) * 0x077CB531)) >> 27];
+}
+
+/* { dg-final { scan-tree-dump-not {= \.CTZ} "forwprop2" } } */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index aced6eb2d9139cae277593022415bc5efdf45175..afc52e5dfbaea29cb07707fee6cbbd3e0c969eda 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -1864,8 +1864,8 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   tree input_type = TREE_TYPE (x);
   unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
 
-  /* Check the array is not wider than integer type and the input is a 32-bit
-     or 64-bit type.  */
+  /* Check the array element type is not wider than 32 bits and the input is
+     a 32-bit or 64-bit type.  */
   if (TYPE_PRECISION (type) > 32)
     return false;
   if (input_bits != 32 && input_bits != 64)
@@ -1879,7 +1879,7 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (!low || !integer_zerop (low))
     return false;
 
-  unsigned shiftval = tree_to_uhwi (tshift);
+  unsigned shiftval = tree_to_shwi (tshift);
 
   /* Check the shift extracts the top 5..7 bits.  */
   if (shiftval < input_bits - 7 || shiftval > input_bits - 5)
@@ -1889,12 +1889,13 @@ optimize_count_trailing_zeroes (tree array_ref, tree x, tree mulc,
   if (!ctor)
     return false;
 
-  unsigned HOST_WIDE_INT val = tree_to_uhwi (mulc);
+  /* Extract the binary representation of the multiply constant.  */
+  unsigned HOST_WIDE_INT val = TREE_INT_CST_LOW (mulc);
 
   if (TREE_CODE (ctor) == CONSTRUCTOR)
     return check_ctz_array (ctor, val, zero_val, shiftval, input_bits);
 
-  if (TREE_CODE (ctor) == STRING_CST)
+  if (TREE_CODE (ctor) == STRING_CST && TYPE_PRECISION (type) == 8)
     return check_ctz_string (ctor, val, zero_val, shiftval, input_bits);
 
   return false;
@@ -1920,15 +1921,24 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
 				      res_ops[1], res_ops[2], zero_val))
     {
       tree type = TREE_TYPE (res_ops[0]);
-      HOST_WIDE_INT ctzval;
+      HOST_WIDE_INT ctz_val = 0;
       HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
-      bool zero_ok = CTZ_DEFINED_VALUE_AT_ZERO (TYPE_MODE (type), ctzval) == 2;
+      bool zero_ok =
+	CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;
+
+      /* If the input value can't be zero, don't special case ctz (0).  */
+      if (tree_expr_nonzero_p (res_ops[0]))
+	{
+	  zero_ok = true;
+	  zero_val = 0;
+	  ctz_val = 0;
+	}
 
       /* Skip if there is no value defined at zero, or if we can't easily
 	 return the correct value for zero.  */
       if (!zero_ok)
 	return false;
-      if (zero_val != ctzval && !(zero_val == 0 && ctzval == type_size))
+      if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == type_size))
 	return false;
 
       gimple_seq seq = NULL;
@@ -1941,7 +1951,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
       tree prev_lhs = gimple_call_lhs (call);
 
       /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0.  */
-      if (zero_val == 0 && ctzval == type_size)
+      if (zero_val == 0 && ctz_val == type_size)
 	{
 	  g = gimple_build_assign (make_ssa_name (integer_type_node),
 				   BIT_AND_EXPR, prev_lhs,



More information about the Gcc-patches mailing list