[PATCH] Fold __builtin_ia32_[tl]zcnt_u{16,32,64} (PR target/78057)

Jakub Jelinek jakub@redhat.com
Fri Oct 21 15:23:00 GMT 2016


Hi!

This patch adds folding for the new ia32 md builtins.
If they can be folded into constant, it is done in ix86_fold_builtin,
if they can fold to corresponding generic __builtin_c[lt]z* (which have
e.g. the advantage that VRP knows about what values it can have etc.),
it is done in gimple_fold_builtin target hook.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-10-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/78057
	* config/i386/i386.c: Include fold-const-call.h, tree-vrp.h
	and tree-ssanames.h.
	(ix86_fold_builtin): Fold IX86_BUILTIN_[LT]ZCNT{16,32,64}
	with INTEGER_CST argument.
	(ix86_gimple_fold_builtin): New function.
	(TARGET_GIMPLE_FOLD_BUILTIN): Define.

	* gcc.target/i386/pr78057.c: New test.

--- gcc/config/i386/i386.c.jj	2016-10-21 11:36:33.135677698 +0200
+++ gcc/config/i386/i386.c	2016-10-21 11:57:58.248530521 +0200
@@ -77,6 +77,9 @@ along with GCC; see the file COPYING3.
 #include "case-cfn-macros.h"
 #include "regrename.h"
 #include "dojump.h"
+#include "fold-const-call.h"
+#include "tree-vrp.h"
+#include "tree-ssanames.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -33332,6 +33335,40 @@ ix86_fold_builtin (tree fndecl, int n_ar
 	    return build_real (type, inf);
 	  }
 
+	case IX86_BUILTIN_TZCNT16:
+	case IX86_BUILTIN_TZCNT32:
+	case IX86_BUILTIN_TZCNT64:
+	  gcc_assert (n_args == 1);
+	  if (TREE_CODE (args[0]) == INTEGER_CST)
+	    {
+	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+	      tree arg = args[0];
+	      if (fn_code == IX86_BUILTIN_TZCNT16)
+		arg = fold_convert (short_unsigned_type_node, arg);
+	      if (integer_zerop (arg))
+		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+	      else
+		return fold_const_call (CFN_CTZ, type, arg);
+	    }
+	  break;
+
+	case IX86_BUILTIN_LZCNT16:
+	case IX86_BUILTIN_LZCNT32:
+	case IX86_BUILTIN_LZCNT64:
+	  gcc_assert (n_args == 1);
+	  if (TREE_CODE (args[0]) == INTEGER_CST)
+	    {
+	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+	      tree arg = args[0];
+	      if (fn_code == IX86_BUILTIN_LZCNT16)
+		arg = fold_convert (short_unsigned_type_node, arg);
+	      if (integer_zerop (arg))
+		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
+	      else
+		return fold_const_call (CFN_CLZ, type, arg);
+	    }
+	  break;
+
 	default:
 	  break;
 	}
@@ -33344,6 +33381,67 @@ ix86_fold_builtin (tree fndecl, int n_ar
   return NULL_TREE;
 }
 
+/* Fold a MD builtin (use ix86_fold_builtin for folding into
+   constant) in GIMPLE.  */
+
+bool
+ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (stmt);
+  gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
+  int n_args = gimple_call_num_args (stmt);
+  enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
+  tree decl = NULL_TREE;
+  tree arg0;
+
+  switch (fn_code)
+    {
+    case IX86_BUILTIN_TZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CTZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_TZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CTZLL);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT32:
+      decl = builtin_decl_implicit (BUILT_IN_CLZ);
+      goto fold_tzcnt_lzcnt;
+
+    case IX86_BUILTIN_LZCNT64:
+      decl = builtin_decl_implicit (BUILT_IN_CLZLL);
+      goto fold_tzcnt_lzcnt;
+
+    fold_tzcnt_lzcnt:
+      gcc_assert (n_args == 1);
+      arg0 = gimple_call_arg (stmt, 0);
+      if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
+	{
+	  int prec = TYPE_PRECISION (TREE_TYPE (arg0));
+	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
+	    return false;
+
+	  location_t loc = gimple_location (stmt);
+	  gimple *g = gimple_build_call (decl, 1, arg0);
+	  gimple_set_location (g, loc);
+	  tree lhs = make_ssa_name (integer_type_node);
+	  gimple_call_set_lhs (g, lhs);
+	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+	  g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
+	  gimple_set_location (g, loc);
+	  gsi_replace (gsi, g, true);
+	  return true;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
 /* Make builtins to detect cpu type and features supported.  NAME is
    the builtin name, CODE is the builtin code, and FTYPE is the function
    type of the builtin.  */
@@ -50531,6 +50629,9 @@ ix86_addr_space_zero_address_valid (addr
 #undef TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
 
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
+
 #undef TARGET_COMPARE_VERSION_PRIORITY
 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
 
--- gcc/testsuite/gcc.target/i386/pr78057.c.jj	2016-10-21 11:57:58.249530508 +0200
+++ gcc/testsuite/gcc.target/i386/pr78057.c	2016-10-21 11:57:58.249530508 +0200
@@ -0,0 +1,42 @@
+/* PR target/78057 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mlzcnt -fdump-tree-optimized" } */
+
+extern void link_error (void);
+
+int
+foo (int x)
+{
+  if (__builtin_ia32_tzcnt_u16 (16) != 4
+      || __builtin_ia32_tzcnt_u16 (0) != 16
+      || __builtin_ia32_lzcnt_u16 (0x1ff) != 7
+      || __builtin_ia32_lzcnt_u16 (0) != 16
+      || __builtin_ia32_tzcnt_u32 (8) != 3
+      || __builtin_ia32_tzcnt_u32 (0) != 32
+      || __builtin_ia32_lzcnt_u32 (0x3fffffff) != 2
+      || __builtin_ia32_lzcnt_u32 (0) != 32
+#ifdef __x86_64__
+      || __builtin_ia32_tzcnt_u64 (4) != 2
+      || __builtin_ia32_tzcnt_u64 (0) != 64
+      || __builtin_ia32_lzcnt_u64 (0x1fffffff) != 35
+      || __builtin_ia32_lzcnt_u64 (0) != 64
+#endif
+     )
+    link_error ();
+  x += 2;
+  if (x == 0)
+    return 5;
+  return __builtin_ia32_tzcnt_u32 (x)
+         + __builtin_ia32_lzcnt_u32 (x)
+#ifdef __x86_64__
+	 + __builtin_ia32_tzcnt_u64 (x)
+	 + __builtin_ia32_lzcnt_u64 (x)
+#endif
+	 ;
+}
+
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_ia32_\[lt]zcnt" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ctz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clz " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_ctzll " 1 "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clzll " 1 "optimized" { target lp64 } } } */

	Jakub



More information about the Gcc-patches mailing list