[PATCH] phiopt: Optimize x ? __builtin_clz (x) : 32 in GIMPLE [PR97503]
Jakub Jelinek
jakub@redhat.com
Wed Oct 21 08:07:05 GMT 2020
Hi!
While we have at the RTL level noce_try_ifelse_collapse combined with
simplify_cond_clz_ctz, that optimization doesn't always trigger because
e.g. on powerpc there is an define_insn to compare a reg against zero and
copy that register to another one and so we end up with a different pseudo
in the simplify_cond_clz_ctz test and punt.
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
we can optimize it already in phiopt though, just need to ensure that
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
VRP changes codified that the builtin calls are always undefined at zero,
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
And, in phiopt we already have popcount handling that does pretty much the
same thing, except for always using a zero value rather than the one set
by C?Z_DEFINED_VALUE_AT_ZERO.
So, this patch extends that function to handle not just popcount, but also
clz and ctz.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2020-10-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97503
* tree-ssa-phiopt.c (cond_removal_in_popcount_pattern): Rename to ...
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
* gcc.dg/tree-ssa/pr97503.c: New test.
--- gcc/tree-ssa-phiopt.c.jj 2020-07-28 15:39:10.075755306 +0200
+++ gcc/tree-ssa-phiopt.c 2020-10-20 17:46:16.971329154 +0200
@@ -61,8 +61,9 @@ static bool minmax_replacement (basic_bl
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+ edge, edge, gimple *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -344,8 +345,9 @@ tree_ssa_phiopt_worker (bool do_store_el
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
- phi, arg0, arg1))
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+ e2, phi, arg0,
+ arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -1777,16 +1779,20 @@ minmax_replacement (basic_block cond_bb,
<bb 4>
c_12 = PHI <_9(2)>
-*/
+
+ Similarly for __builtin_clz or __builtin_ctz if
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
+ instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
- edge e1, edge e2,
- gimple *phi, tree arg0, tree arg1)
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gimple *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
- gimple *popcount;
+ gimple *call;
gimple *cast = NULL;
tree lhs, arg;
@@ -1804,35 +1810,65 @@ cond_removal_in_popcount_pattern (basic_
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
{
- popcount = gsi_stmt (gsi);
+ call = gsi_stmt (gsi);
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
return false;
}
else
{
- popcount = cast;
+ call = cast;
cast = NULL;
}
- /* Check that we have a popcount builtin. */
- if (!is_gimple_call (popcount))
+ /* Check that we have a popcount/clz/ctz builtin. */
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
return false;
- combined_fn cfn = gimple_call_combined_fn (popcount);
+
+ arg = gimple_call_arg (call, 0);
+ lhs = gimple_get_lhs (call);
+
+ if (lhs == NULL_TREE)
+ return false;
+
+ combined_fn cfn = gimple_call_combined_fn (call);
+ internal_fn ifn = IFN_LAST;
+ int val = 0;
switch (cfn)
{
CASE_CFN_POPCOUNT:
break;
+ CASE_CFN_CLZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (optab_handler (clz_optab, mode) != CODE_FOR_nothing
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CLZ;
+ break;
+ }
+ }
+ return false;
+ CASE_CFN_CTZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CTZ;
+ break;
+ }
+ }
+ return false;
default:
return false;
}
- arg = gimple_call_arg (popcount, 0);
- lhs = gimple_get_lhs (popcount);
-
if (cast)
{
- /* We have a cast stmt feeding popcount builtin. */
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
/* Check that we have a cast prior to that. */
if (gimple_code (cast) != GIMPLE_ASSIGN
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
@@ -1845,7 +1881,7 @@ cond_removal_in_popcount_pattern (basic_
cond = last_stmt (cond_bb);
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
builtin. */
if (gimple_code (cond) != GIMPLE_COND
|| (gimple_cond_code (cond) != NE_EXPR
@@ -1865,10 +1901,13 @@ cond_removal_in_popcount_pattern (basic_
}
/* Check PHI arguments. */
- if (lhs != arg0 || !integer_zerop (arg1))
+ if (lhs != arg0
+ || TREE_CODE (arg1) != INTEGER_CST
+ || wi::to_wide (arg1) != val)
return false;
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
+ cond_bb. */
gsi = gsi_last_bb (cond_bb);
if (cast)
{
@@ -1876,9 +1915,19 @@ cond_removal_in_popcount_pattern (basic_
gsi_move_before (&gsi_from, &gsi);
reset_flow_sensitive_info (gimple_get_lhs (cast));
}
- gsi_from = gsi_for_stmt (popcount);
- gsi_move_before (&gsi_from, &gsi);
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
+ gsi_from = gsi_for_stmt (call);
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
+ gsi_move_before (&gsi_from, &gsi);
+ else
+ {
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
+ the latter is well defined at zero. */
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+ gimple_call_set_lhs (call, lhs);
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+ gsi_remove (&gsi_from, true);
+ }
+ reset_flow_sensitive_info (lhs);
/* Now update the PHI and remove unneeded bbs. */
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
--- gcc/testsuite/gcc.dg/tree-ssa/pr97503.c.jj 2020-10-20 18:14:46.862749687 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr97503.c 2020-10-20 18:17:13.618640295 +0200
@@ -0,0 +1,19 @@
+/* PR tree-optimization/97503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+
+int
+foo (int x)
+{
+ return x ? __builtin_clz (x) : 32;
+}
+
+int
+bar (unsigned long long x)
+{
+ return x ? __builtin_clzll (x) : 64;
+}
Jakub
More information about the Gcc-patches
mailing list