This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC][PR82479] missing popcount builtin detection
- From: Kugan Vivekanandarajah <kugan dot vivekanandarajah at linaro dot org>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 25 Jan 2018 08:56:37 +1100
- Subject: [RFC][PR82479] missing popcount builtin detection
- Authentication-results: sourceware.org; auth=none
Hi All,
Here is a patch for popcount builtin detection similar to LLVM. I
would like to queue this for review for next stage 1.
1. This is done part of loop-distribution and effective for -O3 and above.
2. This does not distribute loop to detect popcount (like
memcpy/memmove). I dont think that happens in practice. Please correct
me if I am wrong.
Bootstrapped and regression tested on aarch64-linux-gnu with no new regressions.
Thanks,
Kugan
gcc/ChangeLog:
2018-01-25 Kugan Vivekanandarajah <kuganv@linaro.org>
PR middle-end/82479
* tree-loop-distribution.c (handle_popcount): New.
(pass_loop_distribution::execute): Use handle_popcount.
gcc/testsuite/ChangeLog:
2018-01-25 Kugan Vivekanandarajah <kuganv@linaro.org>
PR middle-end/82479
* gcc.dg/tree-ssa/popcount.c: New test.
From 9fa09af4b7013c6207e59a4920c82f089bfe45c2 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Wed, 24 Jan 2018 08:50:08 +1100
Subject: [PATCH] pocount builtin detection
Change-Id: Ic6e175f9cc9a69bd417936a4845c2c046fd446b4
Change-Id: I680eb107445660c60a5d38f5d7300ab1a3243bf5
Change-Id: Ia9f0df89e05520091dc7797195098118768c7ac2
---
gcc/testsuite/gcc.dg/tree-ssa/popcount.c | 41 +++++++++
gcc/tree-loop-distribution.c | 145 +++++++++++++++++++++++++++++++
2 files changed, 186 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/popcount.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c
new file mode 100644
index 0000000..86a66cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern int foo (int);
+
+int PopCount (long b) {
+ int c = 0;
+ b++;
+
+ while (b) {
+ b &= b - 1;
+ c++;
+ }
+ return c;
+}
+int PopCount2 (long b) {
+ int c = 0;
+
+ while (b) {
+ b &= b - 1;
+ c++;
+ }
+ foo (c);
+ return foo (c);
+}
+
+void PopCount3 (long b1) {
+
+ for (long i = 0; i < b1; ++i)
+ {
+ long b = i;
+ int c = 0;
+ while (b) {
+ b &= b - 1;
+ c++;
+ }
+ foo (c);
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 3 "optimized" } } */
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index a3d76e4..1060700 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1585,6 +1585,148 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
return;
}
+/* See if loop is a popcout implementation of the form
+
+ int c = 0;
+ while (b) {
+ b = b & (b - 1);
+ c++;
+ }
+
+ If so, convert this into c = __builtin_popcount (b)
+ return true if we did, false otherwise. */
+
+
+static bool
+handle_popcount (loop_p loop)
+{
+ tree lhs, rhs;
+ tree dest, src;
+ gimple *and_minus_one;
+ int count = 0;
+ gphi *count_phi;
+ gimple *fn_call;
+ gimple *use_stmt;
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ gimple_stmt_iterator gsi;
+
+ /* Check loop terminating branch is like
+ if (b != 0). */
+ gimple *stmt = last_stmt (loop->header);
+ if (!stmt
+ || gimple_code (stmt) != GIMPLE_COND
+ || !zerop (gimple_cond_rhs (stmt)))
+ return false;
+
+ /* Cheeck "b = b & (b - 1)" is calculated. */
+ lhs = gimple_cond_lhs (stmt);
+ gimple *and_stmt = SSA_NAME_DEF_STMT (lhs);
+ if (gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+ return false;
+ lhs = gimple_assign_rhs1 (and_stmt);
+ rhs = gimple_assign_rhs2 (and_stmt);
+ if (TREE_CODE (lhs) == SSA_NAME
+ && (and_minus_one = SSA_NAME_DEF_STMT (lhs))
+ && is_gimple_assign (and_minus_one)
+ && (gimple_assign_rhs_code (and_minus_one) == PLUS_EXPR)
+ && integer_minus_onep (gimple_assign_rhs2 (and_minus_one)))
+ lhs = rhs;
+ else if (TREE_CODE (rhs) == SSA_NAME
+ && (and_minus_one = SSA_NAME_DEF_STMT (rhs))
+ && is_gimple_assign (and_minus_one)
+ && (gimple_assign_rhs_code (and_minus_one) == PLUS_EXPR)
+ && integer_minus_onep (gimple_assign_rhs2 (and_minus_one)))
+ ;
+ else
+ return false;
+ if ((gimple_assign_rhs1 (and_stmt) != gimple_assign_rhs1 (and_minus_one))
+ && (gimple_assign_rhs2 (and_stmt) != gimple_assign_rhs1 (and_minus_one)))
+ return false;
+
+ /* Check the recurrence. */
+ gimple *phi = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (and_minus_one));
+ gimple *src_phi = SSA_NAME_DEF_STMT (lhs);
+ if (gimple_code (phi) != GIMPLE_PHI
+ || gimple_code (src_phi) != GIMPLE_PHI)
+ return false;
+
+ /* Check the loop closed SSA definition for just the variable c defined in
+ loop. */
+ src = gimple_phi_arg_def (src_phi, loop_preheader_edge (loop)->dest_idx);
+ basic_block bb = single_exit (loop)->dest;
+ for (gphi_iterator gpi = gsi_start_phis (bb);
+ !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ count_phi = gpi.phi ();
+ count++;
+ }
+ if (count != 1)
+ return false;
+
+ /* Make sure we have a count by one and it starts from zero. */
+ rhs = gimple_phi_arg_def (count_phi, 0);
+ stmt = SSA_NAME_DEF_STMT (rhs);
+ if (!is_gimple_assign (stmt)
+ || (gimple_assign_rhs_code (stmt) != PLUS_EXPR)
+ || !integer_onep (gimple_assign_rhs2 (stmt)))
+ return false;
+ rhs = gimple_assign_rhs1 (stmt);
+ stmt = SSA_NAME_DEF_STMT (rhs);
+ if (gimple_code (stmt) != GIMPLE_PHI
+ || !integer_zerop (gimple_phi_arg_def (stmt, loop_preheader_edge (loop)->dest_idx)))
+ return false;
+
+ /* Create a var for builtin_popcount result and update the uses. */
+ dest = gimple_phi_result (count_phi);
+ tree var = make_ssa_name (TREE_TYPE (dest), NULL);
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, dest)
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, var);
+ }
+ dest = var;
+
+ /* Remove the loop closed PHI stmt. */
+ gsi = gsi_after_labels (gimple_bb (count_phi));
+ gphi_iterator gsi_phi = gsi_for_phi (count_phi);
+ remove_phi_node (&gsi_phi, true);
+
+ /* Insert the builtin. */
+ gsi = gsi_after_labels (loop_preheader_edge (loop)->src);
+ src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ tree fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_POPCOUNT));
+ fn_call = gimple_build_call (fn, 1, src);
+ gimple_call_set_lhs (fn_call, dest);
+ gsi_insert_before (&gsi, fn_call, GSI_CONTINUE_LINKING);
+
+ /* In most cases, we will have if (b != 0) preceeding the loop in
+ the form
+ if (b != 0)
+ {
+ loop;
+ }
+
+ Since __builtin_popcount (0) is defined, we can remove this condition
+ by making it always true. */
+
+ stmt = last_stmt (EDGE_PRED (loop_preheader_edge (loop)->src, 0)->src);
+ if (stmt
+ && gimple_code (stmt) == GIMPLE_COND
+ && (gimple_cond_lhs (stmt) == src)
+ && zerop (gimple_cond_rhs (stmt)))
+ {
+ gimple_cond_set_lhs (as_a <gcond *> (stmt),
+ build_int_cst (TREE_TYPE (src), 1));
+ update_stmt (stmt);
+ }
+
+ /* Finaly remove the loop. */
+ destroy_loop (loop);
+ return true;
+}
+
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
@@ -3032,6 +3174,9 @@ pass_loop_distribution::execute (function *fun)
|| !optimize_loop_for_speed_p (loop))
continue;
+ if (handle_popcount (loop))
+ continue;
+
/* Don't distribute loop if niters is unknown. */
tree niters = number_of_latch_executions (loop);
if (niters == NULL_TREE || niters == chrec_dont_know)
--
2.7.4