This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] PR tree-optimization/90836 Missing popcount pattern matching
- From: Dmitrij Pochepko <dmitrij dot pochepko at bell-sw dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 5 Sep 2019 18:34:50 +0300
- Subject: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching
This patch adds matching for Hamming weight (popcount) implementation. The following sources:
int
foo64 (unsigned long long a)
{
unsigned long long b = a;
b -= ((b>>1) & 0x5555555555555555ULL);
b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
b *= 0x0101010101010101ULL;
return (int)(b >> 56);
}
and
int
foo32 (unsigned int a)
{
unsigned long b = a;
b -= ((b>>1) & 0x55555555UL);
b = ((b>>2) & 0x33333333UL) + (b & 0x33333333UL);
b = ((b>>4) + b) & 0x0F0F0F0FUL;
b *= 0x01010101UL;
return (int)(b >> 24);
}
and equivalents are now recognized as popcount for platforms with hw popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu and aarch64-linux-gnu systems with no regressions.
(I have no write access to repo)
Thanks,
Dmitrij
gcc/ChangeLog:
PR tree-optimization/90836
* gcc/match.pd (popcount): New pattern.
gcc/testsuite/ChangeLog:
PR tree-optimization/90836
* lib/target-supports.exp (check_effective_target_popcount)
(check_effective_target_popcountll): New effective targets.
* gcc.dg/tree-ssa/popcount4.c: New test.
* gcc.dg/tree-ssa/popcount4l.c: New test.
* gcc.dg/tree-ssa/popcount4ll.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd
index 0317bc7..b1867bf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cmp (popcount @0) integer_zerop)
(rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+/* 64- and 32-bits branchless implementations of popcount are detected:
+
+ int popcount64c (uint64_t x)
+ {
+ x -= (x >> 1) & 0x5555555555555555ULL;
+ x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+ x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+ return (x * 0x0101010101010101ULL) >> 56;
+ }
+
+ int popcount32c (uint32_t x)
+ {
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0f0f0f0f;
+ return (x * 0x01010101) >> 24;
+ } */
+(simplify
+ (convert
+ (rshift
+ (mult
+ (bit_and:c
+ (plus:c
+ (rshift @8 INTEGER_CST@5)
+ (plus:c@8
+ (bit_and @6 INTEGER_CST@7)
+ (bit_and
+ (rshift
+ (minus@6
+ @0
+ (bit_and
+ (rshift @0 INTEGER_CST@4)
+ INTEGER_CST@11))
+ INTEGER_CST@10)
+ INTEGER_CST@9)))
+ INTEGER_CST@3)
+ INTEGER_CST@2)
+ INTEGER_CST@1))
+ /* Check constants and optab. */
+ (with
+ {
+ tree argtype = TREE_TYPE (@0);
+ unsigned prec = TYPE_PRECISION (argtype);
+ int shift = TYPE_PRECISION (long_long_unsigned_type_node) - prec;
+ const unsigned long long c1 = 0x0101010101010101ULL >> shift,
+ c2 = 0x0F0F0F0F0F0F0F0FULL >> shift,
+ c3 = 0x3333333333333333ULL >> shift,
+ c4 = 0x5555555555555555ULL >> shift;
+ }
+ (if (types_match (type, integer_type_node) && tree_to_uhwi (@4) == 1
+ && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
+ && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1
+ && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3
+ && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4
+ && optab_handler (popcount_optab, TYPE_MODE (argtype))
+ != CODE_FOR_nothing)
+ (switch
+ (if (types_match (argtype, long_long_unsigned_type_node))
+ (BUILT_IN_POPCOUNTLL @0))
+ (if (types_match (argtype, long_unsigned_type_node))
+ (BUILT_IN_POPCOUNTL @0))
+ (if (types_match (argtype, unsigned_type_node))
+ (BUILT_IN_POPCOUNT @0))))))
+
/* Simplify:
a = a1 op a2
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
new file mode 100644
index 0000000..9f759f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m4 = 0x0F0F0F0FUL;
+const unsigned h01 = 0x01010101UL;
+const int shift = 24;
+
+int popcount64c(unsigned x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
new file mode 100644
index 0000000..ab33f79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcountl } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#if __SIZEOF_LONG__ == 4
+const unsigned long m1 = 0x55555555UL;
+const unsigned long m2 = 0x33333333UL;
+const unsigned long m4 = 0x0F0F0F0FUL;
+const unsigned long h01 = 0x01010101UL;
+const int shift = 24;
+#else
+const unsigned long m1 = 0x5555555555555555UL;
+const unsigned long m2 = 0x3333333333333333UL;
+const unsigned long m4 = 0x0f0f0f0f0f0f0f0fUL;
+const unsigned long h01 = 0x0101010101010101UL;
+const int shift = 56;
+#endif
+
+
+int popcount64c(unsigned long x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcountl" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
new file mode 100644
index 0000000..f3755f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcountll } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned long long m1 = 0x5555555555555555ULL;
+const unsigned long long m2 = 0x3333333333333333ULL;
+const unsigned long long m4 = 0x0F0F0F0F0F0F0F0FULL;
+const unsigned long long h01 = 0x0101010101010101ULL;
+const int shift = 56;
+
+int popcount64c(unsigned long long x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcountll" 1 "optimized" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3c50b89..32bbad7 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6855,6 +6855,29 @@ proc check_effective_target_popcountl { } {
} "" ]
}
+# Return 1 if the target supports popcount on long long.
+
+proc check_effective_target_popcountll { } {
+ return [check_no_messages_and_pattern popcountll "!\\(call" rtl-expand {
+ int foo (long long b)
+ {
+ return __builtin_popcountll (b);
+ }
+ } "" ]
+}
+
+
+# Return 1 if the target supports popcount on int.
+
+proc check_effective_target_popcount { } {
+ return [check_no_messages_and_pattern popcount "!\\(call" rtl-expand {
+ int foo (int b)
+ {
+ return __builtin_popcount (b);
+ }
+ } "" ]
+}
+
# Return 1 if the target supports atomic operations on "long long"
# and can execute them.
#