This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching


Hi,

can anybody take a look at v2?

Thanks,
Dmitrij

On Mon, Sep 09, 2019 at 10:03:40PM +0300, Dmitrij Pochepko wrote:
> Hi all.
> 
> Please take a look at v2 (attached).
> I changed patch according to review comments. The same testing was performed again.
> 
> Thanks,
> Dmitrij
> 
> On Thu, Sep 05, 2019 at 06:34:49PM +0300, Dmitrij Pochepko wrote:
> > This patch adds matching for Hamming weight (popcount) implementation. The following sources:
> > 
> > int
> > foo64 (unsigned long long a)
> > {
> >     unsigned long long b = a;
> >     b -= ((b>>1) & 0x5555555555555555ULL);
> >     b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
> >     b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
> >     b *= 0x0101010101010101ULL;
> >     return (int)(b >> 56);
> > }
> > 
> > and
> > 
> > int
> > foo32 (unsigned int a)
> > {
> >     unsigned long b = a;
> >     b -= ((b>>1) & 0x55555555UL);
> >     b = ((b>>2) & 0x33333333UL) + (b & 0x33333333UL);
> >     b = ((b>>4) + b) & 0x0F0F0F0FUL;
> >     b *= 0x01010101UL;
> >     return (int)(b >> 24);
> > }
> > 
> > and equivalents are now recognized as popcount for platforms with hw popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu and aarch64-linux-gnu systems with no regressions. 
> > 
> > (I have no write access to repo)
> > 
> > Thanks,
> > Dmitrij
> > 
> > 
> > gcc/ChangeLog:
> > 
> > 	PR tree-optimization/90836
> > 
> > 	* gcc/match.pd (popcount): New pattern.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	PR tree-optimization/90836
> > 
> > 	* lib/target-supports.exp (check_effective_target_popcount)
> > 	(check_effective_target_popcountll): New effective targets.
> > 	* gcc.dg/tree-ssa/popcount4.c: New test.
> > 	* gcc.dg/tree-ssa/popcount4l.c: New test.
> > 	* gcc.dg/tree-ssa/popcount4ll.c: New test.
> 
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 0317bc7..b1867bf 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >        (cmp (popcount @0) integer_zerop)
> >        (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
> >  
> > +/* 64- and 32-bits branchless implementations of popcount are detected:
> > +
> > +   int popcount64c (uint64_t x)
> > +   {
> > +     x -= (x >> 1) & 0x5555555555555555ULL;
> > +     x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
> > +     x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
> > +     return (x * 0x0101010101010101ULL) >> 56;
> > +   }
> > +
> > +   int popcount32c (uint32_t x)
> > +   {
> > +     x -= (x >> 1) & 0x55555555;
> > +     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> > +     x = (x + (x >> 4)) & 0x0f0f0f0f;
> > +     return (x * 0x01010101) >> 24;
> > +   }  */
> > +(simplify
> > +  (convert
> > +    (rshift
> > +      (mult
> > +	(bit_and:c
> > +	  (plus:c
> > +	    (rshift @8 INTEGER_CST@5)
> > +	    (plus:c@8
> > +	      (bit_and @6 INTEGER_CST@7)
> > +	      (bit_and
> > +		(rshift
> > +		  (minus@6
> > +		    @0
> > +		    (bit_and
> > +		      (rshift @0 INTEGER_CST@4)
> > +		      INTEGER_CST@11))
> > +		  INTEGER_CST@10)
> > +		INTEGER_CST@9)))
> > +	  INTEGER_CST@3)
> > +	INTEGER_CST@2)
> > +      INTEGER_CST@1))
> > +  /* Check constants and optab.  */
> > +  (with
> > +     {
> > +       tree argtype = TREE_TYPE (@0);
> > +       unsigned prec = TYPE_PRECISION (argtype);
> > +       int shift = TYPE_PRECISION (long_long_unsigned_type_node) - prec;
> > +       const unsigned long long c1 = 0x0101010101010101ULL >> shift,
> > +				c2 = 0x0F0F0F0F0F0F0F0FULL >> shift,
> > +				c3 = 0x3333333333333333ULL >> shift,
> > +				c4 = 0x5555555555555555ULL >> shift;
> > +     }
> > +    (if (types_match (type, integer_type_node) && tree_to_uhwi (@4) == 1
> > +	  && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
> > +	  && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1
> > +	  && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3
> > +	  && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4
> > +	  && optab_handler (popcount_optab, TYPE_MODE (argtype))
> > +	    != CODE_FOR_nothing)
> > +	(switch
> > +	    (if (types_match (argtype, long_long_unsigned_type_node))
> > +	      (BUILT_IN_POPCOUNTLL @0))
> > +	    (if (types_match (argtype, long_unsigned_type_node))
> > +	      (BUILT_IN_POPCOUNTL @0))
> > +	    (if (types_match (argtype, unsigned_type_node))
> > +	      (BUILT_IN_POPCOUNT @0))))))
> > +
> >  /* Simplify:
> >  
> >       a = a1 op a2
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
> > new file mode 100644
> > index 0000000..9f759f8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
> > @@ -0,0 +1,22 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcount } */
> > +/* { dg-require-effective-target int32plus } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned m1  = 0x55555555UL;
> > +const unsigned m2  = 0x33333333UL;
> > +const unsigned m4  = 0x0F0F0F0FUL;
> > +const unsigned h01 = 0x01010101UL;
> > +const int shift = 24;
> > +
> > +int popcount64c(unsigned x)
> > +{
> > +    x -= (x >> 1) & m1;
> > +    x = (x & m2) + ((x >> 2) & m2);
> > +    x = (x + (x >> 4)) & m4;
> > +    return (x * h01) >> shift;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } */
> > +
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
> > new file mode 100644
> > index 0000000..ab33f79
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcountl } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +#if __SIZEOF_LONG__ == 4
> > +const unsigned long m1  = 0x55555555UL;
> > +const unsigned long m2  = 0x33333333UL;
> > +const unsigned long m4  = 0x0F0F0F0FUL;
> > +const unsigned long h01 = 0x01010101UL;
> > +const int shift = 24;
> > +#else
> > +const unsigned long m1  = 0x5555555555555555UL;
> > +const unsigned long m2  = 0x3333333333333333UL;
> > +const unsigned long m4  = 0x0f0f0f0f0f0f0f0fUL;
> > +const unsigned long h01 = 0x0101010101010101UL;
> > +const int shift = 56;
> > +#endif
> > +
> > +
> > +int popcount64c(unsigned long x)
> > +{
> > +    x -= (x >> 1) & m1;
> > +    x = (x & m2) + ((x >> 2) & m2);
> > +    x = (x + (x >> 4)) & m4;
> > +    return (x * h01) >> shift;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "__builtin_popcountl" 1 "optimized" } } */
> > +
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
> > new file mode 100644
> > index 0000000..f3755f1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target popcountll } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +const unsigned long long m1  = 0x5555555555555555ULL;
> > +const unsigned long long m2  = 0x3333333333333333ULL;
> > +const unsigned long long m4  = 0x0F0F0F0F0F0F0F0FULL;
> > +const unsigned long long h01 = 0x0101010101010101ULL;
> > +const int shift = 56;
> > +
> > +int popcount64c(unsigned long long x)
> > +{
> > +    x -= (x >> 1) & m1;
> > +    x = (x & m2) + ((x >> 2) & m2);
> > +    x = (x + (x >> 4)) & m4;
> > +    return (x * h01) >> shift;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "__builtin_popcountll" 1 "optimized" } } */
> > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> > index 3c50b89..32bbad7 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -6855,6 +6855,29 @@ proc check_effective_target_popcountl { } {
> >      } "" ]
> >  }
> >  
> > +# Return 1 if the target supports popcount on long long.
> > +
> > +proc check_effective_target_popcountll { } {
> > +    return [check_no_messages_and_pattern popcountll "!\\(call" rtl-expand {
> > +        int foo (long long b)
> > +          {
> > +            return __builtin_popcountll (b);
> > +          }
> > +    } "" ]
> > +}
> > +
> > +
> > +# Return 1 if the target supports popcount on int.
> > +
> > +proc check_effective_target_popcount { } {
> > +    return [check_no_messages_and_pattern popcount "!\\(call" rtl-expand {
> > +        int foo (int b)
> > +          {
> > +            return __builtin_popcount (b);
> > +          }
> > +    } "" ]
> > +}
> > +
> >  # Return 1 if the target supports atomic operations on "long long"
> >  # and can execute them.
> >  #
> 

> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0317bc7..896d5b6 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5358,6 +5358,69 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        (cmp (popcount @0) integer_zerop)
>        (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
>  
> +/* 64- and 32-bits branchless implementations of popcount are detected:
> +
> +   int popcount64c (uint64_t x)
> +   {
> +     x -= (x >> 1) & 0x5555555555555555ULL;
> +     x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
> +     x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
> +     return (x * 0x0101010101010101ULL) >> 56;
> +   }
> +
> +   int popcount32c (uint32_t x)
> +   {
> +     x -= (x >> 1) & 0x55555555;
> +     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
> +     x = (x + (x >> 4)) & 0x0f0f0f0f;
> +     return (x * 0x01010101) >> 24;
> +   }  */
> +(simplify
> +  (rshift
> +    (mult
> +      (bit_and
> +	(plus:c
> +	  (rshift @8 INTEGER_CST@5)
> +	  (plus:c@8
> +	    (bit_and @6 INTEGER_CST@7)
> +	    (bit_and
> +	      (rshift
> +		(minus@6
> +		  @0
> +		  (bit_and
> +		    (rshift @0 INTEGER_CST@4)
> +		    INTEGER_CST@11))
> +	      INTEGER_CST@10)
> +	    INTEGER_CST@9)))
> +	INTEGER_CST@3)
> +      INTEGER_CST@2)
> +    INTEGER_CST@1)
> +  /* Check constants and optab.  */
> +  (with
> +     {
> +       tree argtype = TREE_TYPE (@0);
> +       unsigned prec = TYPE_PRECISION (argtype);
> +       int shift = 64 - prec;
> +       const unsigned HOST_WIDE_INT c1 = 0x0101010101010101ULL >> shift,
> +				    c2 = 0x0F0F0F0F0F0F0F0FULL >> shift,
> +				    c3 = 0x3333333333333333ULL >> shift,
> +				    c4 = 0x5555555555555555ULL >> shift;
> +     }
> +    (if (tree_to_uhwi (@4) == 1
> +	  && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
> +	  && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1
> +	  && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3
> +	  && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4
> +	  && optab_handler (popcount_optab, TYPE_MODE (argtype))
> +	    != CODE_FOR_nothing)
> +	(switch
> +	    (if (types_match (argtype, long_long_unsigned_type_node))
> +	      (convert (BUILT_IN_POPCOUNTLL:integer_type_node @0)))
> +	    (if (types_match (argtype, long_unsigned_type_node))
> +	      (convert (BUILT_IN_POPCOUNTL:integer_type_node @0)))
> +	    (if (types_match (argtype, unsigned_type_node))
> +	      (convert (BUILT_IN_POPCOUNT:integer_type_node @0)))))))
> +
>  /* Simplify:
>  
>       a = a1 op a2
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
> new file mode 100644
> index 0000000..9f759f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcount } */
> +/* { dg-require-effective-target int32plus } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned m1  = 0x55555555UL;
> +const unsigned m2  = 0x33333333UL;
> +const unsigned m4  = 0x0F0F0F0FUL;
> +const unsigned h01 = 0x01010101UL;
> +const int shift = 24;
> +
> +int popcount64c(unsigned x)
> +{
> +    x -= (x >> 1) & m1;
> +    x = (x & m2) + ((x >> 2) & m2);
> +    x = (x + (x >> 4)) & m4;
> +    return (x * h01) >> shift;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
> new file mode 100644
> index 0000000..ab33f79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcountl } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +#if __SIZEOF_LONG__ == 4
> +const unsigned long m1  = 0x55555555UL;
> +const unsigned long m2  = 0x33333333UL;
> +const unsigned long m4  = 0x0F0F0F0FUL;
> +const unsigned long h01 = 0x01010101UL;
> +const int shift = 24;
> +#else
> +const unsigned long m1  = 0x5555555555555555UL;
> +const unsigned long m2  = 0x3333333333333333UL;
> +const unsigned long m4  = 0x0f0f0f0f0f0f0f0fUL;
> +const unsigned long h01 = 0x0101010101010101UL;
> +const int shift = 56;
> +#endif
> +
> +
> +int popcount64c(unsigned long x)
> +{
> +    x -= (x >> 1) & m1;
> +    x = (x & m2) + ((x >> 2) & m2);
> +    x = (x + (x >> 4)) & m4;
> +    return (x * h01) >> shift;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_popcountl" 1 "optimized" } } */
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
> new file mode 100644
> index 0000000..f3755f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target popcountll } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +const unsigned long long m1  = 0x5555555555555555ULL;
> +const unsigned long long m2  = 0x3333333333333333ULL;
> +const unsigned long long m4  = 0x0F0F0F0F0F0F0F0FULL;
> +const unsigned long long h01 = 0x0101010101010101ULL;
> +const int shift = 56;
> +
> +int popcount64c(unsigned long long x)
> +{
> +    x -= (x >> 1) & m1;
> +    x = (x & m2) + ((x >> 2) & m2);
> +    x = (x + (x >> 4)) & m4;
> +    return (x * h01) >> shift;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_popcountll" 1 "optimized" } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 3c50b89..32bbad7 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -6855,6 +6855,29 @@ proc check_effective_target_popcountl { } {
>      } "" ]
>  }
>  
> +# Return 1 if the target supports popcount on long long.
> +
> +proc check_effective_target_popcountll { } {
> +    return [check_no_messages_and_pattern popcountll "!\\(call" rtl-expand {
> +        int foo (long long b)
> +          {
> +            return __builtin_popcountll (b);
> +          }
> +    } "" ]
> +}
> +
> +
> +# Return 1 if the target supports popcount on int.
> +
> +proc check_effective_target_popcount { } {
> +    return [check_no_messages_and_pattern popcount "!\\(call" rtl-expand {
> +        int foo (int b)
> +          {
> +            return __builtin_popcount (b);
> +          }
> +    } "" ]
> +}
> +
>  # Return 1 if the target supports atomic operations on "long long"
>  # and can execute them.
>  #


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]