[PATCH] i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824]

Uros Bizjak ubizjak@gmail.com
Thu Jan 30 07:43:00 GMT 2020


On Thu, Jan 30, 2020 at 1:23 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> Some time ago, patterns were added to optimize move mask followed by zero
> extension from 32 bits to 64 bit.  As the testcase shows, the intrinsics
> actually return int, not unsigned int, so it will happen quite often that
> one actually needs sign extension instead of zero extension.  Except for
> vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a
> difference, as we know the bit 31 will not be set (the source will have 2 or
> 4 doubles, 4 or 8 floats or 16 or 32 chars).
> So, for the floating point patterns, this patch just uses a code iterator
> so that we handle both zero extend and sign extend, and for the byte one
> adds a separate pattern for the 128-bit operand.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-01-30  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/91824
>         * config/i386/sse.md
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this.  Use
>         any_extend code iterator instead of always zero_extend.
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
>         Use any_extend code iterator instead of always zero_extend.
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
>         (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
>         Use any_extend code iterator instead of always zero_extend.
>         (*sse2_pmovmskb_ext): New define_insn.
>         (*sse2_pmovmskb_ext_lt): New define_insn_and_split.
>
>         * gcc.target/i386/pr91824-2.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2020-01-29 09:35:05.791247952 +0100
> +++ gcc/config/i386/sse.md      2020-01-29 16:56:00.354739600 +0100
> @@ -15815,9 +15815,9 @@ (define_insn "<sse>_movmsk<ssemodesuffix
>     (set_attr "prefix" "maybe_vex")
>     (set_attr "mode" "<MODE>")])
>
> -(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
> +(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> -       (zero_extend:DI
> +       (any_extend:DI
>           (unspec:SI
>             [(match_operand:VF_128_256 1 "register_operand" "x")]
>             UNSPEC_MOVMSK)))]
> @@ -15844,9 +15844,9 @@ (define_insn_and_split "*<sse>_movmsk<ss
>     (set_attr "prefix" "maybe_vex")
>     (set_attr "mode" "<MODE>")])
>
> -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
> +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> -       (zero_extend:DI
> +       (any_extend:DI
>           (unspec:SI
>             [(lt:VF_128_256
>                (match_operand:<sseintvecmode> 1 "register_operand" "x")
> @@ -15856,7 +15856,7 @@ (define_insn_and_split "*<sse>_movmsk<ss
>    "#"
>    "&& reload_completed"
>    [(set (match_dup 0)
> -       (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> +       (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
>    "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "maybe_vex")
> @@ -15880,9 +15880,9 @@ (define_insn_and_split "*<sse>_movmsk<ss
>     (set_attr "prefix" "maybe_vex")
>     (set_attr "mode" "<MODE>")])
>
> -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
> +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> -       (zero_extend:DI
> +       (any_extend:DI
>           (unspec:SI
>             [(subreg:VF_128_256
>                (ashiftrt:<sseintvecmode>
> @@ -15893,7 +15893,7 @@ (define_insn_and_split "*<sse>_movmsk<ss
>    "#"
>    "&& reload_completed"
>    [(set (match_dup 0)
> -       (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> +       (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
>    "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "maybe_vex")
> @@ -15932,6 +15932,23 @@ (define_insn "*<sse2_avx2>_pmovmskb_zext
>     (set_attr "prefix" "maybe_vex")
>     (set_attr "mode" "SI")])
>
> +(define_insn "*sse2_pmovmskb_ext"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +       (sign_extend:DI
> +         (unspec:SI
> +           [(match_operand:V16QI 1 "register_operand" "x")]
> +           UNSPEC_MOVMSK)))]
> +  "TARGET_64BIT && TARGET_SSE2"
> +  "%vpmovmskb\t{%1, %k0|%k0, %1}"
> +  [(set_attr "type" "ssemov")
> +   (set (attr "prefix_data16")
> +     (if_then_else
> +       (match_test "TARGET_AVX")
> +     (const_string "*")
> +     (const_string "1")))
> +   (set_attr "prefix" "maybe_vex")
> +   (set_attr "mode" "SI")])
> +
>  (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
>    [(set (match_operand:SI 0 "register_operand" "=r")
>         (unspec:SI
> @@ -15968,6 +15985,28 @@ (define_insn_and_split "*<sse2_avx2>_pmo
>    ""
>    [(set_attr "type" "ssemov")
>     (set (attr "prefix_data16")
> +     (if_then_else
> +       (match_test "TARGET_AVX")
> +     (const_string "*")
> +     (const_string "1")))
> +   (set_attr "prefix" "maybe_vex")
> +   (set_attr "mode" "SI")])
> +
> +(define_insn_and_split "*sse2_pmovmskb_ext_lt"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +       (sign_extend:DI
> +         (unspec:SI
> +           [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
> +                      (match_operand:V16QI 2 "const0_operand" "C"))]
> +           UNSPEC_MOVMSK)))]
> +  "TARGET_64BIT && TARGET_SSE2"
> +  "#"
> +  ""
> +  [(set (match_dup 0)
> +       (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> +  ""
> +  [(set_attr "type" "ssemov")
> +   (set (attr "prefix_data16")
>       (if_then_else
>         (match_test "TARGET_AVX")
>       (const_string "*")
> --- gcc/testsuite/gcc.target/i386/pr91824-2.c.jj        2020-01-29 17:06:18.838474437 +0100
> +++ gcc/testsuite/gcc.target/i386/pr91824-2.c   2020-01-29 17:06:01.070740609 +0100
> @@ -0,0 +1,73 @@
> +/* PR target/91824 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler-not "cltq" } } */
> +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +f1 (__m128i x)
> +{
> +  return _mm_movemask_epi8 (x);
> +}
> +
> +unsigned long long
> +f2 (__m128i x)
> +{
> +  return (unsigned) _mm_movemask_epi8 (x);
> +}
> +
> +unsigned long long
> +f3 (__m128 x)
> +{
> +  return _mm_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f4 (__m128 x)
> +{
> +  return (unsigned) _mm_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f5 (__m128d x)
> +{
> +  return _mm_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f6 (__m128d x)
> +{
> +  return (unsigned) _mm_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f7 (__m256 x)
> +{
> +  return _mm256_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f8 (__m256 x)
> +{
> +  return (unsigned) _mm256_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f9 (__m256d x)
> +{
> +  return _mm256_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f10 (__m256d x)
> +{
> +  return (unsigned) _mm256_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f11 (__m256i x)
> +{
> +  return (unsigned) _mm256_movemask_epi8 (x);
> +}
>
>         Jakub
>



More information about the Gcc-patches mailing list