[PATCH] i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824]
Uros Bizjak
ubizjak@gmail.com
Thu Jan 30 07:43:00 GMT 2020
On Thu, Jan 30, 2020 at 1:23 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> Some time ago, patterns were added to optimize move mask followed by zero
> extension from 32 bits to 64 bit. As the testcase shows, the intrinsics
> actually return int, not unsigned int, so it will happen quite often that
> one actually needs sign extension instead of zero extension. Except for
> vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a
> difference, as we know the bit 31 will not be set (the source will have 2 or
> 4 doubles, 4 or 8 floats or 16 or 32 chars).
> So, for the floating point patterns, this patch just uses a code iterator
> so that we handle both zero extend and sign extend, and for the byte one
> adds a separate pattern for the 128-bit operand.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-01-30 Jakub Jelinek <jakub@redhat.com>
>
> PR target/91824
> * config/i386/sse.md
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this. Use
> any_extend code iterator instead of always zero_extend.
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
> Use any_extend code iterator instead of always zero_extend.
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
> (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
> Use any_extend code iterator instead of always zero_extend.
> (*sse2_pmovmskb_ext): New define_insn.
> (*sse2_pmovmskb_ext_lt): New define_insn_and_split.
>
> * gcc.target/i386/pr91824-2.c: New test.
OK.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2020-01-29 09:35:05.791247952 +0100
> +++ gcc/config/i386/sse.md 2020-01-29 16:56:00.354739600 +0100
> @@ -15815,9 +15815,9 @@ (define_insn "<sse>_movmsk<ssemodesuffix
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
> +(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
> [(set (match_operand:DI 0 "register_operand" "=r")
> - (zero_extend:DI
> + (any_extend:DI
> (unspec:SI
> [(match_operand:VF_128_256 1 "register_operand" "x")]
> UNSPEC_MOVMSK)))]
> @@ -15844,9 +15844,9 @@ (define_insn_and_split "*<sse>_movmsk<ss
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
> +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
> [(set (match_operand:DI 0 "register_operand" "=r")
> - (zero_extend:DI
> + (any_extend:DI
> (unspec:SI
> [(lt:VF_128_256
> (match_operand:<sseintvecmode> 1 "register_operand" "x")
> @@ -15856,7 +15856,7 @@ (define_insn_and_split "*<sse>_movmsk<ss
> "#"
> "&& reload_completed"
> [(set (match_dup 0)
> - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
> [(set_attr "type" "ssemov")
> (set_attr "prefix" "maybe_vex")
> @@ -15880,9 +15880,9 @@ (define_insn_and_split "*<sse>_movmsk<ss
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "<MODE>")])
>
> -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
> +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
> [(set (match_operand:DI 0 "register_operand" "=r")
> - (zero_extend:DI
> + (any_extend:DI
> (unspec:SI
> [(subreg:VF_128_256
> (ashiftrt:<sseintvecmode>
> @@ -15893,7 +15893,7 @@ (define_insn_and_split "*<sse>_movmsk<ss
> "#"
> "&& reload_completed"
> [(set (match_dup 0)
> - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
> [(set_attr "type" "ssemov")
> (set_attr "prefix" "maybe_vex")
> @@ -15932,6 +15932,23 @@ (define_insn "*<sse2_avx2>_pmovmskb_zext
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "SI")])
>
> +(define_insn "*sse2_pmovmskb_ext"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (sign_extend:DI
> + (unspec:SI
> + [(match_operand:V16QI 1 "register_operand" "x")]
> + UNSPEC_MOVMSK)))]
> + "TARGET_64BIT && TARGET_SSE2"
> + "%vpmovmskb\t{%1, %k0|%k0, %1}"
> + [(set_attr "type" "ssemov")
> + (set (attr "prefix_data16")
> + (if_then_else
> + (match_test "TARGET_AVX")
> + (const_string "*")
> + (const_string "1")))
> + (set_attr "prefix" "maybe_vex")
> + (set_attr "mode" "SI")])
> +
> (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
> [(set (match_operand:SI 0 "register_operand" "=r")
> (unspec:SI
> @@ -15968,6 +15985,28 @@ (define_insn_and_split "*<sse2_avx2>_pmo
> ""
> [(set_attr "type" "ssemov")
> (set (attr "prefix_data16")
> + (if_then_else
> + (match_test "TARGET_AVX")
> + (const_string "*")
> + (const_string "1")))
> + (set_attr "prefix" "maybe_vex")
> + (set_attr "mode" "SI")])
> +
> +(define_insn_and_split "*sse2_pmovmskb_ext_lt"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (sign_extend:DI
> + (unspec:SI
> + [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
> + (match_operand:V16QI 2 "const0_operand" "C"))]
> + UNSPEC_MOVMSK)))]
> + "TARGET_64BIT && TARGET_SSE2"
> + "#"
> + ""
> + [(set (match_dup 0)
> + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
> + ""
> + [(set_attr "type" "ssemov")
> + (set (attr "prefix_data16")
> (if_then_else
> (match_test "TARGET_AVX")
> (const_string "*")
> --- gcc/testsuite/gcc.target/i386/pr91824-2.c.jj 2020-01-29 17:06:18.838474437 +0100
> +++ gcc/testsuite/gcc.target/i386/pr91824-2.c 2020-01-29 17:06:01.070740609 +0100
> @@ -0,0 +1,73 @@
> +/* PR target/91824 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler-not "cltq" } } */
> +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
> +
> +#include <x86intrin.h>
> +
> +unsigned long long
> +f1 (__m128i x)
> +{
> + return _mm_movemask_epi8 (x);
> +}
> +
> +unsigned long long
> +f2 (__m128i x)
> +{
> + return (unsigned) _mm_movemask_epi8 (x);
> +}
> +
> +unsigned long long
> +f3 (__m128 x)
> +{
> + return _mm_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f4 (__m128 x)
> +{
> + return (unsigned) _mm_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f5 (__m128d x)
> +{
> + return _mm_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f6 (__m128d x)
> +{
> + return (unsigned) _mm_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f7 (__m256 x)
> +{
> + return _mm256_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f8 (__m256 x)
> +{
> + return (unsigned) _mm256_movemask_ps (x);
> +}
> +
> +unsigned long long
> +f9 (__m256d x)
> +{
> + return _mm256_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f10 (__m256d x)
> +{
> + return (unsigned) _mm256_movemask_pd (x);
> +}
> +
> +unsigned long long
> +f11 (__m256i x)
> +{
> + return (unsigned) _mm256_movemask_epi8 (x);
> +}
>
> Jakub
>
More information about the Gcc-patches
mailing list