This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Improve V?TImode shifts (PR target/82370)


On 10/4/17, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> The following patch tweaks the TImode vector shifts similarly
> to the earlier vector shift patch, so that for shifts by immediate
> we can accept a memory input.  Additionally, it removes the vec_shl_*

I prefer 2 patches, a separate patch to drop vec_shl_* first. which can
go in now.

Thanks.


> expander, because the middle-end has dropped that a few years ago,
> and merges the left and right shift patterns using code iterators.
> Appart from the code/names that can be handled by mode attributes,
> the only difference was that one of the insns had
> (set_attr "atom_unit" "sishuf")
> and the other didn't.  I hope that is just an error, I'd really expect
> both vpslldq and vpsrldq to use the same atom unit, isn't that the case?
> CCing H.J. for that.  If it is intentional difference, I can of course
> adjust the patch and undo the merging of the 4 define_insns into 2.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-10-04  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR target/82370
> 	* config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
> 	(VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
> 	(vec_shl_<mode>): Remove unused expander.
> 	(avx512bw_<shift_insn><mode>3): New define_insn.
> 	(<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
> 	(<sse2_avx2>_<shift_insn><mode>3): ... this.  New define_insn.
>
> 	* gcc.target/i386/pr82370.c: New test.
>
> --- gcc/config/i386/sse.md.jj	2017-10-04 12:18:19.000000000 +0200
> +++ gcc/config/i386/sse.md	2017-10-04 15:34:00.541860351 +0200
> @@ -371,10 +371,17 @@ (define_mode_iterator V16FI
>    [V16SF V16SI])
>
>  ;; ??? We should probably use TImode instead.
> -(define_mode_iterator VIMAX_AVX2
> +(define_mode_iterator VIMAX_AVX2_AVX512BW
>    [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
>
> -;; ??? This should probably be dropped in favor of VIMAX_AVX2.
> +;; Suppose TARGET_AVX512BW as baseline
> +(define_mode_iterator VIMAX_AVX512VL
> +  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
> +
> +(define_mode_iterator VIMAX_AVX2
> +  [(V2TI "TARGET_AVX2") V1TI])
> +
> +;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
>  (define_mode_iterator SSESCALARMODE
>    [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
>
> @@ -10792,9 +10799,9 @@ (define_insn "<shift_insn><mode>3<mask_n
>     (set_attr "mode" "<sseinsnmode>")])
>
>
> -(define_expand "vec_shl_<mode>"
> +(define_expand "vec_shr_<mode>"
>    [(set (match_dup 3)
> -	(ashift:V1TI
> +	(lshiftrt:V1TI
>  	 (match_operand:VI_128 1 "register_operand")
>  	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
>     (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
> @@ -10805,48 +10812,24 @@ (define_expand "vec_shl_<mode>"
>    operands[4] = gen_lowpart (<MODE>mode, operands[3]);
>  })
>
> -(define_insn "<sse2_avx2>_ashl<mode>3"
> -  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
> -	(ashift:VIMAX_AVX2
> -	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
> -	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
> -  "TARGET_SSE2"
> +(define_insn "avx512bw_<shift_insn><mode>3"
> +  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
> +	(any_lshift:VIMAX_AVX512VL
> +	 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
> +	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
> +  "TARGET_AVX512BW"
>  {
>    operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
> -
> -  switch (which_alternative)
> -    {
> -    case 0:
> -      return "pslldq\t{%2, %0|%0, %2}";
> -    case 1:
> -      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
> -    default:
> -      gcc_unreachable ();
> -    }
> +  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
>  }
> -  [(set_attr "isa" "noavx,avx")
> -   (set_attr "type" "sseishft")
> +  [(set_attr "type" "sseishft")
>     (set_attr "length_immediate" "1")
> -   (set_attr "prefix_data16" "1,*")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "maybe_evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "vec_shr_<mode>"
> -  [(set (match_dup 3)
> -	(lshiftrt:V1TI
> -	 (match_operand:VI_128 1 "register_operand")
> -	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
> -   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
> -  "TARGET_SSE2"
> -{
> -  operands[1] = gen_lowpart (V1TImode, operands[1]);
> -  operands[3] = gen_reg_rtx (V1TImode);
> -  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
> -})
> -
> -(define_insn "<sse2_avx2>_lshr<mode>3"
> +(define_insn "<sse2_avx2>_<shift_insn><mode>3"
>    [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
> -	(lshiftrt:VIMAX_AVX2
> +	(any_lshift:VIMAX_AVX2
>  	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
>  	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
>    "TARGET_SSE2"
> @@ -10856,9 +10839,9 @@ (define_insn "<sse2_avx2>_lshr<mode>3"
>    switch (which_alternative)
>      {
>      case 0:
> -      return "psrldq\t{%2, %0|%0, %2}";
> +      return "p<vshift>dq\t{%2, %0|%0, %2}";
>      case 1:
> -      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
> +      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
>      default:
>        gcc_unreachable ();
>      }
> --- gcc/testsuite/gcc.target/i386/pr82370.c.jj	2017-10-04 16:01:16.350247297
> +0200
> +++ gcc/testsuite/gcc.target/i386/pr82370.c	2017-10-04 16:03:06.704922288
> +0200
> @@ -0,0 +1,18 @@
> +/* PR target/82370 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
> +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5,
> \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
> +
> +#include <x86intrin.h>
> +
> +__m512i f1 (__m512i *x) { return _mm512_bslli_epi128 (*x, 5); }
> +__m512i f2 (__m512i *x) { return _mm512_bsrli_epi128 (*x, 5); }
> +__m256i f3 (__m256i *x) { return _mm256_bslli_epi128 (*x, 5); }
> +__m256i f4 (__m256i *x) { return _mm256_bsrli_epi128 (*x, 5); }
> +__m128i f5 (__m128i *x) { return _mm_bslli_si128 (*x, 5); }
> +__m128i f6 (__m128i *x) { return _mm_bsrli_si128 (*x, 5); }
>
> 	Jakub
>


-- 
H.J.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]