This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Improve vec extraction
- From: Kirill Yukhin <kirill dot yukhin at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Uros Bizjak <ubizjak at gmail dot com>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 6 May 2016 14:47:58 +0300
- Subject: Re: [PATCH] Improve vec extraction
- Authentication-results: sourceware.org; auth=none
- References: <20160504194700 dot GQ26501 at tucnak dot zalov dot cz>
On 04 May 21:47, Jakub Jelinek wrote:
> Hi!
>
> While EVEX doesn't have vextracti128, we can use vextracti32x4;
> unfortunately without avx512dq we need to use full zmm input operand,
> but that shouldn't be a big deal when we hardcode 1 as immediate.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK for trunk
--
Thanks, K
>
> 2016-05-04 Jakub Jelinek <jakub@redhat.com>
>
> * config/i386/sse.md (*vec_extractv4sf_0, *sse4_1_extractps,
> *vec_extractv4sf_mem, vec_extract_lo_v16hi, vec_extract_hi_v16hi,
> vec_extract_lo_v32qi, vec_extract_hi_v32qi): Use v instead of x
> in vex or maybe_vex alternatives, use maybe_evex instead of vex
> in prefix.
>
> --- gcc/config/i386/sse.md.jj 2016-05-04 14:36:08.000000000 +0200
> +++ gcc/config/i386/sse.md 2016-05-04 15:16:44.180894303 +0200
> @@ -6613,9 +6613,9 @@ (define_expand "vec_set<mode>"
> })
>
> (define_insn_and_split "*vec_extractv4sf_0"
> - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
> + [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
> (vec_select:SF
> - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
> + (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
> (parallel [(const_int 0)])))]
> "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "#"
> @@ -6624,9 +6624,9 @@ (define_insn_and_split "*vec_extractv4sf
> "operands[1] = gen_lowpart (SFmode, operands[1]);")
>
> (define_insn_and_split "*sse4_1_extractps"
> - [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
> + [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v")
> (vec_select:SF
> - (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
> + (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v")
> (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
> "TARGET_SSE4_1"
> "@
> @@ -6665,7 +6665,7 @@ (define_insn_and_split "*sse4_1_extractp
> (set_attr "mode" "V4SF,V4SF,*,*")])
>
> (define_insn_and_split "*vec_extractv4sf_mem"
> - [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
> + [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
> (vec_select:SF
> (match_operand:V4SF 1 "memory_operand" "o,o,o")
> (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
> @@ -7239,9 +7239,9 @@ (define_insn "vec_extract_hi_v32hi"
> (set_attr "mode" "XI")])
>
> (define_insn_and_split "vec_extract_lo_v16hi"
> - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
> + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
> (vec_select:V8HI
> - (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
> + (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
> (parallel [(const_int 0) (const_int 1)
> (const_int 2) (const_int 3)
> (const_int 4) (const_int 5)
> @@ -7253,20 +7253,27 @@ (define_insn_and_split "vec_extract_lo_v
> "operands[1] = gen_lowpart (V8HImode, operands[1]);")
>
> (define_insn "vec_extract_hi_v16hi"
> - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
> + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
> (vec_select:V8HI
> - (match_operand:V16HI 1 "register_operand" "x,x")
> + (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
> (parallel [(const_int 8) (const_int 9)
> (const_int 10) (const_int 11)
> (const_int 12) (const_int 13)
> (const_int 14) (const_int 15)])))]
> "TARGET_AVX"
> - "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
> + "@
> + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
> + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
> [(set_attr "type" "sselog")
> (set_attr "prefix_extra" "1")
> (set_attr "length_immediate" "1")
> - (set_attr "memory" "none,store")
> - (set_attr "prefix" "vex")
> + (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
> + (set_attr "memory" "none,store,none,store,none,store")
> + (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
> (set_attr "mode" "OI")])
>
> (define_insn_and_split "vec_extract_lo_v64qi"
> @@ -7325,9 +7332,9 @@ (define_insn "vec_extract_hi_v64qi"
> (set_attr "mode" "XI")])
>
> (define_insn_and_split "vec_extract_lo_v32qi"
> - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
> + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
> (vec_select:V16QI
> - (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
> + (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
> (parallel [(const_int 0) (const_int 1)
> (const_int 2) (const_int 3)
> (const_int 4) (const_int 5)
> @@ -7343,9 +7350,9 @@ (define_insn_and_split "vec_extract_lo_v
> "operands[1] = gen_lowpart (V16QImode, operands[1]);")
>
> (define_insn "vec_extract_hi_v32qi"
> - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
> + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
> (vec_select:V16QI
> - (match_operand:V32QI 1 "register_operand" "x,x")
> + (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
> (parallel [(const_int 16) (const_int 17)
> (const_int 18) (const_int 19)
> (const_int 20) (const_int 21)
> @@ -7355,12 +7362,19 @@ (define_insn "vec_extract_hi_v32qi"
> (const_int 28) (const_int 29)
> (const_int 30) (const_int 31)])))]
> "TARGET_AVX"
> - "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
> + "@
> + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
> + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
> + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
> [(set_attr "type" "sselog")
> (set_attr "prefix_extra" "1")
> (set_attr "length_immediate" "1")
> - (set_attr "memory" "none,store")
> - (set_attr "prefix" "vex")
> + (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
> + (set_attr "memory" "none,store,none,store,none,store")
> + (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
> (set_attr "mode" "OI")])
>
> ;; Modes handled by vec_extract patterns.
>
> Jakub