This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve vec extraction


Hi!

While EVEX doesn't have vextracti128, we can use vextracti32x4;
unfortunately without avx512dq we need to use full zmm input operand,
but that shouldn't be a big deal when we hardcode 1 as immediate.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-04  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*vec_extractv4sf_0, *sse4_1_extractps,
	*vec_extractv4sf_mem, vec_extract_lo_v16hi, vec_extract_hi_v16hi,
	vec_extract_lo_v32qi, vec_extract_hi_v32qi): Use v instead of x
	in vex or maybe_vex alternatives, use maybe_evex instead of vex
	in prefix.

--- gcc/config/i386/sse.md.jj	2016-05-04 14:36:08.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-04 15:16:44.180894303 +0200
@@ -6613,9 +6613,9 @@ (define_expand "vec_set<mode>"
 })
 
 (define_insn_and_split "*vec_extractv4sf_0"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
 	(vec_select:SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+	  (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
@@ -6624,9 +6624,9 @@ (define_insn_and_split "*vec_extractv4sf
   "operands[1] = gen_lowpart (SFmode, operands[1]);")
 
 (define_insn_and_split "*sse4_1_extractps"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v")
 	(vec_select:SF
-	  (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
+	  (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
   "TARGET_SSE4_1"
   "@
@@ -6665,7 +6665,7 @@ (define_insn_and_split "*sse4_1_extractp
    (set_attr "mode" "V4SF,V4SF,*,*")])
 
 (define_insn_and_split "*vec_extractv4sf_mem"
-  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
+  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
 	(vec_select:SF
 	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
 	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
@@ -7239,9 +7239,9 @@ (define_insn "vec_extract_hi_v32hi"
    (set_attr "mode" "XI")])
 
 (define_insn_and_split "vec_extract_lo_v16hi"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
 	(vec_select:V8HI
-	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
+	  (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
 	  (parallel [(const_int 0) (const_int 1)
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
@@ -7253,20 +7253,27 @@ (define_insn_and_split "vec_extract_lo_v
   "operands[1] = gen_lowpart (V8HImode, operands[1]);")
 
 (define_insn "vec_extract_hi_v16hi"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
 	(vec_select:V8HI
-	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
 	  (parallel [(const_int 8) (const_int 9)
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
   "TARGET_AVX"
-  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  "@
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "none,store")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+   (set_attr "memory" "none,store,none,store,none,store")
+   (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn_and_split "vec_extract_lo_v64qi"
@@ -7325,9 +7332,9 @@ (define_insn "vec_extract_hi_v64qi"
    (set_attr "mode" "XI")])
 
 (define_insn_and_split "vec_extract_lo_v32qi"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
 	(vec_select:V16QI
-	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
+	  (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
 	  (parallel [(const_int 0) (const_int 1)
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
@@ -7343,9 +7350,9 @@ (define_insn_and_split "vec_extract_lo_v
   "operands[1] = gen_lowpart (V16QImode, operands[1]);")
 
 (define_insn "vec_extract_hi_v32qi"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
 	(vec_select:V16QI
-	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
 	  (parallel [(const_int 16) (const_int 17)
 		     (const_int 18) (const_int 19)
 		     (const_int 20) (const_int 21)
@@ -7355,12 +7362,19 @@ (define_insn "vec_extract_hi_v32qi"
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
   "TARGET_AVX"
-  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  "@
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "none,store")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+   (set_attr "memory" "none,store,none,store,none,store")
+   (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
    (set_attr "mode" "OI")])
 
 ;; Modes handled by vec_extract patterns.

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]