[PATCH][i386] Add some obvious missing vectorizer patterns for AVX

Richard Guenther rguenther@suse.de
Mon May 10 13:02:00 GMT 2010


This adds patterns that do not require much thought.  I duplicated
the existing (but odd to me) superfluous vec_concats for example
in vec_unpacks_hi_v8sf (AVX would have vextract for a
highpart vec_select - but there must be a reason to do it the
odd way for SSE).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Ok for trunk?

Thanks,
Richard.

2010-05-10  Richard Guenther  <rguenther@suse.de>

	* config/i386/sse.md (reduc_splus_v8sf): Add.
	(reduc_splus_v4df): Likewise.
	(vec_unpacks_hi_v8sf): Likewise.
	(vec_unpacks_lo_v8sf): Likewise.
	(*avx_cvtps2pd256_2): Likewise.
	(vec_unpacks_float_hi_v8si): Likewise.
	(vec_unpacks_float_lo_v8si): Likewise.
	(vec_interleave_highv4df): Likewise.
	(vec_interleave_lowv4df): Likewise.

Index: trunk/gcc/config/i386/sse.md
===================================================================
*** trunk.orig/gcc/config/i386/sse.md	2010-05-10 13:27:11.000000000 +0200
--- trunk/gcc/config/i386/sse.md	2010-05-10 14:40:05.000000000 +0200
***************
*** 1377,1382 ****
--- 1377,1395 ----
    [(set_attr "type" "sseadd")
     (set_attr "mode" "V2DF")])
  
+ (define_expand "reduc_splus_v8sf"
+   [(match_operand:V8SF 0 "register_operand" "")
+    (match_operand:V8SF 1 "register_operand" "")]
+   "TARGET_AVX"
+ {
+   rtx tmp = gen_reg_rtx (V8SFmode);
+   rtx tmp2 = gen_reg_rtx (V8SFmode);
+   emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
+   emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
+   emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
+   DONE;
+ })
+ 
  (define_expand "reduc_splus_v4sf"
    [(match_operand:V4SF 0 "register_operand" "")
     (match_operand:V4SF 1 "register_operand" "")]
***************
*** 1393,1398 ****
--- 1406,1422 ----
    DONE;
  })
  
+ (define_expand "reduc_splus_v4df"
+   [(match_operand:V4DF 0 "register_operand" "")
+    (match_operand:V4DF 1 "register_operand" "")]
+   "TARGET_AVX"
+ {
+   rtx tmp = gen_reg_rtx (V4DFmode);
+   emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
+   emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
+   DONE;
+ })
+ 
  (define_expand "reduc_splus_v2df"
    [(match_operand:V2DF 0 "register_operand" "")
     (match_operand:V2DF 1 "register_operand" "")]
***************
*** 3035,3040 ****
--- 3070,3087 ----
     (set_attr "prefix" "vex")
     (set_attr "mode" "V4DF")])
  
+ (define_insn "*avx_cvtps2pd256_2"
+   [(set (match_operand:V4DF 0 "register_operand" "=x")
+ 	(float_extend:V4DF
+ 	  (vec_select:V4SF
+ 	    (match_operand:V8SF 1 "nonimmediate_operand" "x")
+ 	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+   "TARGET_AVX"
+   "vcvtps2pd\t{%x1, %0|%0, %x1}"
+   [(set_attr "type" "ssecvt")
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "V4DF")])
+ 
  (define_insn "sse2_cvtps2pd"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
  	(float_extend:V2DF
***************
*** 3069,3074 ****
--- 3116,3145 ----
   operands[2] = gen_reg_rtx (V4SFmode);
  })
  
+ (define_expand "vec_unpacks_hi_v8sf"
+   [(set (match_dup 2)
+    (vec_select:V8SF
+      (vec_concat:V16SF
+        (match_dup 2)
+        (match_operand:V8SF 1 "nonimmediate_operand" ""))
+      (parallel [(const_int 12)
+ 		(const_int 13)
+ 		(const_int 14)
+ 		(const_int 15)
+ 		(const_int 4)
+ 		(const_int 5)
+ 		(const_int 6)
+ 		(const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand" "")
+    (float_extend:V4DF
+      (vec_select:V4SF
+        (match_dup 2)
+        (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+ {
+  operands[2] = gen_reg_rtx (V8SFmode);
+ })
+ 
  (define_expand "vec_unpacks_lo_v4sf"
    [(set (match_operand:V2DF 0 "register_operand" "")
  	(float_extend:V2DF
***************
*** 3077,3082 ****
--- 3148,3161 ----
  	    (parallel [(const_int 0) (const_int 1)]))))]
    "TARGET_SSE2")
  
+ (define_expand "vec_unpacks_lo_v8sf"
+   [(set (match_operand:V4DF 0 "register_operand" "")
+ 	(float_extend:V4DF
+ 	  (vec_select:V4SF
+ 	    (match_operand:V8SF 1 "nonimmediate_operand" "")
+ 	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+   "TARGET_AVX")
+ 
  (define_expand "vec_unpacks_float_hi_v8hi"
    [(match_operand:V4SF 0 "register_operand" "")
     (match_operand:V8HI 1 "register_operand" "")]
***************
*** 3149,3154 ****
--- 3228,3261 ----
  	    (parallel [(const_int 0) (const_int 1)]))))]
    "TARGET_SSE2")
  
+ (define_expand "vec_unpacks_float_hi_v8si"
+   [(set (match_dup 2)
+ 	(vec_select:V8SI
+ 	  (match_operand:V4SI 1 "nonimmediate_operand" "")
+ 	  (parallel [(const_int 4)
+ 		     (const_int 5)
+ 		     (const_int 6)
+ 		     (const_int 7)
+ 		     (const_int 4)
+ 		     (const_int 5)
+ 		     (const_int 6)
+ 		     (const_int 7)])))
+    (set (match_operand:V4DF 0 "register_operand" "")
+         (float:V4DF
+ 	  (vec_select:V4SI
+ 	  (match_dup 2)
+ 	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "operands[2] = gen_reg_rtx (V8SImode);")
+ 
+ (define_expand "vec_unpacks_float_lo_v8si"
+   [(set (match_operand:V4DF 0 "register_operand" "")
+ 	(float:V4DF
+ 	  (vec_select:V4SI
+ 	    (match_operand:V8SI 1 "nonimmediate_operand" "")
+ 	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+   "TARGET_AVX")
+ 
  (define_expand "vec_unpacku_float_hi_v4si"
    [(set (match_dup 5)
  	(vec_select:V4SI
***************
*** 4328,4333 ****
--- 4482,4501 ----
     (set_attr "prefix" "vex")
     (set_attr "mode" "V4DF")])
  
+ (define_insn "vec_interleave_highv4df"
+   [(set (match_operand:V4DF 0 "register_operand" "=x")
+ 	(vec_select:V4DF
+ 	  (vec_concat:V8DF
+ 	    (match_operand:V4DF 1 "register_operand" "x")
+ 	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ 	  (parallel [(const_int 2) (const_int 6)
+ 		     (const_int 3) (const_int 7)])))]
+   "TARGET_AVX"
+   "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sselog")
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "V4DF")])
+ 
  (define_expand "vec_interleave_highv2df"
    [(set (match_operand:V2DF 0 "register_operand" "")
  	(vec_select:V2DF
***************
*** 4434,4439 ****
--- 4602,4621 ----
    [(set_attr "type" "sselog")
     (set_attr "prefix" "vex")
     (set_attr "mode" "V4DF")])
+ 
+ (define_insn "vec_interleave_lowv4df"
+   [(set (match_operand:V4DF 0 "register_operand" "=x")
+ 	(vec_select:V4DF
+ 	  (vec_concat:V8DF
+ 	    (match_operand:V4DF 1 "register_operand" "x")
+ 	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ 	  (parallel [(const_int 0) (const_int 4)
+ 		     (const_int 1) (const_int 5)])))]
+   "TARGET_AVX"
+   "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sselog")
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "V4DF")])
  
  (define_expand "vec_interleave_lowv2df"
    [(set (match_operand:V2DF 0 "register_operand" "")



More information about the Gcc-patches mailing list