[PATCH i386 AVX512] [68/n] Add vpmullw, vpacksdw, pmaddwd insn patterns.

Kirill Yukhin kirill.yukhin@gmail.com
Thu Oct 9 11:09:00 GMT 2014


Hello,
This patch extends vpmullw, vpacksdw and pmaddwd
insn patterns.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
	* config/i386/sse.md
	(define_c_enum "unspec"): Add UNSPEC_PMADDWD512.
	(define_mode_iterator VI2_AVX2): Add V32HI mode.
	(define_expand "mul<mode>3<mask_name>"): Add masking.
	(define_insn "*mul<mode>3<mask_name>"): Ditto.
	(define_expand "<s>mul<mode>3_highpart<mask_name>"): Ditto.
	(define_insn "*<s>mul<mode>3_highpart<mask_name>"): Ditto.
	(define_insn "avx512bw_pmaddwd512<mode><mask_name>"): New.
	(define_mode_attr SDOT_PMADD_SUF): Ditto.
	(define_expand "sdot_prod<mode>"): Add <SDOT_PMADD_SUF>.
	(define_insn "<sse2_avx2>_packssdw<mask_name>"): Add masking.
	(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"): Ditto.
	(define_insn "avx2_packusdw"): Delete.
	(define_insn "sse4_1_packusdw"): Ditto.
	(define_insn "<sse4_1_avx2>_packusdw<mask_name>"): New.

--
Thanks, K

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f88d3d0..90414c7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -132,6 +132,7 @@
   ;; For AVX512BW support
   UNSPEC_DBPSADBW
   UNSPEC_PMADDUBSW512
+  UNSPEC_PMADDWD512
   UNSPEC_PSHUFHW
   UNSPEC_PSHUFLW
   UNSPEC_CVTINT2MASK
@@ -301,7 +302,7 @@
   [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI2_AVX2
-  [(V16HI "TARGET_AVX2") V8HI])
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512F
   [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
@@ -9248,28 +9249,30 @@
   DONE;
 })
 
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
   [(set (match_operand:VI2_AVX2 0 "register_operand")
 	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
 		       (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
-(define_insn "*mul<mode>3"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
-        (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
-                       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+(define_insn "*mul<mode>3<mask_name>"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
+		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    pmullw\t{%2, %0|%0, %2}
-   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+   vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseimul")
    (set_attr "prefix_data16" "1,*")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "<s>mul<mode>3_highpart"
+(define_expand "<s>mul<mode>3_highpart<mask_name>"
   [(set (match_operand:VI2_AVX2 0 "register_operand")
 	(truncate:VI2_AVX2
 	  (lshiftrt:<ssedoublemode>
@@ -9279,23 +9282,26 @@
 	      (any_extend:<ssedoublemode>
 		(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
 	    (const_int 16))))]
-  "TARGET_SSE2"
+  "TARGET_SSE2
+   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
-(define_insn "*<s>mul<mode>3_highpart"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<s>mul<mode>3_highpart<mask_name>"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
 	(truncate:VI2_AVX2
 	  (lshiftrt:<ssedoublemode>
 	    (mult:<ssedoublemode>
 	      (any_extend:<ssedoublemode>
-		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
 	      (any_extend:<ssedoublemode>
-		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
 	    (const_int 16))))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    pmulh<u>w\t{%2, %0|%0, %2}
-   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+   vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseimul")
    (set_attr "prefix_data16" "1,*")
@@ -9538,6 +9544,18 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
+  [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
+          (unspec:<sseunpackmode>
+            [(match_operand:VI2_AVX2 1 "register_operand" "v")
+             (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
+             UNSPEC_PMADDWD512))]
+   "TARGET_AVX512BW && <mask_mode512bit_condition>"
+   "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_expand "avx2_pmaddwd"
   [(set (match_operand:V8SI 0 "register_operand")
 	(plus:V8SI
@@ -9778,6 +9796,9 @@
   DONE;
 })
 
+(define_mode_attr SDOT_PMADD_SUF
+  [(V32HI "512v32hi") (V16HI "") (V8HI "")])
+
 (define_expand "sdot_prod<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
    (match_operand:VI2_AVX2 1 "register_operand")
@@ -9786,7 +9807,7 @@
   "TARGET_SSE2"
 {
   rtx t = gen_reg_rtx (<sseunpackmode>mode);
-  emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
+  emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
 			  gen_rtx_PLUS (<sseunpackmode>mode,
 					operands[3], t)));
@@ -11024,17 +11045,17 @@
    (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<sse2_avx2>_packssdw"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packssdw<mask_name>"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
 	(vec_concat:VI2_AVX2
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
 	  (ss_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
-  "TARGET_SSE2"
+	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packssdw\t{%2, %0|%0, %2}
-   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
    (set_attr "prefix_data16" "1,*")
@@ -13515,29 +13536,30 @@
   ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
 })
 
-(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
 	(truncate:VI2_AVX2
 	  (lshiftrt:<ssedoublemode>
 	    (plus:<ssedoublemode>
 	      (lshiftrt:<ssedoublemode>
 		(mult:<ssedoublemode>
 		  (sign_extend:<ssedoublemode>
-		    (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+		    (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
 		  (sign_extend:<ssedoublemode>
-		    (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+		    (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
 		(const_int 14))
 	      (match_operand:VI2_AVX2 3 "const1_operand"))
 	    (const_int 1))))]
-  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
   "@
    pmulhrsw\t{%2, %0|%0, %2}
-   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseimul")
    (set_attr "prefix_data16" "1,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
@@ -13935,36 +13957,22 @@
    (set_attr "btver2_decode" "vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx2_packusdw"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
-	(vec_concat:V16HI
-	  (us_truncate:V8HI
-	    (match_operand:V8SI 1 "register_operand" "x"))
-	  (us_truncate:V8HI
-	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
-  "TARGET_AVX2"
-  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
-
-(define_insn "sse4_1_packusdw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-	(vec_concat:V8HI
-	  (us_truncate:V4HI
-	    (match_operand:V4SI 1 "register_operand" "0,x"))
-	  (us_truncate:V4HI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
-  "TARGET_SSE4_1"
+(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+	(vec_concat:VI2_AVX2
+	  (us_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+	  (us_truncate:<ssehalfvecmode>
+	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+  "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packusdw\t{%2, %0|%0, %2}
-   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "TI")])
+   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<sse4_1_avx2>_pblendvb"
   [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")



More information about the Gcc-patches mailing list