This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AVX]: Add some 265bit AVX patterns
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 9 Apr 2008 14:52:03 -0700
- Subject: [AVX]: Add some 265bit AVX patterns
This patch adds some 256bit AVX patterns.
H.J.
----
2008-04-09 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.md (UNSPEC_PCMP): New.
(*avx_setcc<mode>): Likewise.
(prefix_vex): Likewise.
(*fop_df_comm_avx): Likewise.
(mode): Add OI, V8SF and V4DF.
* config/i386/predicates.md (const_4_to_5_operand): New.
(const_6_to_7_operand): Likewise.
(const_8_to_11_operand): Likewise.
(const_12_to_15_operand): Likewise.
(avx_comparison_float_operator): Likewise.
* config/i386/sse.md (AVX256MODEI): New.
(AVXMODEI): Likewise.
(AVX256MODE): Likewise.
(AVXMODE): Likewise.
(AVX256MODEF2P): Likewise.
(AVXMODEF2P): Likewise.
(avxvecmode): New.
(avxmodesuffixf2c): Likewise.
(blendbits): Support V8SF and V4DF.
(mov<mode>): New 256bit AVX integer vector pattern.
(*mov<mode>_internal): Likewise.
(mov<mode>): New 256bit AVX floating point vector pattern.
(*mov<mode>_internal): Likewise.
(<addsub><mode>3): Likewise.
(*<addsub><mode>3): Likewise.
(mul<mode>3): Likewise.
(*mul<mode>3): Likewise.
(<code><mode>3): Likewise.
(*<code><mode>3): Likewise.
(*ieee_smin<mode>3): Likewise.
(*ieee_smax<mode>3): Likewise.
(<code><mode>3): Likewise.
(*<code><mode>3): Likewise.
(push<mode>1): New 256bit AVX vector pattern.
(movmisalign<mode>): Likewise.
(avx_movup<avxmodesuffixf2c>): New.
(avx_movdqu): Likewise.
(divv8sf3): Likewise.
(divv4df3): Likewise.
(avx_div<mode>3): Likewise.
(avx_addsubv8sf3): Likewise.
(avx_addsubv4df3): Likewise.
(avx_h<addsub>v4df3): Likewise.
(avx_h<addsub>v8sf3): Likewise.
(avx_cmpp<avxmodesuffixf2c><mode>3): Likewise.
(avx_cmps<ssemodesuffixf2c><mode>3): Likewise.
(*avx_maskcmp<mode>3): Likewise.
(avx_nand<mode>3): Likewise.
(*avx_nand<mode>3): Likewise.
(avx_cvtdq2ps256): Likewise.
(avx_cvtps2dq256): Likewise.
(avx_cvttps2dq256): Likewise.
(avx_cvtdq2pd256): Likewise.
(avx_cvtpd2dq256): Likewise.
(avx_cvttpd2dq256): Likewise.
(avx_cvtpd2ps256): Likewise.
(avx_cvtps2pd256): Likewise.
(avx_shufps256): Likewise.
(avx_shufps256_1): Likewise.
(avx_shufpd256): Likewise.
(avx_shufpd256_1): Likewise.
(*avx_nand<mode>3): Likewise.
(*avx_<code><mode>3): Likewise.
(avx_blendp<avxmodesuffixf2c>256): Likewise.
(avx_blendvp<avxmodesuffixf2c>256): Likewise.
(avx_dpps256): Likewise.
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md (.../fsf/trunk) (revision 2097)
+++ gcc/config/i386/i386.md (.../branches/avx) (revision 2097)
@@ -197,6 +197,9 @@
; For PCLMUL support
(UNSPEC_PCLMUL 165)
+
+ ; For AVX support
+ (UNSPEC_PCMP 166)
])
(define_constants
@@ -276,7 +279,7 @@
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF"
+ "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF,OI,V8SF,V4DF"
(const_string "unknown"))
;; The CPU unit operations uses.
@@ -373,6 +376,12 @@
;; There are also additional prefixes in SSSE3.
(define_attr "prefix_extra" "" (const_int 0))
+;; Set when VEX prefix is used.
+(define_attr "prefix_vex" ""
+ (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+ (const_int 1)
+ (const_int 0)))
+
;; Set when modrm byte is used.
(define_attr "modrm" ""
(cond [(eq_attr "type" "str,leave")
@@ -14014,6 +14023,16 @@
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
;; it directly.
+(define_insn "*avx_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "avx_comparison_float_operator"
+ [(match_operand:MODEF 2 "register_operand" "x")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_AVX"
+ "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*sse_setcc<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(match_operator:MODEF 1 "sse_comparison_operator"
@@ -16025,6 +16044,22 @@
(const_string "fop"))))
(set_attr "mode" "DF")])
+(define_insn "*fop_df_comm_avx"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "%x")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_AVX
+ && TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:DF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "mode" "DF")])
+
(define_insn "*fop_df_comm_sse"
[(set (match_operand:DF 0 "register_operand" "=x")
(match_operator:DF 3 "binary_fp_operator"
Index: gcc/config/i386/predicates.md
===================================================================
--- gcc/config/i386/predicates.md (.../fsf/trunk) (revision 2097)
+++ gcc/config/i386/predicates.md (.../branches/avx) (revision 2097)
@@ -634,11 +634,31 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+;; Match 4 to 5.
+(define_predicate "const_4_to_5_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 4, 5)")))
+
;; Match 4 to 7.
(define_predicate "const_4_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+;; Match 6 to 7.
+(define_predicate "const_6_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+
+;; Match 8 to 11.
+(define_predicate "const_8_to_11_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+
+;; Match 12 to 15.
+(define_predicate "const_12_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+
;; Match exactly one bit in 2-bit mask.
(define_predicate "const_pow2_1_to_2_operand"
(and (match_code "const_int")
@@ -908,6 +928,11 @@
(define_special_predicate "sse_comparison_operator"
(match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
+;; Return 1 if OP is a comparison operator that can be issued by
+;; avx predicate generation instructions
+(define_predicate "avx_comparison_float_operator"
+ (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
+
;; Return 1 if OP is a comparison operator that can be issued by sse predicate
;; generation instructions
(define_predicate "sse5_comparison_float_operator"
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md (.../fsf/trunk) (revision 2097)
+++ gcc/config/i386/sse.md (.../branches/avx) (revision 2097)
@@ -26,6 +26,18 @@
;; All 16-byte vector modes handled by SSE
(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+;; 32 byte integral vector modes handled by AVX
+(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
+
+;; All integral vector modes handled by AVX
+(define_mode_iterator AVXMODEI [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
+
+;; All 32-byte vector modes handled by AVX
+(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; All vector modes handled by AVX
+(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
+
;; Mix-n-match
(define_mode_iterator SSEMODE12 [V16QI V8HI])
(define_mode_iterator SSEMODE24 [V8HI V4SI])
@@ -36,6 +48,9 @@
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
+(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
+
;; Mapping from float mode to required SSE level
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
@@ -53,8 +68,13 @@
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
+;; Mapping for AVX
+(define_mode_attr avxvecmode
+ [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF") (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V8SF "V8SF") (V4DF "V4DF")])
+(define_mode_attr avxmodesuffixf2c [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
+
;; Mapping of immediate bits for blend instructions
-(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+(define_mode_attr blendbits [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
@@ -64,6 +84,41 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_expand "mov<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:AVX256MODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_AVX
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V8SF)
+ return "vmovaps\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_V4DF)
+ return "vmovapd\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "mode" "<avxvecmode>")])
+
;; All of these patterns are enabled for SSE1 as well as SSE2.
;; This is essential for maintaining stable calling conventions.
@@ -230,6 +285,14 @@
})
(define_expand "push<mode>1"
+ [(match_operand:AVX256MODE 0 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "push<mode>1"
[(match_operand:SSEMODE 0 "register_operand" "")]
"TARGET_SSE"
{
@@ -238,6 +301,15 @@
})
(define_expand "movmisalign<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move_misalign (<MODE>mode, operands);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
[(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
(match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
@@ -246,6 +318,17 @@
DONE;
})
+(define_insn "avx_movup<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_movup<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
(unspec:SSEMODEF2P
@@ -257,6 +340,15 @@
[(set_attr "type" "ssemov")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_movdqu"
+ [(set (match_operand:V32QI 0 "nonimmediate_operand" "=x,m")
+ (unspec:V32QI [(match_operand:V32QI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "OI")])
+
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
@@ -359,6 +451,25 @@
"ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
(define_expand "<addsub><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plusminus:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<addsub><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (plusminus:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<addsub>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "<addsub><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
@@ -392,6 +503,25 @@
(set_attr "mode" "<ssescalarmode>")])
(define_expand "mul<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (mult:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*mul<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (mult:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "mul<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
@@ -424,6 +554,41 @@
[(set_attr "type" "ssemul")
(set_attr "mode" "<ssescalarmode>")])
+(define_expand "divv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (div:V8SF (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
+
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], V8SFmode);
+ DONE;
+ }
+})
+
+(define_expand "divv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (div:V4DF (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+ "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
+
+(define_insn "avx_div<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (div:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "")
(div:V4SF (match_operand:V4SF 1 "register_operand" "")
@@ -571,6 +736,24 @@
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (smaxmin:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (smaxmin:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
@@ -623,6 +806,28 @@
;; presence of -0.0 and NaN.
(define_insn "*ieee_smin<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smin<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "0")
@@ -644,6 +849,32 @@
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_addsubv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_merge:V8SF
+ (plus:V8SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (minus:V8SF (match_dup 1) (match_dup 2))
+ (const_int 85)))]
+ "TARGET_AVX"
+ "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_addsubv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_merge:V4DF
+ (plus:V4DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (minus:V4DF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
+ "TARGET_AVX"
+ "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
@@ -671,6 +902,74 @@
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
+(define_insn "avx_h<addsub>v4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_concat:V4DF
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vh<addsub>pd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "avx_h<addsub>v8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_concat:V8SF
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vh<addsub>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse3_h<addsub>v4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
@@ -765,6 +1064,45 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX"
+ "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcmps<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; We don't promote 128bit vector compare intrinsics. But vectorizer
+;; may generate 256bit vector compare instructions.
+(define_insn "*avx_maskcmp<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (match_operator:AVX256MODEF2P 3 "avx_comparison_float_operator"
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")]))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_maskcmp<mode>3"
[(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
(match_operator:SSEMODEF4 3 "sse_comparison_operator"
@@ -839,6 +1177,17 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_nand<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (and:AVX256MODEF2P
+ (not:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "register_operand" "x"))
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_nand<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(and:SSEMODEF2P
@@ -851,6 +1200,25 @@
(set_attr "mode" "<MODE>")])
(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plogic:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (plogic:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(plogic:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
@@ -1601,6 +1969,14 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
+(define_insn "avx_cvtdq2ps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse2_cvtdq2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
@@ -1609,6 +1985,15 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
+(define_insn "avx_cvtps2dq256"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "OI")])
+
(define_insn "sse2_cvtps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
@@ -1619,6 +2004,14 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "avx_cvttps2dq256"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "OI")])
+
(define_insn "sse2_cvttps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
@@ -1772,6 +2165,14 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
+(define_insn "avx_cvtdq2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtdq2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float:V2DF
@@ -1783,6 +2184,15 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
+(define_insn "avx_cvtpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
(define_expand "sse2_cvtpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
@@ -1805,6 +2215,14 @@
(set_attr "mode" "TI")
(set_attr "amdfam10_decode" "double")])
+(define_insn "avx_cvttpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
(define_expand "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
@@ -1855,6 +2273,15 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "DF")])
+(define_insn "avx_cvtpd2ps256"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float_truncate:V4SF
+ (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
(define_expand "sse2_cvtpd2ps"
[(set (match_operand:V4SF 0 "register_operand" "")
(vec_concat:V4SF
@@ -1877,6 +2304,15 @@
(set_attr "mode" "V4SF")
(set_attr "amdfam10_decode" "double")])
+(define_insn "avx_cvtps2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float_extend:V4DF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float_extend:V2DF
@@ -2148,6 +2584,59 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_expand "avx_shufps256"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12)));
+ DONE;
+})
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx_shufps256_1"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_8_to_11_operand" "")
+ (match_operand 6 "const_8_to_11_operand" "")
+ (match_operand 7 "const_4_to_7_operand" "")
+ (match_operand 8 "const_4_to_7_operand" "")
+ (match_operand 9 "const_12_to_15_operand" "")
+ (match_operand 10 "const_12_to_15_operand" "")])))]
+ "TARGET_AVX
+ && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+ && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+ && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+ && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 8) << 4;
+ mask |= (INTVAL (operands[6]) - 8) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V8SF")])
+
(define_expand "sse_shufps"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")
@@ -2499,6 +2988,46 @@
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "V2DF,V1DF,V1DF")])
+(define_expand "avx_shufpd256"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 5 : 4),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 7 : 6)));
+ DONE;
+})
+
+(define_insn "avx_shufpd256_1"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_4_to_5_operand" "")
+ (match_operand 5 "const_2_to_3_operand" "")
+ (match_operand 6 "const_6_to_7_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 4) << 1;
+ mask |= (INTVAL (operands[5]) - 2) << 2;
+ mask |= (INTVAL (operands[6]) - 6) << 3;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4DF")])
+
(define_expand "sse2_shufpd"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "register_operand" "")
@@ -3762,6 +4291,16 @@
operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
})
+(define_insn "*avx_nand<mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (and:AVX256MODEI
+ (not:AVX256MODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vandnps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V8SF")])
+
(define_insn "*sse_nand<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(and:SSEMODEI
@@ -3802,6 +4341,17 @@
"TARGET_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (plogic:AVX256MODEI
+ (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V8SF")])
+
(define_insn "*sse_<code><mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(plogic:SSEMODEI
@@ -5957,6 +6507,29 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_blendp<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (vec_merge:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+ "TARGET_AVX"
+ "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_blendvp<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVX256MODEF2P 3 "register_operand" "x")]
+ UNSPEC_BLENDV))]
+ "TARGET_AVX"
+ "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse4_1_blendp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
@@ -5982,6 +6555,18 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_dpps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "%x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_AVX"
+ "vdpps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse4_1_dpp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P