This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AVX]: Finish gmmintrin.h
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 10 May 2008 08:07:26 -0700
- Subject: [AVX]: Finish gmmintrin.h
I am checking in this patch to finish gmmintrin.h. All new AVX
intrinsics have been implemented.
H.J.
----
Index: testsuite/gcc.target/i386/avx-1.c
===================================================================
--- testsuite/gcc.target/i386/avx-1.c (revision 135144)
+++ testsuite/gcc.target/i386/avx-1.c (working copy)
@@ -47,6 +47,8 @@
#define __builtin_ia32_vinsertf128_pd256(X, Y, C) __builtin_ia32_vinsertf128_pd256(X, Y, 1)
#define __builtin_ia32_vinsertf128_ps256(X, Y, C) __builtin_ia32_vinsertf128_ps256(X, Y, 1)
#define __builtin_ia32_vinsertf128_si256(X, Y, C) __builtin_ia32_vinsertf128_si256(X, Y, 1)
+#define __builtin_ia32_roundpd256(V, M) __builtin_ia32_roundpd256(V, 1)
+#define __builtin_ia32_roundps256(V, M) __builtin_ia32_roundps256(V, 1)
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
Index: testsuite/gcc.target/i386/avx-2.c
===================================================================
--- testsuite/gcc.target/i386/avx-2.c (revision 135144)
+++ testsuite/gcc.target/i386/avx-2.c (working copy)
@@ -82,6 +82,8 @@ test_2 (_mm256_permute2f128_si256, __m25
test_2 (_mm256_insertf128_pd, __m256d, __m256d, __m128d, 1)
test_2 (_mm256_insertf128_ps, __m256, __m256, __m128, 1)
test_2 (_mm256_insertf128_si256, __m256i, __m256i, __m128i, 1)
+test_1 (_mm256_round_pd, __m256d, __m256d, 1)
+test_1 (_mm256_round_ps, __m256, __m256, 1)
/* wmmintrin.h */
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
Index: testsuite/ChangeLog.avx
===================================================================
--- testsuite/ChangeLog.avx (revision 135144)
+++ testsuite/ChangeLog.avx (working copy)
@@ -1,3 +1,8 @@
+2008-05-09 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-1.c: Add more tests for gmmintrin.h.
+ * gcc.target/i386/avx-2.c: Likewise.
+
2008-04-26 H.J. Lu <hongjiu.lu@intel.com>
PR testsuite/36053
Index: ChangeLog.avx
===================================================================
--- ChangeLog.avx (revision 135144)
+++ ChangeLog.avx (working copy)
@@ -1,3 +1,134 @@
+2008-05-10 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/gmmintrin.h (_mm256_round_pd): Fix a typo.
+ (_mm256_round_ps): Likewise.
+ (_mm_testz_pd): New.
+ (_mm_testc_pd): Likewise.
+ (_mm_testnzc_pd): Likewise.
+ (_mm_testz_ps): Likewise.
+ (_mm_testc_ps): Likewise.
+ (_mm_testnzc_ps): Likewise.
+ (_mm256_testz_pd): Updated.
+ (_mm256_testc_pd): Likewise.
+ (_mm256_testnzc_pd): Likewise.
+ (_mm256_testz_ps): Likewise.
+ (_mm256_testc_ps): Likewise.
+ (_mm256_testnzc_ps): Likewise.
+ (_mm256_setzero_ps): Likewise.
+ (_mm256_setzero_si256): Likewise.
+ (_mm256_set_ps): Likewise.
+ (_mm256_set_epi32): Likewise.
+ (_mm256_set1_ps): Likewise.
+ (_mm256_set1_epi32): Likewise.
+
+ * config/i386/i386.c (ix86_builtins): Add
+ IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
+ IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256.
+ IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256 and
+ IX86_BUILTIN_MOVMSKPS256.
+ (ix86_builtin_type): Add INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST, INT_FTYPE_V4SF_V4SF_PTEST,
+ INT_FTYPE_V2DF_V2DF_PTEST, INT_FTYPE_V8SF and
+ INT_FTYPE_V4DF.
+ (bdesc_args): Add __builtin_ia32_sqrtpd256,
+ __builtin_ia32_sqrtps256, __builtin_ia32_sqrtps_nr256,
+ __builtin_ia32_rsqrtps256, __builtin_ia32_rsqrtps_nr256,
+ __builtin_ia32_rcpps256, __builtin_ia32_roundpd256,
+ __builtin_ia32_roundps256, AVX vtest builtins,
+ __builtin_ia32_movmskpd256 and __builtin_ia32_movmskps256.
+ (ix86_init_mmx_sse_builtins): Updated.
+ (ix86_expand_args_builtin): Updated. Handle
+ CODE_FOR_avx_roundpd256 and CODE_FOR_avx_roundps256.
+ (ix86_hard_regno_mode_ok): OImode is OK only when AVX is
+ enabled.
+
+ * config/i386/i386.md (UNSPEC_VTESTP): New.
+ (movoi): Likewise.
+ (*movoi_internal): Likewise.
+ (*cmpfp_i_sse): Reformat.
+ (*cmpfp_iu_sse): Likewise.
+ (*rsqrtsf2_sse): Likewise.
+ (*sqrt<mode>2_sse): Likewise.
+
+ * config/i386/sse.md (avx_rcpv8sf2): New.
+ (*avx_vmrcpv4sf2): Likewise.
+ (sqrtv8sf2): Likewise.
+ (avx_sqrtv8sf2): Likewise.
+ (sqrtv4df2): Likewise.
+ (*avx_vmsqrt<mode>2): Likewise.
+ (rsqrtv8sf2): Likewise.
+ (avx_rsqrtv8sf2): Likewise.
+ (*avx_vmrsqrtv4sf2): Likewise.
+ (avx_movmskp<avxmodesuffixf2c>256): Likewise.
+ (avx_vtestp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_ptest256): Likewise.
+ (avx_roundp<avxmodesuffixf2c>256): Likewise.
+ (*avx_rounds<ssemodesuffixf2c>): Likewise.
+ (sse_rcpv4sf2): Support AVX.
+ (sqrtv4sf2): Likewise.
+ (sqrtv2df2): Likewise.
+ (rsqrtv4sf2): Likewise.
+ (<sse>_movmskp<ssemodesuffixf2c>): Likewise.
+ (sse2_pmovmskb): Likewise.
+ (sse4_1_ptest): Likewise.
+ (sse4_1_roundp<ssemodesuffixf2c>): Likewise.
+ (avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>): Set
+ type attribute to ssemov.
+ (avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>): Likewise.
+
+2008-05-10 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/sse.md (<sse>_comi): Reformat.
+ (<sse>_ucomi): Likewise.
+ (sse_cvtss2si): Likewise.
+ (sse_cvtss2si_2): Likewise.
+ (sse_cvtss2siq): Likewise.
+ (sse_cvtss2siq_2): Likewise.
+ (sse_cvttss2si): Likewise.
+ (sse_cvttss2siq): Likewise.
+ (sse2_cvtsd2si): Likewise.
+ (sse2_cvtsd2si_2): Likewise.
+ (sse2_cvtsd2siq): Likewise.
+ (sse2_cvtsd2siq_2): Likewise.
+ (sse2_cvttsd2si): Likewise.
+ (sse2_cvttsd2siq): Likewise.
+ (sse2_cvtdq2pd): Likewise.
+ (*sse2_cvtpd2dq): Likewise.
+ (*sse2_cvttpd2dq): Likewise.
+ (*sse2_cvtpd2ps): Likewise.
+ (sse2_cvtps2pd): Likewise.
+ (sse3_movshdup): Likewise.
+ (sse3_movsldup): Likewise.
+ (*vec_dupv2df_sse3): Likewise.
+ (sse_ldmxcsr): Likewise.
+
+2008-05-10 Joey Ye <joey.ye@intel.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.md (*rcpsf2_sse): Support AVX.
+
+2008-05-10 Xuepeng Guo <xuepeng.guo@intel.com>
+
+ * config/i386/sse.md (*avx_pinsrb): New.
+ (*avx_pinsrw): Likewise.
+ (*avx_pinsrd): Likewise.
+ (*avx_pinsrq): Likewise.
+ (*sse4_1_pextrd): Support AVX.
+ (*sse4_1_pextrq): Likewise.
+ (*sse4_1_pextrb): Likewise.
+ (*sse4_1_pextrb_memory): Likewise.
+ (*sse2_pextrw): Likewise.
+ (*sse4_1_pextrw_memory): Likewise.
+
2008-05-08 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/gmmintrin.h (_mm_maskload_pd): Fix a typo.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 135144)
+++ config/i386/i386.md (working copy)
@@ -206,6 +206,7 @@
(UNSPEC_MASKLOAD 170)
(UNSPEC_MASKSTORE 171)
(UNSPEC_CAST 172)
+ (UNSPEC_VTESTP 173)
])
(define_constants
@@ -1315,8 +1316,7 @@
(const_string "SF")
(const_string "DF")))
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX")
- (const_int 0))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
(const_string "vex")
(const_string "orig")))
(set_attr "athlon_decode" "vector")
@@ -1378,8 +1378,7 @@
(const_string "SF")
(const_string "DF")))
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX")
- (const_int 0))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
(const_string "vex")
(const_string "orig")))
(set_attr "athlon_decode" "vector")
@@ -2480,6 +2479,37 @@
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "double")])
+(define_expand "movoi"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "")
+ (match_operand:OI 1 "general_operand" ""))]
+ "TARGET_AVX"
+ "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vxorps\t%0, %0, %0";
+ case 1:
+ case 2:
+ if (misaligned_operand (operands[0], OImode)
+ || misaligned_operand (operands[1], OImode))
+ return "vmovdqu\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
(match_operand:TI 1 "nonimmediate_operand" ""))]
@@ -16102,9 +16132,15 @@
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RCP))]
"TARGET_SSE_MATH"
- "rcpss\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vrcpss\t{%1, %0, %0|%0, %0, %1}\"
+ : \"rcpss\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "SF")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
+ ])
(define_insn "*fop_sf_1_avx"
[(set (match_operand:SF 0 "register_operand" "=x")
@@ -16694,9 +16730,9 @@
(set_attr "mode" "SF")
(set_attr "prefix_0f" "1")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_expand "rsqrtsf2"
[(set (match_operand:SF 0 "register_operand" "")
@@ -16721,9 +16757,9 @@
(set_attr "amdfam10_decode" "*")
(set_attr "prefix_0f" "1")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_expand "sqrt<mode>2"
[(set (match_operand:MODEF 0 "register_operand" "")
Index: config/i386/gmmintrin.h
===================================================================
--- config/i386/gmmintrin.h (revision 135144)
+++ config/i386/gmmintrin.h (working copy)
@@ -907,7 +907,6 @@ _mm256_lddqu_si256 (__m256i const *__P)
return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
}
-#if 0
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_ps (__m256 __A)
{
@@ -936,13 +935,13 @@ _mm256_sqrt_ps (__m256 __A)
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_pd (__m256d __V, const int __M)
{
- return (__m128d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+ return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_ps (__m256 __V, const int __M)
{
- return (__m128) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+ return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
}
#else
#define _mm256_round_pd(V, M) \
@@ -956,7 +955,6 @@ _mm256_round_ps (__m256 __V, const int _
#define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
#define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
#define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
-#endif
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_pd (__m256d __A, __m256d __B)
@@ -982,41 +980,76 @@ _mm256_unpacklo_ps (__m256 __A, __m256 _
return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
}
-#if 0
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_pd (__m256d __M, __m256d __V)
{
- return __builtin_ia32_ptestzpd256 ((__v4df)__M, (__v4df)__V);
+ return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_pd (__m256d __M, __m256d __V)
{
- return __builtin_ia32_ptestcpd256 ((__v4df)__M, (__v4df)__V);
+ return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_pd (__m256d __M, __m256d __V)
{
- return __builtin_ia32_ptestnzcpd256 ((__v4df)__M, (__v4df)__V);
+ return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_ps (__m256 __M, __m256 __V)
{
- return __builtin_ia32_ptestzps256 ((__v8sf)__M, (__v8sf)__V);
+ return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_ps (__m256 __M, __m256 __V)
{
- return __builtin_ia32_ptestcps256 ((__v8sf)__M, (__v8sf)__V);
+ return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_ps (__m256 __M, __m256 __V)
{
- return __builtin_ia32_ptestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+ return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1058,13 +1091,14 @@ _mm256_setzero_pd (void)
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_ps (void)
{
- return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+ return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0 };
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_si256 (void)
{
- return __extension__ (__m256i){ 0, 0, 0, 0 };
+ return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
}
/* Create the vector [A B C D]. */
@@ -1079,7 +1113,8 @@ extern __inline __m256 __attribute__((__
_mm256_set_ps (float __A, float __B, float __C, float __D,
float __E, float __F, float __G, float __H)
{
- return __extension__ (__m256){ __H, __G, __F, __E, __D, __C, __B, __A };
+ return __extension__ (__m256){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
}
/* Create the vector [A B C D E F G H]. */
@@ -1087,7 +1122,8 @@ extern __inline __m256i __attribute__((_
_mm256_set_epi32 (int __A, int __B, int __C, int __D,
int __E, int __F, int __G, int __H)
{
- return __extension__ (__m256i){ __H, __G, __F, __E, __D, __C, __B, __A };
+ return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
}
/* Create a vector with all elements equal to A. */
@@ -1101,16 +1137,17 @@ _mm256_set1_pd (double __A)
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_ps (float __A)
{
- return __extension__ (__m256){ __A, __A, __A, __A, __A, __A, __A, __A };
+ return __extension__ (__m256){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
}
/* Create a vector with all elements equal to A. */
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi32 (int __A)
{
- return __extension__ (__m256i){ __A, __A, __A, __A, __A, __A, __A, __A };
+ return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
}
-#endif
/* Casts between various SP, DP, INT vector types. Note that these do no
conversion of values, they just change the type. */
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 135144)
+++ config/i386/sse.md (working copy)
@@ -644,15 +644,43 @@
[(set_attr "type" "ssediv")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "avx_rcpv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ "TARGET_AVX"
+ "vrcpps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse_rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
- "rcpps\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vrcpps\t{%1, %0|%0, %1}\"
+ : \"rcpps\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sse")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "V4SF")])
+(define_insn "*avx_vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrcpss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
@@ -665,6 +693,29 @@
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
+(define_expand "sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
+ DONE;
+ }
+})
+
+(define_insn "avx_sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_expand "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
@@ -683,18 +734,50 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
- "sqrtps\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vsqrtps\t{%1, %0|%0, %1}\"
+ : \"sqrtps\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sse")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "V4SF")])
+(define_insn "sqrtv4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sqrtv2df2"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "sqrtpd\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vsqrtpd\t{%1, %0|%0, %1}\"
+ : \"sqrtpd\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sse")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "V2DF")])
+(define_insn "*avx_vmsqrt<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (const_int 1)))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vmsqrt<mode>2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
@@ -707,6 +790,26 @@
[(set_attr "type" "sse")
(set_attr "mode" "<ssescalarmode>")])
+(define_expand "rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+ "TARGET_AVX && TARGET_SSE_MATH"
+{
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
+ DONE;
+})
+
+(define_insn "avx_rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+ "TARGET_AVX"
+ "vrsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_expand "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(unspec:V4SF
@@ -722,10 +825,28 @@
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_SSE"
- "rsqrtps\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vrsqrtps\t{%1, %0|%0, %1}\"
+ : \"rsqrtps\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sse")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "V4SF")])
+(define_insn "*avx_vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
@@ -1159,9 +1280,9 @@
[(set_attr "type" "ssecomi")
(set_attr "mode" "<MODE>")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "<sse>_ucomi"
[(set (reg:CCFPU FLAGS_REG)
@@ -1178,9 +1299,9 @@
[(set_attr "type" "ssecomi")
(set_attr "mode" "<MODE>")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_expand "vcond<mode>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
@@ -1957,9 +2078,9 @@
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
@@ -1974,9 +2095,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
@@ -1993,9 +2114,9 @@
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
@@ -2010,9 +2131,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
@@ -2029,9 +2150,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
@@ -2048,9 +2169,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
@@ -2215,9 +2336,9 @@
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
@@ -2232,9 +2353,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
@@ -2251,9 +2372,9 @@
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
@@ -2268,9 +2389,9 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
@@ -2285,9 +2406,9 @@
: \"cvttsd2si\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sseicvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "SI")
(set_attr "athlon_decode" "double,vector")
@@ -2304,9 +2425,9 @@
: \"cvttsd2siq\t{%1, %0|%0, %1}\";"
[(set_attr "type" "sseicvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "double,vector")
@@ -2332,9 +2453,9 @@
: \"cvtdq2pd\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "V2DF")])
(define_insn "avx_cvtpd2dq256"
@@ -2367,9 +2488,9 @@
: \"cvtpd2dq\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")
(set_attr "amdfam10_decode" "double")])
@@ -2401,9 +2522,9 @@
: \"cvttpd2dq\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")
(set_attr "amdfam10_decode" "double")])
@@ -2497,9 +2618,9 @@
: \"cvtpd2ps\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "prefix_data16" "1")
(set_attr "mode" "V4SF")
(set_attr "amdfam10_decode" "double")])
@@ -2526,9 +2647,9 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "amdfam10_decode" "direct")])
(define_expand "vec_unpacks_hi_v4sf"
@@ -2915,9 +3036,9 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "avx_movsldup256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -2952,9 +3073,9 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_expand "avx_shufps256"
[(match_operand:V8SF 0 "register_operand" "")
@@ -4023,9 +4144,9 @@
[(set_attr "type" "sselog1")
(set_attr "mode" "DF")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "vec_dupv2df"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -5651,6 +5772,22 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pinsrb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (match_operand:QI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_pinsrb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_merge:V16QI
@@ -5667,6 +5804,22 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pinsrw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (vec_duplicate:V8HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_pinsrw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_merge:V8HI
@@ -5684,6 +5837,22 @@
(set_attr "mode" "TI")])
;; It must come before sse2_loadld since it is preferred.
+(define_insn "*avx_pinsrd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_pinsrd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
@@ -5700,6 +5869,22 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_pinsrq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_merge:V2DI
@@ -5723,10 +5908,15 @@
(match_operand:V16QI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
"TARGET_SSE4_1"
- "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrb\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrb\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "*sse4_1_pextrb_memory"
[(set (match_operand:QI 0 "memory_operand" "=m")
@@ -5734,10 +5924,15 @@
(match_operand:V16QI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrb\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrb\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "*sse2_pextrw"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -5746,10 +5941,15 @@
(match_operand:V8HI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
"TARGET_SSE2"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrw\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrw\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "*sse4_1_pextrw_memory"
[(set (match_operand:HI 0 "memory_operand" "=m")
@@ -5757,10 +5957,15 @@
(match_operand:V8HI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrw\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrw\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "*sse4_1_pextrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
@@ -5768,10 +5973,15 @@
(match_operand:V4SI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrd\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrd\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrd\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
;; It must come before *vec_extractv2di_1_sse since it is preferred.
(define_insn "*sse4_1_pextrq"
@@ -5780,10 +5990,15 @@
(match_operand:V2DI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
"TARGET_SSE4_1 && TARGET_64BIT"
- "pextrq\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vpextrq\t{%2, %1, %0|%0, %1, %2}\"
+ : \"pextrq\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_expand "sse2_pshufd"
[(match_operand:V4SI 0 "register_operand" "")
@@ -6550,14 +6765,30 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "avx_movmskp<avxmodesuffixf2c>256"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_movmskp<ssemodesuffixf2c>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVMSK))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
- "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}\"
+ : \"movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "<MODE>")])
(define_insn "sse2_pmovmskb"
@@ -6565,9 +6796,14 @@
(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
UNSPEC_MOVMSK))]
"TARGET_SSE2"
- "pmovmskb\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vpmovmskb\t{%1, %0|%0, %1}\"
+ : \"pmovmskb\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "SI")])
(define_expand "sse2_maskmovdqu"
@@ -6614,9 +6850,9 @@
[(set_attr "type" "sse")
(set_attr "memory" "load")
(set (attr "prefix")
- (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "vex")
- (const_string "orig")))])
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))])
(define_insn "sse_stmxcsr"
[(set (match_operand:SI 0 "memory_operand" "=m")
@@ -7963,19 +8199,60 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
+ UNSPEC_VTESTP))]
+ "TARGET_AVX"
+ "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+ (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_AVX"
+ "vptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_ptest"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
UNSPEC_PTEST))]
"TARGET_SSE4_1"
- "ptest\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vptest\t{%1, %0|%0, %1}\"
+ : \"ptest\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecomi")
(set_attr "prefix_extra" "1")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "TI")])
+(define_insn "avx_roundp<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_AVX"
+ "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse4_1_roundp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
@@ -7983,9 +8260,29 @@
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUND))]
"TARGET_ROUND"
- "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ "* return TARGET_AVX ? \"vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"
+ : \"roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
+ (set (attr "prefix")
+ (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_rounds<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "sse4_1_rounds<ssemodesuffixf2c>"
@@ -9814,7 +10111,8 @@
}
gcc_unreachable ();
}
- [(set_attr "prefix" "vex")
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")
(set (attr "length")
(if_then_else (eq_attr "alternative" "0")
@@ -9849,7 +10147,8 @@
}
gcc_unreachable ();
}
- [(set_attr "prefix" "vex")
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")
(set (attr "length")
(if_then_else (eq_attr "alternative" "0")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 135145)
+++ config/i386/i386.c (working copy)
@@ -18091,6 +18091,17 @@ enum ix86_builtins
IX86_BUILTIN_MOVSLDUP256,
IX86_BUILTIN_MOVDDUP256,
+ IX86_BUILTIN_SQRTPD256,
+ IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256,
+ IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256,
+
+ IX86_BUILTIN_RCPPS256,
+
+ IX86_BUILTIN_ROUNDPD256,
+ IX86_BUILTIN_ROUNDPS256,
+
IX86_BUILTIN_UNPCKHPD256,
IX86_BUILTIN_UNPCKLPD256,
IX86_BUILTIN_UNPCKHPS256,
@@ -18103,6 +18114,25 @@ enum ix86_builtins
IX86_BUILTIN_PS_PS256,
IX86_BUILTIN_PD_PD256,
+ IX86_BUILTIN_VTESTZPD,
+ IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD,
+ IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS,
+ IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256,
+ IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256,
+ IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256,
+ IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256,
+ IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256,
+
+ IX86_BUILTIN_MOVMSKPD256,
+ IX86_BUILTIN_MOVMSKPS256,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_FABSQ,
@@ -18468,11 +18498,18 @@ enum ix86_builtin_type
FLOAT128_FTYPE_FLOAT128,
FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST,
+ INT_FTYPE_V4DF_V4DF_PTEST,
+ INT_FTYPE_V4SF_V4SF_PTEST,
INT_FTYPE_V2DI_V2DI_PTEST,
+ INT_FTYPE_V2DF_V2DF_PTEST,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
INT_FTYPE_V16QI,
INT_FTYPE_V8QI,
+ INT_FTYPE_V8SF,
+ INT_FTYPE_V4DF,
INT_FTYPE_V4SF,
INT_FTYPE_V2DF,
V16QI_FTYPE_V16QI,
@@ -19246,6 +19283,17 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
@@ -19257,6 +19305,25 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
};
/* SSE5 */
@@ -20059,6 +20126,8 @@ ix86_init_mmx_sse_builtins (void)
V8SImode);
tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
V8SFmode);
+ tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
+ V4DImode);
tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
V4DFmode);
tree v8sf_ftype_v8sf
@@ -20285,6 +20354,26 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V4SI_type_node,
V8SI_type_node,
NULL_TREE);
+ tree int_ftype_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v4di_v4di
+ = build_function_type_list (integer_type_node,
+ V4DI_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
tree ftype;
@@ -20453,9 +20542,24 @@ ix86_init_mmx_sse_builtins (void)
case FLOAT_FTYPE_FLOAT:
type = float_ftype_float;
break;
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ type = int_ftype_v8sf_v8sf;
+ break;
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ type = int_ftype_v4di_v4di;
+ break;
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ type = int_ftype_v4df_v4df;
+ break;
+ case INT_FTYPE_V4SF_V4SF_PTEST:
+ type = int_ftype_v4sf_v4sf;
+ break;
case INT_FTYPE_V2DI_V2DI_PTEST:
type = int_ftype_v2di_v2di;
break;
+ case INT_FTYPE_V2DF_V2DF_PTEST:
+ type = int_ftype_v2df_v2df;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
@@ -20468,6 +20572,12 @@ ix86_init_mmx_sse_builtins (void)
case INT_FTYPE_V8QI:
type = int_ftype_v8qi;
break;
+ case INT_FTYPE_V8SF:
+ type = int_ftype_v8sf;
+ break;
+ case INT_FTYPE_V4DF:
+ type = int_ftype_v4df;
+ break;
case INT_FTYPE_V4SF:
type = int_ftype_v4sf;
break;
@@ -21731,7 +21841,12 @@ ix86_expand_args_builtin (const struct b
switch ((enum ix86_builtin_type) d->flag)
{
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ case INT_FTYPE_V4SF_V4SF_PTEST:
case INT_FTYPE_V2DI_V2DI_PTEST:
+ case INT_FTYPE_V2DF_V2DF_PTEST:
return ix86_expand_sse_ptest (d, exp, target);
case FLOAT128_FTYPE_FLOAT128:
case FLOAT_FTYPE_FLOAT:
@@ -21739,6 +21854,8 @@ ix86_expand_args_builtin (const struct b
case INT64_FTYPE_V2DF:
case INT_FTYPE_V16QI:
case INT_FTYPE_V8QI:
+ case INT_FTYPE_V8SF:
+ case INT_FTYPE_V4DF:
case INT_FTYPE_V4SF:
case INT_FTYPE_V2DF:
case V16QI_FTYPE_V16QI:
@@ -21980,6 +22097,8 @@ ix86_expand_args_builtin (const struct b
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_roundpd256:
+ case CODE_FOR_avx_roundps256:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
@@ -23384,8 +23503,10 @@ ix86_hard_regno_mode_ok (int regno, enum
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. */
- return (VALID_AVX256_REG_MODE (mode)
+ are available. OImode move is available only when AVX is
+ enabled. */
+ return ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
|| VALID_SSE_REG_MODE (mode)
|| VALID_SSE2_REG_MODE (mode)
|| VALID_MMX_REG_MODE (mode)