This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AVX]: Finish gmmintrin.h


I am checking in this patch to finish gmmintrin.h. All new AVX
intrinsics have been implemented.


H.J.
----
Index: testsuite/gcc.target/i386/avx-1.c
===================================================================
--- testsuite/gcc.target/i386/avx-1.c	(revision 135144)
+++ testsuite/gcc.target/i386/avx-1.c	(working copy)
@@ -47,6 +47,8 @@
 #define __builtin_ia32_vinsertf128_pd256(X, Y, C) __builtin_ia32_vinsertf128_pd256(X, Y, 1)
 #define __builtin_ia32_vinsertf128_ps256(X, Y, C) __builtin_ia32_vinsertf128_ps256(X, Y, 1)
 #define __builtin_ia32_vinsertf128_si256(X, Y, C) __builtin_ia32_vinsertf128_si256(X, Y, 1)
+#define __builtin_ia32_roundpd256(V, M) __builtin_ia32_roundpd256(V, 1)
+#define __builtin_ia32_roundps256(V, M) __builtin_ia32_roundps256(V, 1)
 
 /* wmmintrin.h */
 #define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
Index: testsuite/gcc.target/i386/avx-2.c
===================================================================
--- testsuite/gcc.target/i386/avx-2.c	(revision 135144)
+++ testsuite/gcc.target/i386/avx-2.c	(working copy)
@@ -82,6 +82,8 @@ test_2 (_mm256_permute2f128_si256, __m25
 test_2 (_mm256_insertf128_pd, __m256d, __m256d, __m128d, 1)
 test_2 (_mm256_insertf128_ps, __m256, __m256, __m128, 1)
 test_2 (_mm256_insertf128_si256, __m256i, __m256i, __m128i, 1)
+test_1 (_mm256_round_pd, __m256d, __m256d, 1)
+test_1 (_mm256_round_ps, __m256, __m256, 1)
 
 /* wmmintrin.h */
 test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
Index: testsuite/ChangeLog.avx
===================================================================
--- testsuite/ChangeLog.avx	(revision 135144)
+++ testsuite/ChangeLog.avx	(working copy)
@@ -1,3 +1,8 @@
+2008-05-09  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* gcc.target/i386/avx-1.c: Add more tests for gmmintrin.h.
+	* gcc.target/i386/avx-2.c: Likewise.
+
 2008-04-26  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR testsuite/36053
Index: ChangeLog.avx
===================================================================
--- ChangeLog.avx	(revision 135144)
+++ ChangeLog.avx	(working copy)
@@ -1,3 +1,134 @@
+2008-05-10  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/gmmintrin.h (_mm256_round_pd): Fix a typo.
+	(_mm256_round_ps): Likewise.
+	(_mm_testz_pd): New.
+	(_mm_testc_pd): Likewise.
+	(_mm_testnzc_pd): Likewise.
+	(_mm_testz_ps): Likewise.
+	(_mm_testc_ps): Likewise.
+	(_mm_testnzc_ps): Likewise.
+	(_mm256_testz_pd): Updated.
+	(_mm256_testc_pd): Likewise.
+	(_mm256_testnzc_pd): Likewise.
+	(_mm256_testz_ps): Likewise.
+	(_mm256_testc_ps): Likewise.
+	(_mm256_testnzc_ps): Likewise.
+	(_mm256_setzero_ps): Likewise.
+	(_mm256_setzero_si256): Likewise.
+	(_mm256_set_ps): Likewise.
+	(_mm256_set_epi32): Likewise.
+	(_mm256_set1_ps): Likewise.
+	(_mm256_set1_epi32): Likewise.
+
+	* config/i386/i386.c (ix86_builtins): Add
+	IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
+	IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
+	IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
+	IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256.
+	IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
+	IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
+	IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
+	IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
+	IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
+	IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
+	IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
+	IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256 and
+	IX86_BUILTIN_MOVMSKPS256.
+	(ix86_builtin_type): Add INT_FTYPE_V8SF_V8SF_PTEST,
+	INT_FTYPE_V4DI_V4DI_PTEST, INT_FTYPE_V4SF_V4SF_PTEST,
+	INT_FTYPE_V2DF_V2DF_PTEST, INT_FTYPE_V8SF and
+	INT_FTYPE_V4DF.
+	(bdesc_args): Add __builtin_ia32_sqrtpd256,
+	__builtin_ia32_sqrtps256, __builtin_ia32_sqrtps_nr256,
+	__builtin_ia32_rsqrtps256, __builtin_ia32_rsqrtps_nr256,
+	__builtin_ia32_rcpps256, __builtin_ia32_roundpd256,
+	__builtin_ia32_roundps256,  AVX vtest builtins,
+	__builtin_ia32_movmskpd256 and __builtin_ia32_movmskps256.
+	(ix86_init_mmx_sse_builtins): Updated.
+	(ix86_expand_args_builtin): Updated.  Handle
+	CODE_FOR_avx_roundpd256 and CODE_FOR_avx_roundps256.
+	(ix86_hard_regno_mode_ok): OImode is OK only when AVX is
+	enabled.
+
+	* config/i386/i386.md (UNSPEC_VTESTP): New.
+	(movoi): Likewise.
+	(*movoi_internal): Likewise.
+	(*cmpfp_i_sse): Reformat.
+	(*cmpfp_iu_sse): Likewise.
+	(*rsqrtsf2_sse): Likewise.
+	(*sqrt<mode>2_sse): Likewise.
+
+	* config/i386/sse.md (avx_rcpv8sf2): New.
+	(*avx_vmrcpv4sf2): Likewise.
+	(sqrtv8sf2): Likewise.
+	(avx_sqrtv8sf2): Likewise.
+	(sqrtv4df2): Likewise.
+	(*avx_vmsqrt<mode>2): Likewise.
+	(rsqrtv8sf2): Likewise.
+	(avx_rsqrtv8sf2): Likewise.
+	(*avx_vmrsqrtv4sf2): Likewise.
+	(avx_movmskp<avxmodesuffixf2c>256): Likewise.
+	(avx_vtestp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+	(avx_ptest256): Likewise.
+	(avx_roundp<avxmodesuffixf2c>256): Likewise.
+	(*avx_rounds<ssemodesuffixf2c>): Likewise.
+	(sse_rcpv4sf2): Support AVX.
+	(sqrtv4sf2): Likewise.
+	(sqrtv2df2): Likewise.
+	(rsqrtv4sf2): Likewise.
+	(<sse>_movmskp<ssemodesuffixf2c>): Likewise.
+	(sse2_pmovmskb): Likewise.
+	(sse4_1_ptest): Likewise.
+	(sse4_1_roundp<ssemodesuffixf2c>): Likewise.
+	(avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>): Set
+	type attribute to ssemov.
+	(avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>): Likewise.
+
+2008-05-10  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/sse.md (<sse>_comi): Reformat.
+	(<sse>_ucomi): Likewise.
+	(sse_cvtss2si): Likewise.
+	(sse_cvtss2si_2): Likewise.
+	(sse_cvtss2siq): Likewise.
+	(sse_cvtss2siq_2): Likewise.
+	(sse_cvttss2si): Likewise.
+	(sse_cvttss2siq): Likewise.
+	(sse2_cvtsd2si): Likewise.
+	(sse2_cvtsd2si_2): Likewise.
+	(sse2_cvtsd2siq): Likewise.
+	(sse2_cvtsd2siq_2): Likewise.
+	(sse2_cvttsd2si): Likewise.
+	(sse2_cvttsd2siq): Likewise.
+	(sse2_cvtdq2pd): Likewise.
+	(*sse2_cvtpd2dq): Likewise.
+	(*sse2_cvttpd2dq): Likewise.
+	(*sse2_cvtpd2ps): Likewise.
+	(sse2_cvtps2pd): Likewise.
+	(sse3_movshdup): Likewise.
+	(sse3_movsldup): Likewise.
+	(*vec_dupv2df_sse3): Likewise.
+	(sse_ldmxcsr): Likewise.
+
+2008-05-10  Joey Ye  <joey.ye@intel.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config/i386/i386.md (*rcpsf2_sse): Support AVX.
+
+2008-05-10  Xuepeng Guo  <xuepeng.guo@intel.com>
+
+	* config/i386/sse.md (*avx_pinsrb): New.
+	(*avx_pinsrw): Likewise.
+	(*avx_pinsrd): Likewise.
+	(*avx_pinsrq): Likewise.
+	(*sse4_1_pextrd): Support AVX.
+	(*sse4_1_pextrq): Likewise.
+	(*sse4_1_pextrb): Likewise.
+	(*sse4_1_pextrb_memory): Likewise.
+	(*sse2_pextrw): Likewise.
+	(*sse4_1_pextrw_memory): Likewise.
+
 2008-05-08  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* config/i386/gmmintrin.h (_mm_maskload_pd): Fix a typo.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 135144)
+++ config/i386/i386.md	(working copy)
@@ -206,6 +206,7 @@
    (UNSPEC_MASKLOAD		170)
    (UNSPEC_MASKSTORE		171)
    (UNSPEC_CAST			172)
+   (UNSPEC_VTESTP		173)
   ])
 
 (define_constants
@@ -1315,8 +1316,7 @@
         (const_string "SF")
         (const_string "DF")))
    (set (attr "prefix")
-     (if_then_else (ne (symbol_ref "TARGET_AVX")
-		       (const_int 0))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
        (const_string "vex")
        (const_string "orig")))
    (set_attr "athlon_decode" "vector")
@@ -1378,8 +1378,7 @@
         (const_string "SF")
         (const_string "DF")))
    (set (attr "prefix")
-     (if_then_else (ne (symbol_ref "TARGET_AVX")
-		       (const_int 0))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
        (const_string "vex")
        (const_string "orig")))
    (set_attr "athlon_decode" "vector")
@@ -2480,6 +2479,37 @@
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "double")])
 
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "")
+	(match_operand:OI 1 "general_operand" ""))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vxorps\t%0, %0, %0";
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	return "vmovdqu\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_expand "movti"
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
 	(match_operand:TI 1 "nonimmediate_operand" ""))]
@@ -16102,9 +16132,15 @@
 	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
 		   UNSPEC_RCP))]
   "TARGET_SSE_MATH"
-  "rcpss\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vrcpss\t{%1, %0, %0|%0, %0, %1}\"
+                       : \"rcpss\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "SF")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
+   ])
 
 (define_insn "*fop_sf_1_avx"
   [(set (match_operand:SF 0 "register_operand" "=x")
@@ -16694,9 +16730,9 @@
    (set_attr "mode" "SF")
    (set_attr "prefix_0f" "1")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_expand "rsqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "")
@@ -16721,9 +16757,9 @@
    (set_attr "amdfam10_decode" "*")
    (set_attr "prefix_0f" "1")
    (set (attr "prefix") 
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_expand "sqrt<mode>2"
   [(set (match_operand:MODEF 0 "register_operand" "")
Index: config/i386/gmmintrin.h
===================================================================
--- config/i386/gmmintrin.h	(revision 135144)
+++ config/i386/gmmintrin.h	(working copy)
@@ -907,7 +907,6 @@ _mm256_lddqu_si256 (__m256i const *__P)
   return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
 }
 
-#if 0
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rcp_ps (__m256 __A)
 {
@@ -936,13 +935,13 @@ _mm256_sqrt_ps (__m256 __A)
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_round_pd (__m256d __V, const int __M)
 {
-  return (__m128d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_round_ps (__m256 __V, const int __M)
 {
-  return (__m128) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
 }
 #else
 #define _mm256_round_pd(V, M) \
@@ -956,7 +955,6 @@ _mm256_round_ps (__m256 __V, const int _
 #define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
 #define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
 #define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)
-#endif
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_unpackhi_pd (__m256d __A, __m256d __B)
@@ -982,41 +980,76 @@ _mm256_unpacklo_ps (__m256 __A, __m256 _
   return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
 }
 
-#if 0
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testz_pd (__m256d __M, __m256d __V)
 {
-  return __builtin_ia32_ptestzpd256 ((__v4df)__M, (__v4df)__V);
+  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testc_pd (__m256d __M, __m256d __V)
 {
-  return __builtin_ia32_ptestcpd256 ((__v4df)__M, (__v4df)__V);
+  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testnzc_pd (__m256d __M, __m256d __V)
 {
-  return __builtin_ia32_ptestnzcpd256 ((__v4df)__M, (__v4df)__V);
+  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testz_ps (__m256 __M, __m256 __V)
 {
-  return __builtin_ia32_ptestzps256 ((__v8sf)__M, (__v8sf)__V);
+  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testc_ps (__m256 __M, __m256 __V)
 {
-  return __builtin_ia32_ptestcps256 ((__v8sf)__M, (__v8sf)__V);
+  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_testnzc_ps (__m256 __M, __m256 __V)
 {
-  return __builtin_ia32_ptestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1058,13 +1091,14 @@ _mm256_setzero_pd (void)
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_setzero_ps (void)
 {
-  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+				 0.0, 0.0, 0.0, 0.0 };
 }
 
 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_setzero_si256 (void)
 {
-  return __extension__ (__m256i){ 0, 0, 0, 0 };
+  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
 }
 
 /* Create the vector [A B C D].  */
@@ -1079,7 +1113,8 @@ extern __inline __m256 __attribute__((__
 _mm256_set_ps (float __A, float __B, float __C, float __D,
 	       float __E, float __F, float __G, float __H)
 {
-  return __extension__ (__m256){ __H, __G, __F, __E, __D, __C, __B, __A };
+  return __extension__ (__m256){ __H, __G, __F, __E,
+				 __D, __C, __B, __A };
 }
 
 /* Create the vector [A B C D E F G H].  */
@@ -1087,7 +1122,8 @@ extern __inline __m256i __attribute__((_
 _mm256_set_epi32 (int __A, int __B, int __C, int __D,
 		  int __E, int __F, int __G, int __H)
 {
-  return __extension__ (__m256i){ __H, __G, __F, __E, __D, __C, __B, __A };
+  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+					  __D, __C, __B, __A };
 }
 
 /* Create a vector with all elements equal to A.  */
@@ -1101,16 +1137,17 @@ _mm256_set1_pd (double __A)
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_set1_ps (float __A)
 {
-  return __extension__ (__m256){ __A, __A, __A, __A, __A, __A, __A, __A };
+  return __extension__ (__m256){ __A, __A, __A, __A,
+				 __A, __A, __A, __A };
 }
 
 /* Create a vector with all elements equal to A.  */
 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_set1_epi32 (int __A)
 {
-  return __extension__ (__m256i){ __A, __A, __A, __A, __A, __A, __A, __A };
+  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+					  __A, __A, __A, __A };
 }
-#endif
 
 /* Casts between various SP, DP, INT vector types.  Note that these do no
    conversion of values, they just change the type.  */
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 135144)
+++ config/i386/sse.md	(working copy)
@@ -644,15 +644,43 @@
   [(set_attr "type" "ssediv")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_insn "avx_rcpv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+  "TARGET_AVX"
+  "vrcpps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "sse_rcpv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(unspec:V4SF
 	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
   "TARGET_SSE"
-  "rcpps\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vrcpps\t{%1, %0|%0, %1}\"
+                       : \"rcpps\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sse")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "V4SF")])
 
+(define_insn "*avx_vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RCP)
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vrcpss\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
 (define_insn "sse_vmrcpv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_merge:V4SF
@@ -665,6 +693,29 @@
   [(set_attr "type" "sse")
    (set_attr "mode" "SF")])
 
+(define_expand "sqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "")
+	(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
+  "TARGET_AVX"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
+      DONE;
+    }
+})
+
+(define_insn "avx_sqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_expand "sqrtv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "")
 	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
@@ -683,18 +734,50 @@
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE"
-  "sqrtps\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vsqrtps\t{%1, %0|%0, %1}\"
+                       : \"sqrtps\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sse")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "V4SF")])
 
+(define_insn "sqrtv4df2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vsqrtpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
 (define_insn "sqrtv2df2"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
-  "sqrtpd\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vsqrtpd\t{%1, %0|%0, %1}\"
+                       : \"sqrtpd\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sse")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "V2DF")])
 
+(define_insn "*avx_vmsqrt<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (sqrt:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+	  (match_operand:SSEMODEF2P 2 "register_operand" "x")
+	  (const_int 1)))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "<sse>_vmsqrt<mode>2"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(vec_merge:SSEMODEF2P
@@ -707,6 +790,26 @@
   [(set_attr "type" "sse")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_expand "rsqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+  "TARGET_AVX && TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
+  DONE;
+})
+
+(define_insn "avx_rsqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+  "TARGET_AVX"
+  "vrsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_expand "rsqrtv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "")
 	(unspec:V4SF
@@ -722,10 +825,28 @@
 	(unspec:V4SF
 	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
   "TARGET_SSE"
-  "rsqrtps\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vrsqrtps\t{%1, %0|%0, %1}\"
+                       : \"rsqrtps\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sse")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "V4SF")])
 
+(define_insn "*avx_vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RSQRT)
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
 (define_insn "sse_vmrsqrtv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_merge:V4SF
@@ -1159,9 +1280,9 @@
   [(set_attr "type" "ssecomi")
    (set_attr "mode" "<MODE>")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "<sse>_ucomi"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1178,9 +1299,9 @@
   [(set_attr "type" "ssecomi")
    (set_attr "mode" "<MODE>")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_expand "vcond<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
@@ -1957,9 +2078,9 @@
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")])
 
@@ -1974,9 +2095,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")])
 
@@ -1993,9 +2114,9 @@
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")])
 
@@ -2010,9 +2131,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")])
 
@@ -2029,9 +2150,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")])
 
@@ -2048,9 +2169,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")])
 
@@ -2215,9 +2336,9 @@
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")])
 
@@ -2232,9 +2353,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")])
 
@@ -2251,9 +2372,9 @@
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")])
 
@@ -2268,9 +2389,9 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")])
 
@@ -2285,9 +2406,9 @@
 		       : \"cvttsd2si\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sseicvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "SI")
    (set_attr "athlon_decode" "double,vector")
@@ -2304,9 +2425,9 @@
 		       : \"cvttsd2siq\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sseicvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "DI")
    (set_attr "athlon_decode" "double,vector")
@@ -2332,9 +2453,9 @@
 		       : \"cvtdq2pd\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "V2DF")])
 
 (define_insn "avx_cvtpd2dq256"
@@ -2367,9 +2488,9 @@
 		       : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "TI")
    (set_attr "amdfam10_decode" "double")])
@@ -2401,9 +2522,9 @@
 		       : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "TI")
    (set_attr "amdfam10_decode" "double")])
@@ -2497,9 +2618,9 @@
 		       : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "V4SF")
    (set_attr "amdfam10_decode" "double")])
@@ -2526,9 +2647,9 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V2DF")
    (set (attr "prefix")
-	(if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-	  (const_string "vex")
-	  (const_string "orig")))
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "amdfam10_decode" "direct")])
 
 (define_expand "vec_unpacks_hi_v4sf"
@@ -2915,9 +3036,9 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "avx_movsldup256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -2952,9 +3073,9 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_expand "avx_shufps256"
   [(match_operand:V8SF 0 "register_operand" "")
@@ -4023,9 +4144,9 @@
   [(set_attr "type" "sselog1")
    (set_attr "mode" "DF")
    (set (attr "prefix")
-      (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-        (const_string "vex")
-        (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "vec_dupv2df"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -5651,6 +5772,22 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
+(define_insn "*avx_pinsrb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_merge:V16QI
+	  (vec_duplicate:V16QI
+	    (match_operand:QI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V16QI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "*sse4_1_pinsrb"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
 	(vec_merge:V16QI
@@ -5667,6 +5804,22 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
+(define_insn "*avx_pinsrw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_merge:V8HI
+	  (vec_duplicate:V8HI
+	    (match_operand:HI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V8HI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "*sse2_pinsrw"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(vec_merge:V8HI
@@ -5684,6 +5837,22 @@
    (set_attr "mode" "TI")])
 
 ;; It must come before sse2_loadld since it is preferred.
+(define_insn "*avx_pinsrd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V4SI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "*sse4_1_pinsrd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(vec_merge:V4SI
@@ -5700,6 +5869,22 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
+(define_insn "*avx_pinsrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_merge:V2DI
+	  (vec_duplicate:V2DI
+	    (match_operand:DI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V2DI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "*sse4_1_pinsrq"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(vec_merge:V2DI
@@ -5723,10 +5908,15 @@
 	    (match_operand:V16QI 1 "register_operand" "x")
 	    (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
   "TARGET_SSE4_1"
-  "pextrb\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrb\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrb\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "*sse4_1_pextrb_memory"
   [(set (match_operand:QI 0 "memory_operand" "=m")
@@ -5734,10 +5924,15 @@
 	  (match_operand:V16QI 1 "register_operand" "x")
 	  (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
   "TARGET_SSE4_1"
-  "pextrb\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrb\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrb\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "*sse2_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -5746,10 +5941,15 @@
 	    (match_operand:V8HI 1 "register_operand" "x")
 	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
   "TARGET_SSE2"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrw\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrw\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "*sse4_1_pextrw_memory"
   [(set (match_operand:HI 0 "memory_operand" "=m")
@@ -5757,10 +5957,15 @@
 	  (match_operand:V8HI 1 "register_operand" "x")
 	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
   "TARGET_SSE4_1"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrw\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrw\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "*sse4_1_pextrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
@@ -5768,10 +5973,15 @@
 	  (match_operand:V4SI 1 "register_operand" "x")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
   "TARGET_SSE4_1"
-  "pextrd\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrd\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrd\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
 (define_insn "*sse4_1_pextrq"
@@ -5780,10 +5990,15 @@
 	  (match_operand:V2DI 1 "register_operand" "x")
 	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
   "TARGET_SSE4_1 && TARGET_64BIT"
-  "pextrq\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vpextrq\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"pextrq\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_expand "sse2_pshufd"
   [(match_operand:V4SI 0 "register_operand" "")
@@ -6550,14 +6765,30 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
+(define_insn "avx_movmskp<avxmodesuffixf2c>256"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
 	  UNSPEC_MOVMSK))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}\"
+                       : \"movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "<MODE>")])
 
 (define_insn "sse2_pmovmskb"
@@ -6565,9 +6796,14 @@
 	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
 		   UNSPEC_MOVMSK))]
   "TARGET_SSE2"
-  "pmovmskb\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vpmovmskb\t{%1, %0|%0, %1}\"
+                       : \"pmovmskb\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_data16" "1")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "SI")])
 
 (define_expand "sse2_maskmovdqu"
@@ -6614,9 +6850,9 @@
   [(set_attr "type" "sse")
    (set_attr "memory" "load")
    (set (attr "prefix")
-        (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
-          (const_string "vex")
-          (const_string "orig")))])
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))])
 
 (define_insn "sse_stmxcsr"
   [(set (match_operand:SI 0 "memory_operand" "=m")
@@ -7963,19 +8199,60 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
+		    (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_VTESTP))]
+  "TARGET_AVX"
+  "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
 ;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_PTEST))]
+  "TARGET_AVX"
+  "vptest\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_insn "sse4_1_ptest"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
 		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
 		   UNSPEC_PTEST))]
   "TARGET_SSE4_1"
-  "ptest\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vptest\t{%1, %0|%0, %1}\"
+                       : \"ptest\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecomi")
    (set_attr "prefix_extra" "1")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "TI")])
 
+(define_insn "avx_roundp<avxmodesuffixf2c>256"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+	(unspec:AVX256MODEF2P
+	  [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
+	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
+	  UNSPEC_ROUND))]
+  "TARGET_AVX"
+  "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(unspec:SSEMODEF2P
@@ -7983,9 +8260,29 @@
 	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
 	  UNSPEC_ROUND))]
   "TARGET_ROUND"
-  "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"
+                       : \"roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
+   (set (attr "prefix")
+     (if_then_else (ne (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_rounds<ssemodesuffixf2c>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	    [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+	     (match_operand:SI 3 "const_0_to_15_operand" "n")]
+	    UNSPEC_ROUND)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
@@ -9814,7 +10111,8 @@
     }
   gcc_unreachable ();
 }
-  [(set_attr "prefix" "vex")
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")
    (set (attr "length")
     (if_then_else (eq_attr "alternative" "0")
@@ -9849,7 +10147,8 @@
     }
   gcc_unreachable ();
 }
-  [(set_attr "prefix" "vex")
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")
    (set (attr "length")
     (if_then_else (eq_attr "alternative" "0")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135145)
+++ config/i386/i386.c	(working copy)
@@ -18091,6 +18091,17 @@ enum ix86_builtins
   IX86_BUILTIN_MOVSLDUP256,
   IX86_BUILTIN_MOVDDUP256,
 
+  IX86_BUILTIN_SQRTPD256,
+  IX86_BUILTIN_SQRTPS256,
+  IX86_BUILTIN_SQRTPS_NR256,
+  IX86_BUILTIN_RSQRTPS256,
+  IX86_BUILTIN_RSQRTPS_NR256,
+
+  IX86_BUILTIN_RCPPS256,
+
+  IX86_BUILTIN_ROUNDPD256,
+  IX86_BUILTIN_ROUNDPS256,
+
   IX86_BUILTIN_UNPCKHPD256,
   IX86_BUILTIN_UNPCKLPD256,
   IX86_BUILTIN_UNPCKHPS256,
@@ -18103,6 +18114,25 @@ enum ix86_builtins
   IX86_BUILTIN_PS_PS256,
   IX86_BUILTIN_PD_PD256,
 
+  IX86_BUILTIN_VTESTZPD,
+  IX86_BUILTIN_VTESTCPD,
+  IX86_BUILTIN_VTESTNZCPD,
+  IX86_BUILTIN_VTESTZPS,
+  IX86_BUILTIN_VTESTCPS,
+  IX86_BUILTIN_VTESTNZCPS,
+  IX86_BUILTIN_VTESTZPD256,
+  IX86_BUILTIN_VTESTCPD256,
+  IX86_BUILTIN_VTESTNZCPD256,
+  IX86_BUILTIN_VTESTZPS256,
+  IX86_BUILTIN_VTESTCPS256,
+  IX86_BUILTIN_VTESTNZCPS256,
+  IX86_BUILTIN_PTESTZ256,
+  IX86_BUILTIN_PTESTC256,
+  IX86_BUILTIN_PTESTNZC256,
+
+  IX86_BUILTIN_MOVMSKPD256,
+  IX86_BUILTIN_MOVMSKPS256,
+
   /* TFmode support builtins.  */
   IX86_BUILTIN_INFQ,
   IX86_BUILTIN_FABSQ,
@@ -18468,11 +18498,18 @@ enum ix86_builtin_type
   FLOAT128_FTYPE_FLOAT128,
   FLOAT_FTYPE_FLOAT,
   FLOAT128_FTYPE_FLOAT128_FLOAT128,
+  INT_FTYPE_V8SF_V8SF_PTEST,
+  INT_FTYPE_V4DI_V4DI_PTEST,
+  INT_FTYPE_V4DF_V4DF_PTEST,
+  INT_FTYPE_V4SF_V4SF_PTEST,
   INT_FTYPE_V2DI_V2DI_PTEST,
+  INT_FTYPE_V2DF_V2DF_PTEST,
   INT64_FTYPE_V4SF,
   INT64_FTYPE_V2DF,
   INT_FTYPE_V16QI,
   INT_FTYPE_V8QI,
+  INT_FTYPE_V8SF,
+  INT_FTYPE_V4DF,
   INT_FTYPE_V4SF,
   INT_FTYPE_V2DF,
   V16QI_FTYPE_V16QI,
@@ -19246,6 +19283,17 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
 
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
@@ -19257,6 +19305,25 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
 };
 
 /* SSE5 */
@@ -20059,6 +20126,8 @@ ix86_init_mmx_sse_builtins (void)
 						    V8SImode);
   tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
 						    V8SFmode);
+  tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
+						    V4DImode);
   tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
 						    V4DFmode);
   tree v8sf_ftype_v8sf
@@ -20285,6 +20354,26 @@ ix86_init_mmx_sse_builtins (void)
     = build_function_type_list (V4SI_type_node,
 				V8SI_type_node,
 				NULL_TREE);
+  tree int_ftype_v4df
+    = build_function_type_list (integer_type_node,
+				V4DF_type_node,
+				NULL_TREE);
+  tree int_ftype_v8sf
+    = build_function_type_list (integer_type_node,
+				V8SF_type_node,
+				NULL_TREE);
+  tree int_ftype_v8sf_v8sf
+    = build_function_type_list (integer_type_node,
+				V8SF_type_node, V8SF_type_node,
+				NULL_TREE);
+  tree int_ftype_v4di_v4di
+    = build_function_type_list (integer_type_node,
+				V4DI_type_node, V4DI_type_node,
+				NULL_TREE);
+  tree int_ftype_v4df_v4df
+    = build_function_type_list (integer_type_node,
+				V4DF_type_node, V4DF_type_node,
+				NULL_TREE);
 
   tree ftype;
 
@@ -20453,9 +20542,24 @@ ix86_init_mmx_sse_builtins (void)
 	case FLOAT_FTYPE_FLOAT:
 	  type = float_ftype_float;
 	  break;
+	case INT_FTYPE_V8SF_V8SF_PTEST:
+	  type = int_ftype_v8sf_v8sf;
+	  break;
+	case INT_FTYPE_V4DI_V4DI_PTEST:
+	  type = int_ftype_v4di_v4di;
+	  break;
+	case INT_FTYPE_V4DF_V4DF_PTEST:
+	  type = int_ftype_v4df_v4df;
+	  break;
+	case INT_FTYPE_V4SF_V4SF_PTEST:
+	  type = int_ftype_v4sf_v4sf;
+	  break;
 	case INT_FTYPE_V2DI_V2DI_PTEST:
 	  type = int_ftype_v2di_v2di;
 	  break;
+	case INT_FTYPE_V2DF_V2DF_PTEST:
+	  type = int_ftype_v2df_v2df;
+	  break;
 	case INT64_FTYPE_V4SF:
 	  type = int64_ftype_v4sf;
 	  break;
@@ -20468,6 +20572,12 @@ ix86_init_mmx_sse_builtins (void)
 	case INT_FTYPE_V8QI:
 	  type = int_ftype_v8qi;
 	  break;
+	case INT_FTYPE_V8SF:
+	  type = int_ftype_v8sf;
+	  break;
+	case INT_FTYPE_V4DF:
+	  type = int_ftype_v4df;
+	  break;
 	case INT_FTYPE_V4SF:
 	  type = int_ftype_v4sf;
 	  break;
@@ -21731,7 +21841,12 @@ ix86_expand_args_builtin (const struct b
 
   switch ((enum ix86_builtin_type) d->flag)
     {
+    case INT_FTYPE_V8SF_V8SF_PTEST:
+    case INT_FTYPE_V4DI_V4DI_PTEST:
+    case INT_FTYPE_V4DF_V4DF_PTEST:
+    case INT_FTYPE_V4SF_V4SF_PTEST:
     case INT_FTYPE_V2DI_V2DI_PTEST:
+    case INT_FTYPE_V2DF_V2DF_PTEST:
       return ix86_expand_sse_ptest (d, exp, target);
     case FLOAT128_FTYPE_FLOAT128:
     case FLOAT_FTYPE_FLOAT:
@@ -21739,6 +21854,8 @@ ix86_expand_args_builtin (const struct b
     case INT64_FTYPE_V2DF:
     case INT_FTYPE_V16QI:
     case INT_FTYPE_V8QI:
+    case INT_FTYPE_V8SF:
+    case INT_FTYPE_V4DF:
     case INT_FTYPE_V4SF:
     case INT_FTYPE_V2DF:
     case V16QI_FTYPE_V16QI:
@@ -21980,6 +22097,8 @@ ix86_expand_args_builtin (const struct b
 	      case CODE_FOR_sse4_1_blendps:
 	      case CODE_FOR_avx_blendpd256:
 	      case CODE_FOR_avx_vpermilv4df:
+	      case CODE_FOR_avx_roundpd256:
+	      case CODE_FOR_avx_roundps256:
 		error ("the last argument must be a 4-bit immediate");
 		return const0_rtx;
 
@@ -23384,8 +23503,10 @@ ix86_hard_regno_mode_ok (int regno, enum
     {
       /* We implement the move patterns for all vector modes into and
 	 out of SSE registers, even when no operation instructions
-	 are available.  */
-      return (VALID_AVX256_REG_MODE (mode)
+	 are available.  OImode move is available only when AVX is
+	 enabled.  */
+      return ((TARGET_AVX && mode == OImode)
+	      || VALID_AVX256_REG_MODE (mode)
 	      || VALID_SSE_REG_MODE (mode)
 	      || VALID_SSE2_REG_MODE (mode)
 	      || VALID_MMX_REG_MODE (mode)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]