This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AVX]: Add vinsertf128 support
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 26 Apr 2008 07:15:29 -0700
- Subject: [AVX]: Add vinsertf128 support
Hi,
I am checking in this patch to add vinsertf128 support.
H.J.
----
gcc/
2008-04-26 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/gmmintrin.h (_mm256_insertf128_pd): Fix typos.
(_mm256_insertf128_ps): Likewise.
(_mm256_insertf128_si256): Likewise.
(_mm256_load_si256): Likewise.
* config/i386/i386.c (ix86_builtins): Add
IX86_BUILTIN_VINSERTF128PD256, IX86_BUILTIN_VINSERTF128PS256,
IX86_BUILTIN_VINSERTF128SI256, IX86_BUILTIN_LOADUPD256,
IX86_BUILTIN_LOADUPS256, IX86_BUILTIN_STOREUPD256,
IX86_BUILTIN_STOREUPS256 and IX86_BUILTIN_STOREUPS256.
(sse_builtin_type): Add V8SI_FTYPE_V8SI_V4SI_INT,
V8SF_FTYPE_V8SF_V4SF_INT and V4DF_FTYPE_V4DF_V2DF_INT.
(bdesc_sse_args): Add __builtin_ia32_vinsertf128_pd256,
__builtin_ia32_vinsertf128_ps256 and
__builtin_ia32_vinsertf128_si256.
(ix86_init_mmx_sse_builtins): Updated. Define
__builtin_ia32_loadupd256, __builtin_ia32_loadups256,
__builtin_ia32_storeupd256 and __builtin_ia32_storeups256.
(ix86_expand_sse_operands_builtin): Handle
CODE_FOR_avx_vinsertf128_pd256, CODE_FOR_avx_vinsertf128_ps256
and CODE_FOR_avx_vinsertf128_si256.
(ix86_expand_builtin): Handle IX86_BUILTIN_LOADUPS256,
IX86_BUILTIN_STOREUPS256, IX86_BUILTIN_LOADUPD256
and IX86_BUILTIN_STOREUPD256.
* config/i386/sse.md (AVX256MODEF128): New.
(AVX256MODEF128S): Likewise.
(avxmodesuffixf128): Likewise.
(avx_vinsertf128_<avxmodesuffixf128>256): Likewise.
(avx_vinsertf128_pd256_0): Likewise.
(avx_vinsertf128_pd256_1): Likewise.
(avx_vinsertf128_<avxmodesuffixf128>256_0): Likewise.
(avx_vinsertf128_<avxmodesuffixf128>256_1): Likewise.
(avxhalfvecmode): Add V8SI.
(avx_vperm2f128<mode>3): Replace AVX256MODE with AVX256MODEF128.
gcc/testsuite/
2008-04-26 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/avx-1.c: Add more tests for gmmintrin.h.
* gcc.target/i386/avx-2.c: Likewise.
* gcc.target/i386/i386.exp (check_effective_target_avx): New.
Index: gcc/testsuite/gcc.target/i386/i386.exp
===================================================================
--- gcc/testsuite/gcc.target/i386/i386.exp (.../fsf/branches/avx) (revision 2325)
+++ gcc/testsuite/gcc.target/i386/i386.exp (.../branches/avx-internal) (revision 2325)
@@ -79,6 +79,16 @@ proc check_effective_target_pclmul { } {
} "-O2 -mpclmul" ]
}
+# Return 1 if avx instructions can be compiled.
+proc check_effective_target_avx { } {
+ return [check_no_compiler_messages avx object {
+ void _mm256_zeroall (void)
+ {
+ __builtin_ia32_vzeroall ();
+ }
+ } "-O2 -mavx" ]
+}
+
# Return 1 if sse4a instructions can be compiled.
proc check_effective_target_sse4a { } {
return [check_no_compiler_messages sse4a object {
Index: gcc/testsuite/gcc.target/i386/avx-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-1.c (.../fsf/branches/avx) (revision 2325)
+++ gcc/testsuite/gcc.target/i386/avx-1.c (.../branches/avx-internal) (revision 2325)
@@ -44,6 +44,9 @@
#define __builtin_ia32_vperm2f128_pd256(X, Y, C) __builtin_ia32_vperm2f128_pd256(X, Y, 1)
#define __builtin_ia32_vperm2f128_ps256(X, Y, C) __builtin_ia32_vperm2f128_ps256(X, Y, 1)
#define __builtin_ia32_vperm2f128_si256(X, Y, C) __builtin_ia32_vperm2f128_si256(X, Y, 1)
+#define __builtin_ia32_vinsertf128_pd256(X, Y, C) __builtin_ia32_vinsertf128_pd256(X, Y, 1)
+#define __builtin_ia32_vinsertf128_ps256(X, Y, C) __builtin_ia32_vinsertf128_ps256(X, Y, 1)
+#define __builtin_ia32_vinsertf128_si256(X, Y, C) __builtin_ia32_vinsertf128_si256(X, Y, 1)
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
Index: gcc/testsuite/gcc.target/i386/avx-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-2.c (.../fsf/branches/avx) (revision 2325)
+++ gcc/testsuite/gcc.target/i386/avx-2.c (.../branches/avx-internal) (revision 2325)
@@ -79,6 +79,9 @@ test_3 (_mm256_permute2_ps, __m256, __m2
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)
+test_2 (_mm256_insertf128_pd, __m256d, __m256d, __m128d, 1)
+test_2 (_mm256_insertf128_ps, __m256, __m256, __m128, 1)
+test_2 (_mm256_insertf128_si256, __m256i, __m256i, __m128i, 1)
/* wmmintrin.h */
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
Index: gcc/testsuite/ChangeLog.internal
===================================================================
Index: gcc/ChangeLog.internal
===================================================================
Index: gcc/config/i386/gmmintrin.h
===================================================================
--- gcc/config/i386/gmmintrin.h (.../fsf/branches/avx) (revision 2325)
+++ gcc/config/i386/gmmintrin.h (.../branches/avx-internal) (revision 2325)
@@ -718,45 +718,44 @@ _mm256_broadcast_ps (__m128 const *__X)
return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
}
-#if 0
#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_pd (__m256d __X, __m256d __Y, const int __O)
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
{
- return (__m256d) __builtin_ia32_vinsertf128_pd ((__v4df)__X,
- (__v4df)__y,
- __O);
+ return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+ (__v2df)__Y,
+ __O);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_ps (__m256 __X, __m256 __Y, const int __O)
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
{
- return (__m256) __builtin_ia32_vinsertf128_ps ((__v8sf)__X,
- (__v8sf)__y,
- __O);
+ return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+ (__v4sf)__Y,
+ __O);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_si256 (__m256i __X, __m256i __Y, const int __O)
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
{
return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
- (__v8si)__y,
+ (__v4si)__Y,
__O);
}
#else
-#define _mm256_insertf128_pd (X, Y, O) \
- ((__m256d) __builtin_ia32_vinsertf128_pd ((__v4df)(__m256d)(X),\
- (__v4df)(__m256d)(Y),\
- (int)(O)))
-
-#define _mm256_insertf128_ps(X, Y, O) \
- ((__m256) __builtin_ia32_vinsertf128_ps ((__v8sf)(__m256)(X), \
- (__v8sf)(__m256)(Y), \
- (int)(O)))
-
-#define _mm256_insertf128_si256(X, Y, O) \
- ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),\
- (__v8si)(__m256i)(Y),\
+#define _mm256_insertf128_pd(X, Y, O) \
+ ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O) \
+ ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O) \
+ ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
+ (__v4si)(__m128i)(Y), \
(int)(O)))
#endif
@@ -809,7 +808,7 @@ _mm256_storeu_ps (float *__P, __m256 __A
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_load_si256 (__m128i const *__P)
+_mm256_load_si256 (__m256i const *__P)
{
return *__P;
}
@@ -819,7 +818,6 @@ _mm256_store_si256 (__m256i *__P, __m256
{
*__P = __A;
}
-#endif
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_si256 (__m256i const *__P)
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md (.../fsf/branches/avx) (revision 2325)
+++ gcc/config/i386/sse.md (.../branches/avx-internal) (revision 2325)
@@ -57,6 +57,8 @@
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
+(define_mode_iterator AVX256MODEF128 [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODEF128S [V8SI V8SF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
@@ -85,12 +87,13 @@
(define_mode_attr avxextractmode
[(V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") (V4SF "V8SF") (V2DF "V4DF")])
(define_mode_attr avxhalfvecmode
- [(V4SF "V2SF") (V8SF "V4SF") (V4DF "V2DF")])
+ [(V4SF "V2SF") (V8SI "V4SI") (V8SF "V4SF") (V4DF "V2DF")])
(define_mode_attr avxscalarmode
[(V4SF "SF") (V2DF "DF") (V8SF "SF") (V4DF "DF")])
(define_mode_attr avxcvtvecmode
[(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
(define_mode_attr avxmodesuffixf2c [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
+(define_mode_attr avxmodesuffixf128 [(V8SF "ps") (V8SI "si") (V4DF "pd")])
(define_mode_attr avxmodesuffix
[(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") (V8SI "256") (V8SF "256") (V4DF "256")])
@@ -8854,10 +8857,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "avx_vperm2f128<mode>3"
- [(set (match_operand:AVX256MODE 0 "register_operand" "=x")
- (unspec:AVX256MODE
- [(match_operand:AVX256MODE 1 "register_operand" "x")
- (match_operand:AVX256MODE 2 "nonimmediate_operand" "xm")
+ [(set (match_operand:AVX256MODEF128 0 "register_operand" "=x")
+ (unspec:AVX256MODEF128
+ [(match_operand:AVX256MODEF128 1 "register_operand" "x")
+ (match_operand:AVX256MODEF128 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "const_0_to_255_operand" "n")]
UNSPEC_VPERMIL2F128))]
"TARGET_AVX"
@@ -8914,3 +8917,80 @@
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
+
+(define_expand "avx_vinsertf128_<avxmodesuffixf128>256"
+ [(match_operand:AVX256MODEF128 0 "register_operand" "")
+ (match_operand:AVX256MODEF128 1 "register_operand" "")
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_1_operand" "")]
+ "TARGET_AVX"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ emit_insn (gen_avx_vinsertf128_<avxmodesuffixf128>256_0
+ (operands[0], operands[1], operands[2]));
+ break;
+ case 1:
+ emit_insn (gen_avx_vinsertf128_<avxmodesuffixf128>256_1
+ (operands[0], operands[1], operands[2]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "avx_vinsertf128_pd256_0"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_concat:V4DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (vec_select:V2DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vinsertf128_pd256_1"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_concat:V4DF
+ (vec_select:V2DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vinsertf128_<avxmodesuffixf128>256_0"
+ [(set (match_operand:AVX256MODEF128S 0 "register_operand" "=x")
+ (vec_concat:AVX256MODEF128S
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODEF128S 1 "register_operand" "x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vinsertf128_<avxmodesuffixf128>256_1"
+ [(set (match_operand:AVX256MODEF128S 0 "register_operand" "=x")
+ (vec_concat:AVX256MODEF128S
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODEF128S 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (.../fsf/branches/avx) (revision 2325)
+++ gcc/config/i386/i386.c (.../branches/avx-internal) (revision 2325)
@@ -18022,6 +18022,13 @@ enum ix86_builtins
IX86_BUILTIN_VBROADCASTSS256,
IX86_BUILTIN_VBROADCASTPD256,
IX86_BUILTIN_VBROADCASTPS256,
+ IX86_BUILTIN_VINSERTF128PD256,
+ IX86_BUILTIN_VINSERTF128PS256,
+ IX86_BUILTIN_VINSERTF128SI256,
+ IX86_BUILTIN_LOADUPD256,
+ IX86_BUILTIN_LOADUPS256,
+ IX86_BUILTIN_STOREUPD256,
+ IX86_BUILTIN_STOREUPS256,
IX86_BUILTIN_LDDQU256,
IX86_BUILTIN_LOADDQU256,
IX86_BUILTIN_STOREDQU256,
@@ -18390,10 +18397,13 @@ enum sse_builtin_type
V2DF_FTYPE_V2DF_V2DF_V2DF,
V16QI_FTYPE_V16QI_V16QI_INT,
V8SI_FTYPE_V8SI_V8SI_INT,
+ V8SI_FTYPE_V8SI_V4SI_INT,
V8HI_FTYPE_V8HI_V8HI_INT,
V8SF_FTYPE_V8SF_V8SF_INT,
+ V8SF_FTYPE_V8SF_V4SF_INT,
V4SI_FTYPE_V4SI_V4SI_INT,
V4DF_FTYPE_V4DF_V4DF_INT,
+ V4DF_FTYPE_V4DF_V2DF_INT,
V4SF_FTYPE_V4SF_V4SF_INT,
V2DI_FTYPE_V2DI_V2DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
@@ -18472,6 +18482,9 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_pd256, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_ps256, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_si256, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
};
static const struct builtin_description bdesc_2arg[] =
@@ -19756,7 +19769,29 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (void_type_node,
pchar_type_node, V32QI_type_node,
NULL_TREE);
-
+ tree v8si_ftype_v8si_v4si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v4sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V4SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v2df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree void_ftype_pfloat_v8sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pdouble_v4df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V4DF_type_node,
+ NULL_TREE);
tree ftype;
@@ -19876,18 +19911,27 @@ ix86_init_mmx_sse_builtins (void)
case V8SI_FTYPE_V8SI_V8SI_INT:
type = v8si_ftype_v8si_v8si_int;
break;
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ type = v8si_ftype_v8si_v4si_int;
+ break;
case V8HI_FTYPE_V8HI_V8HI_INT:
type = v8hi_ftype_v8hi_v8hi_int;
break;
case V8SF_FTYPE_V8SF_V8SF_INT:
type = v8sf_ftype_v8sf_v8sf_int;
break;
+ case V8SF_FTYPE_V8SF_V4SF_INT:
+ type = v8sf_ftype_v8sf_v4sf_int;
+ break;
case V4SI_FTYPE_V4SI_V4SI_INT:
type = v4si_ftype_v4si_v4si_int;
break;
case V4DF_FTYPE_V4DF_V4DF_INT:
type = v4df_ftype_v4df_v4df_int;
break;
+ case V4DF_FTYPE_V4DF_V2DF_INT:
+ type = v4df_ftype_v4df_v2df_int;
+ break;
case V4SF_FTYPE_V4SF_V4SF_INT:
type = v4sf_ftype_v4sf_v4sf_int;
break;
@@ -20326,6 +20370,10 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vbroadcastss256", v8sf_ftype_pcfloat, IX86_BUILTIN_VBROADCASTSS256);
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vbroadcastf128_pd256", v4df_ftype_pcv2df, IX86_BUILTIN_VBROADCASTPD256);
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vbroadcastf128_ps256", v8sf_ftype_pcv4sf, IX86_BUILTIN_VBROADCASTPS256);
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_loadupd256", v4df_ftype_pcdouble, IX86_BUILTIN_LOADUPD256);
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_loadups256", v8sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS256);
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_storeupd256", void_ftype_pdouble_v4df, IX86_BUILTIN_STOREUPD256);
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_storeups256", void_ftype_pfloat_v8sf, IX86_BUILTIN_STOREUPS256);
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_lddqu256", v32qi_ftype_pcchar, IX86_BUILTIN_LDDQU256);
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_loaddqu256", v32qi_ftype_pcchar, IX86_BUILTIN_LOADDQU256);
def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_storedqu256", void_ftype_pchar_v32qi, IX86_BUILTIN_STOREDQU256);
@@ -20556,9 +20604,12 @@ ix86_expand_sse_operands_builtin (enum i
case V16QI_FTYPE_V16QI_V16QI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
case V8SI_FTYPE_V8SI_V8SI_INT:
+ case V8SI_FTYPE_V8SI_V4SI_INT:
case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
@@ -20618,6 +20669,9 @@ ix86_expand_sse_operands_builtin (enum i
case CODE_FOR_avx_vextractf128v2df:
case CODE_FOR_avx_vextractf128v4sf:
case CODE_FOR_avx_vextractf128v4si:
+ case CODE_FOR_avx_vinsertf128_pd256:
+ case CODE_FOR_avx_vinsertf128_ps256:
+ case CODE_FOR_avx_vinsertf128_si256:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -21652,6 +21706,12 @@ ix86_expand_builtin (tree exp, rtx targe
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
+ case IX86_BUILTIN_LOADUPS256:
+ return ix86_expand_unop_builtin (CODE_FOR_avx_movups256, exp, target, 1);
+
+ case IX86_BUILTIN_STOREUPS256:
+ return ix86_expand_store_builtin (CODE_FOR_avx_movups256, exp);
+
case IX86_BUILTIN_LOADUPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
@@ -21938,6 +21998,12 @@ ix86_expand_builtin (tree exp, rtx targe
case IX86_BUILTIN_SQRTSD:
return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
+
+ case IX86_BUILTIN_LOADUPD256:
+ return ix86_expand_unop_builtin (CODE_FOR_avx_movupd256, exp, target, 1);
+ case IX86_BUILTIN_STOREUPD256:
+ return ix86_expand_store_builtin (CODE_FOR_avx_movupd256, exp);
+
case IX86_BUILTIN_LOADUPD:
return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
case IX86_BUILTIN_STOREUPD: