This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AVX]: Add vec_extract_XX_<mode>/vec_set_XX_<mode> to AVX
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 21 May 2008 06:45:26 -0700
- Subject: [AVX]: Add vec_extract_XX_<mode>/vec_set_XX_<mode> to AVX
I am checking this patch into AVX branch to add vec_extract_XX_<mode>
and vec_set_XX_<mode> for AVX.
H.J.
---
2008-05-21 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (bdesc_args): Updated.
(ix86_expand_args_builtin): Likewise.
(ix86_expand_vector_set): Don't cast to V4DFmode for 256bit AVX
modes.
* config/i386/sse.md (SSEMODE4F): Removed.
(SSEMODEIF4): Likewise.
(avxextractmode): Likewise.
(avx_vextractf128_<avxmodesuffixp>256): Likewise.
(avx_vextractf128_pd256_0): Likewise.
(avx_vextractf128_pd256_1): Likewise.
(avx_vextractf128_<avxmodesuffixp>256_0): Likewise.
(avx_vextractf128_<avxmodesuffixp>256_1): Likewise.
(avx_vinsertf128_<avxmodesuffixp>256): Likewise.
(avx_vinsertf128_pd256_0): Likewise.
(avx_vinsertf128_pd256_1): Likewise.
(avx_vinsertf128_<avxmodesuffixp>256_0): Likewise.
(avx_vinsertf128_<avxmodesuffixp>256_1): Likewise.
(AVX256MODE4P): New.
(avx_vextractf128<mode>): Likewise.
(vec_extract_lo_<mode>): Likewise.
(vec_extract_hi_<mode>): Likewise.
(vec_extract_lo_<mode>): Likewise.
(vec_extract_hi_<mode>): Likewise.
(vec_extract_lo_v16hi): Likewise.
(vec_extract_hi_v16hi): Likewise.
(vec_extract_lo_v32qi): Likewise.
(vec_extract_hi_v32qi): Likewise.
(avx_vinsertf128<mode>): Likewise.
(vec_set_lo_<mode>): Likewise.
(vec_set_hi_<mode>): Likewise.
(vec_set_lo_<mode>): Likewise.
(vec_set_hi_<mode>): Likewise.
(vec_set_lo_v16hi): Likewise.
(vec_set_hi_v16hi): Likewise.
(vec_set_lo_v32qi): Likewise.
(vec_set_hi_v32qi): Likewise.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 135704)
+++ config/i386/sse.md (working copy)
@@ -48,13 +48,12 @@
(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator SSEMODE4F [V4SI V4SF V2DF])
-(define_mode_iterator SSEMODEIF4 [V4SI V4SF])
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE4P [V4DI V4DF])
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
@@ -94,9 +93,7 @@
(V8SF "V8SF") (V4DF "V4DF")])
(define_mode_attr avxvecpsmode
[(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
- (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
-(define_mode_attr avxextractmode
- [(V4SI "V8SI") (V4SF "V8SF") (V2DF "V4DF")])
+ (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
(define_mode_attr avxhalfvecmode
[(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
(V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
@@ -3640,21 +3637,19 @@
DONE;
})
-(define_expand "avx_vextractf128_<avxmodesuffixp>256"
- [(match_operand:SSEMODE4F 0 "nonimmediate_operand" "")
- (match_operand:<avxextractmode> 1 "register_operand" "")
+(define_expand "avx_vextractf128<mode>"
+ [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
(match_operand:SI 2 "const_0_to_1_operand" "")]
"TARGET_AVX"
{
switch (INTVAL (operands[2]))
{
case 0:
- emit_insn (gen_avx_vextractf128_<avxmodesuffixp>256_0
- (operands[0], operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
break;
case 1:
- emit_insn (gen_avx_vextractf128_<avxmodesuffixp>256_1
- (operands[0], operands[1]));
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
break;
default:
gcc_unreachable ();
@@ -3662,10 +3657,10 @@
DONE;
})
-(define_insn "avx_vextractf128_pd256_0"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (vec_select:V2DF
- (match_operand:V4DF 1 "register_operand" "x,x")
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_AVX"
"vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
@@ -3674,10 +3669,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vextractf128_pd256_1"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (vec_select:V2DF
- (match_operand:V4DF 1 "register_operand" "x,x")
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_AVX"
"vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
@@ -3686,10 +3681,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vextractf128_<avxmodesuffixp>256_0"
- [(set (match_operand:SSEMODEIF4 0 "nonimmediate_operand" "=x,m")
- (vec_select:SSEMODEIF4
- (match_operand:<avxextractmode> 1 "register_operand" "x,x")
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX"
@@ -3699,10 +3694,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vextractf128_<avxmodesuffixp>256_1"
- [(set (match_operand:SSEMODEIF4 0 "nonimmediate_operand" "=x,m")
- (vec_select:SSEMODEIF4
- (match_operand:<avxextractmode> 1 "register_operand" "x,x")
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX"
@@ -3712,6 +3707,74 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_insn "vec_extract_lo_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "*sse4_1_extractps"
[(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
(vec_select:SF
@@ -11585,9 +11648,9 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
-(define_expand "avx_vinsertf128_<avxmodesuffixp>256"
- [(match_operand:AVX256MODE2P 0 "register_operand" "")
- (match_operand:AVX256MODE2P 1 "register_operand" "")
+(define_expand "avx_vinsertf128<mode>"
+ [(match_operand:AVX256MODE 0 "register_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
(match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
(match_operand:SI 3 "const_0_to_1_operand" "")]
"TARGET_AVX"
@@ -11595,12 +11658,12 @@
switch (INTVAL (operands[3]))
{
case 0:
- emit_insn (gen_avx_vinsertf128_<avxmodesuffixp>256_0
- (operands[0], operands[1], operands[2]));
+ emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
+ operands[2]));
break;
case 1:
- emit_insn (gen_avx_vinsertf128_<avxmodesuffixp>256_1
- (operands[0], operands[1], operands[2]));
+ emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
+ operands[2]));
break;
default:
gcc_unreachable ();
@@ -11608,12 +11671,12 @@
DONE;
})
-(define_insn "avx_vinsertf128_pd256_0"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_concat:V4DF
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (vec_select:V2DF
- (match_operand:V4DF 1 "register_operand" "x")
+(define_insn "vec_set_lo_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
"vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
@@ -11621,20 +11684,20 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vinsertf128_pd256_1"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_concat:V4DF
- (vec_select:V2DF
- (match_operand:V4DF 1 "register_operand" "x")
+(define_insn "vec_set_hi_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
(parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
"vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vinsertf128_<avxmodesuffixp>256_0"
+(define_insn "vec_set_lo_<mode>"
[(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
(vec_concat:AVX256MODE8P
(match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
@@ -11648,7 +11711,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_vinsertf128_<avxmodesuffixp>256_1"
+(define_insn "vec_set_hi_<mode>"
[(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
(vec_concat:AVX256MODE8P
(vec_select:<avxhalfvecmode>
@@ -11662,6 +11725,78 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_insn "vec_set_lo_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 135704)
+++ config/i386/i386.c (working copy)
@@ -19250,9 +19250,9 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_pd256, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_ps256, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_si256, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
@@ -19272,9 +19272,9 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_pd256, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_ps256, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_si256, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
@@ -22108,12 +22108,12 @@ ix86_expand_args_builtin (const struct b
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
- case CODE_FOR_avx_vextractf128_pd256:
- case CODE_FOR_avx_vextractf128_ps256:
- case CODE_FOR_avx_vextractf128_si256:
- case CODE_FOR_avx_vinsertf128_pd256:
- case CODE_FOR_avx_vinsertf128_ps256:
- case CODE_FOR_avx_vinsertf128_si256:
+ case CODE_FOR_avx_vextractf128v4df:
+ case CODE_FOR_avx_vextractf128v8sf:
+ case CODE_FOR_avx_vextractf128v8si:
+ case CODE_FOR_avx_vinsertf128v4df:
+ case CODE_FOR_avx_vinsertf128v8sf:
+ case CODE_FOR_avx_vinsertf128v8si:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -25539,10 +25539,26 @@ ix86_expand_vector_set (bool mmx_ok, rtx
enum machine_mode inner_mode = GET_MODE_INNER (mode);
enum machine_mode half_mode;
bool use_vec_merge = false;
- rtx tmp, op0, op1;
- rtx (*gen_extract) (rtx, rtx);
- rtx (*gen_insert) (rtx, rtx, rtx);
- int i, n;
+ rtx tmp;
+ static rtx (*gen_extract[6][2]) (rtx, rtx)
+ = {
+ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+ };
+ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+ = {
+ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+ };
+ int i, j, n;
switch (mode)
{
@@ -25694,31 +25710,37 @@ ix86_expand_vector_set (bool mmx_ok, rtx
case V32QImode:
half_mode = V16QImode;
+ j = 0;
n = 16;
goto half;
case V16HImode:
half_mode = V8HImode;
+ j = 1;
n = 8;
goto half;
case V8SImode:
half_mode = V4SImode;
- n = 4;
- goto half;
-
- case V8SFmode:
- half_mode = V4SFmode;
+ j = 2;
n = 4;
goto half;
case V4DImode:
half_mode = V2DImode;
+ j = 3;
n = 2;
goto half;
+ case V8SFmode:
+ half_mode = V4SFmode;
+ j = 4;
+ n = 4;
+ goto half;
+
case V4DFmode:
half_mode = V2DFmode;
+ j = 5;
n = 2;
goto half;
@@ -25727,44 +25749,17 @@ half:
i = elt / n;
elt %= n;
- switch (i)
- {
- case 0:
- gen_extract = gen_avx_vextractf128_pd256_0;
- gen_insert = gen_avx_vinsertf128_pd256_0;
- break;
- case 1:
- gen_extract = gen_avx_vextractf128_pd256_1;
- gen_insert = gen_avx_vinsertf128_pd256_1;
- break;
- default:
- gcc_unreachable ();
- }
-
- /* Cast to V4DFmode. */
- tmp = gen_reg_rtx (V4DFmode);
- emit_move_insn (tmp, gen_lowpart (V4DFmode, target));
+ gcc_assert (i <= 1);
/* Extract the half. */
- op0 = gen_reg_rtx (V2DFmode);
- emit_insn ((*gen_extract) (op0, tmp));
-
- /* Cast to half mode. */
- op1 = gen_reg_rtx (half_mode);
- emit_move_insn (op1, gen_lowpart (half_mode, op0));
-
- /* Put val in op1 at elt. */
- ix86_expand_vector_set (false, op1, val, elt);
+ tmp = gen_reg_rtx (half_mode);
+ emit_insn ((*gen_extract[j][i]) (tmp, target));
- /* Cast to V2DFmode. */
- op0 = gen_reg_rtx (V2DFmode);
- emit_move_insn (op0, gen_lowpart (V2DFmode, op1));
+ /* Put val in tmp at elt. */
+ ix86_expand_vector_set (false, tmp, val, elt);
/* Put it back. */
- emit_insn ((*gen_insert) (tmp, tmp, op0));
-
- /* Cast to original mode and store in target. */
- emit_move_insn (target, gen_lowpart (mode, tmp));
+ emit_insn ((*gen_insert[j][i]) (target, target, tmp));
return;
default: