This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AVX]: Add vec_extract_XX_<mode>/vec_set_XX_<mode> to AVX


I am checking this patch into AVX branch to add vec_extract_XX_<mode>
and vec_set_XX_<mode> for AVX.


H.J.
---
2008-05-21  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (bdesc_args): Updated.
	(ix86_expand_args_builtin): Likewise.
	(ix86_expand_vector_set): Don't cast to V4DFmode for 256bit AVX
	modes.

	* config/i386/sse.md (SSEMODE4F): Removed.
	(SSEMODEIF4): Likewise.
	(avxextractmode): Likewise.
	(avx_vextractf128_<avxmodesuffixp>256): Likewise.
	(avx_vextractf128_pd256_0): Likewise.
	(avx_vextractf128_pd256_1): Likewise.
	(avx_vextractf128_<avxmodesuffixp>256_0): Likewise.
	(avx_vextractf128_<avxmodesuffixp>256_1): Likewise.
	(avx_vinsertf128_<avxmodesuffixp>256): Likewise.
	(avx_vinsertf128_pd256_0): Likewise.
	(avx_vinsertf128_pd256_1): Likewise.
	(avx_vinsertf128_<avxmodesuffixp>256_0): Likewise.
	(avx_vinsertf128_<avxmodesuffixp>256_1): Likewise.
	(AVX256MODE4P): New.
	(avx_vextractf128<mode>): Likewise.
	(vec_extract_lo_<mode>): Likewise.
	(vec_extract_hi_<mode>): Likewise.
	(vec_extract_lo_<mode>): Likewise.
	(vec_extract_hi_<mode>): Likewise.
	(vec_extract_lo_v16hi): Likewise.
	(vec_extract_hi_v16hi): Likewise.
	(vec_extract_lo_v32qi): Likewise.
	(vec_extract_hi_v32qi): Likewise.
	(avx_vinsertf128<mode>): Likewise.
	(vec_set_lo_<mode>): Likewise.
	(vec_set_hi_<mode>): Likewise.
	(vec_set_lo_<mode>): Likewise.
	(vec_set_hi_<mode>): Likewise.
	(vec_set_lo_v16hi): Likewise.
	(vec_set_hi_v16hi): Likewise.
	(vec_set_lo_v32qi): Likewise.
	(vec_set_hi_v32qi): Likewise.

Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 135704)
+++ config/i386/sse.md	(working copy)
@@ -48,13 +48,12 @@
 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator SSEMODE4F [V4SI V4SF V2DF])
-(define_mode_iterator SSEMODEIF4 [V4SI V4SF])
 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
 
 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE4P [V4DI V4DF])
 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
@@ -94,9 +93,7 @@
    (V8SF "V8SF") (V4DF "V4DF")])
 (define_mode_attr avxvecpsmode
   [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
-  (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
-(define_mode_attr avxextractmode 
-  [(V4SI "V8SI") (V4SF "V8SF") (V2DF "V4DF")])
+   (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
 (define_mode_attr avxhalfvecmode
   [(V4SF "V2SF") (V32QI "V16QI")  (V16HI "V8HI") (V8SI "V4SI")
    (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
@@ -3640,21 +3637,19 @@
   DONE;
 })
 
-(define_expand "avx_vextractf128_<avxmodesuffixp>256"
-  [(match_operand:SSEMODE4F 0 "nonimmediate_operand" "")
-   (match_operand:<avxextractmode> 1 "register_operand" "")
+(define_expand "avx_vextractf128<mode>"
+  [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
+   (match_operand:AVX256MODE 1 "register_operand" "")
    (match_operand:SI 2 "const_0_to_1_operand" "")]
   "TARGET_AVX"
 {
   switch (INTVAL (operands[2]))
     {
     case 0:
-      emit_insn (gen_avx_vextractf128_<avxmodesuffixp>256_0
-		   (operands[0], operands[1]));
+      emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
       break;
     case 1:
-      emit_insn (gen_avx_vextractf128_<avxmodesuffixp>256_1
-		   (operands[0], operands[1]));
+      emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
       break;
     default:
       gcc_unreachable ();
@@ -3662,10 +3657,10 @@
   DONE;
 })
 
-(define_insn "avx_vextractf128_pd256_0"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(vec_select:V2DF
-	  (match_operand:V4DF 1 "register_operand" "x,x")
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
 	  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_AVX"
   "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
@@ -3674,10 +3669,10 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vextractf128_pd256_1"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(vec_select:V2DF
-	  (match_operand:V4DF 1 "register_operand" "x,x")
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_AVX"
   "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
@@ -3686,10 +3681,10 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vextractf128_<avxmodesuffixp>256_0"
-  [(set (match_operand:SSEMODEIF4 0 "nonimmediate_operand" "=x,m")
-	(vec_select:SSEMODEIF4
-	  (match_operand:<avxextractmode> 1 "register_operand" "x,x")
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
 	  (parallel [(const_int 0) (const_int 1)
 		     (const_int 2) (const_int 3)])))]
   "TARGET_AVX"
@@ -3699,10 +3694,10 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vextractf128_<avxmodesuffixp>256_1"
-  [(set (match_operand:SSEMODEIF4 0 "nonimmediate_operand" "=x,m")
-	(vec_select:SSEMODEIF4
-	  (match_operand:<avxextractmode> 1 "register_operand" "x,x")
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
 	  (parallel [(const_int 4) (const_int 5)
 		     (const_int 6) (const_int 7)])))]
   "TARGET_AVX"
@@ -3712,6 +3707,74 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
+(define_insn "vec_extract_lo_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (parallel [(const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "*sse4_1_extractps"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
 	(vec_select:SF
@@ -11585,9 +11648,9 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4SF")])
 
-(define_expand "avx_vinsertf128_<avxmodesuffixp>256"
-  [(match_operand:AVX256MODE2P 0 "register_operand" "")
-   (match_operand:AVX256MODE2P 1 "register_operand" "")
+(define_expand "avx_vinsertf128<mode>"
+  [(match_operand:AVX256MODE 0 "register_operand" "")
+   (match_operand:AVX256MODE 1 "register_operand" "")
    (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
    (match_operand:SI 3 "const_0_to_1_operand" "")]
   "TARGET_AVX"
@@ -11595,12 +11658,12 @@
   switch (INTVAL (operands[3]))
     {
     case 0:
-      emit_insn (gen_avx_vinsertf128_<avxmodesuffixp>256_0
-		   (operands[0], operands[1], operands[2]));
+      emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
+					operands[2]));
       break;
     case 1:
-      emit_insn (gen_avx_vinsertf128_<avxmodesuffixp>256_1
-		   (operands[0], operands[1], operands[2]));
+      emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
+					operands[2]));
       break;
     default:
       gcc_unreachable ();
@@ -11608,12 +11671,12 @@
   DONE;
 })
 
-(define_insn "avx_vinsertf128_pd256_0"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-	(vec_concat:V4DF
-	  (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-	  (vec_select:V2DF
-	    (match_operand:V4DF 1 "register_operand" "x")
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE4P
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE4P 1 "register_operand" "x")
 	    (parallel [(const_int 2) (const_int 3)]))))]
   "TARGET_AVX"
   "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
@@ -11621,20 +11684,20 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vinsertf128_pd256_1"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-	(vec_concat:V4DF
-	  (vec_select:V2DF
-	    (match_operand:V4DF 1 "register_operand" "x")
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE4P
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE4P 1 "register_operand" "x")
 	    (parallel [(const_int 0) (const_int 1)]))
-	  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
   "TARGET_AVX"
   "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vinsertf128_<avxmodesuffixp>256_0"
+(define_insn "vec_set_lo_<mode>"
   [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
 	(vec_concat:AVX256MODE8P
 	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
@@ -11648,7 +11711,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_vinsertf128_<avxmodesuffixp>256_1"
+(define_insn "vec_set_hi_<mode>"
   [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
 	(vec_concat:AVX256MODE8P
 	  (vec_select:<avxhalfvecmode>
@@ -11662,6 +11725,78 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
+(define_insn "vec_set_lo_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 16) (const_int 17)
+		       (const_int 18) (const_int 19)
+		       (const_int 20) (const_int 21)
+		       (const_int 22) (const_int 23)
+		       (const_int 24) (const_int 25)
+		       (const_int 26) (const_int 27)
+		       (const_int 28) (const_int 29)
+		       (const_int 30) (const_int 31)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)
+		       (const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
   [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
 	(unspec:AVXMODEF2P
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135704)
+++ config/i386/i386.c	(working copy)
@@ -19250,9 +19250,9 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_pd256, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_ps256, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128_si256, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
@@ -19272,9 +19272,9 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3,  "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3,  "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_INT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3,  "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_pd256, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_ps256, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128_si256, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
@@ -22108,12 +22108,12 @@ ix86_expand_args_builtin (const struct b
 		error ("the last argument must be a 2-bit immediate");
 		return const0_rtx;
 
-	      case CODE_FOR_avx_vextractf128_pd256:
-	      case CODE_FOR_avx_vextractf128_ps256:
-	      case CODE_FOR_avx_vextractf128_si256:
-	      case CODE_FOR_avx_vinsertf128_pd256:
-	      case CODE_FOR_avx_vinsertf128_ps256:
-	      case CODE_FOR_avx_vinsertf128_si256:
+	      case CODE_FOR_avx_vextractf128v4df:
+	      case CODE_FOR_avx_vextractf128v8sf:
+	      case CODE_FOR_avx_vextractf128v8si:
+	      case CODE_FOR_avx_vinsertf128v4df:
+	      case CODE_FOR_avx_vinsertf128v8sf:
+	      case CODE_FOR_avx_vinsertf128v8si:
 		error ("the last argument must be a 1-bit immediate");
 		return const0_rtx;
 
@@ -25539,10 +25539,26 @@ ix86_expand_vector_set (bool mmx_ok, rtx
   enum machine_mode inner_mode = GET_MODE_INNER (mode);
   enum machine_mode half_mode;
   bool use_vec_merge = false;
-  rtx tmp, op0, op1;
-  rtx (*gen_extract) (rtx, rtx); 
-  rtx (*gen_insert) (rtx, rtx, rtx); 
-  int i, n;
+  rtx tmp;
+  static rtx (*gen_extract[6][2]) (rtx, rtx)
+    = {
+	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+      };
+  static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+    = {
+	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+      };
+  int i, j, n;
 
   switch (mode)
     {
@@ -25694,31 +25710,37 @@ ix86_expand_vector_set (bool mmx_ok, rtx
 
     case V32QImode:
       half_mode = V16QImode;
+      j = 0;
       n = 16;
       goto half;
 
     case V16HImode:
       half_mode = V8HImode;
+      j = 1;
       n = 8;
       goto half;
 
     case V8SImode:
       half_mode = V4SImode;
-      n = 4;
-      goto half;
-
-    case V8SFmode:
-      half_mode = V4SFmode;
+      j = 2;
       n = 4;
       goto half;
 
     case V4DImode:
       half_mode = V2DImode;
+      j = 3;
       n = 2;
       goto half;
 
+    case V8SFmode:
+      half_mode = V4SFmode;
+      j = 4;
+      n = 4;
+      goto half;
+
     case V4DFmode:
       half_mode = V2DFmode;
+      j = 5;
       n = 2;
       goto half;
 
@@ -25727,44 +25749,17 @@ half:
       i = elt / n;
       elt %= n;
 
-      switch (i)
-	{
-	case 0:
-	  gen_extract = gen_avx_vextractf128_pd256_0;
-	  gen_insert = gen_avx_vinsertf128_pd256_0;
-	  break;
-	case 1:
-	  gen_extract = gen_avx_vextractf128_pd256_1;
-	  gen_insert = gen_avx_vinsertf128_pd256_1;
-	  break;
-	default:
-	  gcc_unreachable ();
-	}
-
-      /* Cast to V4DFmode. */
-      tmp = gen_reg_rtx (V4DFmode);
-      emit_move_insn (tmp, gen_lowpart (V4DFmode, target));
+      gcc_assert (i <= 1);
 
       /* Extract the half.  */
-      op0 = gen_reg_rtx (V2DFmode);
-      emit_insn ((*gen_extract) (op0, tmp));
-
-      /* Cast to half mode. */
-      op1 = gen_reg_rtx (half_mode);
-      emit_move_insn (op1, gen_lowpart (half_mode, op0));
-
-      /* Put val in op1 at elt.  */
-      ix86_expand_vector_set (false, op1, val, elt);
+      tmp = gen_reg_rtx (half_mode);
+      emit_insn ((*gen_extract[j][i]) (tmp, target));
 
-      /* Cast to V2DFmode. */
-      op0 = gen_reg_rtx (V2DFmode);
-      emit_move_insn (op0, gen_lowpart (V2DFmode, op1));
+      /* Put val in tmp at elt.  */
+      ix86_expand_vector_set (false, tmp, val, elt);
 
       /* Put it back.  */
-      emit_insn ((*gen_insert) (tmp, tmp, op0));
-
-      /* Cast to original mode and store in target. */
-      emit_move_insn (target, gen_lowpart (mode, tmp));
+      emit_insn ((*gen_insert[j][i]) (target, target, tmp));
       return;
 
     default:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]