SSE fix 19 - move patterns

Jan Hubicka jh@suse.cz
Sun Oct 20 06:40:00 GMT 2002


Hi,
someone has apprently forgot about integer move instructions.  This lets me to
test the integer SSE support that looks unfortunately quite broken.
I will fix it next.

Honza

Sun Oct 20 15:16:23 CEST 2002  Jan Hubicka  <jh@suse.cz>
	* i386.c (builtin_description): Add punpcklqdq and movdq2q
	(ix86_init_mmx_sse_builtins): Add v2di_ftype_void, di_ftype_v2di,
	v16qi_ftype_pchar, void_ftype_pchar_v16qi, v4si_ftype_pchar,
	void_ftype_pchar_v4si; Initialize __builtin_ia32_movdq2q,
	__builtin_ia32_loaddqa, __builtin_ia32_loaddqu, __builtin_ia32_loadd
	__builtin_ia32_storedqa, __builtin_ia32_storedqu, __builtin_ia32_stored
	__builtin_ia32_setzero128.
	(ix86_expand_builtin): Handle IX86_BUILTIN_CLRTI, IX86_BUILTIN_LOADDQA,
	IX86_BUILTIN_LOADDQU, IX86_BUILTIN_LOADD, IX86_BUILTIN_STOREDQA,
	IX86_BUILTIN_STOREDQU, IX86_BUILTIN_STORED.
	* i386.h (ix86_builtins): Add IX86_BUILTIN_LOADDQA, IX86_BUILTIN_LOADDQU,
	IX86_BUILTIN_STOREDQA, IX86_BUILTIN_STOREDQU, IX86_BUILTIN_LOADD,
	IX86_BUILTIN_STORED, IX86_BUILTIN_CLRTI, IX86_BUILTIN_MOVDQ2Q,
	IX86_BUILTIN_PUNPCKLQDQ128.
	* i386.md (sse2_punpcklqdq, sse2_movqsse2_loadd, sse2_stored):
	New patterns.
	(sse2_movdqa, sse2_movdqu, sse2_movdq2q): Fix.
	* xmmintrin.h (_mm_load_si128, _mm_loadu_si128, _mm_loadl_epi64,
	_mm_store_si128, _mm_storeu_si128, _mm_storel_epi64,
	_mm_setzero_si128, _mm_set_epi64, _mm_set_epi32, _mm_set_epi16,
	_mm_set_epi8, _mm_set1_epi64, _mm_set1_epi32, _mm_set1_epi16,
	_mm_set1_epi8, _mm_setr_epi64, _mm_setr_epi32, _mm_setr_epi16,
	_mm_setr_epi8, _mm_unpacklo_epi64): New functions.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.475
diff -c -3 -p -r1.475 i386.c
*** i386.c	19 Oct 2002 22:34:16 -0000	1.475
--- i386.c	20 Oct 2002 13:23:59 -0000
*************** static const struct builtin_description 
*** 12002,12007 ****
--- 12002,12008 ----
    { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
*************** static const struct builtin_description 
*** 12053,12058 ****
--- 12054,12060 ----
    { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
  
*************** ix86_init_mmx_sse_builtins ()
*** 12228,12233 ****
--- 12230,12237 ----
  
    tree ti_ftype_void
      = build_function_type (intTI_type_node, void_list_node);
+   tree v2di_ftype_void
+     = build_function_type (V2DI_type_node, void_list_node);
    tree ti_ftype_ti_ti
      = build_function_type_list (intTI_type_node,
  				intTI_type_node, intTI_type_node, NULL_TREE);
*************** ix86_init_mmx_sse_builtins ()
*** 12236,12241 ****
--- 12240,12248 ----
    tree v2di_ftype_di
      = build_function_type_list (V2DI_type_node,
  				long_long_unsigned_type_node, NULL_TREE);
+   tree di_ftype_v2di
+     = build_function_type_list (long_long_unsigned_type_node,
+ 				V2DI_type_node, NULL_TREE);
    tree v4sf_ftype_v4si
      = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
    tree v4si_ftype_v4sf
*************** ix86_init_mmx_sse_builtins ()
*** 12344,12349 ****
--- 12351,12366 ----
  				V16QI_type_node, V16QI_type_node, NULL_TREE);
    tree int_ftype_v16qi
      = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
+   tree v16qi_ftype_pchar
+     = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
+   tree void_ftype_pchar_v16qi
+     = build_function_type_list (void_type_node,
+ 			        pchar_type_node, V16QI_type_node, NULL_TREE);
+   tree v4si_ftype_pchar
+     = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
+   tree void_ftype_pchar_v4si
+     = build_function_type_list (void_type_node,
+ 			        pchar_type_node, V4SI_type_node, NULL_TREE);
  
    /* Add all builtins that are more or less simple operations on two
       operands.  */
*************** ix86_init_mmx_sse_builtins ()
*** 12524,12529 ****
--- 12541,12547 ----
  
    def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
    def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
+   def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
    def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
*************** ix86_init_mmx_sse_builtins ()
*** 12587,12592 ****
--- 12605,12619 ----
    def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
    def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
  
+   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
+   def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
+   def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
+   def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
+   def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
+   def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
+ 
+   def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
+ 
    def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
    def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
    def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
*************** ix86_expand_builtin (exp, target, subtar
*** 13036,13041 ****
--- 13063,13069 ----
  
      case IX86_BUILTIN_STOREAPS:
        return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
+ 
      case IX86_BUILTIN_STOREUPS:
        return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
  
*************** ix86_expand_builtin (exp, target, subtar
*** 13275,13280 ****
--- 13303,13314 ----
        emit_insn (gen_mmx_clrdi (target));
        return target;
  
+     case IX86_BUILTIN_CLRTI:
+       target = gen_reg_rtx (V2DImode);
+       emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
+       return target;
+ 
+ 
      case IX86_BUILTIN_SQRTSD:
        return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
      case IX86_BUILTIN_LOADAPD:
*************** ix86_expand_builtin (exp, target, subtar
*** 13360,13365 ****
--- 13394,13413 ----
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
      case IX86_BUILTIN_MOVNTI:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
+ 
+     case IX86_BUILTIN_LOADDQA:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
+     case IX86_BUILTIN_LOADDQU:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
+     case IX86_BUILTIN_LOADD:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
+ 
+     case IX86_BUILTIN_STOREDQA:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
+     case IX86_BUILTIN_STOREDQU:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
+     case IX86_BUILTIN_STORED:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
  
      default:
        break;
Index: i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.297
diff -c -3 -p -r1.297 i386.h
*** i386.h	19 Oct 2002 08:48:37 -0000	1.297
--- i386.h	20 Oct 2002 13:24:00 -0000
*************** enum ix86_builtins
*** 2090,2095 ****
--- 2090,2104 ----
    IX86_BUILTIN_MOVNTPS,
    IX86_BUILTIN_MOVNTQ,
  
+   IX86_BUILTIN_LOADDQA,
+   IX86_BUILTIN_LOADDQU,
+   IX86_BUILTIN_STOREDQA,
+   IX86_BUILTIN_STOREDQU,
+   IX86_BUILTIN_LOADD,
+   IX86_BUILTIN_STORED,
+ 
+   IX86_BUILTIN_CLRTI,
+ 
    IX86_BUILTIN_PACKSSWB,
    IX86_BUILTIN_PACKSSDW,
    IX86_BUILTIN_PACKUSWB,
*************** enum ix86_builtins
*** 2336,2341 ****
--- 2345,2351 ----
    IX86_BUILTIN_MOVMSKPD,
    IX86_BUILTIN_PMOVMSKB128,
    IX86_BUILTIN_MOVQ2DQ,
+   IX86_BUILTIN_MOVDQ2Q,
  
    IX86_BUILTIN_PACKSSWB128,
    IX86_BUILTIN_PACKSSDW128,
*************** enum ix86_builtins
*** 2417,2422 ****
--- 2427,2433 ----
    IX86_BUILTIN_PUNPCKLBW128,
    IX86_BUILTIN_PUNPCKLWD128,
    IX86_BUILTIN_PUNPCKLDQ128,
+   IX86_BUILTIN_PUNPCKLQDQ128,
  
    IX86_BUILTIN_CLFLUSH,
    IX86_BUILTIN_MFENCE,
Index: i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.392
diff -c -3 -p -r1.392 i386.md
*** i386.md	19 Oct 2002 22:34:16 -0000	1.392
--- i386.md	20 Oct 2002 13:24:06 -0000
***************
*** 21505,21510 ****
--- 21505,21523 ----
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
+ (define_insn "sse2_punpcklqdq"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+ 	(vec_merge:V2DI
+ 	 (match_operand:V2DI 1 "register_operand" "0")
+ 	 (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
+ 			  (parallel [(const_int 1)
+ 				     (const_int 0)]))
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "punpcklqdq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "TI")])
+ 
  ;; SSE2 moves
  
  (define_insn "sse2_movapd"
***************
*** 21530,21538 ****
     (set_attr "mode" "V2DF")])
  
  (define_insn "sse2_movdqa"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
! 	(unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")]
! 		   UNSPEC_MOVA))]
    "TARGET_SSE2"
    "@
     movdqa\t{%1, %0|%0, %1}
--- 21543,21551 ----
     (set_attr "mode" "V2DF")])
  
  (define_insn "sse2_movdqa"
!   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
! 		       UNSPEC_MOVA))]
    "TARGET_SSE2"
    "@
     movdqa\t{%1, %0|%0, %1}
***************
*** 21541,21549 ****
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movdqu"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
! 	(unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")]
! 		   UNSPEC_MOVU))]
    "TARGET_SSE2"
    "@
     movdqu\t{%1, %0|%0, %1}
--- 21554,21562 ----
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movdqu"
!   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
! 		       UNSPEC_MOVU))]
    "TARGET_SSE2"
    "@
     movdqu\t{%1, %0|%0, %1}
***************
*** 21552,21572 ****
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movdq2q"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=y")
! 	(vec_select:DI (match_operand:V2DI 1 "general_operand" "x")
  		       (parallel [(const_int 0)])))]
    "TARGET_SSE2"
!   "movdq2q\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movq2dq"
!   [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x")
! 	(vec_concat:V2DI (match_operand:DI 1 "general_operand" "y")
! 			 (const_vector:DI [(const_int 0)])))]
    "TARGET_SSE2"
!   "movq2dq\t{%1, %0|%0, %1}"
!   [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movhpd"
--- 21565,21624 ----
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movdq2q"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
! 	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
  		       (parallel [(const_int 0)])))]
    "TARGET_SSE2"
!   "@
!    movq\t{%1, %0|%0, %1}
!    movdq2q\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movq2dq"
!   [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
! 	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
! 			 (const_int 0)))]
    "TARGET_SSE2"
!   "@
!    movq\t{%1, %0|%0, %1}
!    movq2dq\t{%1, %0|%0, %1}"
!   [(set_attr "type" "ssecvt,ssemov")
!    (set_attr "mode" "TI")])
! 
! (define_insn "sse2_movq"
!   [(set (match_operand:V2DI 0 "nonimmediate_operand" "=xm")
! 	(vec_merge:V2DI
! 	 (match_operand:V2DI 1 "nonimmediate_operand" "0")
! 	 (match_operand:V2DI 2 "register_operand" "x")
! 	 (const_int 1)))]
!   "TARGET_SSE2"
!   "movq\t{%2, %0|%0, %2}"
!   [(set_attr "type" "ssemov")
!    (set_attr "mode" "TI")])
! 
! (define_insn "sse2_loadd"
!   [(set (match_operand:V4SI 0 "register_operand" "=x")
! 	(vec_merge:V4SI
! 	 (vec_duplicate:V4HI (match_operand:SI 1 "memory_operand" "m"))
! 	 (const_vector:V4SI [(const_int 0)
! 			     (const_int 0)
! 			     (const_int 0)
! 			     (const_int 0)])
! 	 (const_int 1)))]
!   "TARGET_SSE2"
!   "movd\t{%1, %0|%0, %1}"
!   [(set_attr "type" "ssemov")
!    (set_attr "mode" "TI")])
! 
! (define_insn "sse2_stored"
!   [(set (match_operand:SI 0 "memory_operand" "=m")
! 	(vec_select:SI
! 	 (match_operand:V4SI 1 "register_operand" "x")
! 	 (parallel [(const_int 0)])))]
!   "TARGET_SSE2"
!   "movd\t{%1, %0|%0, %1}"
!   [(set_attr "type" "ssemov")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movhpd"
Index: xmmintrin.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.12
diff -c -3 -p -r1.12 xmmintrin.h
*** xmmintrin.h	19 Oct 2002 22:34:17 -0000	1.12
--- xmmintrin.h	20 Oct 2002 13:24:06 -0000
*************** _mm_ucomineq_sd (__m128d __A, __m128d __
*** 1581,1586 ****
--- 1581,1820 ----
    return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
  }
  
+ /* Create a vector with element 0 as *P and the rest zero.  */
+ 
+ static __inline __m128i
+ _mm_load_si128 (__m128i const *__P)
+ {
+   return (__m128i) __builtin_ia32_loaddqa (__P);
+ }
+ 
+ static __inline __m128i
+ _mm_loadu_si128 (__m128i const *__P)
+ {
+   return (__m128i) __builtin_ia32_loaddqu (__P);
+ }
+ 
+ static __inline __m128i
+ _mm_loadl_epi64 (__m128i const *__P)
+ {
+   return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P);
+ }
+ 
+ static __inline void
+ _mm_store_si128 (__m128i *__P, __m128i __B)
+ {
+   __builtin_ia32_storedqa (__P, (__v16qi)__B);
+ }
+ 
+ static __inline void
+ _mm_storeu_si128 (__m128i *__P, __m128i __B)
+ {
+   __builtin_ia32_storedqu (__P, (__v16qi)__B);
+ }
+ 
+ static __inline void
+ _mm_storel_epi64 (__m128i *__P, __m128i __B)
+ {
+   *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
+ }
+ 
+ /* Create a vector of zeros.  */
+ static __inline __m128i
+ _mm_setzero_si128 (void)
+ {
+   return (__m128i) __builtin_ia32_setzero128 ();
+ }
+ 
+ static __inline __m128i
+ _mm_set_epi64 (__m64 __A,  __m64 __B)
+ {
+   __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+   __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
+   return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp);
+ }
+ 
+ /* Create the vector [Z Y X W].  */
+ static __inline __m128i
+ _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
+ {
+   union {
+     int __a[4];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __W;
+   __u.__a[1] = __X;
+   __u.__a[2] = __Y;
+   __u.__a[3] = __Z;
+ 
+   return __u.__v;
+ }
+ /* Create the vector [S T U V Z Y X W].  */
+ static __inline __m128i
+ _mm_set_epi16 (short __Z, short __Y, short __X, short __W,
+ 	       short __V, short __U, short __T, short __S)
+ {
+   union {
+     short __a[8];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __S;
+   __u.__a[1] = __T;
+   __u.__a[2] = __U;
+   __u.__a[3] = __V;
+   __u.__a[4] = __W;
+   __u.__a[5] = __X;
+   __u.__a[6] = __Y;
+   __u.__a[7] = __Z;
+ 
+   return __u.__v;
+ }
+ 
+ /* Create the vector [S T U V Z Y X W].  */
+ static __inline __m128i
+ _mm_set_epi8 (char __Z, char __Y, char __X, char __W,
+ 	      char __V, char __U, char __T, char __S,
+ 	      char __Z1, char __Y1, char __X1, char __W1,
+ 	      char __V1, char __U1, char __T1, char __S1)
+ {
+   union {
+     char __a[16];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __S1;
+   __u.__a[1] = __T1;
+   __u.__a[2] = __U1;
+   __u.__a[3] = __V1;
+   __u.__a[4] = __W1;
+   __u.__a[5] = __X1;
+   __u.__a[6] = __Y1;
+   __u.__a[7] = __Z1;
+   __u.__a[8] = __S;
+   __u.__a[9] = __T;
+   __u.__a[10] = __U;
+   __u.__a[11] = __V;
+   __u.__a[12] = __W;
+   __u.__a[13] = __X;
+   __u.__a[14] = __Y;
+   __u.__a[15] = __Z;
+ 
+   return __u.__v;
+ }
+ 
+ static __inline __m128i
+ _mm_set1_epi64 (__m64 __A)
+ {
+   __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+   return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp);
+ }
+ 
+ static __inline __m128i
+ _mm_set1_epi32 (int __A)
+ {
+   __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A);
+   return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+ }
+ 
+ static __inline __m128i
+ _mm_set1_epi16 (short __A)
+ {
+   int __Acopy = (unsigned short)__A;
+   __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
+   __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp);
+   return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+ }
+ 
+ static __inline __m128i
+ _mm_set1_epi8 (char __A)
+ {
+   int __Acopy = (unsigned char)__A;
+   __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
+   __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
+   __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
+   return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
+ }
+ 
+ static __inline __m128i
+ _mm_setr_epi64 (__m64 __A,  __m64 __B)
+ {
+   __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+   __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
+   return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2);
+ }
+ 
+ /* Create the vector [Z Y X W].  */
+ static __inline __m128i
+ _mm_setr_epi32 (int __W, int __X, int __Y, int __Z)
+ {
+   union {
+     int __a[4];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __W;
+   __u.__a[1] = __X;
+   __u.__a[2] = __Y;
+   __u.__a[3] = __Z;
+ 
+   return __u.__v;
+ }
+ /* Create the vector [S T U V Z Y X W].  */
+ static __inline __m128i
+ _mm_setr_epi16 (short __S, short __T, short __U, short __V,
+ 	        short __W, short __X, short __Y, short __Z)
+ {
+   union {
+     short __a[8];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __S;
+   __u.__a[1] = __T;
+   __u.__a[2] = __U;
+   __u.__a[3] = __V;
+   __u.__a[4] = __W;
+   __u.__a[5] = __X;
+   __u.__a[6] = __Y;
+   __u.__a[7] = __Z;
+ 
+   return __u.__v;
+ }
+ 
+ /* Create the vector [S T U V Z Y X W].  */
+ static __inline __m128i
+ _mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1,
+ 	       char __W1, char __X1, char __Y1, char __Z1,
+ 	       char __S, char __T, char __U, char __V,
+ 	       char __W, char __X, char __Y, char __Z)
+ {
+   union {
+     char __a[16];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __S1;
+   __u.__a[1] = __T1;
+   __u.__a[2] = __U1;
+   __u.__a[3] = __V1;
+   __u.__a[4] = __W1;
+   __u.__a[5] = __X1;
+   __u.__a[6] = __Y1;
+   __u.__a[7] = __Z1;
+   __u.__a[8] = __S;
+   __u.__a[9] = __T;
+   __u.__a[10] = __U;
+   __u.__a[11] = __V;
+   __u.__a[12] = __W;
+   __u.__a[13] = __X;
+   __u.__a[14] = __Y;
+   __u.__a[15] = __Z;
+ 
+   return __u.__v;
+ }
+ 
  static __inline __m128d
  _mm_cvtepi32_pd (__m128i __A)
  {
*************** static __inline __m128i
*** 1773,1778 ****
--- 2007,2018 ----
  _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
  {
    return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
  }
  
  static __inline __m128i



More information about the Gcc-patches mailing list