This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SSE2 builtins support part 2


This adds the necessary support in xmmintrin.h.   While doing it, I noticed
a number of bugs and oddities which are also fixed by this patch.

Bootstrapped on i686-linux.


Bernd

	* config/i386/i386.c (bdesc_2arg): Add a couple of missing SSE2
	builtins.  Use V2DI patterns instead of TI for logical operations.
	(ix86_init_mmx_sse_builtins): Add a couple of missing SSE2 builtins.
	Correct definitions of psadbw, pmovmskb128, movntdq, cvtdq2ps.
	(ix86_expand_builtins): Change the pattern used for movntdq.
	* config/i386/i386.md (sse2_andv2di3, sse2_iorv2di3, sse2_xorv2di3,
	sse2_nandv2di3): New patterns.
	(sse2_anddf3, sse2_nanddf3, sse2_iordf3, sse2_xordf3): Correct modes
	on operands.
	(sse2_movntv2di): Renamed from sse2_movntti and modes adjusted.
	(cvtdq2pd): Correct mode on operand 1.
	(sse2_umulsidi3): Describe without unspec.
	(sse2_psadbw, mmx_psadbw): Describe with unspec; use more appropriate
	machine modes.
	(lshrv2di3): Renamed from sse2_lshrv2di3 and removed unspec.
	(ashlv2di3): Likewise, from sse2_ashlv2di3.
	(ashrv8hi3, ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3,
	ashlv4si3, ashlv2di3): Use SImode for shift count.
	(ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti, lshrv2di3_ti,
	lshrv4si3_ti, lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti):
	New patterns.
	* config/i386/xmmintrin.h (__v2df, __v2di, __v4si, __v8hi, __v16qi):
	New typedefs.
	(__m128i, __m128d): New macros.
	(_mm_add_pd, _mm_add_sd, _mm_sub_pd, _mm_sub_sd, _mm_mul_pd,
	_mm_mul_sd, _mm_div_pd, _mm_div_sd, _mm_sqrt_pd, _mm_sqrt_sd,
	_mm_min_pd, _mm_min_sd, _mm_max_sd, _mm_max_pd, _mm_and_pd,
	_mm_andnot_pd, _mm_xor_pd, _mm_or_pd, _mm_cmpeq_pd, _mm_cmplt_pd,
	_mm_cmple_pd, _mm_cmpgt_pd, _mm_cmpge_pd, _mm_cmpneq_pd,
	_mm_cmpnlt_pd, _mm_cmpnle_pd, _mm_cmpngt_pd, _mm_cmpnge_pd, 
	_mm_cmpord_pd, _mm_cmpunord_pd, _mm_cmpeq_sd, _mm_cmplt_sd,
	_mm_cmple_sd, _mm_cmpgt_sd, _mm_cmpge_sd, _mm_cmpneq_sd,
	_mm_cmpnlt_sd, _mm_cmpnle_sd, _mm_cmpngt_sd, _mm_cmpnge_sd, 
	_mm_cmpord_sd, _mm_cmpunord_sd, _mm_comieq_sd, _mm_comilt_sd,
	_mm_comile_sd, _mm_comigt_sd, _mm_comige_sd, _mm_comineq_sd,
	_mm_ucomieq_sd, _mm_ucomieq_sd, _mm_ucomilt_sd, _mm_ucomile_sd,
	_mm_ucomigt_sd, _mm_ucomige_sd, _mm_ucomineq_sd, _mm_cvtepi32_pd,
	_mm_cvtepi32_ps, _mm_cvtpd_epi32, _mm_cvtpd_pi32, _mm_cvtpd_ps,
	_mm_cvttpd_epi32, _mm_cvttpd_pi32, _mm_cvtpi32_pd, _mm_cvtps_epi32,
	_mm_cvttps_epi32, _mm_cvtps_pd, _mm_cvtsd_si32, _mm_cvttsd_si32,
	_mm_cvtsd_ss, _mm_cvtsi32_sd, _mm_cvtss_sd, _mm_unpackhi_pd,
	_mm_unpacklo_pd, _mm_loadh_pd, _mm_storeh_pd, _mm_storel_pd,
	_mm_movemask_pd, _mm_packs_epi16, _mm_packs_epi32, _mm_packus_epi16,
	_mm_unpackhi_epi8, _mm_unpackhi_epi16, _mm_unpackhi_epi32,
	_mm_unpacklo_epi8, _mm_unpacklo_epi16, _mm_unpacklo_epi32,
	_mm_add_epi8, _mm_add_epi16, _mm_add_epi32, _mm_add_epi64,
	_mm_adds_epi8, _mm_adds_epi16, _mm_adds_epu8, _mm_adds_epu16,
	_mm_sub_epi8, _mm_sub_epi16, _mm_sub_epi32, _mm_sub_epi64,
	_mm_subs_epi8, _mm_subs_epi16, _mm_subs_epu8, _mm_subs_epu16,
	_mm_madd_epi16, _mm_mulhi_epi16, _mm_mullo_epi16, _mm_mul_pu16,
	_mm_mul_epu16, _mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64,
	_mm_sra_epi16, _mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32,
	_mm_srl_epi64, _mm_slli_epi16, _mm_slli_epi32, _mm_slli_epi64,
	_mm_srai_epi16, _mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
	_mm_srli_epi64, _mm_and_si128, _mm_andnot_si128, _mm_or_si128,
	_mm_xor_si128, _mm_cmpeq_epi8, _mm_cmpeq_epi16, _mm_cmpeq_epi32,
	_mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32, _mm_max_epi16,
	_mm_max_epu8, _mm_min_epi16, _mm_min_epu8, _mm_movemask_epi8,
	_mm_mulhi_epu16, _mm_maskmoveu_si128, _mm_avg_epu8, _mm_avg_epu16,
	_mm_sad_epu8, _mm_stream_si32, _mm_stream_si128, _mm_stream_pd,
	_mm_movpi64_epi64, _mm_clflush, _mm_lfence, _mm_mfence): New
	functions.
	(_mm_shufflehi_epi16, _mm_shufflelo_epi16, _mm_shuffle_epi32, 
	_mm_extract_epi16, _mm_insert_epi16, _mm_shuffle_pd): New macros.

Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.389
diff -c -p -r1.389 i386.c
*** config/i386/i386.c	29 Apr 2002 18:40:46 -0000	1.389
--- config/i386/i386.c	2 May 2002 00:00:19 -0000
*************** static const struct builtin_description 
*** 11167,11176 ****
    { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
  
!   { MASK_SSE2, CODE_FOR_sse2_andti3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_nandti3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_iorti3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_xorti3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
--- 11167,11176 ----
    { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
  
!   { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
*************** static const struct builtin_description 
*** 11194,11199 ****
--- 11194,11227 ----
    { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
  
+   { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
+ 
    { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
*************** ix86_init_mmx_sse_builtins ()
*** 11258,11263 ****
--- 11286,11292 ----
    tree pchar_type_node = build_pointer_type (char_type_node);
    tree pfloat_type_node = build_pointer_type (float_type_node);
    tree pv2si_type_node = build_pointer_type (V2SI_type_node);
+   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
    tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
  
    /* Comparisons.  */
*************** ix86_init_mmx_sse_builtins ()
*** 11322,11332 ****
  						 tree_cons (NULL_TREE,
  							    integer_type_node,
  							    endlink))));
-   tree v4hi_ftype_v8qi_v8qi
-     = build_function_type (V4HI_type_node,
- 			   tree_cons (NULL_TREE, V8QI_type_node,
- 				      tree_cons (NULL_TREE, V8QI_type_node,
- 						 endlink)));
    tree v2si_ftype_v4hi_v4hi
      = build_function_type (V2SI_type_node,
  			   tree_cons (NULL_TREE, V4HI_type_node,
--- 11351,11356 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11399,11404 ****
--- 11423,11434 ----
  				      tree_cons (NULL_TREE,
  						 long_long_unsigned_type_node,
  						 endlink)));
+   tree void_ftype_pv2di_v2di
+     = build_function_type (void_type_node,
+ 			   tree_cons (NULL_TREE, pv2di_type_node,
+ 				      tree_cons (NULL_TREE,
+ 						 V2DI_type_node,
+ 						 endlink)));
    /* Normal vector unops.  */
    tree v4sf_ftype_v4sf
      = build_function_type (V4SF_type_node,
*************** ix86_init_mmx_sse_builtins ()
*** 11617,11622 ****
--- 11647,11657 ----
  						 tree_cons (NULL_TREE,
  							    integer_type_node,
  							    endlink))));
+   tree v2di_ftype_v2di_int
+     = build_function_type (V2DI_type_node,
+ 			   tree_cons (NULL_TREE, V2DI_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
    tree v4si_ftype_v4si_int
      = build_function_type (V4SI_type_node,
  			   tree_cons (NULL_TREE, V4SI_type_node,
*************** ix86_init_mmx_sse_builtins ()
*** 11627,11632 ****
--- 11662,11695 ----
  			   tree_cons (NULL_TREE, V8HI_type_node,
  				      tree_cons (NULL_TREE, integer_type_node,
  						 endlink)));
+   tree v8hi_ftype_v8hi_v2di
+     = build_function_type (V8HI_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, V2DI_type_node,
+ 						 endlink)));
+   tree v4si_ftype_v4si_v2di
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V4SI_type_node,
+ 				      tree_cons (NULL_TREE, V2DI_type_node,
+ 						 endlink)));
+   tree v4si_ftype_v8hi_v8hi
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, V8HI_type_node,
+ 						 endlink)));
+   tree di_ftype_v8qi_v8qi
+     = build_function_type (long_long_unsigned_type_node,
+ 			   tree_cons (NULL_TREE, V8QI_type_node,
+ 				      tree_cons (NULL_TREE, V8QI_type_node,
+ 						 endlink)));
+   tree v2di_ftype_v16qi_v16qi
+     = build_function_type (V2DI_type_node,
+ 			   tree_cons (NULL_TREE, V16QI_type_node,
+ 				      tree_cons (NULL_TREE, V16QI_type_node,
+ 						 endlink)));
+   tree int_ftype_v16qi
+     = build_function_type (integer_type_node,
+ 			   tree_cons (NULL_TREE, V16QI_type_node, endlink));
  
    /* Add all builtins that are more or less simple operations on two
       operands.  */
*************** ix86_init_mmx_sse_builtins ()
*** 11763,11769 ****
  
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
  
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
  
    def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
    def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
--- 11826,11832 ----
  
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
  
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
  
    def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
    def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
*************** ix86_init_mmx_sse_builtins ()
*** 11826,11840 ****
    def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB128);
    def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
    def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTDQ);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
    def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
    def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
!   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW128);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
    def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
--- 11889,11903 ----
    def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
    def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
    def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
    def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
    def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
!   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
    def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
*************** ix86_init_mmx_sse_builtins ()
*** 11842,11848 ****
    def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
--- 11905,11911 ----
    def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
*************** ix86_init_mmx_sse_builtins ()
*** 11874,11879 ****
--- 11937,11966 ----
    def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
    def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
    def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
+   def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
+ 
+   def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
  }
  
  /* Errors in the source file can cause expand_expr to return const0_rtx
*************** ix86_expand_builtin (exp, target, subtar
*** 12669,12675 ****
      case IX86_BUILTIN_MOVNTPD:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
      case IX86_BUILTIN_MOVNTDQ:
!       return ix86_expand_store_builtin (CODE_FOR_sse2_movntti, arglist);
      case IX86_BUILTIN_MOVNTI:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
  
--- 12759,12765 ----
      case IX86_BUILTIN_MOVNTPD:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
      case IX86_BUILTIN_MOVNTDQ:
!       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
      case IX86_BUILTIN_MOVNTI:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
  
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.349
diff -c -p -r1.349 i386.md
*** config/i386/i386.md	29 Apr 2002 18:40:47 -0000	1.349
--- config/i386/i386.md	2 May 2002 00:00:43 -0000
***************
*** 104,109 ****
--- 104,110 ----
  ;; 58 This is a `sfence' operation.
  ;; 59 This is a `mfence' operation.
  ;; 60 This is a `lfence' operation.
+ ;; 61 This is a `psadbw' operation.
  
  ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
  ;; from i386.c.
***************
*** 18501,18506 ****
--- 18502,18516 ----
    "pand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
+ (define_insn "sse2_andv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2
+    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+   "pand\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
  (define_insn "*sse_nandti3_df"
    [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
          (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0))
***************
*** 18533,18538 ****
--- 18543,18557 ----
    "pandn\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
+ (define_insn "sse2_nandv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
+ 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2
+    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+   "pandn\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
  (define_insn "*sse_iorti3_df_1"
    [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
          (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
***************
*** 18583,18588 ****
--- 18602,18616 ----
    "por\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
+ (define_insn "sse2_iorv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2
+    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+   "por\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
  (define_insn "*sse_xorti3_df_1"
    [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
          (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
***************
*** 18633,18638 ****
--- 18661,18675 ----
    "pxor\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
+ (define_insn "sse2_xorv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2
+    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+   "pxor\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
  ;; Use xor, but don't show input operands so they aren't live before
  ;; this insn.
  (define_insn "sse_clrv4sf"
***************
*** 19128,19136 ****
    [(set_attr "type" "sse")])
  
  (define_insn "mmx_psadbw"
!   [(set (match_operand:V8QI 0 "register_operand" "=y")
!         (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
! 			      (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))]
    "TARGET_SSE || TARGET_3DNOW_A"
    "psadbw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 19165,19173 ----
    [(set_attr "type" "sse")])
  
  (define_insn "mmx_psadbw"
!   [(set (match_operand:DI 0 "register_operand" "=y")
!         (unspec:DI [(match_operand:V8QI 1 "register_operand" "0")
! 		    (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 61))]
    "TARGET_SSE || TARGET_3DNOW_A"
    "psadbw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20029,20060 ****
  
  (define_insn "sse2_anddf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (and:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_nanddf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:TI 1 "register_operand" "0") 0))
! 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "andnpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_iordf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (ior:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_xordf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (xor:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "xorpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20066,20097 ----
  
  (define_insn "sse2_anddf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_nanddf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "0") 0))
! 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "andnpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_iordf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_xordf3"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
!         (subreg:V2DF (xor:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
! 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
    "TARGET_SSE2"
    "xorpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20181,20189 ****
    "movntpd\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_movntti"
!   [(set (match_operand:TI 0 "memory_operand" "=m")
! 	(unspec:TI [(match_operand:TI 1 "register_operand" "x")] 34))]
    "TARGET_SSE2"
    "movntdq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
--- 20218,20226 ----
    "movntpd\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_movntv2di"
!   [(set (match_operand:V2DI 0 "memory_operand" "=m")
! 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 34))]
    "TARGET_SSE2"
    "movntdq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
***************
*** 20225,20231 ****
  (define_insn "cvtdq2pd"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
  	(float:V2DF (vec_select:V2SI
! 		     (match_operand:V2SI 1 "nonimmediate_operand" "xm")
  		     (parallel
  		      [(const_int 0)
  		       (const_int 1)]))))]
--- 20262,20268 ----
  (define_insn "cvtdq2pd"
    [(set (match_operand:V2DF 0 "register_operand" "=x")
  	(float:V2DF (vec_select:V2SI
! 		     (match_operand:V4SI 1 "nonimmediate_operand" "xm")
  		     (parallel
  		      [(const_int 0)
  		       (const_int 1)]))))]
***************
*** 20510,20520 ****
    "pmulhuw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
- ;; See the MMX logical operations for the reason for the unspec
  (define_insn "sse2_umulsidi3"
    [(set (match_operand:DI 0 "register_operand" "=y")
!         (unspec:DI [(mult:DI (zero_extend:DI (match_operand:DI 1 "register_operand" "0"))
! 			     (zero_extend:DI (match_operand:DI 2 "nonimmediate_operand" "ym")))] 45))]
    "TARGET_SSE2"
    "pmuludq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20547,20560 ----
    "pmulhuw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "sse2_umulsidi3"
    [(set (match_operand:DI 0 "register_operand" "=y")
!         (mult:DI (zero_extend:DI (vec_select:SI
! 				  (match_operand:V2SI 1 "register_operand" "0")
! 				  (parallel [(const_int 0)])))
! 		 (zero_extend:DI (vec_select:SI
! 				  (match_operand:V2SI 2 "nonimmediate_operand" "ym")
! 				  (parallel [(const_int 0)])))))]
    "TARGET_SSE2"
    "pmuludq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20608,20616 ****
  
  ;; @@@ this isn't the right representation.
  (define_insn "sse2_psadbw"
!   [(set (match_operand:V16QI 0 "register_operand" "=x")
!         (abs:V16QI (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
! 				(match_operand:V16QI 2 "nonimmediate_operand" "ym"))))]
    "TARGET_SSE2"
    "psadbw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20648,20656 ----
  
  ;; @@@ this isn't the right representation.
  (define_insn "sse2_psadbw"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
! 		      (match_operand:V16QI 2 "nonimmediate_operand" "ym")] 61))]
    "TARGET_SSE2"
    "psadbw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20753,20759 ****
  (define_insn "ashrv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psraw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20793,20799 ----
  (define_insn "ashrv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psraw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20761,20767 ****
  (define_insn "ashrv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psrad\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20801,20807 ----
  (define_insn "ashrv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psrad\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20769,20775 ****
  (define_insn "lshrv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psrlw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20809,20815 ----
  (define_insn "lshrv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psrlw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20777,20791 ****
  (define_insn "lshrv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psrld\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_lshrv2di3"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
          (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psrlq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20817,20831 ----
  (define_insn "lshrv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psrld\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "lshrv2di3"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
          (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		       (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psrlq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20793,20799 ****
  (define_insn "ashlv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psllw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20833,20839 ----
  (define_insn "ashlv8hi3"
    [(set (match_operand:V8HI 0 "register_operand" "=x")
          (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
    "TARGET_SSE2"
    "psllw\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 20801,20815 ****
  (define_insn "ashlv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "pslld\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_ashlv2di3"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
          (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
    "TARGET_SSE2"
    "psllq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 20841,20919 ----
  (define_insn "ashlv4si3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
!   "TARGET_SSE2"
!   "pslld\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "ashlv2di3"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
!   "TARGET_SSE2"
!   "psllq\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "ashrv8hi3_ti"
!   [(set (match_operand:V8HI 0 "register_operand" "=x")
!         (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psraw\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "ashrv4si3_ti"
!   [(set (match_operand:V4SI 0 "register_operand" "=x")
!         (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psrad\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "lshrv8hi3_ti"
!   [(set (match_operand:V8HI 0 "register_operand" "=x")
!         (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		       (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psrlw\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "lshrv4si3_ti"
!   [(set (match_operand:V4SI 0 "register_operand" "=x")
!         (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		       (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psrld\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "lshrv2di3_ti"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		       (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psrlq\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "ashlv8hi3_ti"
!   [(set (match_operand:V8HI 0 "register_operand" "=x")
!         (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
! 		     (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
!   "TARGET_SSE2"
!   "psllw\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sse")])
! 
! (define_insn "ashlv4si3_ti"
!   [(set (match_operand:V4SI 0 "register_operand" "=x")
!         (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
! 		     (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
    "TARGET_SSE2"
    "pslld\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "ashlv2di3_ti"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
          (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
! 		     (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
    "TARGET_SSE2"
    "psllq\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
Index: config/i386/xmmintrin.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.2
diff -c -p -r1.2 xmmintrin.h
*** config/i386/xmmintrin.h	12 Jan 2002 10:05:28 -0000	1.2
--- config/i386/xmmintrin.h	2 May 2002 00:00:46 -0000
*************** do {									\
*** 1058,1061 ****
--- 1058,1984 ----
    (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD);			\
  } while (0)
  
+ /* SSE2 */
+ typedef int __v2df __attribute__ ((mode (V2DF)));
+ typedef int __v2di __attribute__ ((mode (V2DI)));
+ typedef int __v4si __attribute__ ((mode (V4SI)));
+ typedef int __v8hi __attribute__ ((mode (V8HI)));
+ typedef int __v16qi __attribute__ ((mode (V16QI)));
+ 
+ #define __m128i __m128
+ #define __m128d __v2df
+ 
+ static __inline __m128d
+ _mm_add_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_add_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_sub_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_sub_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_mul_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_mul_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_div_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_div_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_sqrt_pd (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+ }
+ 
+ static __inline __m128d
+ _mm_sqrt_sd (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__A);
+ }
+ 
+ static __inline __m128d
+ _mm_min_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_min_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_max_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_max_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_and_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_andnot_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_or_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_xor_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpeq_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmplt_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmple_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpgt_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpge_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpneq_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnlt_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnle_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpngt_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnge_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpord_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpunord_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpeq_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmplt_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmple_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpgt_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpgtsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpge_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpgesd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpneq_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnlt_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnle_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpngt_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpngtsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpnge_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpngesd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpord_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cmpunord_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comieq_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comilt_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comile_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comigt_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comige_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_comineq_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomieq_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomilt_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomile_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomigt_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomige_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_ucomineq_sd (__m128d __A, __m128d __B)
+ {
+   return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtepi32_pd (__m128i __A)
+ {
+   return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtepi32_ps (__m128i __A)
+ {
+   return (__m128d)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtpd_epi32 (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+ }
+ 
+ static __inline __m64
+ _mm_cvtpd_pi32 (__m128d __A)
+ {
+   return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtpd_ps (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvttpd_epi32 (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+ }
+ 
+ static __inline __m64
+ _mm_cvttpd_pi32 (__m128d __A)
+ {
+   return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtpi32_pd (__m64 __A)
+ {
+   return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtps_epi32 (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvttps_epi32 (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtps_pd (__m128d __A)
+ {
+   return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+ }
+ 
+ static __inline int
+ _mm_cvtsd_si32 (__m128d __A)
+ {
+   return __builtin_ia32_cvtsd2si ((__v2df) __A);
+ }
+ 
+ static __inline int
+ _mm_cvttsd_si32 (__m128d __A)
+ {
+   return __builtin_ia32_cvttsd2si ((__v2df) __A);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtsd_ss (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtsi32_sd (__m128d __A, int __B)
+ {
+   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+ }
+ 
+ static __inline __m128d
+ _mm_cvtss_sd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+ }
+ 
+ #define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (C)))
+ 
+ static __inline __m128d
+ _mm_unpackhi_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_unpacklo_pd (__m128d __A, __m128d __B)
+ {
+   return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_loadh_pd (__m128d __A, __m128d *__B)
+ {
+   return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
+ }
+ 
+ static __inline void
+ _mm_storeh_pd (__m128d *__A, __m128d __B)
+ {
+   __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128d
+ _mm_loadl_pd (__m128d __A, __m128d *__B)
+ {
+   return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
+ }
+ 
+ static __inline void
+ _mm_storel_pd (__m128d *__A, __m128d __B)
+ {
+   __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
+ }
+ 
+ static __inline int
+ _mm_movemask_pd (__m128d __A)
+ {
+   return __builtin_ia32_movmskpd ((__v2df)__A);
+ }
+ 
+ static __inline __m128i
+ _mm_packs_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_packs_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_packus_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_add_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_add_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_add_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_add_epi64 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_adds_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_adds_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_adds_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_adds_epu16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sub_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sub_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sub_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sub_epi64 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_subs_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_subs_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_subs_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_subs_epu16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_madd_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_mulhi_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_mullo_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m64
+ _mm_mul_pu16 (__m64 __A, __m64 __B)
+ {
+   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_mul_epu16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sll_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sll_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sll_epi64 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sra_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sra_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_srl_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_srl_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_srl_epi64 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_slli_epi16 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_slli_epi32 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_slli_epi64 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_srai_epi16 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_srai_epi32 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_srli_epi16 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_srli_epi32 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_srli_epi64 (__m128i __A, int __B)
+ {
+   return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+ }
+ 
+ static __inline __m128i
+ _mm_and_si128 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_andnot_si128 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_or_si128 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_xor_si128 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+ }
+ 
+ #define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B)
+ 
+ #define _mm_insert_epi16 (__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C))
+ 
+ static __inline __m128i
+ _mm_max_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_max_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_min_epi16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_min_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline int
+ _mm_movemask_epi8 (__m128i __A)
+ {
+   return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+ }
+ 
+ static __inline __m128i
+ _mm_mulhi_epu16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ #define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw128 ((__v8hi)__A, __B))
+ #define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw128 ((__v8hi)__A, __B))
+ #define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+ 
+ static __inline void
+ _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+ {
+   __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+ }
+ 
+ static __inline __m128i
+ _mm_avg_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_avg_epu16 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_sad_epu8 (__m128i __A, __m128i __B)
+ {
+   return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+ }
+ 
+ static __inline void
+ _mm_stream_si32 (int *__A, int __B)
+ {
+   __builtin_ia32_movnti (__A, __B);
+ }
+ 
+ static __inline void
+ _mm_stream_si128 (__m128i *__A, __m128i __B)
+ {
+   __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+ }
+ 
+ static __inline void
+ _mm_stream_pd (__m128d *__A, __m128d __B)
+ {
+   __builtin_ia32_movntpd (__A, (__v2df)__B);
+ }
+ 
+ static __inline __m128i
+ _mm_movpi64_epi64 (__m64 __A)
+ {
+   return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A);
+ }
+ 
+ static __inline void
+ _mm_clflush (void *__A)
+ {
+   return __builtin_ia32_clflush (__A);
+ }
+ 
+ static __inline void
+ _mm_lfence (void)
+ {
+   __builtin_ia32_lfence ();
+ }
+ 
+ static __inline void
+ _mm_mfence (void)
+ {
+   __builtin_ia32_mfence ();
+ }
+ 
+ /* End of SSE2.  */
+ 
+ 
  #endif /* _XMMINTRIN_H_INCLUDED */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]