This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

P4 SSE2 builtins


I'll be checking in this patch shortly.  It adds the first part of SSE2 builtin
support.  There'll be another part that adds the necessary support to
xmmintrin.h as well, and I'll have to update the docs as well.

This has been bootstrapped on i686-linux.  The builtins themselves have only
been rather lightly tested so far.


Bernd

Index: ChangeLog
===================================================================
RCS file: /cvs/gcc/egcs/gcc/ChangeLog,v
retrieving revision 1.13880
diff -c -p -r1.13880 ChangeLog
*** ChangeLog	29 Apr 2002 15:09:50 -0000	1.13880
--- ChangeLog	29 Apr 2002 18:12:41 -0000
***************
*** 1,3 ****
--- 1,51 ----
+ 2002-04-29  Bernd Schmidt  <bernds@redhat.com>
+ 
+ 	* c-common.c (type_for_mode): Add support for V2DFmode, V2DImode,
+ 	UV2DImode.
+ 	* tree.c (build_common_tree_nodes_2): Likewise.
+ 	* tree.h (enum tree_index): Likewise.
+ 	(V2DF_type_node, V2DI_type_node, unsigned_V2DI_type_node): Define.
+ 
+ 	* config/i386/i386.c (bdesc_comi, bdesc_2arg, bdesc_1arg): Add SSE2
+ 	entries.
+ 	(init_mmx_sse_builtins): Initialize SSE2 builtins.
+ 	(ix86_expand_builtin): Add support for SSE2 builtins.
+ 	* config/i386/i386.h (VALID_SSE2_REG_MODE): New macro.
+ 	(VALID_SSE_REG_MODE): Use it.
+ 	(VECTOR_MODE_SUPPORTED_P): Allow SSE2 modes here as well.
+ 	(enum ix86_builtins): Add SSE2 builtins.
+ 	* config/i386/i386.md (movv2df_internal, movv2df, movv8hi_internal,
+ 	movv8hi, movv16qi_internal, movv16qi, pushv2df, pushv8hi, pushv16qi,
+ 	addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3, vmmulv2df3,
+ 	divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3, vmsminv2df3,
+ 	sse2_anddf3, sse2_nanddf3, sse2_iordf3, sse2_xordf3, sqrtv2df2,
+ 	vmsqrtv2df2, maskcmpv2df3, maskncmpv2df3, vmmaskcmpv2df3,
+ 	vmmaskncmpv2df3, sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb,
+ 	sse2_maskmovdqu, sse2_movntv2df, sse2_movntti, sse2_movntsi, cvtdq2ps,
+ 	cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq, cvtpd2pi,
+ 	cvttpd2pi, cvtpi2pd, cvtsd2si, cvttsd2si, cvtsi2sd, cvtsd2ss,
+ 	cvtss2sd, cvtpd2ps, cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3,
+ 	ssaddv16qi3, ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3,
+ 	subv4si3, subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3,
+ 	mulv8hi3, smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3,
+ 	sse2_umulv2siv2di3, sse2_pmaddwd, sse2_clrti, sse2_uavgv16qi3,
+ 	sse2_uavgv8hi3, sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd,
+ 	sse2_pshuflw, sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3,
+ 	gtv8hi3, gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3,
+ 	ashrv8hi3, ashrv4si3, lshrv8hi3, lshrv4si3, sse2_lshrv2di3,
+ 	ashlv8hi3, ashlv4si3, sse2_ashlv2di3, sse2_ashlti3, sse2_lshrti3,
+ 	sse2_unpckhpd, sse2_unpcklpd, sse2_packsswb, sse2_packssdw,
+ 	sse2_packuswb, sse2_punpckhbw, sse2_punpckhwd, sse2_punpckhdq,
+ 	sse2_punpcklbw, sse2_punpcklwd, sse2_punpckldq, sse2_movapd,
+ 	sse2_movupd, sse2_movdqa, sse2_movdqu, sse2_movdq2q, sse2_movq2dq,
+ 	sse2_movhpd, sse2_movlpd, sse2_loadsd, sse2_movsd, sse2_storesd,
+ 	sse2_shufpd, sse2_clflush, sse2_mfence, mfence_insn, sse2_lfence,
+ 	lfence_insn): New patterns.
+ 	(sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Renamed from
+ 	sse_andti3_sse2, sse_nandti3_sse2, sse_iorti3_sse2, sse_xorti3_sse2.
+ 
  Mon Apr 29 17:03:24 CEST 2002  Jan Hubicka  <jh@suse.cz>
  
  	* i386.md (sse_mov?fcc*): Revert patch of Mar 14th.
Index: c-common.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/c-common.c,v
retrieving revision 1.315
diff -c -p -r1.315 c-common.c
*** c-common.c	26 Apr 2002 07:39:54 -0000	1.315
--- c-common.c	29 Apr 2002 18:12:54 -0000
*************** c_common_type_for_mode (mode, unsignedp)
*** 1559,1564 ****
--- 1559,1566 ----
  	  return unsignedp ? unsigned_V8HI_type_node : V8HI_type_node;
  	case V4SImode:
  	  return unsignedp ? unsigned_V4SI_type_node : V4SI_type_node;
+ 	case V2DImode:
+ 	  return unsignedp ? unsigned_V2DI_type_node : V2DI_type_node;
  	case V2SImode:
  	  return unsignedp ? unsigned_V2SI_type_node : V2SI_type_node;
  	case V4HImode:
*************** c_common_type_for_mode (mode, unsignedp)
*** 1571,1576 ****
--- 1573,1580 ----
  	  return V4SF_type_node;
  	case V2SFmode:
  	  return V2SF_type_node;
+ 	case V2DFmode:
+ 	  return V2DF_type_node;
  	default:
  	  break;
  	}
Index: tree.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree.c,v
retrieving revision 1.253
diff -c -p -r1.253 tree.c
*** tree.c	26 Apr 2002 19:35:33 -0000	1.253
--- tree.c	29 Apr 2002 18:13:09 -0000
*************** build_common_tree_nodes_2 (short_double)
*** 4764,4769 ****
--- 4764,4771 ----
      = make_vector (V4SImode, unsigned_intSI_type_node, 1);
    unsigned_V2SI_type_node
      = make_vector (V2SImode, unsigned_intSI_type_node, 1);
+   unsigned_V2DI_type_node
+     = make_vector (V2DImode, unsigned_intDI_type_node, 1);
    unsigned_V4HI_type_node
      = make_vector (V4HImode, unsigned_intHI_type_node, 1);
    unsigned_V8QI_type_node
*************** build_common_tree_nodes_2 (short_double)
*** 4777,4786 ****
--- 4779,4790 ----
    V4SF_type_node = make_vector (V4SFmode, float_type_node, 0);
    V4SI_type_node = make_vector (V4SImode, intSI_type_node, 0);
    V2SI_type_node = make_vector (V2SImode, intSI_type_node, 0);
+   V2DI_type_node = make_vector (V2DImode, intDI_type_node, 0);
    V4HI_type_node = make_vector (V4HImode, intHI_type_node, 0);
    V8QI_type_node = make_vector (V8QImode, intQI_type_node, 0);
    V8HI_type_node = make_vector (V8HImode, intHI_type_node, 0);
    V2SF_type_node = make_vector (V2SFmode, float_type_node, 0);
+   V2DF_type_node = make_vector (V2DFmode, double_type_node, 0);
    V16QI_type_node = make_vector (V16QImode, intQI_type_node, 0);
  }
  
Index: tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree.h,v
retrieving revision 1.331
diff -c -p -r1.331 tree.h
*** tree.h	24 Apr 2002 20:40:45 -0000	1.331
--- tree.h	29 Apr 2002 18:13:18 -0000
*************** enum tree_index
*** 1928,1933 ****
--- 1928,1934 ----
    TI_UV4HI_TYPE,
    TI_UV2SI_TYPE,
    TI_UV2SF_TYPE,
+   TI_UV2DI_TYPE,
    TI_UV16QI_TYPE,
  
    TI_V4SF_TYPE,
*************** enum tree_index
*** 1938,1943 ****
--- 1939,1946 ----
    TI_V4HI_TYPE,
    TI_V2SI_TYPE,
    TI_V2SF_TYPE,
+   TI_V2DF_TYPE,
+   TI_V2DI_TYPE,
    TI_V16QI_TYPE,
  
    TI_MAIN_IDENTIFIER,
*************** extern tree global_trees[TI_MAX];
*** 2005,2010 ****
--- 2008,2014 ----
  #define unsigned_V8HI_type_node		global_trees[TI_UV8HI_TYPE]
  #define unsigned_V4HI_type_node		global_trees[TI_UV4HI_TYPE]
  #define unsigned_V2SI_type_node		global_trees[TI_UV2SI_TYPE]
+ #define unsigned_V2DI_type_node		global_trees[TI_UV2DI_TYPE]
  
  #define V16QI_type_node			global_trees[TI_V16QI_TYPE]
  #define V4SF_type_node			global_trees[TI_V4SF_TYPE]
*************** extern tree global_trees[TI_MAX];
*** 2014,2019 ****
--- 2018,2025 ----
  #define V4HI_type_node			global_trees[TI_V4HI_TYPE]
  #define V2SI_type_node			global_trees[TI_V2SI_TYPE]
  #define V2SF_type_node			global_trees[TI_V2SF_TYPE]
+ #define V2DI_type_node			global_trees[TI_V2DI_TYPE]
+ #define V2DF_type_node			global_trees[TI_V2DF_TYPE]
  #define V16SF_type_node			global_trees[TI_V16SF_TYPE]
  
  /* An enumeration of the standard C integer types.  These must be
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.388
diff -c -p -r1.388 i386.c
*** config/i386/i386.c	29 Apr 2002 11:04:12 -0000	1.388
--- config/i386/i386.c	29 Apr 2002 18:13:48 -0000
*************** struct builtin_description
*** 10937,11005 ****
    const unsigned int flag;
  };
  
  static const struct builtin_description bdesc_comi[] =
  {
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
!   { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
!   { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
  };
  
  static const struct builtin_description bdesc_2arg[] =
  {
    /* SSE */
!   { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
! 
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
!   { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
!   { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
!   { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
!   { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
! 
!   { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
!   { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
! 
!   { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
!   { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
  
    /* MMX */
    { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
--- 10937,11020 ----
    const unsigned int flag;
  };
  
+ /* Used for builtins that are enabled both by -msse and -msse2.  */
+ #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+ 
  static const struct builtin_description bdesc_comi[] =
  {
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
!   { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
!   { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
  };
  
  static const struct builtin_description bdesc_2arg[] =
  {
    /* SSE */
!   { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
! 
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
!   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
! 
!   { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
! 
!   { MASK_SSE1, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
  
    /* MMX */
    { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
*************** static const struct builtin_description 
*** 11020,11034 ****
  
    { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
    { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
  
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
    { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
--- 11035,11049 ----
  
    { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
    { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
  
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
    { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
*************** static const struct builtin_description 
*** 11037,11046 ****
    { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
    { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
  
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
--- 11052,11061 ----
    { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
    { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
  
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
  
    { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
*************** static const struct builtin_description 
*** 11054,11061 ****
    { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
  
!   { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
!   { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
  
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
--- 11069,11076 ----
    { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
    { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
  
!   { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
  
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
*************** static const struct builtin_description 
*** 11076,11100 ****
    { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
    { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
  
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
!   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
  
  };
  
  static const struct builtin_description bdesc_1arg[] =
  {
!   { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
!   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
  
!   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
!   { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
! 
!   { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
!   { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
!   { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
!   { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
  
  };
  
  void
--- 11091,11241 ----
    { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
    { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
  
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
!   { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
  
+   /* SSE2 */
+   { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
+   { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
+   { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
+   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
+   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_nanddf3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_xordf3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
+ 
+   /* SSE2 MMX */
+   { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+ 
+   { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
+   { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
+   { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
+   { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
+   { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
+   { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
+   { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
+   { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_andti3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_nandti3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_iorti3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_xorti3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
+   { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
+   { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
  };
  
  static const struct builtin_description bdesc_1arg[] =
  {
!   { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
!   { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
! 
!   { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
!   { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
! 
!   { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
!   { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
!   { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
!   { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
! 
!   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
! 
!   { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
! 
!   { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
  
!   { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
!   { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
!   { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
!   { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
!   { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
  
+   { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
+   { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ 
+   { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
+   { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
+   { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
  };
  
  void
*************** ix86_init_mmx_sse_builtins ()
*** 11320,11325 ****
--- 11461,11632 ----
                                        tree_cons (NULL_TREE,
                                                   V2SF_type_node,
                                                   endlink)));
+   tree pint_type_node    = build_pointer_type (integer_type_node);
+   tree pdouble_type_node = build_pointer_type (double_type_node);
+   tree int_ftype_v2df_v2df
+     = build_function_type (integer_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node, endlink)));
+ 
+   tree ti_ftype_void
+     = build_function_type (intTI_type_node, endlink);
+   tree ti_ftype_ti_ti
+     = build_function_type (intTI_type_node,
+ 			   tree_cons (NULL_TREE, intTI_type_node,
+ 				      tree_cons (NULL_TREE, intTI_type_node,
+ 						 endlink)));
+   tree void_ftype_pvoid
+     = build_function_type (void_type_node,
+ 			   tree_cons (NULL_TREE, ptr_type_node, endlink));
+   tree v2di_ftype_di
+     = build_function_type (V2DI_type_node,
+ 			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
+ 				      endlink));
+   tree v4sf_ftype_v4si
+     = build_function_type (V4SF_type_node,
+ 			   tree_cons (NULL_TREE, V4SI_type_node, endlink));
+   tree v4si_ftype_v4sf
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V4SF_type_node, endlink));
+   tree v2df_ftype_v4si
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V4SI_type_node, endlink));
+   tree v4si_ftype_v2df
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node, endlink));
+   tree v2si_ftype_v2df
+     = build_function_type (V2SI_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node, endlink));
+   tree v4sf_ftype_v2df
+     = build_function_type (V4SF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node, endlink));
+   tree v2df_ftype_v2si
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2SI_type_node, endlink));
+   tree v2df_ftype_v4sf
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V4SF_type_node, endlink));
+   tree int_ftype_v2df
+     = build_function_type (integer_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node, endlink));
+   tree v2df_ftype_v2df_int
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
+   tree v4sf_ftype_v4sf_v2df
+     = build_function_type (V4SF_type_node,
+ 			   tree_cons (NULL_TREE, V4SF_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 endlink)));
+   tree v2df_ftype_v2df_v4sf
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, V4SF_type_node,
+ 						 endlink)));
+   tree v2df_ftype_v2df_v2df_int
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 tree_cons (NULL_TREE,
+ 							    integer_type_node,
+ 							    endlink))));
+   tree v2df_ftype_v2df_pv2si
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, pv2si_type_node,
+ 						 endlink)));
+   tree void_ftype_pv2si_v2df
+     = build_function_type (void_type_node,
+ 			   tree_cons (NULL_TREE, pv2si_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 endlink)));
+   tree void_ftype_pdouble_v2df
+     = build_function_type (void_type_node,
+ 			   tree_cons (NULL_TREE, pdouble_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 endlink)));
+   tree void_ftype_pint_int
+     = build_function_type (void_type_node,
+ 			   tree_cons (NULL_TREE, pint_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
+   tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
+ 				    tree_cons (NULL_TREE, V16QI_type_node,
+ 					       tree_cons (NULL_TREE,
+ 							  pchar_type_node,
+ 							  endlink)));
+   tree void_ftype_v16qi_v16qi_pchar
+     = build_function_type (void_type_node, maskmovdqu_args);
+   tree v2df_ftype_pdouble
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, pdouble_type_node,
+ 				      endlink));
+   tree v2df_ftype_v2df_v2df
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 endlink)));
+   tree v16qi_ftype_v16qi_v16qi
+     = build_function_type (V16QI_type_node,
+ 			   tree_cons (NULL_TREE, V16QI_type_node,
+ 				      tree_cons (NULL_TREE, V16QI_type_node,
+ 						 endlink)));
+   tree v8hi_ftype_v8hi_v8hi
+     = build_function_type (V8HI_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, V8HI_type_node,
+ 						 endlink)));
+   tree v4si_ftype_v4si_v4si
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V4SI_type_node,
+ 				      tree_cons (NULL_TREE, V4SI_type_node,
+ 						 endlink)));
+   tree v2di_ftype_v2di_v2di
+     = build_function_type (V2DI_type_node,
+ 			   tree_cons (NULL_TREE, V2DI_type_node,
+ 				      tree_cons (NULL_TREE, V2DI_type_node,
+ 						 endlink)));
+   tree v2di_ftype_v2df_v2df
+     = build_function_type (V2DI_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      tree_cons (NULL_TREE, V2DF_type_node,
+ 						 endlink)));
+   tree v2df_ftype_v2df
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, V2DF_type_node,
+ 				      endlink));
+   tree v2df_ftype_double
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, double_type_node,
+ 				      endlink));
+   tree v2df_ftype_double_double
+     = build_function_type (V2DF_type_node,
+ 			   tree_cons (NULL_TREE, double_type_node,
+ 				      tree_cons (NULL_TREE, double_type_node,
+ 						 endlink)));
+   tree int_ftype_v8hi_int
+     = build_function_type (integer_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
+   tree v8hi_ftype_v8hi_int_int
+     = build_function_type (V8HI_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 tree_cons (NULL_TREE,
+ 							    integer_type_node,
+ 							    endlink))));
+   tree v4si_ftype_v4si_int
+     = build_function_type (V4SI_type_node,
+ 			   tree_cons (NULL_TREE, V4SI_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
+   tree v8hi_ftype_v8hi_int
+     = build_function_type (V8HI_type_node,
+ 			   tree_cons (NULL_TREE, V8HI_type_node,
+ 				      tree_cons (NULL_TREE, integer_type_node,
+ 						 endlink)));
  
    /* Add all builtins that are more or less simple operations on two
       operands.  */
*************** ix86_init_mmx_sse_builtins ()
*** 11336,11341 ****
--- 11643,11666 ----
  
        switch (mode)
  	{
+ 	case V16QImode:
+ 	  type = v16qi_ftype_v16qi_v16qi;
+ 	  break;
+ 	case V8HImode:
+ 	  type = v8hi_ftype_v8hi_v8hi;
+ 	  break;
+ 	case V4SImode:
+ 	  type = v4si_ftype_v4si_v4si;
+ 	  break;
+ 	case V2DImode:
+ 	  type = v2di_ftype_v2di_v2di;
+ 	  break;
+ 	case V2DFmode:
+ 	  type = v2df_ftype_v2df_v2df;
+ 	  break;
+ 	case TImode:
+ 	  type = ti_ftype_ti_ti;
+ 	  break;
  	case V4SFmode:
  	  type = v4sf_ftype_v4sf_v4sf;
  	  break;
*************** ix86_init_mmx_sse_builtins ()
*** 11363,11368 ****
--- 11688,11699 ----
  	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
  	type = v4si_ftype_v4sf_v4sf;
  
+       if (d->icode == CODE_FOR_maskcmpv2df3
+ 	  || d->icode == CODE_FOR_maskncmpv2df3
+ 	  || d->icode == CODE_FOR_vmmaskcmpv2df3
+ 	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
+ 	type = v2di_ftype_v2df_v2df;
+ 
        def_builtin (d->mask, d->name, type, d->code);
      }
  
*************** ix86_init_mmx_sse_builtins ()
*** 11387,11444 ****
  
    /* comi/ucomi insns.  */
    for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
!     def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
  
    def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
    def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
    def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
  
!   def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
!   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
! 
!   def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
! 
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
! 
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
! 
!   def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
! 
!   def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
! 
!   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
!   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
! 
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
! 
!   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
! 
!   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
!   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
!   def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
  
!   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
  
    /* Original 3DNow!  */
    def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
--- 11718,11778 ----
  
    /* comi/ucomi insns.  */
    for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
!     if (d->mask == MASK_SSE2)
!       def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
!     else
!       def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
  
    def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
    def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
    def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
  
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
!   def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
! 
!   def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
! 
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
! 
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
! 
!   def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
! 
!   def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
! 
!   def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
!   def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
! 
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
! 
!   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
! 
!   def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
!   def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
  
!   def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
  
    /* Original 3DNow!  */
    def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
*************** ix86_init_mmx_sse_builtins ()
*** 11470,11476 ****
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
  
!   def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
  }
  
  /* Errors in the source file can cause expand_expr to return const0_rtx
--- 11804,11879 ----
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
  
!   def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
! 
!   /* SSE2 */
!   def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB128);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTDQ);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
!   def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
!   def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW128);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
!   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
!   def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
!   def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
!   def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
! 
!   def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
!   def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
!   def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
  }
  
  /* Errors in the source file can cause expand_expr to return const0_rtx
*************** ix86_expand_builtin (exp, target, subtar
*** 11828,11834 ****
        return 0;
  
      case IX86_BUILTIN_PEXTRW:
!       icode = CODE_FOR_mmx_pextrw;
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- 12231,12240 ----
        return 0;
  
      case IX86_BUILTIN_PEXTRW:
!     case IX86_BUILTIN_PEXTRW128:
!       icode = (fcode == IX86_BUILTIN_PEXTRW
! 	       ? CODE_FOR_mmx_pextrw
! 	       : CODE_FOR_sse2_pextrw);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 11856,11862 ****
        return target;
  
      case IX86_BUILTIN_PINSRW:
!       icode = CODE_FOR_mmx_pinsrw;
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
--- 12262,12271 ----
        return target;
  
      case IX86_BUILTIN_PINSRW:
!     case IX86_BUILTIN_PINSRW128:
!       icode = (fcode == IX86_BUILTIN_PINSRW
! 	       ? CODE_FOR_mmx_pinsrw
! 	       : CODE_FOR_sse2_pinsrw);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
*************** ix86_expand_builtin (exp, target, subtar
*** 11889,11895 ****
        return target;
  
      case IX86_BUILTIN_MASKMOVQ:
!       icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
        /* Note the arg order is different from the operand order.  */
        arg1 = TREE_VALUE (arglist);
        arg2 = TREE_VALUE (TREE_CHAIN (arglist));
--- 12298,12306 ----
        return target;
  
      case IX86_BUILTIN_MASKMOVQ:
!       icode = (fcode == IX86_BUILTIN_MASKMOVQ
! 	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
! 	       : CODE_FOR_sse2_maskmovdqu);
        /* Note the arg order is different from the operand order.  */
        arg1 = TREE_VALUE (arglist);
        arg2 = TREE_VALUE (TREE_CHAIN (arglist));
*************** ix86_expand_builtin (exp, target, subtar
*** 11952,11959 ****
  
      case IX86_BUILTIN_LOADHPS:
      case IX86_BUILTIN_LOADLPS:
!       icode = (fcode == IX86_BUILTIN_LOADHPS
! 	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- 12363,12374 ----
  
      case IX86_BUILTIN_LOADHPS:
      case IX86_BUILTIN_LOADLPS:
!     case IX86_BUILTIN_LOADHPD:
!     case IX86_BUILTIN_LOADLPD:
!       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
! 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
! 	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
! 	       : CODE_FOR_sse2_movlpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 11977,11984 ****
  
      case IX86_BUILTIN_STOREHPS:
      case IX86_BUILTIN_STORELPS:
!       icode = (fcode == IX86_BUILTIN_STOREHPS
! 	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- 12392,12403 ----
  
      case IX86_BUILTIN_STOREHPS:
      case IX86_BUILTIN_STORELPS:
!     case IX86_BUILTIN_STOREHPD:
!     case IX86_BUILTIN_STORELPD:
!       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
! 	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
! 	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
! 	       : CODE_FOR_sse2_movlpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 12014,12020 ****
        return copy_to_mode_reg (SImode, target);
  
      case IX86_BUILTIN_SHUFPS:
!       icode = CODE_FOR_sse_shufps;
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
--- 12433,12442 ----
        return copy_to_mode_reg (SImode, target);
  
      case IX86_BUILTIN_SHUFPS:
!     case IX86_BUILTIN_SHUFPD:
!       icode = (fcode == IX86_BUILTIN_SHUFPS
! 	       ? CODE_FOR_sse_shufps
! 	       : CODE_FOR_sse2_shufpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
*************** ix86_expand_builtin (exp, target, subtar
*** 12047,12053 ****
        return target;
  
      case IX86_BUILTIN_PSHUFW:
!       icode = CODE_FOR_mmx_pshufw;
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- 12469,12481 ----
        return target;
  
      case IX86_BUILTIN_PSHUFW:
!     case IX86_BUILTIN_PSHUFD:
!     case IX86_BUILTIN_PSHUFHW:
!     case IX86_BUILTIN_PSHUFLW:
!       icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
! 	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
! 	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
! 	       : CODE_FOR_mmx_pshufw);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 12163,12168 ****
--- 12591,12678 ----
        emit_insn (gen_mmx_clrdi (target));
        return target;
  
+     case IX86_BUILTIN_SQRTSD:
+       return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
+     case IX86_BUILTIN_LOADAPD:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
+     case IX86_BUILTIN_LOADUPD:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
+ 
+     case IX86_BUILTIN_STOREAPD:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+     case IX86_BUILTIN_STOREUPD:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
+ 
+     case IX86_BUILTIN_LOADSD:
+       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
+ 
+     case IX86_BUILTIN_STORESD:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
+ 
+     case IX86_BUILTIN_SETPD1:
+       target = assign_386_stack_local (DFmode, 0);
+       arg0 = TREE_VALUE (arglist);
+       emit_move_insn (adjust_address (target, DFmode, 0),
+ 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
+       op0 = gen_reg_rtx (V2DFmode);
+       emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
+       emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
+       return op0;
+ 
+     case IX86_BUILTIN_SETPD:
+       target = assign_386_stack_local (V2DFmode, 0);
+       arg0 = TREE_VALUE (arglist);
+       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+       emit_move_insn (adjust_address (target, DFmode, 0),
+ 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
+       emit_move_insn (adjust_address (target, DFmode, 8),
+ 		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
+       op0 = gen_reg_rtx (V2DFmode);
+       emit_insn (gen_sse2_movapd (op0, target));
+       return op0;
+ 
+     case IX86_BUILTIN_LOADRPD:
+       target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
+ 					 gen_reg_rtx (V2DFmode), 1);
+       emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
+       return target;
+ 
+     case IX86_BUILTIN_LOADPD1:
+       target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
+ 					 gen_reg_rtx (V2DFmode), 1);
+       emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
+       return target;
+ 
+     case IX86_BUILTIN_STOREPD1:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+     case IX86_BUILTIN_STORERPD:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+ 
+     case IX86_BUILTIN_MFENCE:
+ 	emit_insn (gen_sse2_mfence ());
+ 	return 0;
+     case IX86_BUILTIN_LFENCE:
+ 	emit_insn (gen_sse2_lfence ());
+ 	return 0;
+ 
+     case IX86_BUILTIN_CLFLUSH:
+ 	arg0 = TREE_VALUE (arglist);
+ 	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ 	icode = CODE_FOR_sse2_clflush;
+ 	mode0 = insn_data[icode].operand[0].mode;
+ 	if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ 	    op0 = copy_to_mode_reg (mode0, op0);
+ 
+ 	emit_insn (gen_sse2_clflush (op0));
+ 	return 0;
+ 
+     case IX86_BUILTIN_MOVNTPD:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
+     case IX86_BUILTIN_MOVNTDQ:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movntti, arglist);
+     case IX86_BUILTIN_MOVNTI:
+       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
+ 
      default:
        break;
      }
*************** ix86_expand_builtin (exp, target, subtar
*** 12174,12180 ****
  	if (d->icode == CODE_FOR_maskcmpv4sf3
  	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
  	    || d->icode == CODE_FOR_maskncmpv4sf3
! 	    || d->icode == CODE_FOR_vmmaskncmpv4sf3)
  	  return ix86_expand_sse_compare (d, arglist, target);
  
  	return ix86_expand_binop_builtin (d->icode, arglist, target);
--- 12684,12694 ----
  	if (d->icode == CODE_FOR_maskcmpv4sf3
  	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
  	    || d->icode == CODE_FOR_maskncmpv4sf3
! 	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
! 	    || d->icode == CODE_FOR_maskcmpv2df3
! 	    || d->icode == CODE_FOR_vmmaskcmpv2df3
! 	    || d->icode == CODE_FOR_maskncmpv2df3
! 	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
  	  return ix86_expand_sse_compare (d, arglist, target);
  
  	return ix86_expand_binop_builtin (d->icode, arglist, target);
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.255
diff -c -p -r1.255 i386.h
*** config/i386/i386.h	18 Apr 2002 15:25:54 -0000	1.255
--- config/i386/i386.h	29 Apr 2002 18:14:00 -0000
*************** do {									\
*** 987,995 ****
--- 987,1001 ----
        ? (TARGET_64BIT ? 4 : 6)						\
        : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
  
+ #define VALID_SSE2_REG_MODE(MODE) \
+     ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode    \
+      || (MODE) == V2DImode)
+ 
  #define VALID_SSE_REG_MODE(MODE)					\
      ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
       || (MODE) == SFmode						\
+      /* Always accept SSE2 modes so that xmmintrin.h compiles.  */	\
+      || VALID_SSE2_REG_MODE (MODE)					\
       || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE))))
  
  #define VALID_MMX_REG_MODE_3DNOW(MODE) \
*************** enum ix86_builtins
*** 2217,2222 ****
--- 2223,2434 ----
  
    IX86_BUILTIN_SSE_ZERO,
    IX86_BUILTIN_MMX_ZERO,
+ 
+   /* SSE2 */
+   IX86_BUILTIN_ADDPD,
+   IX86_BUILTIN_ADDSD,
+   IX86_BUILTIN_DIVPD,
+   IX86_BUILTIN_DIVSD,
+   IX86_BUILTIN_MULPD,
+   IX86_BUILTIN_MULSD,
+   IX86_BUILTIN_SUBPD,
+   IX86_BUILTIN_SUBSD,
+ 
+   IX86_BUILTIN_CMPEQPD,
+   IX86_BUILTIN_CMPLTPD,
+   IX86_BUILTIN_CMPLEPD,
+   IX86_BUILTIN_CMPGTPD,
+   IX86_BUILTIN_CMPGEPD,
+   IX86_BUILTIN_CMPNEQPD,
+   IX86_BUILTIN_CMPNLTPD,
+   IX86_BUILTIN_CMPNLEPD,
+   IX86_BUILTIN_CMPNGTPD,
+   IX86_BUILTIN_CMPNGEPD,
+   IX86_BUILTIN_CMPORDPD,
+   IX86_BUILTIN_CMPUNORDPD,
+   IX86_BUILTIN_CMPNEPD,
+   IX86_BUILTIN_CMPEQSD,
+   IX86_BUILTIN_CMPLTSD,
+   IX86_BUILTIN_CMPLESD,
+   IX86_BUILTIN_CMPGTSD,
+   IX86_BUILTIN_CMPGESD,
+   IX86_BUILTIN_CMPNEQSD,
+   IX86_BUILTIN_CMPNLTSD,
+   IX86_BUILTIN_CMPNLESD,
+   IX86_BUILTIN_CMPNGTSD,
+   IX86_BUILTIN_CMPNGESD,
+   IX86_BUILTIN_CMPORDSD,
+   IX86_BUILTIN_CMPUNORDSD,
+   IX86_BUILTIN_CMPNESD,
+ 
+   IX86_BUILTIN_COMIEQSD,
+   IX86_BUILTIN_COMILTSD,
+   IX86_BUILTIN_COMILESD,
+   IX86_BUILTIN_COMIGTSD,
+   IX86_BUILTIN_COMIGESD,
+   IX86_BUILTIN_COMINEQSD,
+   IX86_BUILTIN_UCOMIEQSD,
+   IX86_BUILTIN_UCOMILTSD,
+   IX86_BUILTIN_UCOMILESD,
+   IX86_BUILTIN_UCOMIGTSD,
+   IX86_BUILTIN_UCOMIGESD,
+   IX86_BUILTIN_UCOMINEQSD,
+ 
+   IX86_BUILTIN_MAXPD,
+   IX86_BUILTIN_MAXSD,
+   IX86_BUILTIN_MINPD,
+   IX86_BUILTIN_MINSD,
+ 
+   IX86_BUILTIN_ANDPD,
+   IX86_BUILTIN_ANDNPD,
+   IX86_BUILTIN_ORPD,
+   IX86_BUILTIN_XORPD,
+ 
+   IX86_BUILTIN_SQRTPD,
+   IX86_BUILTIN_SQRTSD,
+ 
+   IX86_BUILTIN_UNPCKHPD,
+   IX86_BUILTIN_UNPCKLPD,
+ 
+   IX86_BUILTIN_SHUFPD,
+ 
+   IX86_BUILTIN_LOADAPD,
+   IX86_BUILTIN_LOADUPD,
+   IX86_BUILTIN_STOREAPD,
+   IX86_BUILTIN_STOREUPD,
+   IX86_BUILTIN_LOADSD,
+   IX86_BUILTIN_STORESD,
+   IX86_BUILTIN_MOVSD,
+ 
+   IX86_BUILTIN_LOADHPD,
+   IX86_BUILTIN_LOADLPD,
+   IX86_BUILTIN_STOREHPD,
+   IX86_BUILTIN_STORELPD,
+ 
+   IX86_BUILTIN_CVTDQ2PD,
+   IX86_BUILTIN_CVTDQ2PS,
+ 
+   IX86_BUILTIN_CVTPD2DQ,
+   IX86_BUILTIN_CVTPD2PI,
+   IX86_BUILTIN_CVTPD2PS,
+   IX86_BUILTIN_CVTTPD2DQ,
+   IX86_BUILTIN_CVTTPD2PI,
+ 
+   IX86_BUILTIN_CVTPI2PD,
+   IX86_BUILTIN_CVTSI2SD,
+ 
+   IX86_BUILTIN_CVTSD2SI,
+   IX86_BUILTIN_CVTSD2SS,
+   IX86_BUILTIN_CVTSS2SD,
+   IX86_BUILTIN_CVTTSD2SI,
+ 
+   IX86_BUILTIN_CVTPS2DQ,
+   IX86_BUILTIN_CVTPS2PD,
+   IX86_BUILTIN_CVTTPS2DQ,
+ 
+   IX86_BUILTIN_MOVNTI,
+   IX86_BUILTIN_MOVNTPD,
+   IX86_BUILTIN_MOVNTDQ,
+ 
+   IX86_BUILTIN_SETPD1,
+   IX86_BUILTIN_SETPD,
+   IX86_BUILTIN_CLRPD,
+   IX86_BUILTIN_SETRPD,
+   IX86_BUILTIN_LOADPD1,
+   IX86_BUILTIN_LOADRPD,
+   IX86_BUILTIN_STOREPD1,
+   IX86_BUILTIN_STORERPD,
+ 
+   /* SSE2 MMX */
+   IX86_BUILTIN_MASKMOVDQU,
+   IX86_BUILTIN_MOVMSKPD,
+   IX86_BUILTIN_PMOVMSKB128,
+   IX86_BUILTIN_MOVQ2DQ,
+ 
+   IX86_BUILTIN_PACKSSWB128,
+   IX86_BUILTIN_PACKSSDW128,
+   IX86_BUILTIN_PACKUSWB128,
+ 
+   IX86_BUILTIN_PADDB128,
+   IX86_BUILTIN_PADDW128,
+   IX86_BUILTIN_PADDD128,
+   IX86_BUILTIN_PADDQ128,
+   IX86_BUILTIN_PADDSB128,
+   IX86_BUILTIN_PADDSW128,
+   IX86_BUILTIN_PADDUSB128,
+   IX86_BUILTIN_PADDUSW128,
+   IX86_BUILTIN_PSUBB128,
+   IX86_BUILTIN_PSUBW128,
+   IX86_BUILTIN_PSUBD128,
+   IX86_BUILTIN_PSUBQ128,
+   IX86_BUILTIN_PSUBSB128,
+   IX86_BUILTIN_PSUBSW128,
+   IX86_BUILTIN_PSUBUSB128,
+   IX86_BUILTIN_PSUBUSW128,
+ 
+   IX86_BUILTIN_PAND128,
+   IX86_BUILTIN_PANDN128,
+   IX86_BUILTIN_POR128,
+   IX86_BUILTIN_PXOR128,
+ 
+   IX86_BUILTIN_PAVGB128,
+   IX86_BUILTIN_PAVGW128,
+ 
+   IX86_BUILTIN_PCMPEQB128,
+   IX86_BUILTIN_PCMPEQW128,
+   IX86_BUILTIN_PCMPEQD128,
+   IX86_BUILTIN_PCMPGTB128,
+   IX86_BUILTIN_PCMPGTW128,
+   IX86_BUILTIN_PCMPGTD128,
+ 
+   IX86_BUILTIN_PEXTRW128,
+   IX86_BUILTIN_PINSRW128,
+ 
+   IX86_BUILTIN_PMADDWD128,
+ 
+   IX86_BUILTIN_PMAXSW128,
+   IX86_BUILTIN_PMAXUB128,
+   IX86_BUILTIN_PMINSW128,
+   IX86_BUILTIN_PMINUB128,
+ 
+   IX86_BUILTIN_PMULUDQ,
+   IX86_BUILTIN_PMULUDQ128,
+   IX86_BUILTIN_PMULHUW128,
+   IX86_BUILTIN_PMULHW128,
+   IX86_BUILTIN_PMULLW128,
+ 
+   IX86_BUILTIN_PSADBW128,
+   IX86_BUILTIN_PSHUFHW,
+   IX86_BUILTIN_PSHUFLW,
+   IX86_BUILTIN_PSHUFD,
+ 
+   IX86_BUILTIN_PSLLW128,
+   IX86_BUILTIN_PSLLD128,
+   IX86_BUILTIN_PSLLQ128,
+   IX86_BUILTIN_PSRAW128,
+   IX86_BUILTIN_PSRAD128,
+   IX86_BUILTIN_PSRLW128,
+   IX86_BUILTIN_PSRLD128,
+   IX86_BUILTIN_PSRLQ128,
+   IX86_BUILTIN_PSLLWI128,
+   IX86_BUILTIN_PSLLDI128,
+   IX86_BUILTIN_PSLLQI128,
+   IX86_BUILTIN_PSRAWI128,
+   IX86_BUILTIN_PSRADI128,
+   IX86_BUILTIN_PSRLWI128,
+   IX86_BUILTIN_PSRLDI128,
+   IX86_BUILTIN_PSRLQI128,
+ 
+   IX86_BUILTIN_PUNPCKHBW128,
+   IX86_BUILTIN_PUNPCKHWD128,
+   IX86_BUILTIN_PUNPCKHDQ128,
+   IX86_BUILTIN_PUNPCKLBW128,
+   IX86_BUILTIN_PUNPCKLWD128,
+   IX86_BUILTIN_PUNPCKLDQ128,
+ 
+   IX86_BUILTIN_CLFLUSH,
+   IX86_BUILTIN_MFENCE,
+   IX86_BUILTIN_LFENCE,
  
    IX86_BUILTIN_MAX
  };
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.348
diff -c -p -r1.348 i386.md
*** config/i386/i386.md	29 Apr 2002 15:09:58 -0000	1.348
--- config/i386/i386.md	29 Apr 2002 18:14:25 -0000
***************
*** 98,103 ****
--- 98,109 ----
  ;; 52 This is a `pfrcpit2' operation.
  ;; 53 This is a `pfrsqrt' operation.
  ;; 54 This is a `pfrsqrit1' operation.
+ ;; 55 This is a `pshuflw' operation.
+ ;; 56 This is a `pshufhw' operation.
+ ;; 57 This is a `clflush' operation.
+ ;; 58 This is a `sfence' operation.
+ ;; 59 This is a `mfence' operation.
+ ;; 60 This is a `lfence' operation.
  
  ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
  ;; from i386.c.
***************
*** 17896,17901 ****
--- 17902,17958 ----
    DONE;
  })
  
+ (define_insn "movv2df_internal"
+   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ 	(match_operand:V2DF 1 "general_operand" "xm,x"))]
+   "TARGET_SSE2"
+   ;; @@@ let's try to use movaps here.
+   "movapd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "movv8hi_internal"
+   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ 	(match_operand:V8HI 1 "general_operand" "xm,x"))]
+   "TARGET_SSE2"
+   ;; @@@ let's try to use movaps here.
+   "movaps\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "movv16qi_internal"
+   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ 	(match_operand:V16QI 1 "general_operand" "xm,x"))]
+   "TARGET_SSE2"
+   ;; @@@ let's try to use movaps here.
+   "movaps\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_expand "movv2df"
+   [(set (match_operand:V2DF 0 "general_operand" "")
+ 	(match_operand:V2DF 1 "general_operand" ""))]
+   "TARGET_SSE2"
+ {
+   ix86_expand_vector_move (V2DFmode, operands);
+   DONE;
+ })
+ 
+ (define_expand "movv8hi"
+   [(set (match_operand:V8HI 0 "general_operand" "")
+ 	(match_operand:V8HI 1 "general_operand" ""))]
+   "TARGET_SSE2"
+ {
+   ix86_expand_vector_move (V8HImode, operands);
+   DONE;
+ })
+ 
+ (define_expand "movv16qi"
+   [(set (match_operand:V16QI 0 "general_operand" "")
+ 	(match_operand:V16QI 1 "general_operand" ""))]
+   "TARGET_SSE2"
+ {
+   ix86_expand_vector_move (V16QImode, operands);
+   DONE;
+ })
+ 
  (define_expand "movv4sf"
    [(set (match_operand:V4SF 0 "general_operand" "")
  	(match_operand:V4SF 1 "general_operand" ""))]
***************
*** 17961,17966 ****
--- 18018,18056 ----
    ""
    [(set_attr "type" "sse")])
  
+ (define_insn_and_split "*pushv2df"
+   [(set (match_operand:V2DF 0 "push_operand" "=<")
+ 	(match_operand:V2DF 1 "nonmemory_operand" "x"))]
+   "TARGET_SSE2"
+   "#"
+   ""
+   [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+    (set (mem:V2DF (reg:SI 7)) (match_dup 1))]
+   ""
+   [(set_attr "type" "sse")])
+ 
+ (define_insn_and_split "*pushv8hi"
+   [(set (match_operand:V8HI 0 "push_operand" "=<")
+ 	(match_operand:V8HI 1 "nonmemory_operand" "x"))]
+   "TARGET_SSE2"
+   "#"
+   ""
+   [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+    (set (mem:V8HI (reg:SI 7)) (match_dup 1))]
+   ""
+   [(set_attr "type" "sse")])
+ 
+ (define_insn_and_split "*pushv16qi"
+   [(set (match_operand:V16QI 0 "push_operand" "=<")
+ 	(match_operand:V16QI 1 "nonmemory_operand" "x"))]
+   "TARGET_SSE2"
+   "#"
+   ""
+   [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+    (set (mem:V16QI (reg:SI 7)) (match_dup 1))]
+   ""
+   [(set_attr "type" "sse")])
+ 
  (define_insn_and_split "*pushv4sf"
    [(set (match_operand:V4SF 0 "push_operand" "=<")
  	(match_operand:V4SF 1 "nonmemory_operand" "x"))]
***************
*** 18402,18408 ****
    "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "*sse_andti3_sse2"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
--- 18492,18498 ----
    "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_andti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
***************
*** 18435,18446 ****
    "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "*sse_nandti3_sse2"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2"
!   "pnand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "*sse_iorti3_df_1"
--- 18525,18536 ----
    "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_nandti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2"
!   "pandn\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "*sse_iorti3_df_1"
***************
*** 18484,18490 ****
    "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "*sse_iorti3_sse2"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
--- 18574,18580 ----
    "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_iorti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
***************
*** 18534,18540 ****
    "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "*sse_xorti3_sse2"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
--- 18624,18630 ----
    "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
! (define_insn "sse2_xorti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
***************
*** 19824,19826 ****
--- 19914,21161 ----
      return "prefetchw\t%a0";
  }
    [(set_attr "type" "mmx")])
+ 
+ ;; SSE2 support
+ 
+ (define_insn "addv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	           (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "addpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmaddv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	                           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "addsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "subv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	           (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "subpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmsubv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	                           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "subsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "mulv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	           (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "mulpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmmulv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	                           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "mulsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "divv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	          (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "divpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmdivv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 				  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "divsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE min/max
+ 
+ (define_insn "smaxv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "maxpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmsmaxv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	                           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "maxsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sminv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "minpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmsminv2df3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	                           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+                         (match_dup 1)
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "minsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_anddf3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (subreg:V2DF (and:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
+ 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
+   "TARGET_SSE2"
+   "andpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_nanddf3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:TI 1 "register_operand" "0") 0))
+ 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
+   "TARGET_SSE2"
+   "andnpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_iordf3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (subreg:V2DF (ior:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
+ 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
+   "TARGET_SSE2"
+   "orpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_xordf3"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (subreg:V2DF (xor:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0)
+ 			     (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))]
+   "TARGET_SSE2"
+   "xorpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ ;; SSE2 square root.  There doesn't appear to be an extension for the
+ ;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
+ 
+ (define_insn "sqrtv2df2"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))]
+   "TARGET_SSE2"
+   "sqrtpd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmsqrtv2df2"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
+                         (match_operand:V2DF 2 "register_operand" "0")
+ 			(const_int 1)))]
+   "TARGET_SSE2"
+   "sqrtsd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE mask-generating compares
+ 
+ (define_insn "maskcmpv2df3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (match_operator:V2DI 3 "sse_comparison_operator"
+ 			     [(match_operand:V2DF 1 "register_operand" "0")
+ 			      (match_operand:V2DF 2 "nonimmediate_operand" "x")]))]
+   "TARGET_SSE2"
+   "cmp%D3pd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "maskncmpv2df3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (not:V2DI
+ 	 (match_operator:V2DI 3 "sse_comparison_operator"
+ 			      [(match_operand:V2DF 1 "register_operand" "0")
+ 			       (match_operand:V2DF 2 "nonimmediate_operand" "x")])))]
+   "TARGET_SSE2"
+   "cmpn%D3pd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmmaskcmpv2df3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+ 	(vec_merge:V2DI
+ 	 (match_operator:V2DI 3 "sse_comparison_operator"
+ 			      [(match_operand:V2DF 1 "register_operand" "0")
+ 			       (match_operand:V2DF 2 "nonimmediate_operand" "x")])
+ 	 (match_dup 1)
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "cmp%D3sd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "vmmaskncmpv2df3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+ 	(vec_merge:V2DI
+ 	 (not:V2DI
+ 	  (match_operator:V2DI 3 "sse_comparison_operator"
+ 			       [(match_operand:V2DF 1 "register_operand" "0")
+ 				(match_operand:V2DF 2 "nonimmediate_operand" "x")]))
+ 	 (subreg:V2DI (match_dup 1) 0)
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "cmp%D3sd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_comi"
+   [(set (reg:CCFP 17)
+         (match_operator:CCFP 2 "sse_comparison_operator"
+ 			[(vec_select:DF
+ 			  (match_operand:V2DF 0 "register_operand" "x")
+ 			  (parallel [(const_int 0)]))
+ 			 (vec_select:DF
+ 			  (match_operand:V2DF 1 "register_operand" "x")
+ 			  (parallel [(const_int 0)]))]))]
+   "TARGET_SSE2"
+   "comisd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_ucomi"
+   [(set (reg:CCFPU 17)
+ 	(match_operator:CCFPU 2 "sse_comparison_operator"
+ 			[(vec_select:DF
+ 			  (match_operand:V2DF 0 "register_operand" "x")
+ 			  (parallel [(const_int 0)]))
+ 			 (vec_select:DF
+ 			  (match_operand:V2DF 1 "register_operand" "x")
+ 			  (parallel [(const_int 0)]))]))]
+   "TARGET_SSE2"
+   "ucomisd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE Strange Moves.
+ 
+ (define_insn "sse2_movmskpd"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 33))]
+   "TARGET_SSE2"
+   "movmskpd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pmovmskb"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 33))]
+   "TARGET_SSE2"
+   "pmovmskb\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_maskmovdqu"
+   [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+ 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ 		       (match_operand:V16QI 2 "register_operand" "x")] 32))]
+   "TARGET_SSE2"
+   ;; @@@ check ordering of operands in intel/nonintel syntax
+   "maskmovdqu\t{%2, %1|%1, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movntv2df"
+   [(set (match_operand:V2DF 0 "memory_operand" "=m")
+ 	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 34))]
+   "TARGET_SSE2"
+   "movntpd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movntti"
+   [(set (match_operand:TI 0 "memory_operand" "=m")
+ 	(unspec:TI [(match_operand:TI 1 "register_operand" "x")] 34))]
+   "TARGET_SSE2"
+   "movntdq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movntsi"
+   [(set (match_operand:SI 0 "memory_operand" "=m")
+ 	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] 34))]
+   "TARGET_SSE2"
+   "movnti\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE <-> integer/MMX conversions
+ 
+ ;; Conversions between SI and SF
+ 
+ (define_insn "cvtdq2ps"
+   [(set (match_operand:V4SF 0 "register_operand" "=x")
+ 	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "cvtdq2ps\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtps2dq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "cvtps2dq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvttps2dq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30))]
+   "TARGET_SSE2"
+   "cvttps2dq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; Conversions between SI and DF
+ 
+ (define_insn "cvtdq2pd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(float:V2DF (vec_select:V2SI
+ 		     (match_operand:V2SI 1 "nonimmediate_operand" "xm")
+ 		     (parallel
+ 		      [(const_int 0)
+ 		       (const_int 1)]))))]
+   "TARGET_SSE2"
+   "cvtdq2pd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtpd2dq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_concat:V4SI
+ 	 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ 	 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+   "TARGET_SSE2"
+   "cvtpd2dq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvttpd2dq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_concat:V4SI
+ 	 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 30)
+ 	 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+   "TARGET_SSE2"
+   "cvttpd2dq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtpd2pi"
+   [(set (match_operand:V2SI 0 "register_operand" "=y")
+ 	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "cvtpd2pi\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvttpd2pi"
+   [(set (match_operand:V2SI 0 "register_operand" "=y")
+ 	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 30))]
+   "TARGET_SSE2"
+   "cvttpd2pi\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtpi2pd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+   "TARGET_SSE2"
+   "cvtpi2pd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; Conversions between SI and DF
+ 
+ (define_insn "cvtsd2si"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ 			       (parallel [(const_int 0)]))))]
+   "TARGET_SSE2"
+   "cvtsd2si\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvttsd2si"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ 				   (parallel [(const_int 0)]))] 30))]
+   "TARGET_SSE2"
+   "cvttsd2si\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtsi2sd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	 		(vec_duplicate:V2DF
+ 			  (float:DF
+ 			    (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ 			(const_int 2)))]
+   "TARGET_SSE2"
+   "cvtsd2si\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; Conversions between SF and DF
+ 
+ (define_insn "cvtsd2ss"
+   [(set (match_operand:V4SF 0 "register_operand" "=x")
+ 	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ 	 		(vec_duplicate:V4SF
+ 			  (float_truncate:V2SF
+ 			    (match_operand:V2DF 2 "register_operand" "xm")))
+ 			(const_int 14)))]
+   "TARGET_SSE2"
+   "cvtsd2ss\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtss2sd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 	 		(float_extend:V2DF
+ 			  (vec_select:V2SF
+ 			    (match_operand:V4SF 2 "register_operand" "xm")
+ 			    (parallel [(const_int 0)
+ 				       (const_int 1)])))
+ 			(const_int 2)))]
+   "TARGET_SSE2"
+   "cvtss2sd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtpd2ps"
+   [(set (match_operand:V4SF 0 "register_operand" "=x")
+ 	(subreg:V4SF
+ 	  (vec_concat:V4SI
+ 	    (subreg:V2SI (float_truncate:V2SF
+ 			   (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0)
+ 	    (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))]
+   "TARGET_SSE2"
+   "cvtpd2ps\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "cvtps2pd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(float_extend:V2DF
+ 	  (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ 			   (parallel [(const_int 0)
+ 				      (const_int 1)]))))]
+   "TARGET_SSE2"
+   "cvtps2pd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE2 variants of MMX insns
+ 
+ ;; MMX arithmetic
+ 
+ (define_insn "addv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "addv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 	           (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "addv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (plus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 	           (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "addv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (plus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ 	           (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ssaddv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddsb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ssaddv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "usaddv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddusb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "usaddv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "paddusw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "subv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		     (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "subv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "subv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (minus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		    (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "subv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (minus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ 		    (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sssubv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 			(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubsb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sssubv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		       (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ussubv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 			(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubusb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ussubv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		       (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "psubusw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "mulv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (mult:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pmullw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "smulv8hi3_highpart"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(truncate:V8HI
+ 	 (lshiftrt:V8SI
+ 	  (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
+ 		     (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ 	  (const_int 16))))]
+   "TARGET_SSE2"
+   "pmulhw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "umulv8hi3_highpart"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(truncate:V8HI
+ 	 (lshiftrt:V8SI
+ 	  (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
+ 		     (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ 	  (const_int 16))))]
+   "TARGET_SSE2"
+   "pmulhuw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; See the MMX logical operations for the reason for the unspec
+ (define_insn "sse2_umulsidi3"
+   [(set (match_operand:DI 0 "register_operand" "=y")
+         (unspec:DI [(mult:DI (zero_extend:DI (match_operand:DI 1 "register_operand" "0"))
+ 			     (zero_extend:DI (match_operand:DI 2 "nonimmediate_operand" "ym")))] 45))]
+   "TARGET_SSE2"
+   "pmuludq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_umulv2siv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=y")
+         (mult:V2DI (zero_extend:V2DI
+ 		     (vec_select:V2SI
+ 		       (match_operand:V4SI 1 "register_operand" "0")
+ 		       (parallel [(const_int 0) (const_int 2)])))
+ 		   (zero_extend:V2DI
+ 		     (vec_select:V2SI
+ 		       (match_operand:V4SI 2 "nonimmediate_operand" "ym")
+ 		       (parallel [(const_int 0) (const_int 2)])))))]
+   "TARGET_SSE2"
+   "pmuludq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pmaddwd"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (plus:V4SI
+ 	 (mult:V4SI
+ 	  (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0")
+ 					     (parallel [(const_int 0)
+ 							(const_int 2)
+ 							(const_int 4)
+ 							(const_int 6)])))
+ 	  (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ 					     (parallel [(const_int 0)
+ 							(const_int 2)
+ 							(const_int 4)
+ 							(const_int 6)]))))
+ 	 (mult:V4SI
+ 	  (sign_extend:V4SI (vec_select:V4HI (match_dup 1)
+ 					     (parallel [(const_int 1)
+ 							(const_int 3)
+ 							(const_int 5)
+ 							(const_int 7)])))
+ 	  (sign_extend:V4SI (vec_select:V4HI (match_dup 2)
+ 					     (parallel [(const_int 1)
+ 							(const_int 3)
+ 							(const_int 5)
+ 							(const_int 7)]))))))]
+   "TARGET_SSE2"
+   "pmaddwd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; Same as pxor, but don't show input operands so that we don't think
+ ;; they are live.
+ (define_insn "sse2_clrti"
+   [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
+   "TARGET_SSE2"
+   "pxor\t{%0, %0|%0, %0}"
+   [(set_attr "type" "sse")])
+ 
+ ;; MMX unsigned averages/sum of absolute differences
+ 
+ (define_insn "sse2_uavgv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (ashiftrt:V16QI
+ 	 (plus:V16QI (plus:V16QI
+ 		     (match_operand:V16QI 1 "register_operand" "0")
+ 		     (match_operand:V16QI 2 "nonimmediate_operand" "ym"))
+ 		     (const_vector:V16QI [(const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)
+ 					  (const_int 1) (const_int 1)]))
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "pavgb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_uavgv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (ashiftrt:V8HI
+ 	 (plus:V8HI (plus:V8HI
+ 		     (match_operand:V8HI 1 "register_operand" "0")
+ 		     (match_operand:V8HI 2 "nonimmediate_operand" "ym"))
+ 		    (const_vector:V8HI [(const_int 1) (const_int 1)
+ 				        (const_int 1) (const_int 1)
+ 				        (const_int 1) (const_int 1)
+ 				        (const_int 1) (const_int 1)]))
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "pavgw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; @@@ this isn't the right representation.
+ (define_insn "sse2_psadbw"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (abs:V16QI (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 				(match_operand:V16QI 2 "nonimmediate_operand" "ym"))))]
+   "TARGET_SSE2"
+   "psadbw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ 
+ ;; MMX insert/extract/shuffle
+ 
+ (define_insn "sse2_pinsrw"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 			(vec_duplicate:V8HI
+ 			 (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ 			(match_operand:SI 3 "immediate_operand" "i")))]
+   "TARGET_SSE2"
+   "pinsrw\t{%3, %2, %0|%0, %2, %3}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pextrw"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+         (zero_extend:SI
+ 	  (vec_select:HI (match_operand:V8HI 1 "register_operand" "x")
+ 			 (parallel
+ 			  [(match_operand:SI 2 "immediate_operand" "i")]))))]
+   "TARGET_SSE2"
+   "pextrw\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pshufd"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ 		      (match_operand:SI 2 "immediate_operand" "i")] 41))]
+   "TARGET_SSE2"
+   "pshufd\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pshuflw"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0")
+ 		      (match_operand:SI 2 "immediate_operand" "i")] 55))]
+   "TARGET_SSE2"
+   "pshuflw\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_pshufhw"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0")
+ 		      (match_operand:SI 2 "immediate_operand" "i")] 56))]
+   "TARGET_SSE2"
+   "pshufhw\t{%2, %1, %0|%0, %1, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; MMX mask-generating comparisons
+ 
+ (define_insn "eqv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (eq:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpeqb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "eqv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (eq:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpeqw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "eqv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (eq:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpeqd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "gtv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (gt:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpgtb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "gtv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (gt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpgtw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "gtv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (gt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pcmpgtd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ 
+ ;; MMX max/min insns
+ 
+ (define_insn "umaxv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (umax:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pmaxub\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "smaxv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (smax:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pmaxsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "uminv16qi3"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+         (umin:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 		   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pminub\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sminv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (smin:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+   "TARGET_SSE2"
+   "pminsw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ 
+ ;; MMX shifts
+ 
+ (define_insn "ashrv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psraw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ashrv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psrad\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "lshrv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psrlw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "lshrv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psrld\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_lshrv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ 		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psrlq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ashlv8hi3"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+         (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psllw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "ashlv4si3"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+         (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "pslld\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_ashlv2di3"
+   [(set (match_operand:V2DI 0 "register_operand" "=x")
+         (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ 		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
+   "TARGET_SSE2"
+   "psllq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; See logical MMX insns for the reason for the unspec.  Strictly speaking
+ ;; we wouldn't need here it since we never generate TImode arithmetic.
+ 
+ ;; There has to be some kind of prize for the weirdest new instruction...
+ (define_insn "sse2_ashlti3"
+   [(set (match_operand:TI 0 "register_operand" "=x")
+         (unspec:TI
+ 	 [(ashift:TI (match_operand:TI 1 "register_operand" "0")
+ 		     (mult:SI (match_operand:SI 2 "immediate_operand" "i")
+ 			       (const_int 8)))] 30))]
+   "TARGET_SSE2"
+   "pslldq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_lshrti3"
+   [(set (match_operand:TI 0 "register_operand" "=x")
+         (unspec:TI
+ 	 [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ 		       (mult:SI (match_operand:SI 2 "immediate_operand" "i")
+ 				(const_int 8)))] 30))]
+   "TARGET_SSE2"
+   "pslrdq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE unpack
+ 
+ (define_insn "sse2_unpckhpd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_concat:V2DF
+ 	 (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 			  (parallel [(const_int 1)]))
+ 	 (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x")
+ 			  (parallel [(const_int 0)]))))]
+   "TARGET_SSE2"
+   "unpckhpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_unpcklpd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_concat:V2DF
+ 	 (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ 			  (parallel [(const_int 0)]))
+ 	 (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x")
+ 			  (parallel [(const_int 1)]))))]
+   "TARGET_SSE2"
+   "unpcklpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; MMX pack/unpack insns.
+ 
+ (define_insn "sse2_packsswb"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+ 	(vec_concat:V16QI
+ 	 (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
+ 	 (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
+   "TARGET_SSE2"
+   "packsswb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_packssdw"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_concat:V8HI
+ 	 (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0"))
+ 	 (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))]
+   "TARGET_SSE2"
+   "packssdw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_packuswb"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+ 	(vec_concat:V16QI
+ 	 (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
+ 	 (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
+   "TARGET_SSE2"
+   "packuswb\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpckhbw"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+ 	(vec_merge:V16QI
+ 	 (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 			   (parallel [(const_int 8) (const_int 0)
+ 				      (const_int 9) (const_int 1)
+ 				      (const_int 10) (const_int 2)
+ 				      (const_int 11) (const_int 3)
+ 				      (const_int 12) (const_int 4)
+ 				      (const_int 13) (const_int 5)
+ 				      (const_int 14) (const_int 6)
+ 				      (const_int 15) (const_int 7)]))
+ 	 (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
+ 			   (parallel [(const_int 0) (const_int 8)
+ 				      (const_int 1) (const_int 9)
+ 				      (const_int 2) (const_int 10)
+ 				      (const_int 3) (const_int 11)
+ 				      (const_int 4) (const_int 12)
+ 				      (const_int 5) (const_int 13)
+ 				      (const_int 6) (const_int 14)
+ 				      (const_int 7) (const_int 15)]))
+ 	 (const_int 21845)))]
+   "TARGET_SSE2"
+   "punpckhbw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpckhwd"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_merge:V8HI
+ 	 (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 			  (parallel [(const_int 4) (const_int 0)
+ 				     (const_int 5) (const_int 1)
+ 				     (const_int 6) (const_int 2)
+ 				     (const_int 7) (const_int 3)]))
+ 	 (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
+ 			  (parallel [(const_int 0) (const_int 4)
+ 				     (const_int 1) (const_int 5)
+ 				     (const_int 2) (const_int 6)
+ 				     (const_int 3) (const_int 7)]))
+ 	 (const_int 85)))]
+   "TARGET_SSE2"
+   "punpckhwd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpckhdq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_merge:V4SI
+ 	 (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 			  (parallel [(const_int 2) (const_int 0)
+ 				     (const_int 3) (const_int 1)]))
+ 	 (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
+ 			  (parallel [(const_int 0) (const_int 2)
+ 				     (const_int 1) (const_int 3)]))
+ 	 (const_int 5)))]
+   "TARGET_SSE2"
+   "punpckhdq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpcklbw"
+   [(set (match_operand:V16QI 0 "register_operand" "=x")
+ 	(vec_merge:V16QI
+ 	 (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ 			   (parallel [(const_int 0) (const_int 8)
+ 				      (const_int 1) (const_int 9)
+ 				      (const_int 2) (const_int 10)
+ 				      (const_int 3) (const_int 11)
+ 				      (const_int 4) (const_int 12)
+ 				      (const_int 5) (const_int 13)
+ 				      (const_int 6) (const_int 14)
+ 				      (const_int 7) (const_int 15)]))
+ 	 (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
+ 			   (parallel [(const_int 8) (const_int 0)
+ 				      (const_int 9) (const_int 1)
+ 				      (const_int 10) (const_int 2)
+ 				      (const_int 11) (const_int 3)
+ 				      (const_int 12) (const_int 4)
+ 				      (const_int 13) (const_int 5)
+ 				      (const_int 14) (const_int 6)
+ 				      (const_int 15) (const_int 7)]))
+ 	 (const_int 21845)))]
+   "TARGET_SSE2"
+   "punpcklbw\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpcklwd"
+   [(set (match_operand:V8HI 0 "register_operand" "=x")
+ 	(vec_merge:V8HI
+ 	 (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ 			  (parallel [(const_int 0) (const_int 4)
+ 				     (const_int 1) (const_int 5)
+ 				     (const_int 2) (const_int 6)
+ 				     (const_int 3) (const_int 7)]))
+ 	 (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
+ 			  (parallel [(const_int 4) (const_int 0)
+ 				     (const_int 5) (const_int 1)
+ 				     (const_int 6) (const_int 2)
+ 				     (const_int 7) (const_int 3)]))
+ 	 (const_int 85)))]
+   "TARGET_SSE2"
+   "punpcklwd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_punpckldq"
+   [(set (match_operand:V4SI 0 "register_operand" "=x")
+ 	(vec_merge:V4SI
+ 	 (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ 			  (parallel [(const_int 0) (const_int 2)
+ 				     (const_int 1) (const_int 3)]))
+ 	 (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
+ 			  (parallel [(const_int 2) (const_int 0)
+ 				     (const_int 3) (const_int 1)]))
+ 	 (const_int 5)))]
+   "TARGET_SSE2"
+   "punpckldq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ ;; SSE2 moves
+ 
+ (define_insn "sse2_movapd"
+   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ 	(unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] 38))]
+   "TARGET_SSE2"
+   "@
+    movapd\t{%1, %0|%0, %1}
+    movapd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movupd"
+   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ 	(unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] 39))]
+   "TARGET_SSE2"
+   "@
+    movupd\t{%1, %0|%0, %1}
+    movupd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movdqa"
+   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
+ 	(unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")] 38))]
+   "TARGET_SSE2"
+   "@
+    movdqa\t{%1, %0|%0, %1}
+    movdqa\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movdqu"
+   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
+ 	(unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")] 39))]
+   "TARGET_SSE2"
+   "@
+    movdqu\t{%1, %0|%0, %1}
+    movdqu\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movdq2q"
+   [(set (match_operand:DI 0 "nonimmediate_operand" "=y")
+ 	(vec_select:DI (match_operand:V2DI 1 "general_operand" "x")
+ 		       (parallel [(const_int 0)])))]
+   "TARGET_SSE2"
+   "movdq2q\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movq2dq"
+   [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x")
+ 	(vec_concat:V2DI (match_operand:DI 1 "general_operand" "y")
+ 			 (const_vector:DI [(const_int 0)])))]
+   "TARGET_SSE2"
+   "movq2dq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movhpd"
+   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ 	(vec_merge:V2DF
+ 	 (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
+ 	 (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
+ 	 (const_int 2)))]
+   "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+   "movhpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movlpd"
+   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ 	(vec_merge:V2DF
+ 	 (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
+ 	 (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
+ 	 (const_int 1)))]
+   "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+   "movlpd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_loadsd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF
+ 	 (match_operand:DF 1 "memory_operand" "m")
+ 	 (vec_duplicate:DF (float:DF (const_int 0)))
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "movsd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_movsd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+ 	(vec_merge:V2DF
+ 	 (match_operand:V2DF 1 "register_operand" "0")
+ 	 (match_operand:V2DF 2 "register_operand" "x")
+ 	 (const_int 1)))]
+   "TARGET_SSE2"
+   "movsd\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_storesd"
+   [(set (match_operand:DF 0 "memory_operand" "=m")
+ 	(vec_select:DF
+ 	 (match_operand:V2DF 1 "register_operand" "x")
+ 	 (parallel [(const_int 0)])))]
+   "TARGET_SSE2"
+   "movsd\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_shufpd"
+   [(set (match_operand:V2DF 0 "register_operand" "=x")
+         (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ 		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ 		      (match_operand:SI 3 "immediate_operand" "i")] 41))]
+   "TARGET_SSE2"
+   ;; @@@ check operand order for intel/nonintel syntax
+   "shufpd\t{%3, %2, %0|%0, %2, %3}"
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "sse2_clflush"
+   [(unspec_volatile [(match_operand:SI 0 "address_operand" "p")] 57)]
+   "TARGET_SSE2"
+   "clflush %0"
+   [(set_attr "type" "sse")])
+ 
+ (define_expand "sse2_mfence"
+   [(set (match_dup 0)
+ 	(unspec:BLK [(match_dup 0)] 59))]
+   "TARGET_SSE2"
+ {
+   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+   MEM_VOLATILE_P (operands[0]) = 1;
+ })
+ 
+ (define_insn "*mfence_insn"
+   [(set (match_operand:BLK 0 "" "")
+ 	(unspec:BLK [(match_dup 0)] 59))]
+   "TARGET_SSE2"
+   "mfence"
+   [(set_attr "type" "sse")
+    (set_attr "memory" "unknown")])
+ 
+ (define_expand "sse2_lfence"
+   [(set (match_dup 0)
+ 	(unspec:BLK [(match_dup 0)] 60))]
+   "TARGET_SSE2"
+ {
+   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+   MEM_VOLATILE_P (operands[0]) = 1;
+ })
+ 
+ (define_insn "*lfence_insn"
+   [(set (match_operand:BLK 0 "" "")
+ 	(unspec:BLK [(match_dup 0)] 60))]
+   "TARGET_SSE2"
+   "lfence"
+   [(set_attr "type" "sse")
+    (set_attr "memory" "unknown")])


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]