P3 SSE/MMX support: adding the patterns

Bernd Schmidt bernds@redhat.co.uk
Tue Sep 5 08:00:00 GMT 2000


The next part of the patches that add SSE and MMX support to gcc.  This
shouldn't really affect the rest of the compiler at the moment, it just
adds a bunch of patterns.  There are some modifications in a few other
patterns; those shouldn't trigger either.

Tested to some extent on i586-linux.

Bernd

	* i386-protos.h (sse_comparison_operator): Declare new function.
	* i386.c (sse_comparison_operator): New function.
	* i386.md (attr "type"): Add sse and mmx types.
	(attr "memory"): Handle them without a crash.
	(movsi_1, movdi_2): Allow MMX regs.
	(movdi splits): Don't split moves involving MMX regs.
	(setcc_4): Remove '*' from pattern name so we get a gen_setcc4.
	(movv4sf_internal, movv4si_internal, movv8qi_internal,
	movv4hi_internal, movv2si_internal, movv8qi, movv4hi, movv2si,
	movv4sf, movv4si, pushv4sf, pushv4si, pushv8qi, pushv4hi, pushv2si,
	sse_movaps, sse_movups, sse_movmskps, mmx_pmovmskb, mmx_maskmovq,
	sse_movntv4sf, sse_movntdi, sse_movhlps, sse_movlhps, sse_movhps,
	sse_movlps, sse_loadss, sse_movss, sse_storess, sse_shufps,
	addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, mulv4sf3, vmmulv4sf3,
	divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2, rsqrtv4sf2, vmrsqrtv4sf2,
	sqrtv4sf2, vmsqrtv4sf2, sse_andti3, sse_nandti3, sse_iorti3,
 	sse_xorti3, maskcmpv4sf3, maskncmpv4sf3, vmmaskcmpv4sf3,
 	vmmaskncmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps,
	smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi,
	cvttps2pi, cvtsi2ss, cvtss2si, cvttss2si, addv8qi3, addv4hi3,
	addv2si3, ssaddv8qi3, ssaddv4hi3, usaddv8qi3, usaddv4hi3, subv8qi3,
	subv4hi3, subv2si3, sssubv8qi3, sssubv4hi3, ussubv8qi3, ussubv4hi3,
	mulv4hi3, smulv4hi3_highpart, umulv4hi3_highpart, mmx_pmaddwd,
	mmx_iordi3, mmx_xordi3, mmx_anddi3, mmx_nanddi3, mmx_uavgv8qi3,
	mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw, mmx_pshufw,
	eqv8qi3, eqv4hi3, eqv2si3, gtv8qi3, gtv4hi3, gtv2si3, umaxv8qi3,
	smaxv4hi3, uminv8qi3, sminv4hi3, ashrv4hi3, ashrv2si3, lshrv4hi3,
	lshrv2si3, mmx_lshrdi3, ashlv4hi3, ashlv2si3, mmx_ashldi3,
	mmx_packsswb, mmx_packssdw, mmx_packuswb, mmx_punpckhbw,
	mmx_punpckhwd, mmx_punpckhdq, mmx_punpcklbw, mmx_punpcklwd,
	mmx_punpckldq, emms, sfence, ldmxcsr, prefetch, stmxcsr, sse_clrti,
	mmx_clrdi): New patterns.

Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386-protos.h,v
retrieving revision 1.25
diff -u -p -r1.25 i386-protos.h
--- config/i386/i386-protos.h	2000/08/28 21:48:05	1.25
+++ config/i386/i386-protos.h	2000/09/05 14:09:46
@@ -56,6 +56,7 @@ extern int nonmemory_no_elim_operand PAR
 extern int q_regs_operand PARAMS ((rtx, enum machine_mode));
 extern int non_q_regs_operand PARAMS ((rtx, enum machine_mode));
 extern int no_comparison_operator PARAMS ((rtx, enum machine_mode));
+extern int sse_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int fcmov_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int uno_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int cmp_fp_expander_operand PARAMS ((rtx, enum machine_mode));
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.175
diff -u -p -r1.175 i386.c
--- config/i386/i386.c	2000/08/29 00:51:21	1.175
+++ config/i386/i386.c	2000/09/05 14:09:48
@@ -1264,6 +1264,17 @@ no_comparison_operator (op, mode)
     }
 }
 
+/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
+   insns.  */
+int
+sse_comparison_operator (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  enum rtx_code code = GET_CODE (op);
+  return code == EQ || code == LT || code == LE || code == UNORDERED;
+}
+
 /* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
 
 int
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.175
diff -u -p -r1.175 i386.md
--- config/i386/i386.md	2000/09/04 10:13:02	1.175
+++ config/i386/i386.md	2000/09/05 14:09:50
@@ -71,7 +71,25 @@
 ;; 9  This is an `fnstsw' operation.
 ;; 10 This is a `sahf' operation.
 ;; 11 This is a `fstcw' operation
-;;
+
+;; For SSE/MMX support:
+;; 30 This is `fix', guaranteed to be truncating.
+;; 31 This is a `emms' operation.
+;; 32 This is a `maskmov' operation.
+;; 33 This is a `movmsk' operation.
+;; 34 This is a `non-temporal' move.
+;; 35 This is a `prefetch' operation.
+;; 36 This is used to distinguish COMISS from UCOMISS.
+;; 37 This is a `ldmxcsr' operation.
+;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
+;; 39 This is a forced `movups' instruction (rather than whatever movti does)
+;; 40 This is a `stmxcsr' operation.
+;; 41 This is a `shuffle' operation.
+;; 42 This is a `rcp' operation.
+;; 43 This is a `rsqsrt' operation.
+;; 44 This is a `sfence' operation.
+;; 45 This is a noop to prevent excessive combiner cleverness.
+
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; from i386.c.
 
@@ -84,7 +102,7 @@
 ;; A basic instruction type.  Refinements due to arguments to be
 ;; provided in other attributes.
 (define_attr "type"
-  "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld"
+  "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx"
   (const_string "other"))
 
 ;; Main data type used by the insn
@@ -234,7 +252,7 @@
 	   (const_string "store")
 	 (match_operand 1 "memory_operand" "")
 	   (const_string "load")
-	 (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp")
+	 (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx")
 	      (match_operand 2 "memory_operand" ""))
 	   (const_string "load")
 	 (and (eq_attr "type" "icmov")
@@ -1535,6 +1553,9 @@
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
   "*
 {
+  if (MMX_REG_P (operands[0]) || MMX_REG_P (operands[1]))
+    return \"movd\\t{%1, %0|%0, %1}\";
+
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
@@ -1983,15 +2004,19 @@
   "#")
 
 (define_insn "*movdi_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
-	(match_operand:DI 1 "general_operand" "riFo,riF"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y")
+	(match_operand:DI 1 "general_operand" "riFo,riF,*y,m"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
-  "#")
+  "@
+   #
+   #
+   movq\\t{%1, %0|%0, %1}
+   movq\\t{%1, %0|%0, %1}")
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
-  "reload_completed"
+  "reload_completed && ! MMX_REG_P (operands[1])"
   [(const_int 0)]
   "if (!ix86_split_long_move (operands)) abort (); DONE;")
 
@@ -1999,7 +2024,7 @@
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
-  "reload_completed"
+  "reload_completed && ! MMX_REG_P (operands[0]) && ! MMX_REG_P (operands[1])"
   [(set (match_dup 2) (match_dup 5))
    (set (match_dup 3) (match_dup 6))]
   "if (ix86_split_long_move (operands)) DONE;")
@@ -7864,7 +7889,7 @@
   [(set_attr "type" "setcc")
    (set_attr "mode" "QI")])
 
-(define_insn "*setcc_4"
+(define_insn "setcc_4"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
 	(match_operator:QI 1 "uno_comparison_operator"
 	  [(reg:CC 17) (const_int 0)]))]
@@ -11174,3 +11199,1556 @@
 			     CODE_LABEL_NUMBER (operands[2]));
   RET;
 }")
+
+	;; Pentium III SIMD instructions.
+
+;; Moves for SSE/MMX regs.
+
+(define_insn "movv4sf_internal"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+	(match_operand:V4SF 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  ;; @@@ let's try to use movaps here.
+  "movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "movv4si_internal"
+  [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
+	(match_operand:V4SI 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  ;; @@@ let's try to use movaps here.
+  "movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "movv8qi_internal"
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
+	(match_operand:V8QI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "movv4hi_internal"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
+	(match_operand:V4HI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "movv2si_internal"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
+	(match_operand:V2SI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "general_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "TARGET_SSE"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], TImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (TImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], TImode)
+      && !register_operand (operands[1], TImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (TImode))
+    {
+      rtx temp = force_reg (TImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "general_operand" "")
+	(match_operand:V4SF 1 "general_operand" ""))]
+  "TARGET_SSE"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4SFmode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4SFmode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4SFmode)
+      && !register_operand (operands[1], V4SFmode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V4SFmode))
+    {
+      rtx temp = force_reg (V4SFmode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4si"
+  [(set (match_operand:V4SI 0 "general_operand" "")
+	(match_operand:V4SI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4SImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4SImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4SImode)
+      && !register_operand (operands[1], V4SImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V4SImode))
+    {
+      rtx temp = force_reg (V4SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+	(match_operand:V2SI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V2SImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V2SImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V2SImode)
+      && !register_operand (operands[1], V2SImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V2SImode))
+    {
+      rtx temp = force_reg (V2SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "general_operand" "")
+	(match_operand:V4HI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4HImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4HImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4HImode)
+      && !register_operand (operands[1], V4HImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V4HImode))
+    {
+      rtx temp = force_reg (V4HImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "general_operand" "")
+	(match_operand:V8QI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V8QImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V8QImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V8QImode)
+      && !register_operand (operands[1], V8QImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V8QImode))
+    {
+      rtx temp = force_reg (V8QImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*pushti"
+  [(set (match_operand:TI 0 "push_operand" "=<")
+	(match_operand:TI 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:TI (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn_and_split "*pushv4sf"
+  [(set (match_operand:V4SF 0 "push_operand" "=<")
+	(match_operand:V4SF 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:V4SF (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn_and_split "*pushv4si"
+  [(set (match_operand:V4SI 0 "push_operand" "=<")
+	(match_operand:V4SI 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:V4SI (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn_and_split "*pushv2si"
+  [(set (match_operand:V2SI 0 "push_operand" "=<")
+	(match_operand:V2SI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V2SI (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn_and_split "*pushv4hi"
+  [(set (match_operand:V4HI 0 "push_operand" "=<")
+	(match_operand:V4HI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V4HI (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn_and_split "*pushv8qi"
+  [(set (match_operand:V8QI 0 "push_operand" "=<")
+	(match_operand:V8QI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V8QI (reg:SI 7)) (match_dup 1))]
+  "")
+
+(define_insn "movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
+	(match_operand:TI 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  "@
+   movaps\\t{%1, %0|%0, %1}
+   movaps\\t{%1, %0|%0, %1}")
+
+;; These two patterns are useful for specifying exactly whether to use
+;; movaps or movups
+(define_insn "sse_movaps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+	(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
+  "TARGET_SSE"
+  "@
+   movaps\\t{%1, %0|%0, %1}
+   movaps\\t{%1, %0|%0, %1}")
+
+(define_insn "sse_movups"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+	(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
+  "TARGET_SSE"
+  "@
+   movups\\t{%1, %0|%0, %1}
+   movups\\t{%1, %0|%0, %1}")
+
+
+;; SSE Strange Moves.
+
+(define_insn "sse_movmskps"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))]
+  "TARGET_SSE"
+  "movmskps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
+  "TARGET_SSE"
+  "pmovmskb\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_maskmovq"
+  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")] 32))]
+  "TARGET_SSE"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\\t{%2, %1|%1, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movntv4sf"
+  [(set (match_operand:V4SF 0 "memory_operand" "=m")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))]
+  "TARGET_SSE"
+  "movntps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movntdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "x")] 34))]
+  "TARGET_SSE"
+  "movntq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "register_operand" "0")
+	 (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+			  (parallel [(const_int 2)
+				     (const_int 3)
+				     (const_int 0)
+				     (const_int 1)]))
+	 (const_int 3)))]
+  "TARGET_SSE"
+  "movhlps\\t{%2, %0|%0, %2}")
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "register_operand" "0")
+	 (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+			  (parallel [(const_int 2)
+				     (const_int 3)
+				     (const_int 0)
+				     (const_int 1)]))
+	 (const_int 12)))]
+  "TARGET_SSE"
+  "movlhps\\t{%2, %0|%0, %2}")
+
+(define_insn "sse_movhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+	 (const_int 12)))]
+  "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+  "movhps\\t{%2, %0|%0, %2}")
+
+(define_insn "sse_movlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+	 (const_int 3)))]
+  "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+  "movlps\\t{%2, %0|%0, %2}")
+
+(define_insn "sse_loadss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "memory_operand" "m")
+	 (vec_duplicate:V4SF (float:SF (const_int 0)))
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "movss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "register_operand" "0")
+	 (match_operand:V4SF 2 "register_operand" "x")
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "movss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_storess"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(vec_select:SF
+	 (match_operand:V4SF 1 "register_operand" "x")
+	 (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "movss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_shufps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+		      (match_operand:SI 3 "immediate_operand" "i")] 41))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax
+  "shufps\\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE arithmetic
+
+(define_insn "addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	           (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "addps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmaddv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "addss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	           (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "subps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "subss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	           (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "mulps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "mulss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "divv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	          (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "divps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmdivv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+				  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "divss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE square root/reciprocal
+
+(define_insn "rcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
+  "TARGET_SSE"
+  "rcpps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
+                        (match_operand:V4SF 2 "register_operand" "0")
+			(const_int 1)))]
+  "TARGET_SSE"
+  "rcpss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "rsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
+  "TARGET_SSE"
+  "rsqrtps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
+                        (match_operand:V4SF 2 "register_operand" "0")
+			(const_int 1)))]
+  "TARGET_SSE"
+  "rsqrtss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
+  "TARGET_SSE"
+  "sqrtps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
+                        (match_operand:V4SF 2 "register_operand" "0")
+			(const_int 1)))]
+  "TARGET_SSE"
+  "sqrtss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE logical operations.
+
+;; These are not called andti3 etc. because we really really don't want
+;; the compiler to widen DImode ands to TImode ands and then try to move
+;; into DImode subregs of SSE registers, and them together, and move out
+;; of DImode subregs again!
+
+(define_insn "sse_andti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (and:TI (match_operand:TI 1 "register_operand" "0")
+		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "andps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_nandti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "andnps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_iorti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (ior:TI (match_operand:TI 1 "register_operand" "0")
+		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "iorps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_xorti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (xor:TI (match_operand:TI 1 "register_operand" "0")
+		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "xorps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+;; Use xor, but don't show input operands so they aren't live before
+;; this insn.
+(define_insn "sse_clrti"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (unspec:TI [(const_int 0)] 45))]
+  "TARGET_SSE"
+  "xorps\\t{%0, %0|%0, %0}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE mask-generating compares
+
+(define_insn "maskcmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (match_operator:V4SI 3 "sse_comparison_operator"
+			     [(match_operand:V4SF 1 "register_operand" "0")
+			      (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpeqps\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpltps\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpleps\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpunordps\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "maskncmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (not:V4SI
+	 (match_operator:V4SI 3 "sse_comparison_operator"
+			      [(match_operand:V4SF 1 "register_operand" "0")
+			       (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpneqps\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpnltps\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpnleps\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpordps\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmaskcmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_merge:V4SI
+	 (match_operator:V4SI 3 "sse_comparison_operator"
+			      [(match_operand:V4SF 1 "register_operand" "0")
+			       (match_operand:V4SF 2 "nonimmediate_operand" "x")])
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpeqss\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpltss\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpless\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpunordss\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmaskncmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_merge:V4SI
+	 (not:V4SI
+	  (match_operator:V4SI 3 "sse_comparison_operator"
+			       [(match_operand:V4SF 1 "register_operand" "0")
+				(match_operand:V4SF 2 "nonimmediate_operand" "x")]))
+	 (subreg:V4SI (match_dup 1) 0)
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpneqss\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpnltss\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpnless\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpordss\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_comi"
+  [(set (reg:CC 17)
+        (match_operator:CC 2 "sse_comparison_operator"
+			[(vec_select:SF
+			  (match_operand:V4SF 0 "register_operand" "x")
+			  (parallel [(const_int 0)]))
+			 (vec_select:SF
+			  (match_operand:V4SF 1 "register_operand" "x")
+			  (parallel [(const_int 0)]))]))]
+  "TARGET_SSE"
+  "comiss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_ucomi"
+  [(set (reg:CC 17)
+        (unspec:CC
+	 [(match_operator 2 "sse_comparison_operator"
+			  [(vec_select:SF
+			    (match_operand:V4SF 0 "register_operand" "x")
+			    (parallel [(const_int 0)]))
+			   (vec_select:SF
+			    (match_operand:V4SF 1 "register_operand" "x")
+			    (parallel [(const_int 0)]))])] 36))]
+  "TARGET_SSE"
+  "ucomiss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE unpack
+
+(define_insn "sse_unpckhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "TARGET_SSE"
+  "unpckhps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_unpcklps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	 (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (const_int 5)))]
+  "TARGET_SSE"
+  "unpcklps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE min/max
+
+(define_insn "smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "maxps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsmaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "maxss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "minps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+			(const_int 1)))]
+  "TARGET_SSE"
+  "minss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE <-> integer/MMX conversions
+
+(define_insn "cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+			(vec_duplicate:V4SF
+			 (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
+			(const_int 12)))]
+  "TARGET_SSE"
+  "cvtpi2ps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+			 (parallel
+			  [(const_int 0)
+			   (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+			 (parallel
+			  [(const_int 0)
+			   (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+	 		(vec_duplicate:V4SF
+			 (float:SF (match_operand:SI 2 "register_operand" "rm")))
+			(const_int 15)))]
+  "TARGET_SSE"
+  "cvtsi2ss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtss2si"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+		       (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "cvtss2si\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvttss2si"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+		       (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "cvttss2si\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX insns
+
+;; MMX arithmetic
+
+(define_insn "addv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+	           (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+	           (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+	           (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		    (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		    (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		       (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		       (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		       (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		       (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pmullw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	 (lshiftrt:V4SI
+	  (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+		     (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	  (const_int 16))))]
+  "TARGET_MMX"
+  "pmulhw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	 (lshiftrt:V4SI
+	  (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+		     (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	  (const_int 16))))]
+  "TARGET_MMX"
+  "pmulhuw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI
+	 (mult:V2SI
+	  (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+					     (parallel [(const_int 0)
+							(const_int 2)])))
+	  (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+					     (parallel [(const_int 0)
+							(const_int 2)]))))
+	 (mult:V2SI
+	  (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
+					     (parallel [(const_int 1)
+							(const_int 3)])))
+	  (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
+					     (parallel [(const_int 1)
+							(const_int 3)]))))))]
+  "TARGET_MMX"
+  "pmaddwd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX logical operations
+;; Note we don't want to declare these as regular iordi3 insns to prevent
+;; normal code that also wants to use the FPU from getting broken.
+;; The UNSPECs are there to prevent the combiner from getting overly clever.
+(define_insn "mmx_iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(ior:DI (match_operand:DI 1 "register_operand" "0")
+		  (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "por\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(xor:DI (match_operand:DI 1 "register_operand" "0")
+		  (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pxor\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; Same as pxor, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "mmx_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] 45))]
+  "TARGET_MMX"
+  "pxor\\t{%0, %0|%0, %0}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(and:DI (match_operand:DI 1 "register_operand" "0")
+		  (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pand\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_nanddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
+			  (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pandn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX unsigned averages/sum of absolute differences
+
+(define_insn "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ashiftrt:V8QI
+	 (plus:V8QI (plus:V8QI
+		     (match_operand:V8QI 1 "register_operand" "0")
+		     (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+		    (vec_const:V8QI (parallel [(const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)])))
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "pavgbn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI
+	 (plus:V4HI (plus:V4HI
+		     (match_operand:V4HI 1 "register_operand" "0")
+		     (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+		    (vec_const:V4HI (parallel [(const_int 1)
+					       (const_int 1)
+					       (const_int 1)
+					       (const_int 1)])))
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "pavgwn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_psadbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (abs:V8QI (minus:V8QI (match_operand:DI 1 "register_operand" "0")
+			      (match_operand:DI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_SSE"
+  "padbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX insert/extract/shuffle
+
+(define_insn "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0")
+			(vec_duplicate:V4HI
+			 (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
+			(match_operand:SI 3 "immediate_operand" "i")))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax.
+  "pinsrw\\t%3, {%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+				       (parallel
+					[(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax.
+  "pextrw\\t%2, {%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pshufw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
+		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		      (match_operand:SI 3 "immediate_operand" "i")] 41))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax
+  "pshufw\\t %3,{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX mask-generating comparisons
+
+(define_insn "eqv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		 (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		 (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		 (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		 (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX max/min insns
+
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pmaxub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pmaxsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
+		   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pminub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pminsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX shifts
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psraw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrad\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrlw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrld\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrlq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psllw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
+		       (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "pslld\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (ashift:DI (match_operand:DI 1 "register_operand" "0")
+		   (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psllq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX pack/unpack insns.
+
+(define_insn "mmx_packsswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	 (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+	 (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packsswb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	 (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
+	 (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packssdw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	 (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+	 (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packuswb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_merge:V8QI
+	 (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+			  (parallel [(const_int 4)
+				     (const_int 0)
+				     (const_int 5)
+				     (const_int 1)
+				     (const_int 6)
+				     (const_int 2)
+				     (const_int 7)
+				     (const_int 3)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 4)
+				     (const_int 1)
+				     (const_int 5)
+				     (const_int 2)
+				     (const_int 6)
+				     (const_int 3)
+				     (const_int 7)]))
+	 (const_int 85)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_merge:V4HI
+	 (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (const_int 5)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_merge:V2SI
+	 (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+			  (parallel [(const_int 0)
+				     (const_int 1)]))
+	 (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (const_int 1)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_merge:V8QI
+	 (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+			  (parallel [(const_int 0)
+				     (const_int 4)
+				     (const_int 1)
+				     (const_int 5)
+				     (const_int 2)
+				     (const_int 6)
+				     (const_int 3)
+				     (const_int 7)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+			  (parallel [(const_int 4)
+				     (const_int 0)
+				     (const_int 5)
+				     (const_int 1)
+				     (const_int 6)
+				     (const_int 2)
+				     (const_int 7)
+				     (const_int 3)]))
+	 (const_int 85)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_merge:V4HI
+	 (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_merge:V2SI
+	 (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+			   (parallel [(const_int 1)
+				      (const_int 0)]))
+	 (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 1)]))
+	 (const_int 1)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; Miscellaneous stuff
+
+(define_insn "emms"
+  [(unspec_volatile [(const_int 0)] 31)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))]
+  "TARGET_MMX"
+  "emms")
+
+(define_insn "sfence"
+  [(unspec_volatile [(const_int 0)] 44)]
+  "TARGET_SSE"
+  "sfence")
+
+(define_insn "ldmxcsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)]
+  "TARGET_MMX"
+  "ldmxcsr\\t%0")
+
+(define_insn "prefetch"
+  [(unspec_volatile [(match_operand:SI 0 "address_operand" "p")
+		     (match_operand:SI 1 "address_operand" "p")] 35)]
+  "TARGET_SSE"
+  "*
+{
+  switch (INTVAL (operands[1]))
+    {
+    case 0:
+      return \"prefetcht0\\t%0\";
+    case 1:
+      return \"prefetcht1\\t%0\";
+    case 2:
+      return \"prefetcht2\\t%0\";
+    case 3:
+      return \"prefetchnta\\t%0\";
+    default:
+      abort ();
+    }
+}")
+
+(define_insn "stmxcsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec_volatile:SI [(const_int 0)] 40))]
+  "TARGET_MMX"
+  "stmxcsr\\t%0")



More information about the Gcc-patches mailing list