mmx/sse support headers

Richard Henderson rth@redhat.com
Sat Jan 12 02:13:00 GMT 2002


On Fri, Jan 11, 2002 at 11:49:31PM -0800, Richard Henderson wrote:
> Expect the SSE tests to fail at the moment.  I tested this with type_for_mode
> hacked to ignore the HOST_BITS_PER_WIDE_INT so that we can create TImode
> registers.  We need to find a proper solution here that does not involve
> forcing the use of long long throughout the compiler.

Of course, the easiest answer is to not use TImode for __m128.
Using V4SFmode is somewhat less than clean when it comes time
to add the SSE2 V2DFmode stuff, but what the hey it works.

Also:

Removed a bunch of builtins that we don't use.  All the composite
stuff is done just as well in xmmintrin.h in C instead of inside
the compiler.

Removed the x86 private prefetch builtins.  The generic prefetch
builtin is a superset, and I suspect that the number of folks that
are currently using __builint_ia32_prefetch approximates zero.

Fixed a bunch of predicates that didn't match their constraints.

Fixed two typos in xmmintrin.h.


r~


        * config/i386/i386.c (override_options): If SSE, enable sse prefetch.
        (ix86_expand_vector_move): New.
        (bdesc_2arg): Remove andps, andnps, orps, xorps.
        (ix86_init_mmx_sse_builtins): Make static.  Remove composite builtins.
        Remove old prefetch builtins.  Special case the logicals removed above.
        (ix86_expand_builtin): Likewise.
        (safe_vector_operand): Use V4SFmode, not TImode.
        (ix86_expand_store_builtin): Remove shuffle arg.  Update callers.
        (ix86_expand_timode_binop_builtin): New.
        * config/i386/i386-protos.h: Update.
        * config/i386/i386.h (enum ix86_builtins): Update.
        * config/i386/i386.md: Correct predicates on MMX/SSE patterns.
        Use ix86_expand_vector_move in vector move expanders.
        (movti_internal, movti_rex64): Add xorps alternative.
        (sse_clrv4sf): Rename and adjust from sse_clrti.
        (prefetch): Don't work so hard.
        (prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
        * config/i386/xmmintrin.h (__m128): Use V4SFmode.
        (_mm_getcsr, _mm_setcsr): Fix typo in builtin name.

Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.66
diff -c -p -d -r1.66 i386-protos.h
*** i386-protos.h	2001/10/31 14:32:27	1.66
--- i386-protos.h	2002/01/12 09:50:07
*************** extern rtx i386_simplify_dwarf_addr PARA
*** 108,113 ****
--- 108,114 ----
  
  extern void ix86_expand_clear PARAMS ((rtx));
  extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[]));
+ extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[]));
  extern void ix86_expand_binary_operator PARAMS ((enum rtx_code,
  					       enum machine_mode, rtx[]));
  extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode,
*************** extern void function_arg_advance PARAMS 
*** 177,183 ****
  					tree, int));
  extern rtx ix86_function_value PARAMS ((tree));
  extern void ix86_init_builtins PARAMS ((void));
- extern void ix86_init_mmx_sse_builtins PARAMS ((void));
  extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
  #endif
  
--- 178,183 ----
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.355
diff -c -p -d -r1.355 i386.c
*** i386.c	2002/01/12 07:38:48	1.355
--- i386.c	2002/01/12 09:50:07
*************** static int ix86_adjust_cost PARAMS ((rtx
*** 684,689 ****
--- 684,690 ----
  static void ix86_sched_init PARAMS ((FILE *, int, int));
  static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
  static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
+ static void ix86_init_mmx_sse_builtins PARAMS ((void));
  
  struct ix86_address
  {
*************** static rtx ix86_expand_sse_compare PARAM
*** 701,707 ****
  static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
  static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
  static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
! static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
  static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
  static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
  static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
--- 702,710 ----
  static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
  static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
  static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
! static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
! 						     tree, rtx));
! static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
  static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
  static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
  static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
*************** override_options ()
*** 1164,1170 ****
    /* It makes no sense to ask for just SSE builtins, so MMX is also turned
       on by -msse.  */
    if (TARGET_SSE)
!     target_flags |= MASK_MMX;
  
    /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
    if (TARGET_3DNOW)
--- 1167,1176 ----
    /* It makes no sense to ask for just SSE builtins, so MMX is also turned
       on by -msse.  */
    if (TARGET_SSE)
!     {
!       target_flags |= MASK_MMX;
!       x86_prefetch_sse = true;
!     }
  
    /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
    if (TARGET_3DNOW)
*************** ix86_expand_move (mode, operands)
*** 6661,6666 ****
--- 6667,6704 ----
    emit_insn (insn);
  }
  
+ void
+ ix86_expand_vector_move (mode, operands)
+      enum machine_mode mode;
+      rtx operands[];
+ {
+   /* Force constants other than zero into memory.  We do not know how
+      the instructions used to build constants modify the upper 64 bits
+      of the register, once we have that information we may be able
+      to handle some of them more efficiently.  */
+   if ((reload_in_progress | reload_completed) == 0
+       && register_operand (operands[0], mode)
+       && CONSTANT_P (operands[1]))
+     {
+       rtx addr = gen_reg_rtx (Pmode);
+       emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
+       operands[1] = gen_rtx_MEM (mode, addr);
+     }
+ 
+   /* Make operand1 a register if it isn't already.  */
+   if ((reload_in_progress | reload_completed) == 0
+       && !register_operand (operands[0], mode)
+       && !register_operand (operands[1], mode)
+       && operands[1] != CONST0_RTX (mode))
+     {
+       rtx temp = force_reg (TImode, operands[1]);
+       emit_move_insn (operands[0], temp);
+       return;
+     }
+ 
+   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+ }  
+ 
  /* Attempt to expand a binary operator.  Make the expansion closer to the
     actual machine, then just general_operand, which will allow 3 separate
     memory references (one output, two input) in a single insn.  */
*************** static const struct builtin_description 
*** 10748,10758 ****
    { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
    { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
  
-   { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
-   { MASK_SSE, CODE_FOR_sse_nandti3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
-   { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
-   { MASK_SSE, CODE_FOR_sse_xorti3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
- 
    { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
    { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
    { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
--- 10786,10791 ----
*************** ix86_init_builtins ()
*** 10865,10871 ****
  /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
     is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
     builtins.  */
! void
  ix86_init_mmx_sse_builtins ()
  {
    const struct builtin_description * d;
--- 10898,10904 ----
  /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
     is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
     builtins.  */
! static void
  ix86_init_mmx_sse_builtins ()
  {
    const struct builtin_description * d;
*************** ix86_init_mmx_sse_builtins ()
*** 10899,10912 ****
      = build_function_type (integer_type_node,
  			   tree_cons (NULL_TREE, V8QI_type_node,
  				      endlink));
-   tree int_ftype_v2si
-     = build_function_type (integer_type_node,
- 			   tree_cons (NULL_TREE, V2SI_type_node,
- 				      endlink));
-   tree v2si_ftype_int
-     = build_function_type (V2SI_type_node,
- 			   tree_cons (NULL_TREE, integer_type_node,
- 				      endlink));
    tree v4sf_ftype_v4sf_int
      = build_function_type (V4SF_type_node,
  			   tree_cons (NULL_TREE, V4SF_type_node,
--- 10932,10937 ----
*************** ix86_init_mmx_sse_builtins ()
*** 10976,10986 ****
  						 endlink)));
    tree void_ftype_void
      = build_function_type (void_type_node, endlink);
-   tree void_ftype_pchar_int
-     = build_function_type (void_type_node,
- 			   tree_cons (NULL_TREE, pchar_type_node,
- 				      tree_cons (NULL_TREE, integer_type_node,
- 						 endlink)));
    tree void_ftype_unsigned
      = build_function_type (void_type_node,
  			   tree_cons (NULL_TREE, unsigned_type_node,
--- 11001,11006 ----
*************** ix86_init_mmx_sse_builtins ()
*** 10989,10996 ****
      = build_function_type (unsigned_type_node, endlink);
    tree di_ftype_void
      = build_function_type (long_long_unsigned_type_node, endlink);
!   tree ti_ftype_void
!     = build_function_type (intTI_type_node, endlink);
    tree v2si_ftype_v4sf
      = build_function_type (V2SI_type_node,
  			   tree_cons (NULL_TREE, V4SF_type_node,
--- 11009,11016 ----
      = build_function_type (unsigned_type_node, endlink);
    tree di_ftype_void
      = build_function_type (long_long_unsigned_type_node, endlink);
!   tree v4sf_ftype_void
!     = build_function_type (V4SF_type_node, endlink);
    tree v2si_ftype_v4sf
      = build_function_type (V2SI_type_node,
  			   tree_cons (NULL_TREE, V4SF_type_node,
*************** ix86_init_mmx_sse_builtins ()
*** 11007,11025 ****
      = build_function_type (V4SF_type_node,
  			   tree_cons (NULL_TREE, pfloat_type_node,
  				      endlink));
-   tree v4sf_ftype_float
-     = build_function_type (V4SF_type_node,
- 			   tree_cons (NULL_TREE, float_type_node,
- 				      endlink));
-   tree v4sf_ftype_float_float_float_float
-     = build_function_type (V4SF_type_node,
- 			   tree_cons (NULL_TREE, float_type_node,
- 				      tree_cons (NULL_TREE, float_type_node,
- 						 tree_cons (NULL_TREE,
- 							    float_type_node,
- 							    tree_cons (NULL_TREE,
- 								       float_type_node,
- 								       endlink)))));
    /* @@@ the type is bogus */
    tree v4sf_ftype_v4sf_pv2si
      = build_function_type (V4SF_type_node,
--- 11027,11032 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11069,11079 ****
  			   tree_cons (NULL_TREE, V2SI_type_node,
  				      tree_cons (NULL_TREE, V2SI_type_node,
  						 endlink)));
-   tree ti_ftype_ti_ti
-     = build_function_type (intTI_type_node,
- 			   tree_cons (NULL_TREE, intTI_type_node,
- 				      tree_cons (NULL_TREE, intTI_type_node,
- 						 endlink)));
    tree di_ftype_di_di
      = build_function_type (long_long_unsigned_type_node,
  			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
--- 11076,11081 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11110,11120 ****
                                                   V2SF_type_node,
                                                   endlink)));
  
-   tree void_ftype_pchar
-     = build_function_type (void_type_node,
-                            tree_cons (NULL_TREE, pchar_type_node,
-                                       endlink));
- 
    /* Add all builtins that are more or less simple operations on two
       operands.  */
    for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
--- 11112,11117 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11142,11150 ****
  	case V2SImode:
  	  type = v2si_ftype_v2si_v2si;
  	  break;
- 	case TImode:
- 	  type = ti_ftype_ti_ti;
- 	  break;
  	case DImode:
  	  type = di_ftype_di_di;
  	  break;
--- 11139,11144 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11164,11171 ****
      }
  
    /* Add the remaining MMX insns with somewhat more complicated types.  */
-   def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
-   def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
    def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
    def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
    def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
--- 11158,11163 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11199,11204 ****
--- 11191,11201 ----
    def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
    def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
  
+   def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
+   def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
+   def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
+   def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
+ 
    def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
    def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
  
*************** ix86_init_mmx_sse_builtins ()
*** 11222,11228 ****
    def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
  
    def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-   def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
  
    def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
  
--- 11219,11224 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11256,11263 ****
    def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
    def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
    def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
-   def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
-   def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
  
    /* 3DNow! extension as used in the Athlon CPU.  */
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
--- 11252,11257 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11267,11280 ****
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
  
!   /* Composite intrinsics.  */
!   def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
!   def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
!   def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
!   def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
!   def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
  }
  
  /* Errors in the source file can cause expand_expr to return const0_rtx
--- 11261,11267 ----
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
    def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
  
!   def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
  }
  
  /* Errors in the source file can cause expand_expr to return const0_rtx
*************** safe_vector_operand (x, mode)
*** 11293,11300 ****
      emit_insn (gen_mmx_clrdi (mode == DImode ? x
  			      : gen_rtx_SUBREG (DImode, x, 0)));
    else
!     emit_insn (gen_sse_clrti (mode == TImode ? x
! 			      : gen_rtx_SUBREG (TImode, x, 0)));
    return x;
  }
  
--- 11280,11287 ----
      emit_insn (gen_mmx_clrdi (mode == DImode ? x
  			      : gen_rtx_SUBREG (DImode, x, 0)));
    else
!     emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! 				: gen_rtx_SUBREG (V4SFmode, x, 0)));
    return x;
  }
  
*************** ix86_expand_binop_builtin (icode, arglis
*** 11342,11354 ****
    return target;
  }
  
  /* Subroutine of ix86_expand_builtin to take care of stores.  */
  
  static rtx
! ix86_expand_store_builtin (icode, arglist, shuffle)
       enum insn_code icode;
       tree arglist;
-      int shuffle;
  {
    rtx pat;
    tree arg0 = TREE_VALUE (arglist);
--- 11329,11373 ----
    return target;
  }
  
+ /* In type_for_mode we restrict the ability to create TImode types 
+    to hosts with 64-bit H_W_I.  So we've defined the SSE logicals
+    to have a V4SFmode signature.  Convert them in-place to TImode.  */
+ 
+ static rtx
+ ix86_expand_timode_binop_builtin (icode, arglist, target)
+      enum insn_code icode;
+      tree arglist;
+      rtx target;
+ {
+   rtx pat;
+   tree arg0 = TREE_VALUE (arglist);
+   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ 
+   op0 = gen_lowpart (TImode, op0);
+   op1 = gen_lowpart (TImode, op1);
+   target = gen_reg_rtx (TImode);
+ 
+   if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
+     op0 = copy_to_mode_reg (TImode, op0);
+   if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+     op1 = copy_to_mode_reg (TImode, op1);
+ 
+   pat = GEN_FCN (icode) (target, op0, op1);
+   if (! pat)
+     return 0;
+   emit_insn (pat);
+ 
+   return gen_lowpart (V4SFmode, target);
+ }
+ 
  /* Subroutine of ix86_expand_builtin to take care of stores.  */
  
  static rtx
! ix86_expand_store_builtin (icode, arglist)
       enum insn_code icode;
       tree arglist;
  {
    rtx pat;
    tree arg0 = TREE_VALUE (arglist);
*************** ix86_expand_store_builtin (icode, arglis
*** 11362,11371 ****
      op1 = safe_vector_operand (op1, mode1);
  
    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-   if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
-     op1 = copy_to_mode_reg (mode1, op1);
-   if (shuffle >= 0)
-     emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
    pat = GEN_FCN (icode) (op0, op1);
    if (pat)
      emit_insn (pat);
--- 11381,11386 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11568,11574 ****
    enum insn_code icode;
    tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
    tree arglist = TREE_OPERAND (exp, 1);
!   tree arg0, arg1, arg2, arg3;
    rtx op0, op1, op2, pat;
    enum machine_mode tmode, mode0, mode1, mode2;
    unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
--- 11583,11589 ----
    enum insn_code icode;
    tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
    tree arglist = TREE_OPERAND (exp, 1);
!   tree arg0, arg1, arg2;
    rtx op0, op1, op2, pat;
    enum machine_mode tmode, mode0, mode1, mode2;
    unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
*************** ix86_expand_builtin (exp, target, subtar
*** 11583,11601 ****
        emit_insn (gen_sfence ());
        return 0;
  
-     case IX86_BUILTIN_M_FROM_INT:
-       target = gen_reg_rtx (DImode);
-       op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
-       emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
-       return target;
- 
-     case IX86_BUILTIN_M_TO_INT:
-       op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
-       op0 = copy_to_mode_reg (DImode, op0);
-       target = gen_reg_rtx (SImode);
-       emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
-       return target;
- 
      case IX86_BUILTIN_PEXTRW:
        icode = CODE_FOR_mmx_pextrw;
        arg0 = TREE_VALUE (arglist);
--- 11598,11603 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11689,11694 ****
--- 11691,11709 ----
      case IX86_BUILTIN_RCPSS:
        return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
  
+     case IX86_BUILTIN_ANDPS:
+       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
+ 					       arglist, target);
+     case IX86_BUILTIN_ANDNPS:
+       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
+ 					       arglist, target);
+     case IX86_BUILTIN_ORPS:
+       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
+ 					       arglist, target);
+     case IX86_BUILTIN_XORPS:
+       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
+ 					       arglist, target);
+ 
      case IX86_BUILTIN_LOADAPS:
        return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
  
*************** ix86_expand_builtin (exp, target, subtar
*** 11696,11710 ****
        return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
  
      case IX86_BUILTIN_STOREAPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
      case IX86_BUILTIN_STOREUPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
  
      case IX86_BUILTIN_LOADSS:
        return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
  
      case IX86_BUILTIN_STORESS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
  
      case IX86_BUILTIN_LOADHPS:
      case IX86_BUILTIN_LOADLPS:
--- 11711,11725 ----
        return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
  
      case IX86_BUILTIN_STOREAPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
      case IX86_BUILTIN_STOREUPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
  
      case IX86_BUILTIN_LOADSS:
        return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
  
      case IX86_BUILTIN_STORESS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
  
      case IX86_BUILTIN_LOADHPS:
      case IX86_BUILTIN_LOADLPS:
*************** ix86_expand_builtin (exp, target, subtar
*** 11753,11761 ****
        return 0;
  
      case IX86_BUILTIN_MOVNTPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
      case IX86_BUILTIN_MOVNTQ:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
  
      case IX86_BUILTIN_LDMXCSR:
        op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
--- 11768,11776 ----
        return 0;
  
      case IX86_BUILTIN_MOVNTPS:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
      case IX86_BUILTIN_MOVNTQ:
!       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
  
      case IX86_BUILTIN_LDMXCSR:
        op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 11769,11797 ****
        emit_insn (gen_stmxcsr (target));
        return copy_to_mode_reg (SImode, target);
  
-     case IX86_BUILTIN_PREFETCH:
-       icode = CODE_FOR_prefetch_sse;
-       arg0 = TREE_VALUE (arglist);
-       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-       mode0 = insn_data[icode].operand[0].mode;
-       mode1 = insn_data[icode].operand[1].mode;
- 
-       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- 	{
- 	  /* @@@ better error message */
- 	  error ("selector must be an immediate");
- 	  return const0_rtx;
- 	}
- 
-       op0 = copy_to_mode_reg (Pmode, op0);
-       pat = GEN_FCN (icode) (op0, op1);
-       if (! pat)
- 	return 0;
-       emit_insn (pat);
-       return target;
- 
      case IX86_BUILTIN_SHUFPS:
        icode = CODE_FOR_sse_shufps;
        arg0 = TREE_VALUE (arglist);
--- 11784,11789 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11914,11932 ****
      case IX86_BUILTIN_PMULHRW:
        return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
  
-     case IX86_BUILTIN_PREFETCH_3DNOW:
-     case IX86_BUILTIN_PREFETCHW:
-       icode = CODE_FOR_prefetch_3dnow;
-       arg0 = TREE_VALUE (arglist);
-       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-       op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
-       mode0 = insn_data[icode].operand[0].mode;
-       pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
-       if (! pat)
-         return NULL_RTX;
-       emit_insn (pat);
-       return NULL_RTX;
- 
      case IX86_BUILTIN_PF2IW:
        return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
  
--- 11906,11911 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11944,12000 ****
  
      case IX86_BUILTIN_PSWAPDSF:
        return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
- 
-       /* Composite intrinsics.  */
-     case IX86_BUILTIN_SETPS1:
-       target = assign_386_stack_local (SFmode, 0);
-       arg0 = TREE_VALUE (arglist);
-       emit_move_insn (adjust_address (target, SFmode, 0),
- 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
-       op0 = gen_reg_rtx (V4SFmode);
-       emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
-       emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
-       return op0;
- 
-     case IX86_BUILTIN_SETPS:
-       target = assign_386_stack_local (V4SFmode, 0);
-       arg0 = TREE_VALUE (arglist);
-       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-       arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-       arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
-       emit_move_insn (adjust_address (target, SFmode, 0),
- 		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
-       emit_move_insn (adjust_address (target, SFmode, 4),
- 		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
-       emit_move_insn (adjust_address (target, SFmode, 8),
- 		      expand_expr (arg2, NULL_RTX, VOIDmode, 0));
-       emit_move_insn (adjust_address (target, SFmode, 12),
- 		      expand_expr (arg3, NULL_RTX, VOIDmode, 0));
-       op0 = gen_reg_rtx (V4SFmode);
-       emit_insn (gen_sse_movaps (op0, target));
-       return op0;
  
!     case IX86_BUILTIN_CLRPS:
!       target = gen_reg_rtx (TImode);
!       emit_insn (gen_sse_clrti (target));
!       return target;
! 
!     case IX86_BUILTIN_LOADRPS:
!       target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
! 					 gen_reg_rtx (V4SFmode), 1);
!       emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
!       return target;
! 
!     case IX86_BUILTIN_LOADPS1:
!       target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
! 					 gen_reg_rtx (V4SFmode), 1);
!       emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
        return target;
- 
-     case IX86_BUILTIN_STOREPS1:
-       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
-     case IX86_BUILTIN_STORERPS:
-       return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
  
      case IX86_BUILTIN_MMX_ZERO:
        target = gen_reg_rtx (DImode);
--- 11923,11933 ----
  
      case IX86_BUILTIN_PSWAPDSF:
        return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
  
!     case IX86_BUILTIN_SSE_ZERO:
!       target = gen_reg_rtx (V4SFmode);
!       emit_insn (gen_sse_clrv4sf (target));
        return target;
  
      case IX86_BUILTIN_MMX_ZERO:
        target = gen_reg_rtx (DImode);
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.236
diff -c -p -d -r1.236 i386.h
*** i386.h	2002/01/11 13:01:21	1.236
--- i386.h	2002/01/12 09:50:08
*************** enum ix86_builtins
*** 2089,2096 ****
    IX86_BUILTIN_CVTSS2SI,
    IX86_BUILTIN_CVTTPS2PI,
    IX86_BUILTIN_CVTTSS2SI,
-   IX86_BUILTIN_M_FROM_INT,
-   IX86_BUILTIN_M_TO_INT,
  
    IX86_BUILTIN_MAXPS,
    IX86_BUILTIN_MAXSS,
--- 2089,2094 ----
*************** enum ix86_builtins
*** 2215,2221 ****
    IX86_BUILTIN_LDMXCSR,
    IX86_BUILTIN_STMXCSR,
    IX86_BUILTIN_SFENCE,
-   IX86_BUILTIN_PREFETCH,
  
    /* 3DNow! Original */
    IX86_BUILTIN_FEMMS,
--- 2213,2218 ----
*************** enum ix86_builtins
*** 2238,2245 ****
    IX86_BUILTIN_PFSUBR,
    IX86_BUILTIN_PI2FD,
    IX86_BUILTIN_PMULHRW,
-   IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
-   IX86_BUILTIN_PREFETCHW,
  
    /* 3DNow! Athlon Extensions */
    IX86_BUILTIN_PF2IW,
--- 2235,2240 ----
*************** enum ix86_builtins
*** 2248,2264 ****
    IX86_BUILTIN_PI2FW,
    IX86_BUILTIN_PSWAPDSI,
    IX86_BUILTIN_PSWAPDSF,
- 
-   /* Composite builtins, expand to more than one insn.  */
-   IX86_BUILTIN_SETPS1,
-   IX86_BUILTIN_SETPS,
-   IX86_BUILTIN_CLRPS,
-   IX86_BUILTIN_SETRPS,
-   IX86_BUILTIN_LOADPS1,
-   IX86_BUILTIN_LOADRPS,
-   IX86_BUILTIN_STOREPS1,
-   IX86_BUILTIN_STORERPS,
  
    IX86_BUILTIN_MMX_ZERO,
  
    IX86_BUILTIN_MAX
--- 2243,2250 ----
    IX86_BUILTIN_PI2FW,
    IX86_BUILTIN_PSWAPDSI,
    IX86_BUILTIN_PSWAPDSF,
  
+   IX86_BUILTIN_SSE_ZERO,
    IX86_BUILTIN_MMX_ZERO,
  
    IX86_BUILTIN_MAX
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.328
diff -c -p -d -r1.328 i386.md
*** i386.md	2002/01/12 07:38:48	1.328
--- i386.md	2002/01/12 09:50:08
***************
*** 81,87 ****
  ;; 32 This is a `maskmov' operation.
  ;; 33 This is a `movmsk' operation.
  ;; 34 This is a `non-temporal' move.
- ;; 35 This is a `prefetch' (SSE) operation.
  ;; 36 This is used to distinguish COMISS from UCOMISS.
  ;; 37 This is a `ldmxcsr' operation.
  ;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
--- 81,86 ----
***************
*** 17686,17692 ****
  
  (define_insn "movv4sf_internal"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(match_operand:V4SF 1 "general_operand" "xm,x"))]
    "TARGET_SSE"
    ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
--- 17685,17691 ----
  
  (define_insn "movv4sf_internal"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
    ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
***************
*** 17694,17700 ****
  
  (define_insn "movv4si_internal"
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
! 	(match_operand:V4SI 1 "general_operand" "xm,x"))]
    "TARGET_SSE"
    ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
--- 17693,17699 ----
  
  (define_insn "movv4si_internal"
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
! 	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
    ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
***************
*** 17702,17729 ****
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V8QI 1 "general_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv4hi_internal"
    [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V4HI 1 "general_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv2si_internal"
    [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V2SI 1 "general_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv2sf_internal"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
!         (match_operand:V2SF 1 "general_operand" "ym,y"))]
    "TARGET_3DNOW"
    "movq\\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
--- 17701,17728 ----
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv4hi_internal"
    [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv2si_internal"
    [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
! 	(match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
    "TARGET_MMX"
    "movq\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
  
  (define_insn "movv2sf_internal"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
!         (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
    "TARGET_3DNOW"
    "movq\\t{%1, %0|%0, %1}"
    [(set_attr "type" "mmx")])
***************
*** 17734,17767 ****
    "TARGET_SSE || TARGET_64BIT"
  {
    if (TARGET_64BIT)
!     {
!       ix86_expand_move (TImode, operands);
!       DONE;
!     }
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], TImode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (TImode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], TImode)
!       && !register_operand (operands[1], TImode)
!       && operands[1] != CONST0_RTX (TImode))
!     {
!       rtx temp = force_reg (TImode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv4sf"
--- 17733,17742 ----
    "TARGET_SSE || TARGET_64BIT"
  {
    if (TARGET_64BIT)
!     ix86_expand_move (TImode, operands);
!   else
!     ix86_expand_vector_move (TImode, operands);
!   DONE;
  })
  
  (define_expand "movv4sf"
***************
*** 17769,17798 ****
  	(match_operand:V4SF 1 "general_operand" ""))]
    "TARGET_SSE"
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V4SFmode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V4SFmode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V4SFmode)
!       && !register_operand (operands[1], V4SFmode)
!       && operands[1] != CONST0_RTX (V4SFmode))
!     {
!       rtx temp = force_reg (V4SFmode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv4si"
--- 17744,17751 ----
  	(match_operand:V4SF 1 "general_operand" ""))]
    "TARGET_SSE"
  {
!   ix86_expand_vector_move (V4SFmode, operands);
!   DONE;
  })
  
  (define_expand "movv4si"
***************
*** 17800,17829 ****
  	(match_operand:V4SI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V4SImode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V4SImode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V4SImode)
!       && !register_operand (operands[1], V4SImode)
!       && operands[1] != CONST0_RTX (V4SImode))
!     {
!       rtx temp = force_reg (V4SImode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv2si"
--- 17753,17760 ----
  	(match_operand:V4SI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   ix86_expand_vector_move (V4SImode, operands);
!   DONE;
  })
  
  (define_expand "movv2si"
***************
*** 17831,17860 ****
  	(match_operand:V2SI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V2SImode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V2SImode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V2SImode)
!       && !register_operand (operands[1], V2SImode)
!       && operands[1] != CONST0_RTX (V2SImode))
!     {
!       rtx temp = force_reg (V2SImode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv4hi"
--- 17762,17769 ----
  	(match_operand:V2SI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   ix86_expand_vector_move (V2SImode, operands);
!   DONE;
  })
  
  (define_expand "movv4hi"
***************
*** 17862,17891 ****
  	(match_operand:V4HI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V4HImode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V4HImode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V4HImode)
!       && !register_operand (operands[1], V4HImode)
!       && operands[1] != CONST0_RTX (V4HImode))
!     {
!       rtx temp = force_reg (V4HImode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv8qi"
--- 17771,17778 ----
  	(match_operand:V4HI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   ix86_expand_vector_move (V4HImode, operands);
!   DONE;
  })
  
  (define_expand "movv8qi"
***************
*** 17893,17957 ****
  	(match_operand:V8QI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V8QImode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V8QImode, addr);
!     }
! 
!   /* Make operand1 a register if it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V8QImode)
!       && !register_operand (operands[1], V8QImode)
!       && operands[1] != CONST0_RTX (V8QImode))
!     {
!       rtx temp = force_reg (V8QImode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
  })
  
  (define_expand "movv2sf"
    [(set (match_operand:V2SF 0 "general_operand" "")
  	(match_operand:V2SF 1 "general_operand" ""))]
     "TARGET_3DNOW"
-    "
  {
!   /* For constants other than zero into memory.  We do not know how the
!      instructions used to build constants modify the upper 64 bits
!      of the register, once we have that information we may be able
!      to handle some of them more efficiently.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && register_operand (operands[0], V2SFmode)
!       && CONSTANT_P (operands[1]))
!     {
!       rtx addr = gen_reg_rtx (Pmode);
! 
!       emit_move_insn (addr,
! 		      XEXP (force_const_mem (V2SFmode, operands[1]), 0));
!       operands[1] = gen_rtx_MEM (V2SFmode, addr);
!    }
! 
!   /* Make operand1 a register is it isn't already.  */
!   if ((reload_in_progress | reload_completed) == 0
!       && !register_operand (operands[0], V2SFmode)
!       && !register_operand (operands[1], V2SFmode)
!       && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
!       && operands[1] != CONST0_RTX (V2SFmode))
!    {
!       rtx temp = force_reg (V2SFmode, operands[1]);
!       emit_move_insn (operands[0], temp);
!       DONE;
!     }
! }")
  
  (define_insn_and_split "*pushti"
    [(set (match_operand:TI 0 "push_operand" "=<")
--- 17780,17797 ----
  	(match_operand:V8QI 1 "general_operand" ""))]
    "TARGET_MMX"
  {
!   ix86_expand_vector_move (V8QImode, operands);
!   DONE;
  })
  
  (define_expand "movv2sf"
    [(set (match_operand:V2SF 0 "general_operand" "")
  	(match_operand:V2SF 1 "general_operand" ""))]
     "TARGET_3DNOW"
  {
!   ix86_expand_vector_move (V2SFmode, operands);
!   DONE;
! })
  
  (define_insn_and_split "*pushti"
    [(set (match_operand:TI 0 "push_operand" "=<")
***************
*** 18031,18055 ****
    [(set_attr "type" "mmx")])
  
  (define_insn "movti_internal"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
! 	(match_operand:TI 1 "general_operand" "xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
    "@
     movaps\t{%1, %0|%0, %1}
     movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "*movti_rex64"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
! 	(match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
     #
     #
     movaps\\t{%1, %0|%0, %1}
     movaps\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "*,*,sse,sse")
     (set_attr "mode" "TI")])
  
  (define_split
--- 17871,17897 ----
    [(set_attr "type" "mmx")])
  
  (define_insn "movti_internal"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
! 	(match_operand:TI 1 "general_operand" "O,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
    "@
+    xorps\t%0, %0
     movaps\t{%1, %0|%0, %1}
     movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "*movti_rex64"
!   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
! 	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
     #
     #
+    xorps\t%0, %0
     movaps\\t{%1, %0|%0, %1}
     movaps\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "*,*,sse,sse,sse")
     (set_attr "mode" "TI")])
  
  (define_split
***************
*** 18064,18070 ****
  ;; movaps or movups
  (define_insn "sse_movaps"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
    "TARGET_SSE"
    "@
     movaps\t{%1, %0|%0, %1}
--- 17906,17913 ----
  ;; movaps or movups
  (define_insn "sse_movaps"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V4SF
! 	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
    "TARGET_SSE"
    "@
     movaps\t{%1, %0|%0, %1}
***************
*** 18073,18079 ****
  
  (define_insn "sse_movups"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
    "TARGET_SSE"
    "@
     movups\t{%1, %0|%0, %1}
--- 17916,17923 ----
  
  (define_insn "sse_movups"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(unspec:V4SF
! 	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
    "TARGET_SSE"
    "@
     movups\t{%1, %0|%0, %1}
***************
*** 18154,18160 ****
  	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
  	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
  	 (const_int 12)))]
!   "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
    "movhps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
--- 17998,18005 ----
  	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
  	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
  	 (const_int 12)))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
    "movhps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
***************
*** 18164,18170 ****
  	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
  	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
  	 (const_int 3)))]
!   "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
    "movlps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
--- 18009,18016 ----
  	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
  	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
  	 (const_int 3)))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
    "movlps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
***************
*** 18220,18229 ****
  
  (define_insn "vmaddv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "addss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18066,18076 ----
  
  (define_insn "vmaddv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "addss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18231,18247 ****
  (define_insn "subv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
          (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	           (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE"
    "subps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmsubv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "subss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18078,18095 ----
  (define_insn "subv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
          (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		    (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE"
    "subps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmsubv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "subss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18256,18265 ****
  
  (define_insn "vmmulv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "mulss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18104,18114 ----
  
  (define_insn "vmmulv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "mulss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18274,18283 ****
  
  (define_insn "vmdivv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 				  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "divss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18123,18133 ----
  
  (define_insn "vmdivv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "divss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18287,18339 ****
  
  (define_insn "rcpv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
    "TARGET_SSE"
    "rcpps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmrcpv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
!                         (match_operand:V4SF 2 "register_operand" "0")
! 			(const_int 1)))]
    "TARGET_SSE"
    "rcpss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "rsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
    "TARGET_SSE"
    "rsqrtps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmrsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
!                         (match_operand:V4SF 2 "register_operand" "0")
! 			(const_int 1)))]
    "TARGET_SSE"
    "rsqrtss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "sqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
    "TARGET_SSE"
    "sqrtps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
!                         (match_operand:V4SF 2 "register_operand" "0")
! 			(const_int 1)))]
    "TARGET_SSE"
    "sqrtss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
- 
  ;; SSE logical operations.
  
  ;; These are not called andti3 etc. because we really really don't want
--- 18137,18193 ----
  
  (define_insn "rcpv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF
! 	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
    "TARGET_SSE"
    "rcpps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmrcpv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
! 	 (match_operand:V4SF 2 "register_operand" "0")
! 	 (const_int 1)))]
    "TARGET_SSE"
    "rcpss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "rsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF
! 	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
    "TARGET_SSE"
    "rsqrtps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmrsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
! 	 (match_operand:V4SF 2 "register_operand" "0")
! 	 (const_int 1)))]
    "TARGET_SSE"
    "rsqrtss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "sqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
    "TARGET_SSE"
    "sqrtps\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "vmsqrtv4sf2"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! 	 (match_operand:V4SF 2 "register_operand" "0")
! 	 (const_int 1)))]
    "TARGET_SSE"
    "sqrtss\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  ;; SSE logical operations.
  
  ;; These are not called andti3 etc. because we really really don't want
***************
*** 18519,18527 ****
  
  ;; Use xor, but don't show input operands so they aren't live before
  ;; this insn.
! (define_insn "sse_clrti"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (unspec:TI [(const_int 0)] 45))]
    "TARGET_SSE"
    "xorps\t{%0, %0|%0, %0}"
    [(set_attr "type" "sse")
--- 18373,18381 ----
  
  ;; Use xor, but don't show input operands so they aren't live before
  ;; this insn.
! (define_insn "sse_clrv4sf"
!   [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF [(const_int 0)] 45))]
    "TARGET_SSE"
    "xorps\t{%0, %0|%0, %0}"
    [(set_attr "type" "sse")
***************
*** 18532,18539 ****
  (define_insn "maskcmpv4sf3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (match_operator:V4SI 3 "sse_comparison_operator"
! 			     [(match_operand:V4SF 1 "register_operand" "0")
! 			      (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
    "TARGET_SSE"
    "cmp%D3ps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18386,18393 ----
  (define_insn "maskcmpv4sf3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (match_operator:V4SI 3 "sse_comparison_operator"
! 		[(match_operand:V4SF 1 "register_operand" "0")
! 		 (match_operand:V4SF 2 "register_operand" "x")]))]
    "TARGET_SSE"
    "cmp%D3ps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18542,18565 ****
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (not:V4SI
  	 (match_operator:V4SI 3 "sse_comparison_operator"
! 			      [(match_operand:V4SF 1 "register_operand" "0")
! 			       (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
    "TARGET_SSE"
-   "*
  {
    if (GET_CODE (operands[3]) == UNORDERED)
!     return \"cmpordps\t{%2, %0|%0, %2}\";
! 
!   return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
! }"
    [(set_attr "type" "sse")])
  
  (define_insn "vmmaskcmpv4sf3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
  	(vec_merge:V4SI
  	 (match_operator:V4SI 3 "sse_comparison_operator"
! 			      [(match_operand:V4SF 1 "register_operand" "0")
! 			       (match_operand:V4SF 2 "nonimmediate_operand" "x")])
  	 (match_dup 1)
  	 (const_int 1)))]
    "TARGET_SSE"
--- 18396,18418 ----
    [(set (match_operand:V4SI 0 "register_operand" "=x")
          (not:V4SI
  	 (match_operator:V4SI 3 "sse_comparison_operator"
! 		[(match_operand:V4SF 1 "register_operand" "0")
! 		 (match_operand:V4SF 2 "register_operand" "x")])))]
    "TARGET_SSE"
  {
    if (GET_CODE (operands[3]) == UNORDERED)
!     return "cmpordps\t{%2, %0|%0, %2}";
!   else
!     return "cmpn%D3ps\t{%2, %0|%0, %2}";
! }
    [(set_attr "type" "sse")])
  
  (define_insn "vmmaskcmpv4sf3"
    [(set (match_operand:V4SI 0 "register_operand" "=x")
  	(vec_merge:V4SI
  	 (match_operator:V4SI 3 "sse_comparison_operator"
! 		[(match_operand:V4SF 1 "register_operand" "0")
! 		 (match_operand:V4SF 2 "register_operand" "x")])
  	 (match_dup 1)
  	 (const_int 1)))]
    "TARGET_SSE"
***************
*** 18571,18588 ****
  	(vec_merge:V4SI
  	 (not:V4SI
  	  (match_operator:V4SI 3 "sse_comparison_operator"
! 			       [(match_operand:V4SF 1 "register_operand" "0")
! 				(match_operand:V4SF 2 "nonimmediate_operand" "x")]))
  	 (subreg:V4SI (match_dup 1) 0)
  	 (const_int 1)))]
    "TARGET_SSE"
-   "*
  {
    if (GET_CODE (operands[3]) == UNORDERED)
!     return \"cmpordss\t{%2, %0|%0, %2}\";
! 
!   return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
! }"
    [(set_attr "type" "sse")])
  
  (define_insn "sse_comi"
--- 18424,18440 ----
  	(vec_merge:V4SI
  	 (not:V4SI
  	  (match_operator:V4SI 3 "sse_comparison_operator"
! 		[(match_operand:V4SF 1 "register_operand" "0")
! 		 (match_operand:V4SF 2 "register_operand" "x")]))
  	 (subreg:V4SI (match_dup 1) 0)
  	 (const_int 1)))]
    "TARGET_SSE"
  {
    if (GET_CODE (operands[3]) == UNORDERED)
!     return "cmpordss\t{%2, %0|%0, %2}";
!   else
!     return "cmpn%D3ss\t{%2, %0|%0, %2}";
! }
    [(set_attr "type" "sse")])
  
  (define_insn "sse_comi"
***************
*** 18663,18672 ****
  
  (define_insn "vmsmaxv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "maxss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18515,18525 ----
  
  (define_insn "vmsmaxv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "maxss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18681,18690 ****
  
  (define_insn "vmsminv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	                           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
!                         (match_dup 1)
! 			(const_int 1)))]
    "TARGET_SSE"
    "minss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
--- 18534,18544 ----
  
  (define_insn "vmsminv4sf3"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! 	 (match_dup 1)
! 	 (const_int 1)))]
    "TARGET_SSE"
    "minss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
***************
*** 18694,18749 ****
  
  (define_insn "cvtpi2ps"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 			(vec_duplicate:V4SF
! 			 (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
! 			(const_int 12)))]
    "TARGET_SSE"
    "cvtpi2ps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtps2pi"
    [(set (match_operand:V2SI 0 "register_operand" "=y")
! 	(vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
! 			 (parallel
! 			  [(const_int 0)
! 			   (const_int 1)])))]
    "TARGET_SSE"
    "cvtps2pi\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvttps2pi"
    [(set (match_operand:V2SI 0 "register_operand" "=y")
! 	(vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
! 			 (parallel
! 			  [(const_int 0)
! 			   (const_int 1)])))]
    "TARGET_SSE"
    "cvttps2pi\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtsi2ss"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
! 	 		(vec_duplicate:V4SF
! 			 (float:SF (match_operand:SI 2 "register_operand" "rm")))
! 			(const_int 14)))]
    "TARGET_SSE"
    "cvtsi2ss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
! 		       (parallel [(const_int 0)])))]
    "TARGET_SSE"
    "cvtss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvttss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
! 		       (parallel [(const_int 0)])))]
    "TARGET_SSE"
    "cvttss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
--- 18548,18605 ----
  
  (define_insn "cvtpi2ps"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (match_operand:V4SF 1 "register_operand" "0")
! 	 (vec_duplicate:V4SF
! 	  (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
! 	 (const_int 12)))]
    "TARGET_SSE"
    "cvtpi2ps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtps2pi"
    [(set (match_operand:V2SI 0 "register_operand" "=y")
! 	(vec_select:V2SI
! 	 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! 	 (parallel [(const_int 0) (const_int 1)])))]
    "TARGET_SSE"
    "cvtps2pi\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvttps2pi"
    [(set (match_operand:V2SI 0 "register_operand" "=y")
! 	(vec_select:V2SI
! 	 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
! 	 (parallel [(const_int 0) (const_int 1)])))]
    "TARGET_SSE"
    "cvttps2pi\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtsi2ss"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(vec_merge:V4SF
! 	 (match_operand:V4SF 1 "register_operand" "0")
! 	 (vec_duplicate:V4SF
! 	  (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
! 	 (const_int 14)))]
    "TARGET_SSE"
    "cvtsi2ss\t{%2, %0|%0, %2}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvtss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(vec_select:SI
! 	 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! 	 (parallel [(const_int 0)])))]
    "TARGET_SSE"
    "cvtss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
  
  (define_insn "cvttss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(vec_select:SI
! 	 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
! 	 (parallel [(const_int 0)])))]
    "TARGET_SSE"
    "cvttss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sse")])
***************
*** 18877,18884 ****
    [(set (match_operand:V4HI 0 "register_operand" "=y")
  	(truncate:V4HI
  	 (lshiftrt:V4SI
! 	  (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
! 		     (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
  	  (const_int 16))))]
    "TARGET_MMX"
    "pmulhw\t{%2, %0|%0, %2}"
--- 18733,18742 ----
    [(set (match_operand:V4HI 0 "register_operand" "=y")
  	(truncate:V4HI
  	 (lshiftrt:V4SI
! 	  (mult:V4SI (sign_extend:V4SI
! 		      (match_operand:V4HI 1 "register_operand" "0"))
! 		     (sign_extend:V4SI
! 		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
  	  (const_int 16))))]
    "TARGET_MMX"
    "pmulhw\t{%2, %0|%0, %2}"
***************
*** 18888,18895 ****
    [(set (match_operand:V4HI 0 "register_operand" "=y")
  	(truncate:V4HI
  	 (lshiftrt:V4SI
! 	  (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
! 		     (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
  	  (const_int 16))))]
    "TARGET_SSE || TARGET_3DNOW_A"
    "pmulhuw\t{%2, %0|%0, %2}"
--- 18746,18755 ----
    [(set (match_operand:V4HI 0 "register_operand" "=y")
  	(truncate:V4HI
  	 (lshiftrt:V4SI
! 	  (mult:V4SI (zero_extend:V4SI
! 		      (match_operand:V4HI 1 "register_operand" "0"))
! 		     (zero_extend:V4SI
! 		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
  	  (const_int 16))))]
    "TARGET_SSE || TARGET_3DNOW_A"
    "pmulhuw\t{%2, %0|%0, %2}"
***************
*** 18899,18910 ****
    [(set (match_operand:V2SI 0 "register_operand" "=y")
          (plus:V2SI
  	 (mult:V2SI
! 	  (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
! 					     (parallel [(const_int 0)
! 							(const_int 2)])))
! 	  (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
! 					     (parallel [(const_int 0)
! 							(const_int 2)]))))
  	 (mult:V2SI
  	  (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
  					     (parallel [(const_int 1)
--- 18759,18770 ----
    [(set (match_operand:V2SI 0 "register_operand" "=y")
          (plus:V2SI
  	 (mult:V2SI
! 	  (sign_extend:V2SI
! 	   (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
! 			    (parallel [(const_int 0) (const_int 2)])))
! 	  (sign_extend:V2SI
! 	   (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
! 			    (parallel [(const_int 0) (const_int 2)]))))
  	 (mult:V2SI
  	  (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
  					     (parallel [(const_int 1)
***************
*** 19404,19478 ****
    [(set_attr "type" "sse")
     (set_attr "memory" "unknown")])
  
- (define_expand "prefetch"
-   [(prefetch (match_operand:SI 0 "address_operand" "p")
- 	     (match_operand:SI 1 "const_int_operand" "n")
- 	     (match_operand:SI 2 "const_int_operand" "n"))]
-   "TARGET_PREFETCH_SSE || TARGET_3DNOW"
-   "
- {
-   int rw = INTVAL (operands[1]);
-   int locality = INTVAL (operands[2]);
-   if (rw != 0 && rw != 1)
-     abort ();
-   if (locality < 0 || locality > 3)
-     abort ();
-   /* Use 3dNOW prefetch in case we are asking for write prefetch not
-      suported by SSE counterpart or the SSE prefetch is not available
-      (K6 machines).  Otherwise use SSE prefetch as it allows specifying
-      of locality.  */
-   if (TARGET_3DNOW
-        && (!TARGET_PREFETCH_SSE || rw))
-     {
-       emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
-     }
-   else
-     {
-       int i;
-       switch (locality)
- 	{
- 	  case 0:	/* No temporal locality.  */
- 	    i = 0;
- 	    break;
- 	  case 1:	/* Lowest level of temporal locality.  */
- 	    i = 3;
- 	    break;
- 	  case 2:	/* Moderate level of temporal locality.  */
- 	    i = 2;
- 	    break;
- 	  case 3:	/* Highest level of temporal locality.  */
- 	    i = 1;
- 	    break;
- 	  default:
- 	    abort ();	/* We already checked for valid values above.  */
- 	    break;
- 	}
-       emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
-     }
-   DONE;
- }")
- 
- (define_insn "prefetch_sse"
-   [(unspec [(match_operand:SI 0 "address_operand" "p")
- 	    (match_operand:SI 1 "immediate_operand" "n")] 35)]
-   "TARGET_PREFETCH_SSE"
- {
-   switch (INTVAL (operands[1]))
-     {
-     case 0:
-       return "prefetchnta\t%a0";
-     case 1:
-       return "prefetcht0\t%a0";
-     case 2:
-       return "prefetcht1\t%a0";
-     case 3:
-       return "prefetcht2\t%a0";
-     default:
-       abort ();
-     }
- }
-   [(set_attr "type" "sse")])
- 
  (define_expand "sse_prologue_save"
    [(parallel [(set (match_operand:BLK 0 "" "")
  		   (unspec:BLK [(reg:DI 21)
--- 19264,19269 ----
***************
*** 19630,19648 ****
    "femms"
    [(set_attr "type" "mmx")])
  
- (define_insn "prefetch_3dnow"
-   [(prefetch (match_operand:SI 0 "address_operand" "p")
- 	     (match_operand:SI 1 "const_int_operand" "n")
- 	     (const_int 0))]
-   "TARGET_3DNOW"
- {
-   if (INTVAL (operands[1]) == 0)
-     return "prefetch\t%a0";
-   else
-     return "prefetchw\t%a0";
- }
-   [(set_attr "type" "mmx")])
- 
  (define_insn "pf2id"
    [(set (match_operand:V2SI 0 "register_operand" "=y")
  	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
--- 19421,19426 ----
***************
*** 19819,19822 ****
--- 19597,19658 ----
  			 (parallel [(const_int 1) (const_int 0)])))]
    "TARGET_3DNOW_A"
    "pswapd\\t{%1, %0|%0, %1}"
+   [(set_attr "type" "mmx")])
+ 
+ (define_expand "prefetch"
+   [(prefetch (match_operand:SI 0 "address_operand" "")
+ 	     (match_operand:SI 1 "const_int_operand" "")
+ 	     (match_operand:SI 2 "const_int_operand" ""))]
+   "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+ {
+   int rw = INTVAL (operands[1]);
+   int locality = INTVAL (operands[2]);
+   if (rw != 0 && rw != 1)
+     abort ();
+   if (locality < 0 || locality > 3)
+     abort ();
+ 
+   /* Use 3dNOW prefetch in case we are asking for write prefetch not
+      suported by SSE counterpart or the SSE prefetch is not available
+      (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+      of locality.  */
+   if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+     {
+       operands[2] = GEN_INT (3);
+     }
+   else
+     {
+       operands[1] = const0_rtx;
+     }
+ })
+ 
+ (define_insn "*prefetch_sse"
+   [(prefetch (match_operand:SI 0 "address_operand" "")
+ 	     (const_int 0)
+ 	     (match_operand:SI 1 "const_int_operand" ""))]
+   "TARGET_PREFETCH_SSE"
+ {
+   static const char * const patterns[4] = {
+    "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+   };
+ 
+   int locality = INTVAL (operands[1]);
+   if (locality < 0 || locality > 3)
+     abort ();
+ 
+   return patterns[locality];  
+ }
+   [(set_attr "type" "sse")])
+ 
+ (define_insn "*prefetch_3dnow"
+   [(prefetch (match_operand:SI 0 "address_operand" "p")
+ 	     (match_operand:SI 1 "const_int_operand" "n")
+ 	     (const_int 0))]
+   "TARGET_3DNOW"
+ {
+   if (INTVAL (operands[1]) == 0)
+     return "prefetch\t%a0";
+   else
+     return "prefetchw\t%a0";
+ }
    [(set_attr "type" "mmx")])
Index: config/i386/xmmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.1
diff -c -p -d -r1.1 xmmintrin.h
*** xmmintrin.h	2002/01/12 07:38:49	1.1
--- xmmintrin.h	2002/01/12 09:50:08
***************
*** 34,44 ****
  #include <mmintrin.h>
  
  /* The data type indended for user use.  */
! typedef int __m128 __attribute__ ((mode (TI)));
  
  /* Internal data types for implementing the instrinsics.  */
! typedef int __v4sf __attribute__ ((mode (V4SF)));
! typedef int __v4si __attribute__ ((mode (V4SI)));
  
  /* Create a selector for use with the SHUFPS instruction.  */
  #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
--- 34,44 ----
  #include <mmintrin.h>
  
  /* The data type indended for user use.  */
! typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
  
  /* Internal data types for implementing the instrinsics.  */
! typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
! typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
  
  /* Create a selector for use with the SHUFPS instruction.  */
  #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
*************** _mm_movemask_ps (__m128 __A)
*** 680,686 ****
  static __inline unsigned int
  _mm_getcsr (void)
  {
!   return __builtin_ia32_getmxcsr ();
  }
  
  /* Read exception bits from the control register.  */
--- 680,686 ----
  static __inline unsigned int
  _mm_getcsr (void)
  {
!   return __builtin_ia32_stmxcsr ();
  }
  
  /* Read exception bits from the control register.  */
*************** _MM_GET_FLUSH_ZERO_MODE (void)
*** 712,718 ****
  static __inline void
  _mm_setcsr (unsigned int __I)
  {
!   __builtin_ia32_setmxcsr (__I);
  }
  
  /* Set exception bits in the control register.  */
--- 712,718 ----
  static __inline void
  _mm_setcsr (unsigned int __I)
  {
!   __builtin_ia32_ldmxcsr (__I);
  }
  
  /* Set exception bits in the control register.  */



More information about the Gcc-patches mailing list