This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

vector move operations, revisited


So, the test case that I added for PR 19010 was failing on x86_64.
And in looking at it, I decided that my previous approach was wrong.

In the solution to that pr, I had set HARD_REGNO_MODE_OK to reflect
what *operational* modes are available for each register for each
different ISA.

However, based on re-reading the x86_64 calling conventions, which
does things like move traditionally MMX value through SSE registers,
it became clear this was incorrect.

The original bug was that HARD_REGNO_MODE_OK and the move patterns
and mismatched.  But the proper solution is to relax both to the
most accomodating setting -- any register that is wide enough for
the mode should be able to hold the mode, and the move instruction
should allow it.

This most relaxed view isn't *too* far off of where we were before
my 19010 patch started tightening things up.  There are more 
substantive changes to the scalar and vector df patterns, but most
of the changes are fairly minor.

I think I'll mention the type_natural_mode change in another message
sure to draw more attention...

Anyway, tested on i686 and x86_64 linux.



r~



        * config/i386/i386.c (x86_64_reg_class_name): Re-indent.
        (classify_argument, examine_argument, construct_container,
        merge_classes): Remove prototypes.
        (type_natural_mode): Split out from ...
        (function_arg): ... here.
        (gen_reg_or_parallel): Remove alt_mode argument.  Update callers.
        Use orig_mode unless it's BLKmode.
        (construct_container): Add orig_mode argument.  Update callers.
        Use gen_reg_or_parallel for SSE registers.
        (ix86_function_value): Use type_natural_mode.
        (ix86_gimplify_va_arg): Likewise.
        (ix86_hard_regno_mode_ok): Always accept all SSE, MMX, 3DNOW modes in
        SSE registers; always accept all MMX, 3DNOW modes in MMX registers.
        * config/i386/i386.h (VALID_SSE2_REG_MODE): Don't include
        VALID_MMX_REG_MODE.
        * config/i386/i386.md (attribute mode): Add V1DF.
        (movsi_1): Use 'x' instead of 'Y' constraints.
        (movsi_1_nointernunit, movdi_2, movdi_1_rex64): Likewise.
        (movdi_1_rex64_nointerunit): Likewise.
        (movdf_nointeger, movdf_integer): Likewise.  Handle SSE1.
        (movsf_1, movsf_1_nointerunit): Line up constraint alternatives.
        (swapsf): Use fp_register_operand, don't disable for TARGET_SSE.
        (swapdf): Likewise.
        (swapxf): Enable only for TARGET_80387.
        (movv2sf, movv2sf_internal, pushv2sf): Enable for MMX.
        (movtf): Remove double-check for TARGET_64BIT.
        (movv2df_internal): Enable for SSE1.
        (movv8hi_internal, movv16qi_internal): Likewise.
        (movv2df, movv8hi, movv16qi): Likewise.
        (pushv2di, pushv8hi, pushv16qi, pushv4si): Likewise.
        (pushdi2_rex64, movv4sf_internal, movv4si_internal, movv2di_internal,
        movv8qi_internal, movv4hi_internal, movv2sf_internal,
        movv2df_internal, movv8hi_internal, movv16qi_internal,
        movti_internal): Add leading '*' to name.

Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.755
diff -c -p -d -r1.755 i386.c
*** config/i386/i386.c	17 Dec 2004 08:53:58 -0000	1.755
--- config/i386/i386.c	17 Dec 2004 21:12:07 -0000
*************** enum x86_64_reg_class
*** 951,967 ****
      X86_64_COMPLEX_X87_CLASS,
      X86_64_MEMORY_CLASS
    };
! static const char * const x86_64_reg_class_name[] =
!    {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "cplx87", "no"};
  
  #define MAX_CLASSES 4
- static int classify_argument (enum machine_mode, tree,
- 			      enum x86_64_reg_class [MAX_CLASSES], int);
- static int examine_argument (enum machine_mode, tree, int, int *, int *);
- static rtx construct_container (enum machine_mode, tree, int, int, int,
- 				const int *, int);
- static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
- 					    enum x86_64_reg_class);
  
  /* Table of constants used by fldpi, fldln2, etc....  */
  static REAL_VALUE_TYPE ext_80387_constants_table [5];
--- 951,962 ----
      X86_64_COMPLEX_X87_CLASS,
      X86_64_MEMORY_CLASS
    };
! static const char * const x86_64_reg_class_name[] = {
!   "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
!   "sseup", "x87", "x87up", "cplx87", "no"
! };
  
  #define MAX_CLASSES 4
  
  /* Table of constants used by fldpi, fldln2, etc....  */
  static REAL_VALUE_TYPE ext_80387_constants_table [5];
*************** init_cumulative_args (CUMULATIVE_ARGS *c
*** 2040,2045 ****
--- 2035,2105 ----
    return;
  }
  
+ /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
+    But in the case of vector types, it is some vector mode.
+ 
+    When we have only some of our vector isa extensions enabled, then there
+    are some modes for which vector_mode_supported_p is false.  For these
+    modes, the generic vector support in gcc will choose some non-vector mode
+    in order to implement the type.  By computing the natural mode, we'll 
+    select the proper ABI location for the operand and not depend on whatever
+    the middle-end decides to do with these vector types.  */
+ 
+ static enum machine_mode
+ type_natural_mode (tree type)
+ {
+   enum machine_mode mode = TYPE_MODE (type);
+ 
+   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+     {
+       HOST_WIDE_INT size = int_size_in_bytes (type);
+       if ((size == 8 || size == 16)
+ 	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
+ 	  && TYPE_VECTOR_SUBPARTS (type) > 1)
+ 	{
+ 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+ 
+ 	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+ 	    mode = MIN_MODE_VECTOR_FLOAT;
+ 	  else
+ 	    mode = MIN_MODE_VECTOR_INT;
+ 
+ 	  /* Get the mode which has this inner mode and number of units.  */
+ 	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+ 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+ 		&& GET_MODE_INNER (mode) == innermode)
+ 	      return mode;
+ 
+ 	  abort ();
+ 	}
+     }
+ 
+   return mode;
+ }
+ 
+ /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
+    this may not agree with the mode that the type system has chosen for the
+    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
+    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
+ 
+ static rtx
+ gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+ 		     unsigned int regno)
+ {
+   rtx tmp;
+ 
+   if (orig_mode != BLKmode)
+     tmp = gen_rtx_REG (orig_mode, regno);
+   else
+     {
+       tmp = gen_rtx_REG (mode, regno);
+       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+     }
+ 
+   return tmp;
+ }
+ 
  /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
     of this code is to classify each 8bytes of incoming argument by the register
     class and assign registers accordingly.  */
*************** examine_argument (enum machine_mode mode
*** 2442,2453 ****
        }
    return 1;
  }
  /* Construct container for the argument used by GCC interface.  See
     FUNCTION_ARG for the detailed description.  */
  static rtx
! construct_container (enum machine_mode mode, tree type, int in_return,
! 		     int nintregs, int nsseregs, const int * intreg,
! 		     int sse_regno)
  {
    enum machine_mode tmpmode;
    int bytes =
--- 2502,2515 ----
        }
    return 1;
  }
+ 
  /* Construct container for the argument used by GCC interface.  See
     FUNCTION_ARG for the detailed description.  */
+ 
  static rtx
! construct_container (enum machine_mode mode, enum machine_mode orig_mode,
! 		     tree type, int in_return, int nintregs, int nsseregs,
! 		     const int *intreg, int sse_regno)
  {
    enum machine_mode tmpmode;
    int bytes =
*************** construct_container (enum machine_mode m
*** 2477,2483 ****
      }
    if (!n)
      return NULL;
!   if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
      return NULL;
    if (needed_intregs > nintregs || needed_sseregs > nsseregs)
      return NULL;
--- 2539,2546 ----
      }
    if (!n)
      return NULL;
!   if (!examine_argument (mode, type, in_return, &needed_intregs,
! 			 &needed_sseregs))
      return NULL;
    if (needed_intregs > nintregs || needed_sseregs > nsseregs)
      return NULL;
*************** construct_container (enum machine_mode m
*** 2493,2499 ****
        case X86_64_SSE_CLASS:
        case X86_64_SSESF_CLASS:
        case X86_64_SSEDF_CLASS:
! 	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
        case X86_64_X87_CLASS:
        case X86_64_COMPLEX_X87_CLASS:
  	return gen_rtx_REG (mode, FIRST_STACK_REG);
--- 2556,2562 ----
        case X86_64_SSE_CLASS:
        case X86_64_SSESF_CLASS:
        case X86_64_SSEDF_CLASS:
! 	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
        case X86_64_X87_CLASS:
        case X86_64_COMPLEX_X87_CLASS:
  	return gen_rtx_REG (mode, FIRST_STACK_REG);
*************** construct_container (enum machine_mode m
*** 2581,2599 ****
     (TYPE is null for libcalls where that information may not be available.)  */
  
  void
! function_arg_advance (CUMULATIVE_ARGS *cum,	/* current arg information */
! 		      enum machine_mode mode,	/* current arg mode */
! 		      tree type,	/* type of the argument or 0 if lib support */
! 		      int named)	/* whether or not the argument was named */
  {
    int bytes =
      (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
  
    if (TARGET_DEBUG_ARG)
!     fprintf (stderr,
! 	     "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
! 	     words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
    if (TARGET_64BIT)
      {
        int int_nregs, sse_nregs;
--- 2644,2661 ----
     (TYPE is null for libcalls where that information may not be available.)  */
  
  void
! function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
! 		      tree type, int named)
  {
    int bytes =
      (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
  
    if (TARGET_DEBUG_ARG)
!     fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
! 	     "mode=%s, named=%d)\n\n",
! 	     words, cum->words, cum->nregs, cum->sse_nregs,
! 	     GET_MODE_NAME (mode), named);
    if (TARGET_64BIT)
      {
        int int_nregs, sse_nregs;
*************** function_arg_advance (CUMULATIVE_ARGS *c
*** 2651,2684 ****
    return;
  }
  
- /* A subroutine of function_arg.  We want to pass a parameter whose nominal
-    type is MODE in REGNO.  We try to minimize ABI variation, so MODE may not
-    actually be valid for REGNO with the current ISA.  In this case, ALT_MODE
-    is used instead.  It must be the same size as MODE, and must be known to
-    be valid for REGNO.  Finally, ORIG_MODE is the original mode of the 
-    parameter, as seen by the type system.  This may be different from MODE
-    when we're mucking with things minimizing ABI variations.
- 
-    Returns a REG or a PARALLEL as appropriate.  */
- 
- static rtx
- gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
- 		     enum machine_mode orig_mode, unsigned int regno)
- {
-   rtx tmp;
- 
-   if (HARD_REGNO_MODE_OK (regno, mode))
-     tmp = gen_rtx_REG (mode, regno);
-   else
-     {
-       tmp = gen_rtx_REG (alt_mode, regno);
-       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
-       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
-     }
- 
-   return tmp;
- }
- 
  /* Define where to put the arguments to a function.
     Value is zero to push the argument on the stack,
     or a hard register in which to store the argument.
--- 2713,2718 ----
*************** function_arg (CUMULATIVE_ARGS *cum, enum
*** 2705,2730 ****
  
    /* To simplify the code below, represent vector types with a vector mode
       even if MMX/SSE are not active.  */
!   if (type
!       && TREE_CODE (type) == VECTOR_TYPE
!       && (bytes == 8 || bytes == 16)
!       && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
!       && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
!     {
!       enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
!       enum machine_mode newmode
! 	= TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
! 	  ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
! 
!       /* Get the mode which has this inner mode and number of units.  */
!       for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
! 	if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
! 	    && GET_MODE_INNER (newmode) == innermode)
! 	  {
! 	    mode = newmode;
! 	    break;
! 	  }
!     }
  
    /* Handle a hidden AL argument containing number of registers for varargs
       x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
--- 2739,2746 ----
  
    /* To simplify the code below, represent vector types with a vector mode
       even if MMX/SSE are not active.  */
!   if (type && TREE_CODE (type) == VECTOR_TYPE)
!     mode = type_natural_mode (type);
  
    /* Handle a hidden AL argument containing number of registers for varargs
       x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
*************** function_arg (CUMULATIVE_ARGS *cum, enum
*** 2741,2747 ****
  	return constm1_rtx;
      }
    if (TARGET_64BIT)
!     ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
  			       &x86_64_int_parameter_registers [cum->regno],
  			       cum->sse_regno);
    else
--- 2757,2764 ----
  	return constm1_rtx;
      }
    if (TARGET_64BIT)
!     ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
! 			       cum->sse_nregs,
  			       &x86_64_int_parameter_registers [cum->regno],
  			       cum->sse_regno);
    else
*************** function_arg (CUMULATIVE_ARGS *cum, enum
*** 2793,2799 ****
  			 "changes the ABI");
  	      }
  	    if (cum->sse_nregs)
! 	      ret = gen_reg_or_parallel (mode, TImode, orig_mode,
  					 cum->sse_regno + FIRST_SSE_REG);
  	  }
  	break;
--- 2810,2816 ----
  			 "changes the ABI");
  	      }
  	    if (cum->sse_nregs)
! 	      ret = gen_reg_or_parallel (mode, orig_mode,
  					 cum->sse_regno + FIRST_SSE_REG);
  	  }
  	break;
*************** function_arg (CUMULATIVE_ARGS *cum, enum
*** 2810,2816 ****
  			 "changes the ABI");
  	      }
  	    if (cum->mmx_nregs)
! 	      ret = gen_reg_or_parallel (mode, DImode, orig_mode,
  					 cum->mmx_regno + FIRST_MMX_REG);
  	  }
  	break;
--- 2827,2833 ----
  			 "changes the ABI");
  	      }
  	    if (cum->mmx_nregs)
! 	      ret = gen_reg_or_parallel (mode, orig_mode,
  					 cum->mmx_regno + FIRST_MMX_REG);
  	  }
  	break;
*************** ix86_function_value (tree valtype)
*** 2972,2982 ****
  {
    if (TARGET_64BIT)
      {
!       rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
! 				     REGPARM_MAX, SSE_REGPARM_MAX,
  				     x86_64_int_return_registers, 0);
!       /* For zero sized structures, construct_container return NULL, but we need
!          to keep rest of compiler happy by returning meaningful value.  */
        if (!ret)
  	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
        return ret;
--- 2989,3000 ----
  {
    if (TARGET_64BIT)
      {
!       rtx ret = construct_container (type_natural_mode (valtype),
! 				     TYPE_MODE (valtype), valtype,
! 				     1, REGPARM_MAX, SSE_REGPARM_MAX,
  				     x86_64_int_return_registers, 0);
!       /* For zero sized structures, construct_container return NULL, but we
! 	 need to keep rest of compiler happy by returning meaningful value.  */
        if (!ret)
  	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
        return ret;
*************** ix86_gimplify_va_arg (tree valist, tree 
*** 3342,3352 ****
    size = int_size_in_bytes (type);
    rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
  
!   container = construct_container (TYPE_MODE (type), type, 0,
! 				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
!   /*
!    * Pull the value out of the saved registers ...
!    */
  
    addr = create_tmp_var (ptr_type_node, "addr");
    DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
--- 3360,3370 ----
    size = int_size_in_bytes (type);
    rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
  
!   container = construct_container (type_natural_mode (type), TYPE_MODE (type),
! 				   type, 0, REGPARM_MAX, SSE_REGPARM_MAX,
! 				   intreg, 0);
! 
!   /* Pull the value out of the saved registers.  */
  
    addr = create_tmp_var (ptr_type_node, "addr");
    DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
*************** ix86_hard_regno_mode_ok (int regno, enum
*** 14032,14049 ****
      return VALID_FP_MODE_P (mode);
    if (SSE_REGNO_P (regno))
      {
!       if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
! 	return 1;
!       if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
! 	return 1;
!       return 0;
      }
    if (MMX_REGNO_P (regno))
      {
!       if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
! 	return 1;
!       if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
! 	return 1;
      }
    /* We handle both integer and floats in the general purpose registers.
       In future we should be able to handle vector modes as well.  */
--- 14050,14070 ----
      return VALID_FP_MODE_P (mode);
    if (SSE_REGNO_P (regno))
      {
!       /* We implement the move patterns for all vector modes into and
! 	 out of SSE registers, even when no operation instructions
! 	 are available.  */
!       return (VALID_SSE_REG_MODE (mode)
! 	      || VALID_SSE2_REG_MODE (mode)
! 	      || VALID_MMX_REG_MODE (mode)
! 	      || VALID_MMX_REG_MODE_3DNOW (mode));
      }
    if (MMX_REGNO_P (regno))
      {
!       /* We implement the move patterns for 3DNOW modes even in MMX mode,
! 	 so if the register is available at all, then we can move data of
! 	 the given mode into or out of it.  */
!       return (VALID_MMX_REG_MODE (mode)
! 	      || VALID_MMX_REG_MODE_3DNOW (mode));
      }
    /* We handle both integer and floats in the general purpose registers.
       In future we should be able to handle vector modes as well.  */
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.408
diff -c -p -d -r1.408 i386.h
*** config/i386/i386.h	15 Dec 2004 17:41:48 -0000	1.408
--- config/i386/i386.h	17 Dec 2004 21:12:08 -0000
*************** do {									\
*** 1075,1082 ****
  
  #define VALID_SSE2_REG_MODE(MODE) \
      ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode    \
!      || (MODE) == V2DImode || (MODE) == DFmode				\
!      || VALID_MMX_REG_MODE (MODE))
  
  #define VALID_SSE_REG_MODE(MODE)					\
      ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
--- 1075,1081 ----
  
  #define VALID_SSE2_REG_MODE(MODE) \
      ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode    \
!      || (MODE) == V2DImode || (MODE) == DFmode)
  
  #define VALID_SSE_REG_MODE(MODE)					\
      ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.583
diff -c -p -d -r1.583 i386.md
*** config/i386/i386.md	17 Dec 2004 06:28:59 -0000	1.583
--- config/i386/i386.md	17 Dec 2004 21:12:12 -0000
***************
*** 199,205 ****
  
  ;; Main data type used by the insn
  (define_attr "mode"
!   "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF"
    (const_string "unknown"))
  
  ;; The CPU unit operations uses.
--- 199,205 ----
  
  ;; Main data type used by the insn
  (define_attr "mode"
!   "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF"
    (const_string "unknown"))
  
  ;; The CPU unit operations uses.
***************
*** 1122,1129 ****
     (set_attr "length_immediate" "1")])
  
  (define_insn "*movsi_1"
!   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
! 	(match_operand:SI 1 "general_operand" "rinm,rin,*y,*y,rm,*Y,*Y,rm"))]
    "(TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
--- 1122,1131 ----
     (set_attr "length_immediate" "1")])
  
  (define_insn "*movsi_1"
!   [(set (match_operand:SI 0 "nonimmediate_operand"
! 			"=r  ,m  ,!*y,!rm,!*y,!*x,!rm,!*x")
! 	(match_operand:SI 1 "general_operand"
! 			"rinm,rin,*y ,*y ,rm ,*x ,*x ,rm"))]
    "(TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
***************
*** 1161,1168 ****
     (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")])
  
  (define_insn "*movsi_1_nointernunit"
!   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!m,!*y,!*Y,!m,!*Y")
! 	(match_operand:SI 1 "general_operand" "rinm,rin,*y,*y,m,*Y,*Y,m"))]
    "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
--- 1163,1172 ----
     (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")])
  
  (define_insn "*movsi_1_nointernunit"
!   [(set (match_operand:SI 0 "nonimmediate_operand"
! 			"=r  ,m  ,!*y,!m,!*y,!*x,!m,!*x")
! 	(match_operand:SI 1 "general_operand"
! 			"rinm,rin,*y ,*y,m  ,*x ,*x,m"))]
    "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
***************
*** 1784,1790 ****
    "!TARGET_64BIT"
    "#")
  
! (define_insn "pushdi2_rex64"
    [(set (match_operand:DI 0 "push_operand" "=<,!<")
  	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
    "TARGET_64BIT"
--- 1788,1794 ----
    "!TARGET_64BIT"
    "#")
  
! (define_insn "*pushdi2_rex64"
    [(set (match_operand:DI 0 "push_operand" "=<,!<")
  	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
    "TARGET_64BIT"
***************
*** 1895,1902 ****
     (set_attr "length_immediate" "1")])
  
  (define_insn "*movdi_2"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,!*Y,!*Y")
! 	(match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*Y,*Y,m"))]
    "!TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
--- 1899,1906 ----
     (set_attr "length_immediate" "1")])
  
  (define_insn "*movdi_2"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,!*x,!*x")
! 	(match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*x,*x,m"))]
    "!TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
***************
*** 1929,1936 ****
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*movdi_1_rex64"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y,!*Y,!*y")
! 	(match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm,*y,*Y"))]
    "TARGET_64BIT
     && (TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
--- 1933,1942 ----
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*movdi_1_rex64"
!   [(set (match_operand:DI 0 "nonimmediate_operand"
! 		"=r,r  ,r,mr,!mr,!*y,!rm,!*y,!*x,!rm,!*x,!*x,!*y")
! 	(match_operand:DI 1 "general_operand"
! 		"Z ,rem,i,re,n  ,*y ,*y ,rm ,*x ,*x ,rm ,*y ,*x"))]
    "TARGET_64BIT
     && (TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
***************
*** 1986,1993 ****
     (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI,DI,DI")])
  
  (define_insn "*movdi_1_rex64_nointerunit"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y")
! 	(match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,m,*Y,*Y,m"))]
    "TARGET_64BIT
     && (!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
--- 1992,2001 ----
     (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI,DI,DI")])
  
  (define_insn "*movdi_1_rex64_nointerunit"
!   [(set (match_operand:DI 0 "nonimmediate_operand"
! 		"=r,r ,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y")
! 	(match_operand:DI 1 "general_operand"
! 		"Z,rem,i,re,n  ,*y ,*y,m  ,*Y ,*Y,m"))]
    "TARGET_64BIT
     && (!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
***************
*** 2179,2186 ****
     (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
  
  (define_insn "*movsf_1"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y")
! 	(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))]
    "(TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
--- 2187,2196 ----
     (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
  
  (define_insn "*movsf_1"
!   [(set (match_operand:SF 0 "nonimmediate_operand"
! 	  "=f#xr,m   ,f#xr,r#xf  ,m    ,x#rf,x#rf,x#rf ,m   ,!*y,!rm,!*y")
! 	(match_operand:SF 1 "general_operand"
! 	  "fm#rx,f#rx,G   ,rmF#fx,Fr#fx,C   ,x   ,xm#rf,x#rf,rm ,*y ,*y"))]
    "(TARGET_INTER_UNIT_MOVES || optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
***************
*** 2267,2274 ****
  	       (const_string "SF")))])
  
  (define_insn "*movsf_1_nointerunit"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!m,!*y")
! 	(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,m,*y,*y"))]
    "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
--- 2277,2286 ----
  	       (const_string "SF")))])
  
  (define_insn "*movsf_1_nointerunit"
!   [(set (match_operand:SF 0 "nonimmediate_operand"
! 	  "=f#xr,m   ,f#xr,r#xf  ,m    ,x#rf,x#rf,x#rf ,m   ,!*y,!m,!*y")
! 	(match_operand:SF 1 "general_operand"
! 	  "fm#rx,f#rx,G   ,rmF#fx,Fr#fx,C   ,x   ,xm#rf,x#rf,m  ,*y,*y"))]
    "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
***************
*** 2355,2365 ****
  	       (const_string "SF")))])
  
  (define_insn "*swapsf"
!   [(set (match_operand:SF 0 "register_operand" "+f")
! 	(match_operand:SF 1 "register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   "reload_completed || !TARGET_SSE"
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
--- 2367,2377 ----
  	       (const_string "SF")))])
  
  (define_insn "*swapsf"
!   [(set (match_operand:SF 0 "fp_register_operand" "+f")
! 	(match_operand:SF 1 "fp_register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   "reload_completed || TARGET_80387"
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
***************
*** 2431,2438 ****
  ;; when optimizing for size.
  
  (define_insn "*movdf_nointeger"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m")
! 	(match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))]
    "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
     && (reload_in_progress || reload_completed
--- 2443,2452 ----
  ;; when optimizing for size.
  
  (define_insn "*movdf_nointeger"
!   [(set (match_operand:DF 0 "nonimmediate_operand"
! 				"=f#x,m  ,f#x,*r  ,o  ,x#f,x#f,x#f  ,m")
! 	(match_operand:DF 1 "general_operand"
! 				"fm#x,f#x,G  ,*roF,F*r,C  ,x#f,xHm#f,x#f"))]
    "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
     && (reload_in_progress || reload_completed
***************
*** 2470,2493 ****
  	  abort ();
  	}
      case 6:
        switch (get_attr_mode (insn))
  	{
  	case MODE_V4SF:
  	  return "movaps\t{%1, %0|%0, %1}";
  	case MODE_V2DF:
  	  return "movapd\t{%1, %0|%0, %1}";
  	case MODE_DF:
  	  return "movsd\t{%1, %0|%0, %1}";
  	default:
  	  abort ();
  	}
-     case 7:
-       if (get_attr_mode (insn) == MODE_V2DF)
- 	return "movlpd\t{%1, %0|%0, %1}";
-       else
- 	return "movsd\t{%1, %0|%0, %1}";
-     case 8:
-       return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
--- 2484,2508 ----
  	  abort ();
  	}
      case 6:
+     case 7:
+     case 8:
        switch (get_attr_mode (insn))
  	{
  	case MODE_V4SF:
  	  return "movaps\t{%1, %0|%0, %1}";
  	case MODE_V2DF:
  	  return "movapd\t{%1, %0|%0, %1}";
+ 	case MODE_TI:
+ 	  return "movdqa\t{%1, %0|%0, %1}";
+ 	case MODE_DI:
+ 	  return "movq\t{%1, %0|%0, %1}";
  	case MODE_DF:
  	  return "movsd\t{%1, %0|%0, %1}";
+ 	case MODE_V1DF:
+ 	  return "movlpd\t{%1, %0|%0, %1}";
  	default:
  	  abort ();
  	}
  
      default:
        abort();
***************
*** 2497,2502 ****
--- 2512,2528 ----
     (set (attr "mode")
          (cond [(eq_attr "alternative" "3,4")
  		 (const_string "SI")
+ 
+ 	       /* For SSE1, we have many fewer alternatives.  */
+ 	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ 		 (cond [(eq_attr "alternative" "5,6")
+ 			  (if_then_else
+ 			    (ne (symbol_ref "optimize_size") (const_int 0))
+ 			    (const_string "V4SF")
+ 			    (const_string "TI"))
+ 		       ]
+ 		   (const_string "DI"))
+ 
  	       /* xorps is one byte shorter.  */
  	       (eq_attr "alternative" "5")
  		 (cond [(ne (symbol_ref "optimize_size")
***************
*** 2504,2511 ****
  			  (const_string "V4SF")
  			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
  			    (const_int 0))
! 			  (const_string "TI")]
  		       (const_string "V2DF"))
  	       /* For architectures resolving dependencies on
  		  whole SSE registers use APD move to break dependency
  		  chains, otherwise use short move to avoid extra work.
--- 2530,2539 ----
  			  (const_string "V4SF")
  			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
  			    (const_int 0))
! 			  (const_string "TI")
! 		       ]
  		       (const_string "V2DF"))
+ 
  	       /* For architectures resolving dependencies on
  		  whole SSE registers use APD move to break dependency
  		  chains, otherwise use short move to avoid extra work.
***************
*** 2513,2524 ****
  		  movaps encodes one byte shorter.  */
  	       (eq_attr "alternative" "6")
  		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
  		   (const_string "DF"))
  	       /* For architectures resolving dependencies on register
  		  parts we may avoid extra work to zero out upper part
--- 2541,2553 ----
  		  movaps encodes one byte shorter.  */
  	       (eq_attr "alternative" "6")
  		 (cond
! 		   [(ne (symbol_ref "optimize_size")
! 		        (const_int 0))
! 		      (const_string "V4SF")
! 		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		        (const_int 0))
! 		      (const_string "V2DF")
! 		   ]
  		   (const_string "DF"))
  	       /* For architectures resolving dependencies on register
  		  parts we may avoid extra work to zero out upper part
***************
*** 2527,2539 ****
  		 (if_then_else
  		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
  		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_insn "*movdf_integer"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
! 	(match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))]
    "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
     && (reload_in_progress || reload_completed
--- 2556,2571 ----
  		 (if_then_else
  		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
  		       (const_int 0))
! 		   (const_string "V1DF")
! 		   (const_string "DF"))
! 	      ]
! 	      (const_string "DF")))])
  
  (define_insn "*movdf_integer"
!   [(set (match_operand:DF 0 "nonimmediate_operand"
! 			"=f#Yr,m   ,f#Yr,r#Yf  ,o    ,Y#rf,Y#rf,Y#rf ,m")
! 	(match_operand:DF 1 "general_operand"
! 			"fm#Yr,f#Yr,G   ,roF#Yf,Fr#Yf,C   ,Y#rf,Ym#rf,Y#rf"))]
    "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
     && (reload_in_progress || reload_completed
***************
*** 2572,2595 ****
  	  abort ();
  	}
      case 6:
        switch (get_attr_mode (insn))
  	{
  	case MODE_V4SF:
  	  return "movaps\t{%1, %0|%0, %1}";
  	case MODE_V2DF:
  	  return "movapd\t{%1, %0|%0, %1}";
  	case MODE_DF:
  	  return "movsd\t{%1, %0|%0, %1}";
  	default:
  	  abort ();
  	}
-     case 7:
-       if (get_attr_mode (insn) == MODE_V2DF)
- 	return "movlpd\t{%1, %0|%0, %1}";
-       else
- 	return "movsd\t{%1, %0|%0, %1}";
-     case 8:
-       return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
--- 2604,2628 ----
  	  abort ();
  	}
      case 6:
+     case 7:
+     case 8:
        switch (get_attr_mode (insn))
  	{
  	case MODE_V4SF:
  	  return "movaps\t{%1, %0|%0, %1}";
  	case MODE_V2DF:
  	  return "movapd\t{%1, %0|%0, %1}";
+ 	case MODE_TI:
+ 	  return "movdqa\t{%1, %0|%0, %1}";
+ 	case MODE_DI:
+ 	  return "movq\t{%1, %0|%0, %1}";
  	case MODE_DF:
  	  return "movsd\t{%1, %0|%0, %1}";
+ 	case MODE_V1DF:
+ 	  return "movlpd\t{%1, %0|%0, %1}";
  	default:
  	  abort ();
  	}
  
      default:
        abort();
***************
*** 2599,2604 ****
--- 2632,2648 ----
     (set (attr "mode")
          (cond [(eq_attr "alternative" "3,4")
  		 (const_string "SI")
+ 
+ 	       /* For SSE1, we have many fewer alternatives.  */
+ 	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ 		 (cond [(eq_attr "alternative" "5,6")
+ 			  (if_then_else
+ 			    (ne (symbol_ref "optimize_size") (const_int 0))
+ 			    (const_string "V4SF")
+ 			    (const_string "TI"))
+ 		       ]
+ 		   (const_string "DI"))
+ 
  	       /* xorps is one byte shorter.  */
  	       (eq_attr "alternative" "5")
  		 (cond [(ne (symbol_ref "optimize_size")
***************
*** 2606,2626 ****
  			  (const_string "V4SF")
  			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
  			    (const_int 0))
! 			  (const_string "TI")]
  		       (const_string "V2DF"))
  	       /* For architectures resolving dependencies on
  		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.  
  
  		  movaps encodes one byte shorter.  */
  	       (eq_attr "alternative" "6")
  		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
  		   (const_string "DF"))
  	       /* For architectures resolving dependencies on register
  		  parts we may avoid extra work to zero out upper part
--- 2650,2673 ----
  			  (const_string "V4SF")
  			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
  			    (const_int 0))
! 			  (const_string "TI")
! 		       ]
  		       (const_string "V2DF"))
+ 
  	       /* For architectures resolving dependencies on
  		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.
  
  		  movaps encodes one byte shorter.  */
  	       (eq_attr "alternative" "6")
  		 (cond
! 		   [(ne (symbol_ref "optimize_size")
! 		        (const_int 0))
! 		      (const_string "V4SF")
! 		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		        (const_int 0))
! 		      (const_string "V2DF")
! 		   ]
  		   (const_string "DF"))
  	       /* For architectures resolving dependencies on register
  		  parts we may avoid extra work to zero out upper part
***************
*** 2629,2637 ****
  		 (if_then_else
  		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
  		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
--- 2676,2685 ----
  		 (if_then_else
  		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
  		       (const_int 0))
! 		   (const_string "V1DF")
! 		   (const_string "DF"))
! 	      ]
! 	      (const_string "DF")))])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
***************
*** 2648,2658 ****
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*swapdf"
!   [(set (match_operand:DF 0 "register_operand" "+f")
! 	(match_operand:DF 1 "register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   "reload_completed || !TARGET_SSE2"
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
--- 2696,2706 ----
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*swapdf"
!   [(set (match_operand:DF 0 "fp_register_operand" "+f")
! 	(match_operand:DF 1 "fp_register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   "reload_completed || TARGET_80387"
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
***************
*** 2843,2849 ****
  	(match_operand:XF 1 "register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   ""
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
--- 2891,2897 ----
  	(match_operand:XF 1 "register_operand" "+f"))
     (set (match_dup 1)
  	(match_dup 0))]
!   "TARGET_80387"
  {
    if (STACK_TOP_P (operands[0]))
      return "fxch\t%1";
***************
*** 19759,19765 ****
  
  ;; Moves for SSE/MMX regs.
  
! (define_insn "movv4sf_internal"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
--- 19807,19813 ----
  
  ;; Moves for SSE/MMX regs.
  
! (define_insn "*movv4sf_internal"
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
***************
*** 19784,19790 ****
    operands[2] = CONST0_RTX (V4SFmode);
  })
  
! (define_insn "movv4si_internal"
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
--- 19832,19838 ----
    operands[2] = CONST0_RTX (V4SFmode);
  })
  
! (define_insn "*movv4si_internal"
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
***************
*** 19824,19830 ****
  		   (const_string "TI"))]
  	       (const_string "TI")))])
  
! (define_insn "movv2di_internal"
    [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
--- 19872,19878 ----
  		   (const_string "TI"))]
  	       (const_string "TI")))])
  
! (define_insn "*movv2di_internal"
    [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE"
***************
*** 19878,19884 ****
    operands[2] = CONST0_RTX (V2DFmode);
  })
  
! (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m")
  	(match_operand:V8QI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))]
    "TARGET_MMX
--- 19926,19932 ----
    operands[2] = CONST0_RTX (V2DFmode);
  })
  
! (define_insn "*movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m")
  	(match_operand:V8QI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))]
    "TARGET_MMX
***************
*** 19894,19900 ****
    [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov")
     (set_attr "mode" "DI")])
  
! (define_insn "movv4hi_internal"
    [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m")
  	(match_operand:V4HI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))]
    "TARGET_MMX
--- 19942,19948 ----
    [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov")
     (set_attr "mode" "DI")])
  
! (define_insn "*movv4hi_internal"
    [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m")
  	(match_operand:V4HI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))]
    "TARGET_MMX
***************
*** 19926,19935 ****
    [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov")
     (set_attr "mode" "DI")])
  
! (define_insn "movv2sf_internal"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*x,?m")
          (match_operand:V2SF 1 "vector_move_operand" "C,ym,y,*Y,y,*xm,*x"))]
!   "TARGET_3DNOW
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
      pxor\t%0, %0
--- 19974,19983 ----
    [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov")
     (set_attr "mode" "DI")])
  
! (define_insn "*movv2sf_internal"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*x,?m")
          (match_operand:V2SF 1 "vector_move_operand" "C,ym,y,*Y,y,*xm,*x"))]
!   "TARGET_MMX
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
    "@
      pxor\t%0, %0
***************
*** 19959,19975 ****
  	(match_operand:TF 1 "nonimmediate_operand" ""))]
    "TARGET_64BIT"
  {
!   if (TARGET_64BIT)
!     ix86_expand_move (TFmode, operands);
!   else
!     ix86_expand_vector_move (TFmode, operands);
    DONE;
  })
  
! (define_insn "movv2df_internal"
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE2
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
--- 20007,20020 ----
  	(match_operand:TF 1 "nonimmediate_operand" ""))]
    "TARGET_64BIT"
  {
!   ix86_expand_move (TFmode, operands);
    DONE;
  })
  
! (define_insn "*movv2df_internal"
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
***************
*** 19991,19997 ****
  }
    [(set_attr "type" "ssemov")
     (set (attr "mode")
!         (cond [(eq_attr "alternative" "0,1")
  		 (if_then_else
  		   (ne (symbol_ref "optimize_size")
  		       (const_int 0))
--- 20036,20044 ----
  }
    [(set_attr "type" "ssemov")
     (set (attr "mode")
!         (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
! 		 (const_string "V4SF")
! 	       (eq_attr "alternative" "0,1")
  		 (if_then_else
  		   (ne (symbol_ref "optimize_size")
  		       (const_int 0))
***************
*** 20007,20016 ****
  		   (const_string "V2DF"))]
  	       (const_string "V2DF")))])
  
! (define_insn "movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE2
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
--- 20054,20063 ----
  		   (const_string "V2DF"))]
  	       (const_string "V2DF")))])
  
! (define_insn "*movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
***************
*** 20048,20057 ****
  		   (const_string "TI"))]
  	       (const_string "TI")))])
  
! (define_insn "movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE2
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
--- 20095,20104 ----
  		   (const_string "TI"))]
  	       (const_string "TI")))])
  
! (define_insn "*movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
!   "TARGET_SSE
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
***************
*** 20092,20098 ****
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
  	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE2"
  {
    ix86_expand_vector_move (V2DFmode, operands);
    DONE;
--- 20139,20145 ----
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
  	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE"
  {
    ix86_expand_vector_move (V2DFmode, operands);
    DONE;
***************
*** 20101,20107 ****
  (define_expand "movv8hi"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
  	(match_operand:V8HI 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE2"
  {
    ix86_expand_vector_move (V8HImode, operands);
    DONE;
--- 20148,20154 ----
  (define_expand "movv8hi"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
  	(match_operand:V8HI 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE"
  {
    ix86_expand_vector_move (V8HImode, operands);
    DONE;
***************
*** 20110,20116 ****
  (define_expand "movv16qi"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
  	(match_operand:V16QI 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE2"
  {
    ix86_expand_vector_move (V16QImode, operands);
    DONE;
--- 20157,20163 ----
  (define_expand "movv16qi"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
  	(match_operand:V16QI 1 "nonimmediate_operand" ""))]
!   "TARGET_SSE"
  {
    ix86_expand_vector_move (V16QImode, operands);
    DONE;
***************
*** 20173,20179 ****
  (define_expand "movv2sf"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
  	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
!    "TARGET_3DNOW"
  {
    ix86_expand_vector_move (V2SFmode, operands);
    DONE;
--- 20220,20226 ----
  (define_expand "movv2sf"
    [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
  	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
!   "TARGET_MMX"
  {
    ix86_expand_vector_move (V2SFmode, operands);
    DONE;
***************
*** 20194,20212 ****
  (define_insn "*pushv2di"
    [(set (match_operand:V2DI 0 "push_operand" "=<")
  	(match_operand:V2DI 1 "register_operand" "x"))]
!   "TARGET_SSE2"
    "#")
  
  (define_insn "*pushv8hi"
    [(set (match_operand:V8HI 0 "push_operand" "=<")
  	(match_operand:V8HI 1 "register_operand" "x"))]
!   "TARGET_SSE2"
    "#")
  
  (define_insn "*pushv16qi"
    [(set (match_operand:V16QI 0 "push_operand" "=<")
  	(match_operand:V16QI 1 "register_operand" "x"))]
!   "TARGET_SSE2"
    "#")
  
  (define_insn "*pushv4sf"
--- 20241,20259 ----
  (define_insn "*pushv2di"
    [(set (match_operand:V2DI 0 "push_operand" "=<")
  	(match_operand:V2DI 1 "register_operand" "x"))]
!   "TARGET_SSE"
    "#")
  
  (define_insn "*pushv8hi"
    [(set (match_operand:V8HI 0 "push_operand" "=<")
  	(match_operand:V8HI 1 "register_operand" "x"))]
!   "TARGET_SSE"
    "#")
  
  (define_insn "*pushv16qi"
    [(set (match_operand:V16QI 0 "push_operand" "=<")
  	(match_operand:V16QI 1 "register_operand" "x"))]
!   "TARGET_SSE"
    "#")
  
  (define_insn "*pushv4sf"
***************
*** 20218,20224 ****
  (define_insn "*pushv4si"
    [(set (match_operand:V4SI 0 "push_operand" "=<")
  	(match_operand:V4SI 1 "register_operand" "x"))]
!   "TARGET_SSE2"
    "#")
  
  (define_insn "*pushv2si"
--- 20265,20271 ----
  (define_insn "*pushv4si"
    [(set (match_operand:V4SI 0 "push_operand" "=<")
  	(match_operand:V4SI 1 "register_operand" "x"))]
!   "TARGET_SSE"
    "#")
  
  (define_insn "*pushv2si"
***************
*** 20242,20248 ****
  (define_insn "*pushv2sf"
    [(set (match_operand:V2SF 0 "push_operand" "=<")
  	(match_operand:V2SF 1 "register_operand" "y"))]
!   "TARGET_3DNOW"
    "#")
  
  (define_split
--- 20289,20295 ----
  (define_insn "*pushv2sf"
    [(set (match_operand:V2SF 0 "push_operand" "=<")
  	(match_operand:V2SF 1 "register_operand" "y"))]
!   "TARGET_MMX"
    "#")
  
  (define_split
***************
*** 20268,20274 ****
     operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
  
  
! (define_insn "movti_internal"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT
--- 20315,20321 ----
     operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
  
  
! (define_insn "*movti_internal"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT
***************
*** 22196,22202 ****
    [(set (match_operand:V2SI 0 "register_operand" "=y")
  	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
  		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
!  "TARGET_3DNOW"
    "pfcmpgt\\t{%2, %0|%0, %2}"
    [(set_attr "type" "mmxcmp")
     (set_attr "mode" "V2SF")])
--- 22243,22249 ----
    [(set (match_operand:V2SI 0 "register_operand" "=y")
  	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
  		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
!   "TARGET_3DNOW"
    "pfcmpgt\\t{%2, %0|%0, %2}"
    [(set_attr "type" "mmxcmp")
     (set_attr "mode" "V2SF")])


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]