This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR optimization/13424


Hi!

This patch attempts to maintain proper MEM attributes in IA-32/AMD64 string
operations (movstr*, clrstr*, strlen* and cmpstr*).  movstr* already did
some MEM attribute handling, but as it copied over the same attributes to
all MEMs in the sequence, it got MEM_OFFSET/MEM_ALIAS etc. wrong.

I had to partially undo Jan's change_address recent changes, so that
change_address sets NULL MEM_OFFSET etc. attributes if they were previously
different.  If some code doesn't need this behaviour, then it IMHO should
be changed to use other function.
E.g. cse.c does:
        else if (GET_CODE (addr) == ADDRESSOF)
          return change_address (x, VOIDmode, addr);
but at least as I understand it addr should contain the same address as
XEXP (x, 0), so I think MEM attributes don't need to be thrown out.
Not sure what the replacement for it would be, perhaps
replace_equiv_address (x, addr) or
adjust_automodify_address (x, VOIDmode, addr, 0).

Bootstrapped/regtested on IA-32, ok for mainline/3.4 (with 3.4 I'd wait a
few days).

2004-01-28  Jakub Jelinek  <jakub@redhat.com>

	PR optimization/13424
	* expr.c (store_constructor): Revert 2003-12-03 change.

	* emit-rtl.c (change_address): Check also if MEM_ATTRS is set as
	expected before returning early.  Avoid sharing RTL if they
	need to be changed.

	* config/i386/i386.c (ix86_expand_movstr): Rework rep_mov and strmov
	handling so that memory attributes are preserved.  Don't call
	ix86_set_move_mem_attrs.
	(ix86_set_move_mem_attrs_1, ix86_set_move_mem_attrs): Removed.
	(ix86_expand_clrstr): Rename src argument to
	dst.  Rework rep_stos and strset handling so that memory attributes
	are preserved.
	(ix86_expand_strlen): Pass src argument to
	ix86_expand_strlensi_unroll_1.  Rework strlenqi_1 handling so that
	memory attributes are preserved.
	(ix86_expand_strlensi_unroll_1): Add src argument.  Use
	change_address instead of gen_rtx_MEM.
	* config/i386/i386.md (strmov, strmov_singleop, rep_mov): New
	expanders.
	(strmovdi_rex64, strmovsi, strmovsi_rex64, strmovhi, strmovhi_rex64,
	strmovqi, strmovqi_rex64): Remove.
	(rep_mov*, strmov*): Prefix insn names with *.
	(strset, strset_singleop, rep_stos): New expanders.
	(strsetdi_rex64, strsetsi, strsetsi_rex64, strsethi, strsethi_rex64,
	strsetqi, strsetqi_rex64): Remove.
	(rep_stos*, strset*): Prefix insn names with *.
	(rep_stosqi_rex64): Likewise.  Fix mode of dirflag reg from DImode
	to SImode.
	(cmpstrsi): Rework cmpstrqi_1 handling so that memory attributes
	are preserved.
	(cmpstrqi_nz_1, cmpstrqi_nz_rex_1, cmpstrqi_1, cmpstrqi_rex_1):
	Prefix insn names with *.
	(cmpstrqi_nz_1, cmpstrqi_1): New expanders.
	(strlenqi_1, strlenqi_rex_1): Prefix insn names with *.
	(strlenqi_1): New expander.
	* config/i386/i386.h (ix86_set_move_mem_attrs): Remove prototype.

--- gcc/expr.c.jj	2004-01-26 11:34:31.000000000 +0100
+++ gcc/expr.c	2004-01-26 11:37:41.000000000 +0100
@@ -4616,10 +4616,7 @@ store_constructor (tree exp, rtx target,
 				       highest_pow2_factor (offset));
 	    }
 
-	  /* If the constructor has been cleared, setting RTX_UNCHANGING_P
-	     on the MEM might lead to scheduling the clearing after the
-	     store.  */
-	  if (TREE_READONLY (field) && !cleared)
+	  if (TREE_READONLY (field))
 	    {
 	      if (GET_CODE (to_rtx) == MEM)
 		to_rtx = copy_rtx (to_rtx);
--- gcc/emit-rtl.c.jj	2004-01-26 11:34:30.000000000 +0100
+++ gcc/emit-rtl.c	2004-01-28 19:17:21.427978060 +0100
@@ -1869,19 +1869,29 @@ change_address_1 (rtx memref, enum machi
 rtx
 change_address (rtx memref, enum machine_mode mode, rtx addr)
 {
-  rtx new = change_address_1 (memref, mode, addr, 1);
+  rtx new = change_address_1 (memref, mode, addr, 1), size;
   enum machine_mode mmode = GET_MODE (new);
+  unsigned int align;
+
+  size = mmode == BLKmode ? 0 : GEN_INT (GET_MODE_SIZE (mmode));
+  align = mmode == BLKmode ? BITS_PER_UNIT : GET_MODE_ALIGNMENT (mmode);
 
   /* If there are no changes, just return the original memory reference.  */
   if (new == memref)
-    return new;
+    {
+      if (MEM_ATTRS (memref) == 0
+	  || (MEM_EXPR (memref) == NULL
+	      && MEM_OFFSET (memref) == NULL
+	      && MEM_SIZE (memref) == size
+	      && MEM_ALIGN (memref) == align))
+	return new;
+
+      new = gen_rtx_MEM (mmode, addr);
+      MEM_COPY_ATTRIBUTES (new, memref);
+    }
 
   MEM_ATTRS (new)
-    = get_mem_attrs (MEM_ALIAS_SET (memref), 0, 0,
-		     mmode == BLKmode ? 0 : GEN_INT (GET_MODE_SIZE (mmode)),
-		     (mmode == BLKmode ? BITS_PER_UNIT
-		      : GET_MODE_ALIGNMENT (mmode)),
-		     mmode);
+    = get_mem_attrs (MEM_ALIAS_SET (memref), 0, 0, size, align, mmode);
 
   return new;
 }
--- gcc/config/i386/i386.c.jj	2004-01-26 11:34:38.000000000 +0100
+++ gcc/config/i386/i386.c	2004-01-28 17:52:04.478461044 +0100
@@ -813,12 +813,11 @@ static void ix86_emit_save_regs (void);
 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
-static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
 static void ix86_sched_reorder_ppro (rtx *, rtx *);
 static HOST_WIDE_INT ix86_GOT_alias_set (void);
 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
 static rtx ix86_expand_aligntest (rtx, int);
-static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
+static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
 static int ix86_issue_rate (void);
 static int ix86_adjust_cost (rtx, rtx, rtx, int);
 static void ix86_sched_init (FILE *, int, int);
@@ -11014,11 +11013,10 @@ ix86_zero_extend_to_Pmode (rtx exp)
 int
 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
 {
-  rtx srcreg, destreg, countreg;
+  rtx srcreg, destreg, countreg, srcexp, destexp;
   enum machine_mode counter_mode;
   HOST_WIDE_INT align = 0;
   unsigned HOST_WIDE_INT count = 0;
-  rtx insns;
 
   if (GET_CODE (align_exp) == CONST_INT)
     align = INTVAL (align_exp);
@@ -11047,28 +11045,27 @@ ix86_expand_movstr (rtx dst, rtx src, rt
   else
     counter_mode = DImode;
 
-  start_sequence ();
-
   if (counter_mode != SImode && counter_mode != DImode)
     abort ();
 
   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  if (destreg != XEXP (dst, 0))
+    dst = replace_equiv_address_nv (dst, destreg);
   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
-
-  emit_insn (gen_cld ());
+  if (srcreg != XEXP (src, 0))
+    src = replace_equiv_address_nv (src, srcreg);
 
   /* When optimizing for size emit simple rep ; movsb instruction for
      counts not divisible by 4.  */
 
   if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
     {
+      emit_insn (gen_cld ());
       countreg = ix86_zero_extend_to_Pmode (count_exp);
-      if (TARGET_64BIT)
-	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
-				        destreg, srcreg, countreg));
-      else
-	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
-				  destreg, srcreg, countreg));
+      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
+      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
+			      destexp, srcexp));
     }
 
   /* For constant aligned (or small unaligned) copies use rep movsl
@@ -11080,32 +11077,53 @@ ix86_expand_movstr (rtx dst, rtx src, rt
 	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
 	       || optimize_size || count < (unsigned int) 64))
     {
+      unsigned HOST_WIDE_INT offset = 0;
       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+      rtx srcmem, dstmem;
+
+      emit_insn (gen_cld ());
       if (count & ~(size - 1))
 	{
 	  countreg = copy_to_mode_reg (counter_mode,
 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
 	  countreg = ix86_zero_extend_to_Pmode (countreg);
-	  if (size == 4)
-	    {
-	      if (TARGET_64BIT)
-		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
-					        destreg, srcreg, countreg));
-	      else
-		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
-					  destreg, srcreg, countreg));
-	    }
-	  else
-	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
-					    destreg, srcreg, countreg));
+	  
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg,
+				    GEN_INT (size == 4 ? 2 : 3));
+	  srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+
+	  emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+				  countreg, destexp, srcexp));
+	  offset = count & ~(size - 1);
 	}
       if (size == 8 && (count & 0x04))
-	emit_insn (gen_strmovsi (destreg, srcreg));
+	{
+	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
+						 offset);
+	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
+						 offset);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	  offset += 4;
+	}
       if (count & 0x02)
-	emit_insn (gen_strmovhi (destreg, srcreg));
+	{
+	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
+						 offset);
+	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
+						 offset);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	  offset += 2;
+	}
       if (count & 0x01)
-	emit_insn (gen_strmovqi (destreg, srcreg));
+	{
+	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
+						 offset);
+	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
+						 offset);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	}
     }
   /* The generic code based on the glibc implementation:
      - align destination to 4 bytes (8 byte alignment is used for PentiumPro
@@ -11116,9 +11134,13 @@ ix86_expand_movstr (rtx dst, rtx src, rt
     {
       rtx countreg2;
       rtx label = NULL;
+      rtx srcmem, dstmem;
       int desired_alignment = (TARGET_PENTIUMPRO
 			       && (count == 0 || count >= (unsigned int) 260)
 			       ? 8 : UNITS_PER_WORD);
+      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
+      dst = change_address (dst, BLKmode, destreg);
+      src = change_address (src, BLKmode, srcreg);
 
       /* In case we don't know anything about the alignment, default to
          library version, since it is usually equally fast and result in
@@ -11128,10 +11150,7 @@ ix86_expand_movstr (rtx dst, rtx src, rt
 	 will not be important.  */
       if (!TARGET_INLINE_ALL_STRINGOPS
 	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
-	{
-	  end_sequence ();
-	  return 0;
-	}
+	return 0;
 
       if (TARGET_SINGLE_STRINGOP)
 	emit_insn (gen_cld ());
@@ -11161,7 +11180,9 @@ ix86_expand_movstr (rtx dst, rtx src, rt
       if (align <= 1)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 1);
-	  emit_insn (gen_strmovqi (destreg, srcreg));
+	  srcmem = change_address (src, QImode, srcreg);
+	  dstmem = change_address (dst, QImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  ix86_adjust_counter (countreg, 1);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11169,7 +11190,9 @@ ix86_expand_movstr (rtx dst, rtx src, rt
       if (align <= 2)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 2);
-	  emit_insn (gen_strmovhi (destreg, srcreg));
+	  srcmem = change_address (src, HImode, srcreg);
+	  dstmem = change_address (dst, HImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  ix86_adjust_counter (countreg, 2);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11177,7 +11200,9 @@ ix86_expand_movstr (rtx dst, rtx src, rt
       if (align <= 4 && desired_alignment > 4)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 4);
-	  emit_insn (gen_strmovsi (destreg, srcreg));
+	  srcmem = change_address (src, SImode, srcreg);
+	  dstmem = change_address (dst, SImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  ix86_adjust_counter (countreg, 4);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11195,15 +11220,17 @@ ix86_expand_movstr (rtx dst, rtx src, rt
 	{
 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
 				  GEN_INT (3)));
-	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
-					  destreg, srcreg, countreg2));
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
 	}
       else
 	{
-	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
-	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
-				    destreg, srcreg, countreg2));
+	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
 	}
+      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+			      countreg2, destexp, srcexp));
 
       if (label)
 	{
@@ -11211,48 +11238,61 @@ ix86_expand_movstr (rtx dst, rtx src, rt
 	  LABEL_NUSES (label) = 1;
 	}
       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
-	emit_insn (gen_strmovsi (destreg, srcreg));
+	{
+	  srcmem = change_address (src, SImode, srcreg);
+	  dstmem = change_address (dst, SImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	}
       if ((align <= 4 || count == 0) && TARGET_64BIT)
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 4);
-	  emit_insn (gen_strmovsi (destreg, srcreg));
+	  srcmem = change_address (src, SImode, srcreg);
+	  dstmem = change_address (dst, SImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
       if (align > 2 && count != 0 && (count & 2))
-	emit_insn (gen_strmovhi (destreg, srcreg));
+	{
+	  srcmem = change_address (src, HImode, srcreg);
+	  dstmem = change_address (dst, HImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	}
       if (align <= 2 || count == 0)
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 2);
-	  emit_insn (gen_strmovhi (destreg, srcreg));
+	  srcmem = change_address (src, HImode, srcreg);
+	  dstmem = change_address (dst, HImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
       if (align > 1 && count != 0 && (count & 1))
-	emit_insn (gen_strmovqi (destreg, srcreg));
+	{
+	  srcmem = change_address (src, QImode, srcreg);
+	  dstmem = change_address (dst, QImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+	}
       if (align <= 1 || count == 0)
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 1);
-	  emit_insn (gen_strmovqi (destreg, srcreg));
+	  srcmem = change_address (src, QImode, srcreg);
+	  dstmem = change_address (dst, QImode, destreg);
+	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
     }
 
-  insns = get_insns ();
-  end_sequence ();
-
-  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
-  emit_insn (insns);
   return 1;
 }
 
 /* Expand string clear operation (bzero).  Use i386 string operations when
    profitable.  expand_movstr contains similar code.  */
 int
-ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
+ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
 {
-  rtx destreg, zeroreg, countreg;
+  rtx destreg, zeroreg, countreg, destexp;
   enum machine_mode counter_mode;
   HOST_WIDE_INT align = 0;
   unsigned HOST_WIDE_INT count = 0;
@@ -11283,7 +11323,9 @@ ix86_expand_clrstr (rtx src, rtx count_e
   else
     counter_mode = DImode;
 
-  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  if (destreg != XEXP (dst, 0))
+    dst = replace_equiv_address_nv (dst, destreg);
 
   emit_insn (gen_cld ());
 
@@ -11294,12 +11336,8 @@ ix86_expand_clrstr (rtx src, rtx count_e
     {
       countreg = ix86_zero_extend_to_Pmode (count_exp);
       zeroreg = copy_to_mode_reg (QImode, const0_rtx);
-      if (TARGET_64BIT)
-	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
-				         destreg, countreg));
-      else
-	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
-				   destreg, countreg));
+      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
     }
   else if (count != 0
 	   && (align >= 8
@@ -11307,6 +11345,8 @@ ix86_expand_clrstr (rtx src, rtx count_e
 	       || optimize_size || count < (unsigned int) 64))
     {
       int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+      unsigned HOST_WIDE_INT offset = 0;
+
       zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
       if (count & ~(size - 1))
 	{
@@ -11314,28 +11354,34 @@ ix86_expand_clrstr (rtx src, rtx count_e
 				       GEN_INT ((count >> (size == 4 ? 2 : 3))
 						& (TARGET_64BIT ? -1 : 0x3fffffff)));
 	  countreg = ix86_zero_extend_to_Pmode (countreg);
-	  if (size == 4)
-	    {
-	      if (TARGET_64BIT)
-		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
-					         destreg, countreg));
-	      else
-		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
-					   destreg, countreg));
-	    }
-	  else
-	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
-					     destreg, countreg));
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
+	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+	  emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
+	  offset = count & ~(size - 1);
 	}
       if (size == 8 && (count & 0x04))
-	emit_insn (gen_strsetsi (destreg,
+	{
+	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
+						  offset);
+	  emit_insn (gen_strset (destreg, mem,
 				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
+	  offset += 4;
+	}
       if (count & 0x02)
-	emit_insn (gen_strsethi (destreg,
+	{
+	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
+						  offset);
+	  emit_insn (gen_strset (destreg, mem,
 				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
+	  offset += 2;
+	}
       if (count & 0x01)
-	emit_insn (gen_strsetqi (destreg,
+	{
+	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
+						  offset);
+	  emit_insn (gen_strset (destreg, mem,
 				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
+	}
     }
   else
     {
@@ -11362,6 +11408,8 @@ ix86_expand_clrstr (rtx src, rtx count_e
       countreg2 = gen_reg_rtx (Pmode);
       countreg = copy_to_mode_reg (counter_mode, count_exp);
       zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
+      /* Get rid of MEM_OFFSET, it won't be accurate.  */
+      dst = change_address (dst, BLKmode, destreg);
 
       if (count == 0 && align < desired_alignment)
 	{
@@ -11372,8 +11420,8 @@ ix86_expand_clrstr (rtx src, rtx count_e
       if (align <= 1)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 1);
-	  emit_insn (gen_strsetqi (destreg,
-				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
+	  emit_insn (gen_strset (destreg, dst,
+				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
 	  ix86_adjust_counter (countreg, 1);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11381,8 +11429,8 @@ ix86_expand_clrstr (rtx src, rtx count_e
       if (align <= 2)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 2);
-	  emit_insn (gen_strsethi (destreg,
-				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
+	  emit_insn (gen_strset (destreg, dst,
+				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
 	  ix86_adjust_counter (countreg, 2);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11390,9 +11438,10 @@ ix86_expand_clrstr (rtx src, rtx count_e
       if (align <= 4 && desired_alignment > 4)
 	{
 	  rtx label = ix86_expand_aligntest (destreg, 4);
-	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
-					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
-					     : zeroreg)));
+	  emit_insn (gen_strset (destreg, dst,
+				 (TARGET_64BIT
+				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
+				  : zeroreg)));
 	  ix86_adjust_counter (countreg, 4);
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
@@ -11411,15 +11460,16 @@ ix86_expand_clrstr (rtx src, rtx count_e
 	{
 	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
 				  GEN_INT (3)));
-	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
-					   destreg, countreg2));
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
 	}
       else
 	{
-	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
-	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
-				     destreg, countreg2));
+	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
 	}
+      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
+
       if (label)
 	{
 	  emit_label (label);
@@ -11427,41 +11477,42 @@ ix86_expand_clrstr (rtx src, rtx count_e
 	}
 
       if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
-	emit_insn (gen_strsetsi (destreg,
-				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
+	emit_insn (gen_strset (destreg, dst,
+			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
       if (TARGET_64BIT && (align <= 4 || count == 0))
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 4);
-	  emit_insn (gen_strsetsi (destreg,
-				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
+	  emit_insn (gen_strset (destreg, dst,
+				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
       if (align > 2 && count != 0 && (count & 2))
-	emit_insn (gen_strsethi (destreg,
-				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
+	emit_insn (gen_strset (destreg, dst,
+			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
       if (align <= 2 || count == 0)
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 2);
-	  emit_insn (gen_strsethi (destreg,
-				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
+	  emit_insn (gen_strset (destreg, dst,
+				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
       if (align > 1 && count != 0 && (count & 1))
-	emit_insn (gen_strsetqi (destreg,
-				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
+	emit_insn (gen_strset (destreg, dst,
+			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
       if (align <= 1 || count == 0)
 	{
 	  rtx label = ix86_expand_aligntest (countreg, 1);
-	  emit_insn (gen_strsetqi (destreg,
-				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
+	  emit_insn (gen_strset (destreg, dst,
+				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
 	  emit_label (label);
 	  LABEL_NUSES (label) = 1;
 	}
     }
   return 1;
 }
+
 /* Expand strlen.  */
 int
 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
@@ -11493,7 +11544,7 @@ ix86_expand_strlen (rtx out, rtx src, rt
 
       emit_move_insn (out, addr);
 
-      ix86_expand_strlensi_unroll_1 (out, align);
+      ix86_expand_strlensi_unroll_1 (out, src, align);
 
       /* strlensi_unroll_1 returns the address of the zero at the end of
          the string, like memchr(), so compute the length by subtracting
@@ -11505,6 +11556,7 @@ ix86_expand_strlen (rtx out, rtx src, rt
     }
   else
     {
+      rtx unspec;
       scratch2 = gen_reg_rtx (Pmode);
       scratch3 = gen_reg_rtx (Pmode);
       scratch4 = force_reg (Pmode, constm1_rtx);
@@ -11513,17 +11565,19 @@ ix86_expand_strlen (rtx out, rtx src, rt
       eoschar = force_reg (QImode, eoschar);
 
       emit_insn (gen_cld ());
+      src = replace_equiv_address_nv (src, scratch3);
+
+      /* If .md starts supporting :P, this can be done in .md.  */
+      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+						 scratch4), UNSPEC_SCAS);
+      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
       if (TARGET_64BIT)
 	{
-	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
-					 align, scratch4, scratch3));
 	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
 	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
 	}
       else
 	{
-	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
-				     align, scratch4, scratch3));
 	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
 	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
 	}
@@ -11543,7 +11597,7 @@ ix86_expand_strlen (rtx out, rtx src, rt
    some address computing at the end.  These things are done in i386.md.  */
 
 static void
-ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
 {
   int align;
   rtx tmp;
@@ -11596,7 +11650,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, 
 				   Pmode, 1, align_4_label);
         }
 
-      mem = gen_rtx_MEM (QImode, out);
+      mem = change_address (src, QImode, out);
 
       /* Now compare the bytes.  */
 
@@ -11640,7 +11694,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, 
      speed up.  */
   emit_label (align_4_label);
 
-  mem = gen_rtx_MEM (SImode, out);
+  mem = change_address (src, SImode, out);
   emit_move_insn (scratch, mem);
   if (TARGET_64BIT)
     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
@@ -12505,50 +12559,6 @@ ia32_multipass_dfa_lookahead (void)
 }
 
 
-/* Walk through INSNS and look for MEM references whose address is DSTREG or
-   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
-   appropriate.  */
-
-void
-ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
-			 rtx srcreg)
-{
-  rtx insn;
-
-  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
-    if (INSN_P (insn))
-      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
-				 dstreg, srcreg);
-}
-
-/* Subroutine of above to actually do the updating by recursively walking
-   the rtx.  */
-
-static void
-ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
-			   rtx srcreg)
-{
-  enum rtx_code code = GET_CODE (x);
-  const char *format_ptr = GET_RTX_FORMAT (code);
-  int i, j;
-
-  if (code == MEM && XEXP (x, 0) == dstreg)
-    MEM_COPY_ATTRIBUTES (x, dstref);
-  else if (code == MEM && XEXP (x, 0) == srcreg)
-    MEM_COPY_ATTRIBUTES (x, srcref);
-
-  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
-    {
-      if (*format_ptr == 'e')
-	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
-				   dstreg, srcreg);
-      else if (*format_ptr == 'E')
-	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
-	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
-				     dstreg, srcreg);
-    }
-}
-
 /* Compute the alignment given to a constant that is being placed in memory.
    EXP is the constant and ALIGN is the alignment that the object would
    ordinarily have.
--- gcc/config/i386/i386.md.jj	2004-01-23 14:23:35.000000000 +0100
+++ gcc/config/i386/i386.md	2004-01-28 19:58:07.698884522 +0100
@@ -15357,170 +15357,45 @@
 ;; Most CPUs don't like single string operations
 ;; Handle this case here to simplify previous expander.
 
-(define_expand "strmovdi_rex64"
-  [(set (match_dup 2)
-  	(mem:DI (match_operand:DI 1 "register_operand" "")))
-   (set (mem:DI (match_operand:DI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0],
-				     operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (DImode);
-})
-
-
-(define_expand "strmovsi"
-  [(set (match_dup 2)
-  	(mem:SI (match_operand:SI 1 "register_operand" "")))
-   (set (mem:SI (match_operand:SI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
+(define_expand "strmov"
+  [(set (match_dup 4) (match_operand 3 "memory_operand" ""))
+   (set (match_operand 1 "memory_operand" "") (match_dup 4))
+   (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5))
 	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))
+   (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6))
 	      (clobber (reg:CC 17))])]
   ""
 {
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strmovsi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0],
-				operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (SImode);
-})
+  rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
 
-(define_expand "strmovsi_rex64"
-  [(set (match_dup 2)
-  	(mem:SI (match_operand:DI 1 "register_operand" "")))
-   (set (mem:SI (match_operand:DI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0],
-				     operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (SImode);
-})
+  /* If .md ever supports :P for Pmode, these can be directly
+     in the pattern above.  */
+  operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+  operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
 
-(define_expand "strmovhi"
-  [(set (match_dup 2)
-  	(mem:HI (match_operand:SI 1 "register_operand" "")))
-   (set (mem:HI (match_operand:SI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
-	      (clobber (reg:CC 17))])]
-  ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strmovhi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
   if (TARGET_SINGLE_STRINGOP || optimize_size)
     {
-      emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0],
-				operands[1]));
+      emit_insn (gen_strmov_singleop (operands[0], operands[1],
+				      operands[2], operands[3],
+				      operands[5], operands[6]));
       DONE;
     }
-  else 
-    operands[2] = gen_reg_rtx (HImode);
-})
 
-(define_expand "strmovhi_rex64"
-  [(set (match_dup 2)
-  	(mem:HI (match_operand:DI 1 "register_operand" "")))
-   (set (mem:HI (match_operand:DI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0],
-				     operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (HImode);
-})
-
-(define_expand "strmovqi"
-  [(set (match_dup 2)
-  	(mem:QI (match_operand:SI 1 "register_operand" "")))
-   (set (mem:QI (match_operand:SI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1)))
-	      (clobber (reg:CC 17))])]
-  ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strmovqi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0],
-				operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (QImode);
+  operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
 })
 
-(define_expand "strmovqi_rex64"
-  [(set (match_dup 2)
-  	(mem:QI (match_operand:DI 1 "register_operand" "")))
-   (set (mem:QI (match_operand:DI 0 "register_operand" ""))
-        (match_dup 2))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC 17))])
-   (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0],
-				     operands[1]));
-      DONE;
-    }
-  else 
-    operands[2] = gen_reg_rtx (QImode);
-})
+(define_expand "strmov_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 5 "" ""))
+	      (use (reg:SI 19))])]
+  "TARGET_SINGLE_STRINGOP || optimize_size"
+  "")
 
-(define_insn "strmovdi_rex_1"
+(define_insn "*strmovdi_rex_1"
   [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
 	(mem:DI (match_operand:DI 3 "register_operand" "1")))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15536,7 +15411,7 @@
    (set_attr "mode" "DI")
    (set_attr "memory" "both")])
 
-(define_insn "strmovsi_1"
+(define_insn "*strmovsi_1"
   [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
 	(mem:SI (match_operand:SI 3 "register_operand" "1")))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15552,7 +15427,7 @@
    (set_attr "mode" "SI")
    (set_attr "memory" "both")])
 
-(define_insn "strmovsi_rex_1"
+(define_insn "*strmovsi_rex_1"
   [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
 	(mem:SI (match_operand:DI 3 "register_operand" "1")))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15568,7 +15443,7 @@
    (set_attr "mode" "SI")
    (set_attr "memory" "both")])
 
-(define_insn "strmovhi_1"
+(define_insn "*strmovhi_1"
   [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
 	(mem:HI (match_operand:SI 3 "register_operand" "1")))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15584,7 +15459,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "HI")])
 
-(define_insn "strmovhi_rex_1"
+(define_insn "*strmovhi_rex_1"
   [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
 	(mem:HI (match_operand:DI 3 "register_operand" "1")))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15600,7 +15475,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "HI")])
 
-(define_insn "strmovqi_1"
+(define_insn "*strmovqi_1"
   [(set (mem:QI (match_operand:SI 2 "register_operand" "0"))
 	(mem:QI (match_operand:SI 3 "register_operand" "1")))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15616,7 +15491,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "QI")])
 
-(define_insn "strmovqi_rex_1"
+(define_insn "*strmovqi_rex_1"
   [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
 	(mem:QI (match_operand:DI 3 "register_operand" "1")))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15632,7 +15507,20 @@
    (set_attr "memory" "both")
    (set_attr "mode" "QI")])
 
-(define_insn "rep_movdi_rex64"
+(define_expand "rep_mov"
+  [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 5 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 6 "" ""))
+	      (set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (use (match_dup 4))
+	      (use (reg:SI 19))])]
+  ""
+  "")
+
+(define_insn "*rep_movdi_rex64"
   [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
@@ -15652,7 +15540,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "DI")])
 
-(define_insn "rep_movsi"
+(define_insn "*rep_movsi"
   [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
    (set (match_operand:SI 0 "register_operand" "=D") 
         (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2")
@@ -15672,7 +15560,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
 
-(define_insn "rep_movsi_rex64"
+(define_insn "*rep_movsi_rex64"
   [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
@@ -15692,7 +15580,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
 
-(define_insn "rep_movqi"
+(define_insn "*rep_movqi"
   [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
    (set (match_operand:SI 0 "register_operand" "=D") 
         (plus:SI (match_operand:SI 3 "register_operand" "0")
@@ -15710,7 +15598,7 @@
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
 
-(define_insn "rep_movqi_rex64"
+(define_insn "*rep_movqi_rex64"
   [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (match_operand:DI 3 "register_operand" "0")
@@ -15755,120 +15643,40 @@
 ;; Most CPUs don't like single string operations
 ;; Handle this case here to simplify previous expander.
 
-(define_expand "strsetdi_rex64"
-  [(set (mem:DI (match_operand:DI 0 "register_operand" ""))
-	(match_operand:DI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_expand "strsetsi"
-  [(set (mem:SI (match_operand:SI 0 "register_operand" ""))
-	(match_operand:SI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
+(define_expand "strset"
+  [(set (match_operand 1 "memory_operand" "")
+	(match_operand 2 "register_operand" ""))
+   (parallel [(set (match_operand 0 "register_operand" "")
+		   (match_dup 3))
 	      (clobber (reg:CC 17))])]
   ""
 {
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strsetsi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
-  else if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
+  if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+    operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
 
-(define_expand "strsetsi_rex64"
-  [(set (mem:SI (match_operand:DI 0 "register_operand" ""))
-	(match_operand:SI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_expand "strsethi"
-  [(set (mem:HI (match_operand:SI 0 "register_operand" ""))
-	(match_operand:HI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
-	      (clobber (reg:CC 17))])]
-  ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strsethi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
-  else if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_expand "strsethi_rex64"
-  [(set (mem:HI (match_operand:DI 0 "register_operand" ""))
-	(match_operand:HI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
+  /* If .md ever supports :P for Pmode, this can be directly
+     in the pattern above.  */
+  operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+			      GEN_INT (GET_MODE_SIZE (GET_MODE
+						      (operands[2]))));
   if (TARGET_SINGLE_STRINGOP || optimize_size)
     {
-      emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1]));
+      emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+				      operands[3]));
       DONE;
     }
 })
 
-(define_expand "strsetqi"
-  [(set (mem:QI (match_operand:SI 0 "register_operand" ""))
-	(match_operand:QI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC 17))])]
-  ""
-{
-  if (TARGET_64BIT)
-    {
-      emit_insn (gen_strsetqi_rex64 (operands[0], operands[1]));
-      DONE;
-    }
-  else if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_expand "strsetqi_rex64"
-  [(set (mem:QI (match_operand:DI 0 "register_operand" ""))
-	(match_operand:QI 1 "register_operand" ""))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
-	      (clobber (reg:CC 17))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
-    {
-      emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1]));
-      DONE;
-    }
-})
+(define_expand "strset_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 2 "register_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 3 "" ""))
+	      (use (reg:SI 19))])]
+  "TARGET_SINGLE_STRINGOP || optimize_size"
+  "")
 
-(define_insn "strsetdi_rex_1"
+(define_insn "*strsetdi_rex_1"
   [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
 	(match_operand:SI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15881,7 +15689,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "DI")])
 
-(define_insn "strsetsi_1"
+(define_insn "*strsetsi_1"
   [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
 	(match_operand:SI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15894,7 +15702,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
-(define_insn "strsetsi_rex_1"
+(define_insn "*strsetsi_rex_1"
   [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
 	(match_operand:SI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15907,7 +15715,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
-(define_insn "strsethi_1"
+(define_insn "*strsethi_1"
   [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
 	(match_operand:HI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15920,7 +15728,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "HI")])
 
-(define_insn "strsethi_rex_1"
+(define_insn "*strsethi_rex_1"
   [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
 	(match_operand:HI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15933,7 +15741,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "HI")])
 
-(define_insn "strsetqi_1"
+(define_insn "*strsetqi_1"
   [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
 	(match_operand:QI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
@@ -15946,7 +15754,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "QI")])
 
-(define_insn "strsetqi_rex_1"
+(define_insn "*strsetqi_rex_1"
   [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
 	(match_operand:QI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
@@ -15959,7 +15767,18 @@
    (set_attr "memory" "store")
    (set_attr "mode" "QI")])
 
-(define_insn "rep_stosdi_rex64"
+(define_expand "rep_stos"
+  [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "memory_operand" "") (const_int 0))
+	      (use (match_operand 3 "register_operand" ""))
+	      (use (match_dup 1))
+	      (use (reg:SI 19))])]
+  ""
+  "")
+
+(define_insn "*rep_stosdi_rex64"
   [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
@@ -15977,7 +15796,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "DI")])
 
-(define_insn "rep_stossi"
+(define_insn "*rep_stossi"
   [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
    (set (match_operand:SI 0 "register_operand" "=D") 
         (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1")
@@ -15995,7 +15814,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
-(define_insn "rep_stossi_rex64"
+(define_insn "*rep_stossi_rex64"
   [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
@@ -16013,7 +15832,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
 
-(define_insn "rep_stosqi"
+(define_insn "*rep_stosqi"
   [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
    (set (match_operand:SI 0 "register_operand" "=D") 
         (plus:SI (match_operand:SI 3 "register_operand" "0")
@@ -16030,7 +15849,7 @@
    (set_attr "memory" "store")
    (set_attr "mode" "QI")])
 
-(define_insn "rep_stosqi_rex64"
+(define_insn "*rep_stosqi_rex64"
   [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
    (set (match_operand:DI 0 "register_operand" "=D") 
         (plus:DI (match_operand:DI 3 "register_operand" "0")
@@ -16039,7 +15858,7 @@
 	(const_int 0))
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))
-   (use (reg:DI 19))]
+   (use (reg:SI 19))]
   "TARGET_64BIT"
   "{rep\;stosb|rep stosb}"
   [(set_attr "type" "str")
@@ -16067,7 +15886,11 @@
 
   addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
   addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
-  
+  if (addr1 != XEXP (operands[1], 0))
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  if (addr2 != XEXP (operands[2], 0))
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
   count = operands[3];
   countreg = ix86_zero_extend_to_Pmode (count);
 
@@ -16084,27 +15907,17 @@
 	  emit_move_insn (operands[0], const0_rtx);
 	  DONE;
 	}
-      if (TARGET_64BIT)
-	emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align,
-					  addr1, addr2, countreg));
-      else
-	emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align,
-				      addr1, addr2, countreg));
+      emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align,
+				    operands[1], operands[2]));
     }
   else
     {
       if (TARGET_64BIT)
-	{
-	  emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
-	  emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align,
-					 addr1, addr2, countreg));
-	}
+	emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
       else
-	{
-	  emit_insn (gen_cmpsi_1 (countreg, countreg));
-	  emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align,
-				     addr1, addr2, countreg));
-	}
+	emit_insn (gen_cmpsi_1 (countreg, countreg));
+      emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align,
+				 operands[1], operands[2]));
     }
 
   outlow = gen_lowpart (QImode, out);
@@ -16135,7 +15948,20 @@
 ;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
 ;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
 
-(define_insn "cmpstrqi_nz_1"
+(define_expand "cmpstrqi_nz_1"
+  [(parallel [(set (reg:CC 17)
+		   (compare:CC (match_operand 4 "memory_operand" "")
+			       (match_operand 5 "memory_operand" "")))
+	      (use (match_operand 2 "register_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (use (reg:SI 19))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "")
+
+(define_insn "*cmpstrqi_nz_1"
   [(set (reg:CC 17)
 	(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
 		    (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
@@ -16151,7 +15977,7 @@
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-(define_insn "cmpstrqi_nz_rex_1"
+(define_insn "*cmpstrqi_nz_rex_1"
   [(set (reg:CC 17)
 	(compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
 		    (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
@@ -16169,7 +15995,23 @@
 
 ;; The same, but the count is not known to not be zero.
 
-(define_insn "cmpstrqi_1"
+(define_expand "cmpstrqi_1"
+  [(parallel [(set (reg:CC 17)
+		(if_then_else:CC (ne (match_operand 2 "register_operand" "")
+				     (const_int 0))
+		  (compare:CC (match_operand 4 "memory_operand" "")
+			      (match_operand 5 "memory_operand" ""))
+		  (const_int 0)))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (use (reg:CC 17))
+	      (use (reg:SI 19))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "")
+
+(define_insn "*cmpstrqi_1"
   [(set (reg:CC 17)
 	(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
 			     (const_int 0))
@@ -16188,7 +16030,7 @@
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-(define_insn "cmpstrqi_rex_1"
+(define_insn "*cmpstrqi_rex_1"
   [(set (reg:CC 17)
 	(if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
 			     (const_int 0))
@@ -16233,7 +16075,15 @@
    FAIL;
 })
 
-(define_insn "strlenqi_1"
+(define_expand "strlenqi_1"
+  [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
+	      (use (reg:SI 19))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (reg:CC 17))])]
+  ""
+  "")
+
+(define_insn "*strlenqi_1"
   [(set (match_operand:SI 0 "register_operand" "=&c")
 	(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
 		    (match_operand:QI 2 "register_operand" "a")
@@ -16248,7 +16098,7 @@
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-(define_insn "strlenqi_rex_1"
+(define_insn "*strlenqi_rex_1"
   [(set (match_operand:DI 0 "register_operand" "=&c")
 	(unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
 		    (match_operand:QI 2 "register_operand" "a")
--- gcc/config/i386/i386-protos.h.jj	2004-01-19 16:58:01.000000000 +0100
+++ gcc/config/i386/i386-protos.h	2004-01-28 14:27:11.053105012 +0100
@@ -178,7 +178,6 @@ extern int ix86_secondary_memory_needed 
 					 enum machine_mode, int);
 extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
-extern void ix86_set_move_mem_attrs (rtx, rtx, rtx, rtx, rtx);
 extern void emit_i387_cw_initialization (rtx, rtx);
 extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
 extern void x86_order_regs_for_local_alloc (void);

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]