This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386]: Do not emit "cld" instructions


Jan Hubicka wrote:

I would think in favour of simply dropping the code - if it ever becomes
important we can always rescuesce your mode switching pass.  I would not
expect inlining memove via std to become important, just because the
hardware probably won't be very well optimized for his and here are
better ways to inline it.

Or alternatively just disable code emitting the CLD.  I think it is
better than outputting empty ASM and confusing scheduler.



Attached patch thus removes cld instruction and associated DIRFLAG_REG.

2006-12-05 Uros Bizjak <ubizjak@gmail.com>

       * config/i386/i386.md (DIRFLAG_REG): Remove constant.
       (type): Remove cld.
       (length_immediate): Do not depend on cld type attribute.
       (length_address): Ditto.
       (modrm): Ditto.
       (memory): Ditto.
       (cld): Remove insn pattern.
       (strmov_singleop): Do not use DIRFLAG_REG.
       (*strmovdi_rex_1): Ditto.
       (*strmovsi_1): Ditto.
       (*strmovsi_rex_1): Ditto.
       (*strmovhi_1): Ditto.
       (*strmovhi_rex_1): Ditto.
       (*strmovqi_1): Ditto.
       (*strmovqi_rex_1): Ditto.
       (rep_mov): Ditto.
       (*rep_movdi_rex64): Ditto.
       (*rep_movsi): Ditto.
       (*rep_movsi_rex64): Ditto.
       (*rep_movqi): Ditto.
       (*rep_movqi_rex64): Ditto.
       (strset_singleop): Ditto.
       (*strsetdi_rex_1): Ditto.
       (*strsetsi_1): Ditto.
       (*strsetsi_rex_1): Ditto.
       (*strsethi_1): Ditto.
       (*strsethi_rex_1): Ditto.
       (*strsetqi_1): Ditto.
       (*strsetqi_rex_1): Ditto.
       (rep_stos): Ditto.
       (*rep_stosdi_rex64): Ditto.
       (*rep_stossi): Ditto.
       (*rep_stossi_rex64): Ditto.
       (*rep_stosqi): Ditto.
       (*rep_stosqi_rex64): Ditto.
       (cmpstrnsi): Do not generate cld insn.
       (cmpstrnqi_nz_1): Do not use DIRFLAG_REG.
       (*cmpstrnqi_nz_1): Ditto.
       (*cmpstrnqi_nz_rex_1): Ditto.
       (cmpstrnqi_1): Ditto.
       (*cmpstrnqi_1): Ditto.
       (*cmpstrnqi_rex_1): Ditto.
       (strlenqi_1): Ditto.
       (*strlenqi_1): Ditto.
       (*strlenqi_rex_1): Ditto.

       * config/i386/geode.md (shift): Do not depend on cld type attribute.
       * config/i386/pentium.md (pent_cld): Remove insn reservation.
       * config/i386/athlon.md (athlon_decode): Do not depend on cld
       type attribute.
       * config/i386/ppro.md (ppro_cld): Remove insn reservation.
       * config/i386/k6.md (k6_alux_only): Do not depend on cld type
       attribute.
       (k6_alux_only_load): Ditto.
       (k6_alux_only_store): Ditto.

       * config/i386/i386.c (ix86_expand_movmem): Remove cld_done boolean.
       Do not emit cld instruction.
       (ix86_expand_setmem): Ditto.
       (ix86_expand_strlen): Do not emit cld instruction.

Patch was boostrapped on x86_64-pc-linux-gnu and regression tested for c and c++.
OK for mainline?


According to the guide, it applies to pentium4.



This is pretty high. Would be possible for you to rerun the test_stringops script on P4 machine after removing the CLD? If it really is 48 cycles, it should show difference in the preffered memcpy codegen.



Sure! But I think that this is an error in the optimizing guide.

Uros.

Index: i386.md
===================================================================
--- i386.md	(revision 119553)
+++ i386.md	(working copy)
@@ -180,7 +180,6 @@
    (FLAGS_REG			17)
    (FPSR_REG			18)
    (FPCR_REG			19)
-   (DIRFLAG_REG			20)
    (R10_REG			40)
    (R11_REG			41)
   ])
@@ -207,7 +206,7 @@
    incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
-   str,cld,
+   str,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseishft,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
@@ -234,7 +233,7 @@
 
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
-  (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
@@ -261,7 +260,7 @@
 
 ;; The (bounding maximum) length of an instruction address.
 (define_attr "length_address" ""
-  (cond [(eq_attr "type" "str,cld,other,multi,fxch")
+  (cond [(eq_attr "type" "str,other,multi,fxch")
 	   (const_int 0)
 	 (and (eq_attr "type" "call")
 	      (match_operand 0 "constant_call_address_operand" ""))
@@ -310,7 +309,7 @@
 
 ;; Set when modrm byte is used.
 (define_attr "modrm" ""
-  (cond [(eq_attr "type" "str,cld,leave")
+  (cond [(eq_attr "type" "str,leave")
 	   (const_int 0)
 	 (eq_attr "unit" "i387")
 	   (const_int 0)
@@ -370,7 +369,7 @@
 (define_attr "memory" "none,load,store,both,unknown"
   (cond [(eq_attr "type" "other,multi,str")
 	   (const_string "unknown")
-	 (eq_attr "type" "lea,fcmov,fpspc,cld")
+	 (eq_attr "type" "lea,fcmov,fpspc")
 	   (const_string "none")
 	 (eq_attr "type" "fistp,leave")
 	   (const_string "both")
@@ -18141,12 +18140,6 @@
 
 ;; Block operation instructions
 
-(define_insn "cld"
- [(set (reg:SI DIRFLAG_REG) (const_int 0))]
- ""
- "cld"
-  [(set_attr "type" "cld")])
-
 (define_expand "movmemsi"
   [(use (match_operand:BLK 0 "memory_operand" ""))
    (use (match_operand:BLK 1 "memory_operand" ""))
@@ -18211,8 +18204,7 @@
 	      (set (match_operand 0 "register_operand" "")
 		   (match_operand 4 "" ""))
 	      (set (match_operand 2 "register_operand" "")
-		   (match_operand 5 "" ""))
-	      (use (reg:SI DIRFLAG_REG))])]
+		   (match_operand 5 "" ""))])]
   "TARGET_SINGLE_STRINGOP || optimize_size"
   "")
 
@@ -18224,8 +18216,7 @@
 		 (const_int 8)))
    (set (match_operand:DI 1 "register_operand" "=S")
 	(plus:DI (match_dup 3)
-		 (const_int 8)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 8)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "movsq"
   [(set_attr "type" "str")
@@ -18240,8 +18231,7 @@
 		 (const_int 4)))
    (set (match_operand:SI 1 "register_operand" "=S")
 	(plus:SI (match_dup 3)
-		 (const_int 4)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 4)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "{movsl|movsd}"
   [(set_attr "type" "str")
@@ -18256,8 +18246,7 @@
 		 (const_int 4)))
    (set (match_operand:DI 1 "register_operand" "=S")
 	(plus:DI (match_dup 3)
-		 (const_int 4)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 4)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "{movsl|movsd}"
   [(set_attr "type" "str")
@@ -18272,8 +18261,7 @@
 		 (const_int 2)))
    (set (match_operand:SI 1 "register_operand" "=S")
 	(plus:SI (match_dup 3)
-		 (const_int 2)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 2)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "movsw"
   [(set_attr "type" "str")
@@ -18288,8 +18276,7 @@
 		 (const_int 2)))
    (set (match_operand:DI 1 "register_operand" "=S")
 	(plus:DI (match_dup 3)
-		 (const_int 2)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 2)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "movsw"
   [(set_attr "type" "str")
@@ -18304,8 +18291,7 @@
 		 (const_int 1)))
    (set (match_operand:SI 1 "register_operand" "=S")
 	(plus:SI (match_dup 3)
-		 (const_int 1)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 1)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "movsb"
   [(set_attr "type" "str")
@@ -18320,8 +18306,7 @@
 		 (const_int 1)))
    (set (match_operand:DI 1 "register_operand" "=S")
 	(plus:DI (match_dup 3)
-		 (const_int 1)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 1)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "movsb"
   [(set_attr "type" "str")
@@ -18336,8 +18321,7 @@
 		   (match_operand 6 "" ""))
 	      (set (match_operand 1 "memory_operand" "")
 		   (match_operand 3 "memory_operand" ""))
-	      (use (match_dup 4))
-	      (use (reg:SI DIRFLAG_REG))])]
+	      (use (match_dup 4))])]
   ""
   "")
 
@@ -18352,8 +18336,7 @@
 		 (match_operand:DI 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 5))]
   "TARGET_64BIT"
   "{rep\;movsq|rep movsq}"
   [(set_attr "type" "str")
@@ -18372,8 +18355,7 @@
 		 (match_operand:SI 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 5))]
   "!TARGET_64BIT"
   "{rep\;movsl|rep movsd}"
   [(set_attr "type" "str")
@@ -18392,8 +18374,7 @@
 		 (match_operand:DI 4 "register_operand" "1")))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 5))]
   "TARGET_64BIT"
   "{rep\;movsl|rep movsd}"
   [(set_attr "type" "str")
@@ -18410,8 +18391,7 @@
         (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5)))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 5))]
   "!TARGET_64BIT"
   "{rep\;movsb|rep movsb}"
   [(set_attr "type" "str")
@@ -18428,8 +18408,7 @@
         (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
-   (use (match_dup 5))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 5))]
   "TARGET_64BIT"
   "{rep\;movsb|rep movsb}"
   [(set_attr "type" "str")
@@ -18500,8 +18479,7 @@
   [(parallel [(set (match_operand 1 "memory_operand" "")
 		   (match_operand 2 "register_operand" ""))
 	      (set (match_operand 0 "register_operand" "")
-		   (match_operand 3 "" ""))
-	      (use (reg:SI DIRFLAG_REG))])]
+		   (match_operand 3 "" ""))])]
   "TARGET_SINGLE_STRINGOP || optimize_size"
   "")
 
@@ -18510,8 +18488,7 @@
 	(match_operand:DI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
 	(plus:DI (match_dup 1)
-		 (const_int 8)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 8)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "stosq"
   [(set_attr "type" "str")
@@ -18523,8 +18500,7 @@
 	(match_operand:SI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
 	(plus:SI (match_dup 1)
-		 (const_int 4)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 4)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "{stosl|stosd}"
   [(set_attr "type" "str")
@@ -18536,8 +18512,7 @@
 	(match_operand:SI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
 	(plus:DI (match_dup 1)
-		 (const_int 4)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 4)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "{stosl|stosd}"
   [(set_attr "type" "str")
@@ -18549,8 +18524,7 @@
 	(match_operand:HI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
 	(plus:SI (match_dup 1)
-		 (const_int 2)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 2)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "stosw"
   [(set_attr "type" "str")
@@ -18562,8 +18536,7 @@
 	(match_operand:HI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
 	(plus:DI (match_dup 1)
-		 (const_int 2)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 2)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "stosw"
   [(set_attr "type" "str")
@@ -18575,8 +18548,7 @@
 	(match_operand:QI 2 "register_operand" "a"))
    (set (match_operand:SI 0 "register_operand" "=D")
 	(plus:SI (match_dup 1)
-		 (const_int 1)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 1)))]
   "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "stosb"
   [(set_attr "type" "str")
@@ -18588,8 +18560,7 @@
 	(match_operand:QI 2 "register_operand" "a"))
    (set (match_operand:DI 0 "register_operand" "=D")
 	(plus:DI (match_dup 1)
-		 (const_int 1)))
-   (use (reg:SI DIRFLAG_REG))]
+		 (const_int 1)))]
   "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
   "stosb"
   [(set_attr "type" "str")
@@ -18602,8 +18573,7 @@
 		   (match_operand 4 "" ""))
 	      (set (match_operand 2 "memory_operand" "") (const_int 0))
 	      (use (match_operand 3 "register_operand" ""))
-	      (use (match_dup 1))
-	      (use (reg:SI DIRFLAG_REG))])]
+	      (use (match_dup 1))])]
   ""
   "")
 
@@ -18616,8 +18586,7 @@
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:DI 2 "register_operand" "a"))
-   (use (match_dup 4))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 4))]
   "TARGET_64BIT"
   "{rep\;stosq|rep stosq}"
   [(set_attr "type" "str")
@@ -18634,8 +18603,7 @@
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:SI 2 "register_operand" "a"))
-   (use (match_dup 4))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 4))]
   "!TARGET_64BIT"
   "{rep\;stosl|rep stosd}"
   [(set_attr "type" "str")
@@ -18652,8 +18620,7 @@
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:SI 2 "register_operand" "a"))
-   (use (match_dup 4))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 4))]
   "TARGET_64BIT"
   "{rep\;stosl|rep stosd}"
   [(set_attr "type" "str")
@@ -18669,8 +18636,7 @@
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:QI 2 "register_operand" "a"))
-   (use (match_dup 4))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 4))]
   "!TARGET_64BIT"
   "{rep\;stosb|rep stosb}"
   [(set_attr "type" "str")
@@ -18686,8 +18652,7 @@
    (set (mem:BLK (match_dup 3))
 	(const_int 0))
    (use (match_operand:QI 2 "register_operand" "a"))
-   (use (match_dup 4))
-   (use (reg:SI DIRFLAG_REG))]
+   (use (match_dup 4))]
   "TARGET_64BIT"
   "{rep\;stosb|rep stosb}"
   [(set_attr "type" "str")
@@ -18728,7 +18693,6 @@
      once cc0 is dead.  */
   align = operands[4];
 
-  emit_insn (gen_cld ());
   if (GET_CODE (count) == CONST_INT)
     {
       if (INTVAL (count) == 0)
@@ -18783,7 +18747,6 @@
 			       (match_operand 5 "memory_operand" "")))
 	      (use (match_operand 2 "register_operand" ""))
 	      (use (match_operand:SI 3 "immediate_operand" ""))
-	      (use (reg:SI DIRFLAG_REG))
 	      (clobber (match_operand 0 "register_operand" ""))
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (match_dup 2))])]
@@ -18796,7 +18759,6 @@
 		    (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
    (use (match_operand:SI 6 "register_operand" "2"))
    (use (match_operand:SI 3 "immediate_operand" "i"))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:SI 0 "register_operand" "=S"))
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
@@ -18812,7 +18774,6 @@
 		    (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
    (use (match_operand:DI 6 "register_operand" "2"))
    (use (match_operand:SI 3 "immediate_operand" "i"))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:DI 0 "register_operand" "=S"))
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
@@ -18833,7 +18794,6 @@
 		  (const_int 0)))
 	      (use (match_operand:SI 3 "immediate_operand" ""))
 	      (use (reg:CC FLAGS_REG))
-	      (use (reg:SI DIRFLAG_REG))
 	      (clobber (match_operand 0 "register_operand" ""))
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (match_dup 2))])]
@@ -18849,7 +18809,6 @@
 	  (const_int 0)))
    (use (match_operand:SI 3 "immediate_operand" "i"))
    (use (reg:CC FLAGS_REG))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:SI 0 "register_operand" "=S"))
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
@@ -18868,7 +18827,6 @@
 	  (const_int 0)))
    (use (match_operand:SI 3 "immediate_operand" "i"))
    (use (reg:CC FLAGS_REG))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:DI 0 "register_operand" "=S"))
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
@@ -18906,7 +18864,6 @@
 
 (define_expand "strlenqi_1"
   [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
-	      (use (reg:SI DIRFLAG_REG))
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -18918,7 +18875,6 @@
 		    (match_operand:QI 2 "register_operand" "a")
 		    (match_operand:SI 3 "immediate_operand" "i")
 		    (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
@@ -18933,7 +18889,6 @@
 		    (match_operand:QI 2 "register_operand" "a")
 		    (match_operand:DI 3 "immediate_operand" "i")
 		    (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
-   (use (reg:SI DIRFLAG_REG))
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
@@ -18963,7 +18918,6 @@
 		      (mem:BLK (match_operand 5 "register_operand" ""))))
      (use (match_operand 6 "register_operand" ""))
      (use (match_operand:SI 3 "immediate_operand" ""))
-     (use (reg:SI DIRFLAG_REG))
      (clobber (match_operand 0 "register_operand" ""))
      (clobber (match_operand 1 "register_operand" ""))
      (clobber (match_operand 2 "register_operand" ""))])
@@ -18981,7 +18935,6 @@
 		      (mem:BLK (match_dup 5))))
      (use (match_dup 6))
      (use (match_dup 3))
-     (use (reg:SI DIRFLAG_REG))
      (clobber (match_dup 0))
      (clobber (match_dup 1))
      (clobber (match_dup 2))])]
@@ -18998,7 +18951,6 @@
 	    (const_int 0)))
      (use (match_operand:SI 3 "immediate_operand" ""))
      (use (reg:CC FLAGS_REG))
-     (use (reg:SI DIRFLAG_REG))
      (clobber (match_operand 0 "register_operand" ""))
      (clobber (match_operand 1 "register_operand" ""))
      (clobber (match_operand 2 "register_operand" ""))])
@@ -19019,7 +18971,6 @@
 	    (const_int 0)))
      (use (match_dup 3))
      (use (reg:CC FLAGS_REG))
-     (use (reg:SI DIRFLAG_REG))
      (clobber (match_dup 0))
      (clobber (match_dup 1))
      (clobber (match_dup 2))])]
Index: geode.md
===================================================================
--- geode.md	(revision 119553)
+++ geode.md	(working copy)
@@ -52,7 +52,7 @@
 
 (define_insn_reservation "shift" 2
 			 (and (eq_attr "cpu" "geode")
-			      (eq_attr "type" "ishift,ishift1,rotate,rotate1,cld"))
+			      (eq_attr "type" "ishift,ishift1,rotate,rotate1"))
 			 "geode_issue,geode_alu*2")
 
 (define_insn_reservation "imul" 7
Index: pentium.md
===================================================================
--- pentium.md	(revision 119553)
+++ pentium.md	(working copy)
@@ -151,11 +151,6 @@
        (eq_attr "type" "idiv"))
   "pentium-np+pentium-fp")
 
-(define_insn_reservation "pent_cld" 2
-  (and (eq_attr "cpu" "pentium")
-       (eq_attr "type" "cld"))
-  "pentium-np*2")
-
 ;;  Moves usually have one cycle penalty, but there are exceptions.
 (define_insn_reservation "pent_fmov" 1
   (and (eq_attr "cpu" "pentium")
Index: athlon.md
===================================================================
--- athlon.md	(revision 119553)
+++ athlon.md	(working copy)
@@ -18,7 +18,7 @@
 ;; communicates with all the execution units separately instead.
 
 (define_attr "athlon_decode" "direct,vector,double"
-  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
+  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
 	   (const_string "vector")
          (and (eq_attr "type" "push")
               (match_operand 1 "memory_operand" ""))
Index: ppro.md
===================================================================
--- ppro.md	(revision 119553)
+++ ppro.md	(working copy)
@@ -60,8 +60,7 @@
 ;;   This isn't necessary right now because we assume for every
 ;;   instruction that it never blocks a decoder.
 ;; - Figure out where the p0 and p1 reservations come from.  These
-;;   appear not to be in the manual (e.g. why is cld "(p0+p1)*2"
-;;   better than "(p0|p1)*4" ???)
+;;   appear not to be in the manual
 ;; - Lots more because I'm sure this is still far from optimal :-)
 
 ;; The ppro_idiv and ppro_fdiv automata are used to model issue
@@ -196,10 +195,6 @@
 				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
 			 "decoder0,p2+p0,p4+p3")
 
-(define_insn_reservation "ppro_cld" 2
-			 (and (eq_attr "cpu" "pentiumpro,generic32")
-			      (eq_attr "type" "cld"))
-			 "decoder0,(p0+p1)*2")
 
 ;; The P6 has a sophisticated branch prediction mechanism to minimize
 ;; latencies due to branching.  In particular, it has a fast way to
Index: k6.md
===================================================================
--- k6.md	(revision 119553)
+++ k6.md	(working copy)
@@ -80,19 +80,19 @@
 ;; Shift instructions and certain arithmetic are issued only on Integer X.
 (define_insn_reservation "k6_alux_only" 1
 			 (and (eq_attr "cpu" "k6")
-			      (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+			      (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
 				   (eq_attr "memory" "none")))
 			 "k6_decode_short,k6_alux")
 
 (define_insn_reservation "k6_alux_only_load" 3
 			 (and (eq_attr "cpu" "k6")
-			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
 				    (eq_attr "memory" "load")))
 			 "k6_decode_short,k6_load,k6_alux")
 
 (define_insn_reservation "k6_alux_only_store" 3
 			 (and (eq_attr "cpu" "k6")
-			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
 				    (eq_attr "memory" "store,both,unknown")))
 			 "k6_decode_long,k6_load,k6_alux,k6_store")
 
Index: i386.c
===================================================================
--- i386.c	(revision 119553)
+++ i386.c	(working copy)
@@ -13623,10 +13623,6 @@
   int desired_align = 0;
   enum stringop_alg alg;
   int dynamic_check;
-  /* Precise placement on cld depends whether stringops will be emit in
-     prologue, main copying body or epilogue.  This variable keeps track
-     if cld was already needed.  */
-  bool cld_done = false;
 
   if (GET_CODE (align_exp) == CONST_INT)
     align = INTVAL (align_exp);
@@ -13691,8 +13687,7 @@
       && !count)
     {
       int size = MAX (size_needed - 1, desired_align - align);
-      if (TARGET_SINGLE_STRINGOP)
-	emit_insn (gen_cld ()), cld_done = true;
+
       label = gen_label_rtx ();
       emit_cmp_and_jump_insns (count_exp,
 			       GEN_INT (size),
@@ -13726,8 +13721,6 @@
 	 the info early.  */
       src = change_address (src, BLKmode, srcreg);
       dst = change_address (dst, BLKmode, destreg);
-      if (TARGET_SINGLE_STRINGOP && !cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
 			      desired_align);
     }
@@ -13760,20 +13753,14 @@
 				     expected_size);
       break;
     case rep_prefix_8_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
 				 DImode);
       break;
     case rep_prefix_4_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
 				 SImode);
       break;
     case rep_prefix_1_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
 				 QImode);
       break;
@@ -13809,12 +13796,8 @@
       LABEL_NUSES (label) = 1;
     }
   if (count_exp != const0_rtx && size_needed > 1)
-    {
-      if (TARGET_SINGLE_STRINGOP && !cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
-      expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
-			      size_needed);
-    }
+    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
+			    size_needed);
   if (jump_around_label)
     emit_label (jump_around_label);
   return 1;
@@ -13908,10 +13891,6 @@
   int size_needed = 0;
   int desired_align = 0;
   enum stringop_alg alg;
-  /* Precise placement on cld depends whether stringops will be emit in
-     prologue, main copying body or epilogue.  This variable keeps track
-     if cld was already needed.  */
-  bool cld_done = false;
   rtx promoted_val = val_exp;
   bool force_loopy_epilogue = false;
   int dynamic_check;
@@ -13978,8 +13957,6 @@
          code, so we need to use QImode accesses in epilogue.  */
       if (GET_CODE (val_exp) != CONST_INT && size_needed > 1)
 	force_loopy_epilogue = true;
-      else if (TARGET_SINGLE_STRINGOP)
-	emit_insn (gen_cld ()), cld_done = true;
       label = gen_label_rtx ();
       emit_cmp_and_jump_insns (count_exp,
 			       GEN_INT (size),
@@ -14014,8 +13991,7 @@
       && !count && !label)
     {
       int size = MAX (size_needed - 1, desired_align - align);
-      if (TARGET_SINGLE_STRINGOP)
-	emit_insn (gen_cld ()), cld_done = true;
+
       label = gen_label_rtx ();
       emit_cmp_and_jump_insns (count_exp,
 			       GEN_INT (size),
@@ -14032,8 +14008,6 @@
 	 the pain to maintain it for the first move, so throw away
 	 the info early.  */
       dst = change_address (dst, BLKmode, destreg);
-      if (TARGET_SINGLE_STRINGOP && !cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
 			      desired_align);
     }
@@ -14061,20 +14035,14 @@
 				     count_exp, Pmode, 4, expected_size);
       break;
     case rep_prefix_8_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
 				  DImode);
       break;
     case rep_prefix_4_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
 				  SImode);
       break;
     case rep_prefix_1_byte:
-      if (!cld_done)
-	emit_insn (gen_cld ()), cld_done = true;
       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
 				  QImode);
       break;
@@ -14107,12 +14075,8 @@
 	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
 					 size_needed);
       else
-	{
-	  if (TARGET_SINGLE_STRINGOP && !cld_done)
-	    emit_insn (gen_cld ()), cld_done = true;
-	  expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
-				  size_needed);
-	}
+	expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
+				size_needed);
     }
   if (jump_around_label)
     emit_label (jump_around_label);
@@ -14170,7 +14134,6 @@
       emit_move_insn (scratch3, addr);
       eoschar = force_reg (QImode, eoschar);
 
-      emit_insn (gen_cld ());
       src = replace_equiv_address_nv (src, scratch3);
 
       /* If .md starts supporting :P, this can be done in .md.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]