This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 2/2] Remove x86 cmpstrnsi


From: Andi Kleen <ak@linux.intel.com>

In my tests the optimized glibc out of line strcmp is always faster than
using inline rep ; cmpsb, even for small strings. The Intel optimization manual
also recommends to not use it. So remove the cmpstrnsi instruction.

Tested on Sandy Bridge, Westmere Intel CPUs.

gcc/:

2014-07-02  Andi Kleen	<ak@linux.intel.com>

	* config/i386/i386.md (cmpstrnsi, cmpintqi): Remove expanders.
---
 gcc/config/i386/i386.md | 85 -------------------------------------------------
 1 file changed, 85 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5f32a24..67f1343 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15878,91 +15878,6 @@
 	  (const_string "*")))
    (set_attr "mode" "QI")])
 
-(define_expand "cmpstrnsi"
-  [(set (match_operand:SI 0 "register_operand")
-	(compare:SI (match_operand:BLK 1 "general_operand")
-		    (match_operand:BLK 2 "general_operand")))
-   (use (match_operand 3 "general_operand"))
-   (use (match_operand 4 "immediate_operand"))]
-  ""
-{
-  rtx addr1, addr2, out, outlow, count, countreg, align;
-
-  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
-    FAIL;
-
-  /* Can't use this if the user has appropriated ecx, esi or edi.  */
-  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-    FAIL;
-
-  out = operands[0];
-  if (!REG_P (out))
-    out = gen_reg_rtx (SImode);
-
-  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
-  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
-  if (addr1 != XEXP (operands[1], 0))
-    operands[1] = replace_equiv_address_nv (operands[1], addr1);
-  if (addr2 != XEXP (operands[2], 0))
-    operands[2] = replace_equiv_address_nv (operands[2], addr2);
-
-  count = operands[3];
-  countreg = ix86_zero_extend_to_Pmode (count);
-
-  /* %%% Iff we are testing strict equality, we can use known alignment
-     to good advantage.  This may be possible with combine, particularly
-     once cc0 is dead.  */
-  align = operands[4];
-
-  if (CONST_INT_P (count))
-    {
-      if (INTVAL (count) == 0)
-	{
-	  emit_move_insn (operands[0], const0_rtx);
-	  DONE;
-	}
-      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
-				     operands[1], operands[2]));
-    }
-  else
-    {
-      rtx (*gen_cmp) (rtx, rtx);
-
-      gen_cmp = (TARGET_64BIT
-		 ? gen_cmpdi_1 : gen_cmpsi_1);
-
-      emit_insn (gen_cmp (countreg, countreg));
-      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
-				  operands[1], operands[2]));
-    }
-
-  outlow = gen_lowpart (QImode, out);
-  emit_insn (gen_cmpintqi (outlow));
-  emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
-
-  if (operands[0] != out)
-    emit_move_insn (operands[0], out);
-
-  DONE;
-})
-
-;; Produce a tri-state integer (-1, 0, 1) from condition codes.
-
-(define_expand "cmpintqi"
-  [(set (match_dup 1)
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_dup 2)
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (parallel [(set (match_operand:QI 0 "register_operand")
-		   (minus:QI (match_dup 1)
-			     (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  operands[1] = gen_reg_rtx (QImode);
-  operands[2] = gen_reg_rtx (QImode);
-})
-
 ;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
 ;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
 
-- 
2.0.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]