This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch, i386] Avoid LCP stalls (issue5975045)


This patch addresses instructions that incur expensive length-changing prefix (LCP) stalls
on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of
a 16-bit constant into memory requires a length-changing prefix and can incur significant
penalties. The attached patch avoids this by forcing such instructions to be split into
two: a move of the corresponding 32-bit constant into a register, and a move of the
register's lower 16 bits into memory.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Is this ok for trunk?

Thanks,
Teresa

2012-03-29   Teresa Johnson  <tejohnson@google.com>

	* config/i386/i386.h (ix86_tune_indices): Add
	X86_TUNE_LCP_STALL.
	* config/i386/i386.md (movhi_internal): Split to
	movhi_internal and movhi_imm_internal.
	* config/i386/i386.c (initial_ix86_tune_features): Initialize
	X86_TUNE_LCP_STALL entry.

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 185920)
+++ config/i386/i386.h	(working copy)
@@ -262,6 +262,7 @@ enum ix86_tune_indices {
   X86_TUNE_MOVX,
   X86_TUNE_PARTIAL_REG_STALL,
   X86_TUNE_PARTIAL_FLAG_REG_STALL,
+  X86_TUNE_LCP_STALL,
   X86_TUNE_USE_HIMODE_FIOP,
   X86_TUNE_USE_SIMODE_FIOP,
   X86_TUNE_USE_MOV0,
@@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
 #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
 #define TARGET_PARTIAL_FLAG_REG_STALL \
 	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_LCP_STALL \
+	ix86_tune_features[X86_TUNE_LCP_STALL]
 #define TARGET_USE_HIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
 #define TARGET_USE_SIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
 #define TARGET_USE_MOV0		ix86_tune_features[X86_TUNE_USE_MOV0]
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 185920)
+++ config/i386/i386.md	(working copy)
@@ -2262,9 +2262,19 @@
 	   ]
 	   (const_string "SI")))])
 
+(define_insn "*movhi_imm_internal"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+        (match_operand:HI 1 "immediate_operand" "n"))]
+  "!TARGET_LCP_STALL && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  return "mov{w}\t{%1, %0|%0, %1}";
+}
+  [(set (attr "type") (const_string "imov"))
+   (set (attr "mode") (const_string "HI"))])
+
 (define_insn "*movhi_internal"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
-	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+	(match_operand:HI 1 "general_operand" "r,rn,rm,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 185920)
+++ config/i386/i386.c	(working copy)
@@ -1964,6 +1964,11 @@ static unsigned int initial_ix86_tune_features[X86
   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
   m_CORE2I7 | m_GENERIC,
 
+  /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+   * on 16-bit immediate moves into memory on Core2 and Corei7,
+   * which may also affect AMD implementations.  */
+  m_CORE2I7 | m_GENERIC | m_AMD_MULTIPLE,
+
   /* X86_TUNE_USE_HIMODE_FIOP */
   m_386 | m_486 | m_K6_GEODE,
 

--
This patch is available for review at http://codereview.appspot.com/5975045


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]