]> gcc.gnu.org Git - gcc.git/commitdiff
Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some...
authorTeresa Johnson <tejohnson@google.com>
Fri, 6 Apr 2012 05:03:49 +0000 (05:03 +0000)
committerTeresa Johnson <tejohnson@gcc.gnu.org>
Fri, 6 Apr 2012 05:03:49 +0000 (05:03 +0000)
Avoid instructions that incur expensive length-changing prefix (LCP) stalls
on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of
a 16-bit constant into memory requires a length-changing prefix and can incur significant
penalties. Modified an old patch written by H.J to split such instructions
during peephole2.

2012-04-05  Teresa Johnson  <tejohnson@google.com>
    H.J. Lu  <hongjiu.lu@intel.com>

* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_LCP_STALL.
* config/i386/i386.md (move immediate to memory peephole2):
Add cases for HImode move when LCP stall avoidance is needed.
* config/i386/i386.c (initial_ix86_tune_features): Initialize
X86_TUNE_LCP_STALL entry.

Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com>
From-SVN: r186176

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md

index 0df25cf1fdc41df716f4cd391af0bb2126d44e02..8118ed59ad7b039a55163966694be261cb804ca1 100644 (file)
@@ -1,3 +1,13 @@
+2012-04-05  Teresa Johnson  <tejohnson@google.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386.h (ix86_tune_indices): Add
+       X86_TUNE_LCP_STALL.
+       * config/i386/i386.md (move immediate to memory peephole2):
+       Add cases for HImode move when LCP stall avoidance is needed.
+       * config/i386/i386.c (initial_ix86_tune_features): Initialize
+       X86_TUNE_LCP_STALL entry.
+
 2012-04-05  Uros Bizjak  <ubizjak@gmail.com>
 
        PR target/52882
index c95911321d3888903a7f1d7a2a0ed1c43dd60a4b..8974ddc9a020bc38835634de3fe1f3ec2cdda1e4 100644 (file)
@@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
   m_CORE2I7 | m_GENERIC,
 
+  /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+   * on 16-bit immediate moves into memory on Core2 and Corei7.  */
+  m_CORE2I7 | m_GENERIC,
+
   /* X86_TUNE_USE_HIMODE_FIOP */
   m_386 | m_486 | m_K6_GEODE,
 
index 7ba90c764f938d1737aa1202171f19dc82dfb1e6..8942ea86edf638f11cc4a901385e751eea85fddf 100644 (file)
@@ -262,6 +262,7 @@ enum ix86_tune_indices {
   X86_TUNE_MOVX,
   X86_TUNE_PARTIAL_REG_STALL,
   X86_TUNE_PARTIAL_FLAG_REG_STALL,
+  X86_TUNE_LCP_STALL,
   X86_TUNE_USE_HIMODE_FIOP,
   X86_TUNE_USE_SIMODE_FIOP,
   X86_TUNE_USE_MOV0,
@@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
 #define TARGET_PARTIAL_FLAG_REG_STALL \
        ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_LCP_STALL \
+       ix86_tune_features[X86_TUNE_LCP_STALL]
 #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
 #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
 #define TARGET_USE_MOV0                ix86_tune_features[X86_TUNE_USE_MOV0]
index 2d20a52bc062eec3449fe6102a97139a91bc2969..dd1f9be16c289bf8cc85021582b514d3657ae406 100644 (file)
    (set (match_dup 0) (match_dup 2))])
 
 ;; Don't move an immediate directly to memory when the instruction
-;; gets too big.
+;; gets too big, or if LCP stalls are a problem for 16-bit moves.
 (define_peephole2
   [(match_scratch:SWI124 1 "<r>")
    (set (match_operand:SWI124 0 "memory_operand")
         (const_int 0))]
   "optimize_insn_for_speed_p ()
-   && !TARGET_USE_MOV0
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+   && ((<MODE>mode == HImode
+       && TARGET_LCP_STALL)
+       || (!TARGET_USE_MOV0
+          && TARGET_SPLIT_LONG_MOVES
+          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 2) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])
    (set (match_operand:SWI124 0 "memory_operand")
         (match_operand:SWI124 1 "immediate_operand"))]
   "optimize_insn_for_speed_p ()
-   && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+   && ((<MODE>mode == HImode
+       && TARGET_LCP_STALL)
+       || (TARGET_SPLIT_LONG_MOVES
+          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
 
This page took 0.15619 seconds and 5 git commands to generate.