This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Insert "cld" via optimize mode switching


Hello!

Attached patch optimizes insertion of "cld" instruction via optimize mode switching pass.

"Original" insn pattern, as emitted by expanders, is splitted just before mode switching pass into "fake" cld insn pattern "(use (reg:SI DIRFLAG_REG))", which has the sole purpose of showing its "cld" type attribute during o_m_s pass. The "real" instruction is generated as "cld_1" insn pattern during optimize_mode_switching pass at its optimal position. Although fake cld instructions remain present in insn flow, they don't emit anything into *.s files. Mode is reset to UNINTIALIZED after every call and asm insn pattern.

This optimization optimizes (among others) following testcase:

--cut here--
char a[2048];
char b[2048];
t()
{
 int i;

 for (i = 0; i < 10; i++)
   __builtin_memcpy (a,b,2048);
}
--cut here--

into:

t:
.LFB2:
       movl    $a, %r8d
       movl    $b, %edx
       movl    $256, %ecx
       cld                                 << only one cld instruction
       movq    %r8, %rdi
       movq    %rdx, %rsi
       movl    $1, %eax
       rep
       movsq
       .p2align 4,,7
.L2:
       addl    $1, %eax
       movl    $256, %ecx
       movq    %r8, %rdi
       cmpl    $10, %eax
       movq    %rdx, %rsi
       rep
       movsq
       jne     .L2
       rep ; ret

Patch was bootstrapped on i686-pc-linux-gnu and x86_64-pc-linux-gnu, regression tested on i686-pc-linux-gnu for c, c++ and fortran.

OK for mainline?

2006-12-01 Uros Bizjak <ubizjak@gmail.com>

       * config/i386/i386-protos.h (ix86_emit_mode_set): New function.
       (emit_i387_cw_initialization): Remove.
       * config/i386/i386.h (enum ix86_fpu_mode): New enum.
       (enum ix86_entity): Add CLD_FLAG.
       (NUM_MODES_FOR_MODE_SWITCHING): Add CLD_FLAG_ANY.
       (EMIT_MODE_SET): Implement using ix86_emit_mode_set.
       * config/i386/i386.c (ix86_mode_needed): Handle CLD_FLAG entity.
       (ix86_emit_mode_set): Rename from emit_i387_cw_initialization.
       Handle CLD_FLAG entity.
       * config/i386/i386.md ("cld_1"): Rename from "cld" insn pattern.
       ("*cld_dummy"): New insn pattern.
       ("cld"): New insn and splitter pattern.

Uros.
Index: i386/i386.h
===================================================================
--- i386/i386.h	(revision 119399)
+++ i386/i386.h	(working copy)
@@ -2184,6 +2184,10 @@
 extern rtx ix86_compare_op1;	/* operand 1 for comparisons */
 extern rtx ix86_compare_emitted;
 
+
+/* Mode states for optimized CLD switching.  */
+enum ix86_fpu_mode { CLD_FLAG_SET, CLD_FLAG_UNINITIALIZED, CLD_FLAG_ANY };
+
 /* To properly truncate FP values into integers, we need to set i387 control
    word.  We can't emit proper mode switching code before reload, as spills
    generated by reload may truncate values incorrectly, but we still can avoid
@@ -2201,7 +2205,8 @@
 
 enum ix86_entity
 {
-  I387_TRUNC = 0,
+  CLD_FLAG = 0,
+  I387_TRUNC,
   I387_FLOOR,
   I387_CEIL,
   I387_MASK_PM,
@@ -2234,7 +2239,7 @@
    refer to the mode-switched entity in question.  */
 
 #define NUM_MODES_FOR_MODE_SWITCHING \
-   { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+  { CLD_FLAG_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
 
 /* ENTITY is an integer specifying a mode-switched entity.  If
    `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
@@ -2254,9 +2259,7 @@
    are to be inserted.  */
 
 #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
-  ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED		\
-   ? emit_i387_cw_initialization (MODE), 0				\
-   : 0)
+  ix86_emit_mode_set ((ENTITY), (MODE))
 
 
 /* Avoid renaming of stack registers, as doing so in combination with
Index: i386/i386.md
===================================================================
--- i386/i386.md	(revision 119399)
+++ i386/i386.md	(working copy)
@@ -18141,12 +18141,36 @@
 
 ;; Block operation instructions
 
-(define_insn "cld"
- [(set (reg:SI DIRFLAG_REG) (const_int 0))]
- ""
- "cld"
+(define_insn "cld_1"
+  [(set (reg:SI DIRFLAG_REG) (const_int 0))]
+  ""
+  "cld"
   [(set_attr "type" "cld")])
 
+;; Following instruction should never be emmitted.  Its only purpose
+;; is to show "cld" type attribute during optimize mode switching pass.
+
+(define_insn "*cld_dummy"
+  [(use (reg:SI DIRFLAG_REG))]
+  ""
+  "* gcc_unreachable();"
+  [(set_attr "type" "cld")])
+
+;; This insn is split into *cld_dummy just before optimize mode
+;; switching pass.  Based on *cld_dummy position, o_m_s pass will
+;; insert cld_1 in some appropriate place.
+
+(define_insn_and_split "cld"
+  [(set (reg:SI DIRFLAG_REG) (const_int 1))]
+  "!(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(use (reg:SI DIRFLAG_REG))]
+{
+  ix86_optimize_mode_switching[CLD_FLAG] = 1;
+}
+  [(set_attr "type" "cld")])
+
 (define_expand "movmemsi"
   [(use (match_operand:BLK 0 "memory_operand" ""))
    (use (match_operand:BLK 1 "memory_operand" ""))
Index: i386/i386-protos.h
===================================================================
--- i386/i386-protos.h	(revision 119399)
+++ i386/i386-protos.h	(working copy)
@@ -147,7 +147,7 @@
 extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
 extern int ix86_mode_needed (int, rtx);
-extern void emit_i387_cw_initialization (int);
+extern void ix86_emit_mode_set (int, int);
 extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
Index: i386/i386.c
===================================================================
--- i386/i386.c	(revision 119399)
+++ i386/i386.c	(working copy)
@@ -8800,27 +8802,37 @@
   return buf;
 }
 
-/* Return needed mode for entity in optimize_mode_switching pass.  */
+/* Return needed mode for entity in optimize_mode_switching pass.
+   Returned mode should match ix86_mode_entry () for function calls.  */
 
 int
 ix86_mode_needed (int entity, rtx insn)
 {
   enum attr_i387_cw mode;
+  bool uninit;
 
-  /* The mode UNINITIALIZED is used to store control word after a
-     function call or ASM pattern.  The mode ANY specify that function
-     has no requirements on the control word and make no changes in the
-     bits we are interested in.  */
+  if (recog_memoized (insn) < 0)
+    return (entity == CLD_FLAG) ? CLD_FLAG_ANY : I387_CW_ANY;
 
-  if (CALL_P (insn)
-      || (NONJUMP_INSN_P (insn)
-	  && (asm_noperands (PATTERN (insn)) >= 0
-	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
-    return I387_CW_UNINITIALIZED;
+  /* We don't know mode state after a function call or ASM pattern.  */
+  uninit =  (CALL_P (insn)
+	     || (NONJUMP_INSN_P (insn)
+		 && (asm_noperands (PATTERN (insn)) >= 0
+		     || GET_CODE (PATTERN (insn)) == ASM_INPUT)));
 
-  if (recog_memoized (insn) < 0)
-    return I387_CW_ANY;
+  /* Return CLD mode depending on insn type.  */
+  if (entity == CLD_FLAG)
+    {
+      if (uninit)
+	return CLD_FLAG_UNINITIALIZED;
 
+      if (get_attr_type (insn) == TYPE_CLD)
+	return CLD_FLAG_SET;
+
+      return CLD_FLAG_ANY;
+    }
+
+  /* Return i387 control word depending on insn i387_cw attribute.  */
   mode = get_attr_i387_cw (insn);
 
   switch (entity)
@@ -8852,21 +8864,38 @@
   return I387_CW_ANY;
 }
 
-/* Output code to initialize control word copies used by trunc?f?i and
-   rounding patterns.  CURRENT_MODE is set to current control word,
-   while NEW_MODE is set to new control word.  */
+/* Emit mode switching instructions in optimize_mode_switching pass.  */
 
 void
-emit_i387_cw_initialization (int mode)
+ix86_emit_mode_set (int entity, int mode)
 {
-  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
-  rtx new_mode;
+  rtx stored_mode, new_mode;
+  rtx reg;
 
   int slot;
 
-  rtx reg = gen_reg_rtx (HImode);
+  /* Handle CLD flag mode switching.  */
+  if (entity == CLD_FLAG)
+    {
+      if (mode == CLD_FLAG_SET)
+	emit_insn (gen_cld_1());
 
+      return;
+    }
+
+  /* Output code to initialize control word copies used by trunc?f?i and
+     rounding patterns.  CURRENT_MODE is set to current control word,
+     while NEW_MODE is set to new control word.  */
+
+  if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
+    return;
+  
+  stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
   emit_insn (gen_x86_fnstcw_1 (stored_mode));
+  
+  reg = gen_reg_rtx (HImode);
+
+  emit_insn (gen_x86_fnstcw_1 (stored_mode));
   emit_move_insn (reg, copy_rtx (stored_mode));
 
   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]