This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Insert "cld" via optimize mode switching
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 01 Dec 2006 21:27:17 +0100
- Subject: [PATCH, i386]: Insert "cld" via optimize mode switching
Hello!
Attached patch optimizes insertion of "cld" instruction via optimize
mode switching pass.
"Original" insn pattern, as emitted by expanders, is splitted just
before mode switching pass into "fake" cld insn pattern "(use (reg:SI
DIRFLAG_REG))", which has the sole purpose of showing its "cld" type
attribute during o_m_s pass. The "real" instruction is generated as
"cld_1" insn pattern during optimize_mode_switching pass at its optimal
position. Although fake cld instructions remain present in insn flow,
they don't emit anything into *.s files. Mode is reset to UNINTIALIZED
after every call and asm insn pattern.
This optimization optimizes (among others) following testcase:
--cut here--
char a[2048];
char b[2048];
t()
{
int i;
for (i = 0; i < 10; i++)
__builtin_memcpy (a,b,2048);
}
--cut here--
into:
t:
.LFB2:
movl $a, %r8d
movl $b, %edx
movl $256, %ecx
cld << only one cld instruction
movq %r8, %rdi
movq %rdx, %rsi
movl $1, %eax
rep
movsq
.p2align 4,,7
.L2:
addl $1, %eax
movl $256, %ecx
movq %r8, %rdi
cmpl $10, %eax
movq %rdx, %rsi
rep
movsq
jne .L2
rep ; ret
Patch was bootstrapped on i686-pc-linux-gnu and x86_64-pc-linux-gnu,
regression tested on i686-pc-linux-gnu for c, c++ and fortran.
OK for mainline?
2006-12-01 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386-protos.h (ix86_emit_mode_set): New function.
(emit_i387_cw_initialization): Remove.
* config/i386/i386.h (enum ix86_fpu_mode): New enum.
(enum ix86_entity): Add CLD_FLAG.
(NUM_MODES_FOR_MODE_SWITCHING): Add CLD_FLAG_ANY.
(EMIT_MODE_SET): Implement using ix86_emit_mode_set.
* config/i386/i386.c (ix86_mode_needed): Handle CLD_FLAG entity.
(ix86_emit_mode_set): Rename from emit_i387_cw_initialization.
Handle CLD_FLAG entity.
* config/i386/i386.md ("cld_1"): Rename from "cld" insn pattern.
("*cld_dummy"): New insn pattern.
("cld"): New insn and splitter pattern.
Uros.
Index: i386/i386.h
===================================================================
--- i386/i386.h (revision 119399)
+++ i386/i386.h (working copy)
@@ -2184,6 +2184,10 @@
extern rtx ix86_compare_op1; /* operand 1 for comparisons */
extern rtx ix86_compare_emitted;
+
+/* Mode states for optimized CLD switching. */
+enum ix86_fpu_mode { CLD_FLAG_SET, CLD_FLAG_UNINITIALIZED, CLD_FLAG_ANY };
+
/* To properly truncate FP values into integers, we need to set i387 control
word. We can't emit proper mode switching code before reload, as spills
generated by reload may truncate values incorrectly, but we still can avoid
@@ -2201,7 +2205,8 @@
enum ix86_entity
{
- I387_TRUNC = 0,
+ CLD_FLAG = 0,
+ I387_TRUNC,
I387_FLOOR,
I387_CEIL,
I387_MASK_PM,
@@ -2234,7 +2239,7 @@
refer to the mode-switched entity in question. */
#define NUM_MODES_FOR_MODE_SWITCHING \
- { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+ { CLD_FLAG_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
/* ENTITY is an integer specifying a mode-switched entity. If
`OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
@@ -2254,9 +2259,7 @@
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
- ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
- ? emit_i387_cw_initialization (MODE), 0 \
- : 0)
+ ix86_emit_mode_set ((ENTITY), (MODE))
/* Avoid renaming of stack registers, as doing so in combination with
Index: i386/i386.md
===================================================================
--- i386/i386.md (revision 119399)
+++ i386/i386.md (working copy)
@@ -18141,12 +18141,36 @@
;; Block operation instructions
-(define_insn "cld"
- [(set (reg:SI DIRFLAG_REG) (const_int 0))]
- ""
- "cld"
+(define_insn "cld_1"
+ [(set (reg:SI DIRFLAG_REG) (const_int 0))]
+ ""
+ "cld"
[(set_attr "type" "cld")])
+;; Following instruction should never be emmitted. Its only purpose
+;; is to show "cld" type attribute during optimize mode switching pass.
+
+(define_insn "*cld_dummy"
+ [(use (reg:SI DIRFLAG_REG))]
+ ""
+ "* gcc_unreachable();"
+ [(set_attr "type" "cld")])
+
+;; This insn is split into *cld_dummy just before optimize mode
+;; switching pass. Based on *cld_dummy position, o_m_s pass will
+;; insert cld_1 in some appropriate place.
+
+(define_insn_and_split "cld"
+ [(set (reg:SI DIRFLAG_REG) (const_int 1))]
+ "!(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(use (reg:SI DIRFLAG_REG))]
+{
+ ix86_optimize_mode_switching[CLD_FLAG] = 1;
+}
+ [(set_attr "type" "cld")])
+
(define_expand "movmemsi"
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:BLK 1 "memory_operand" ""))
Index: i386/i386-protos.h
===================================================================
--- i386/i386-protos.h (revision 119399)
+++ i386/i386-protos.h (working copy)
@@ -147,7 +147,7 @@
extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
extern int ix86_mode_needed (int, rtx);
-extern void emit_i387_cw_initialization (int);
+extern void ix86_emit_mode_set (int, int);
extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
extern void x86_order_regs_for_local_alloc (void);
extern void x86_function_profiler (FILE *, int);
Index: i386/i386.c
===================================================================
--- i386/i386.c (revision 119399)
+++ i386/i386.c (working copy)
@@ -8800,27 +8802,37 @@
return buf;
}
-/* Return needed mode for entity in optimize_mode_switching pass. */
+/* Return needed mode for entity in optimize_mode_switching pass.
+ Returned mode should match ix86_mode_entry () for function calls. */
int
ix86_mode_needed (int entity, rtx insn)
{
enum attr_i387_cw mode;
+ bool uninit;
- /* The mode UNINITIALIZED is used to store control word after a
- function call or ASM pattern. The mode ANY specify that function
- has no requirements on the control word and make no changes in the
- bits we are interested in. */
+ if (recog_memoized (insn) < 0)
+ return (entity == CLD_FLAG) ? CLD_FLAG_ANY : I387_CW_ANY;
- if (CALL_P (insn)
- || (NONJUMP_INSN_P (insn)
- && (asm_noperands (PATTERN (insn)) >= 0
- || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
- return I387_CW_UNINITIALIZED;
+ /* We don't know mode state after a function call or ASM pattern. */
+ uninit = (CALL_P (insn)
+ || (NONJUMP_INSN_P (insn)
+ && (asm_noperands (PATTERN (insn)) >= 0
+ || GET_CODE (PATTERN (insn)) == ASM_INPUT)));
- if (recog_memoized (insn) < 0)
- return I387_CW_ANY;
+ /* Return CLD mode depending on insn type. */
+ if (entity == CLD_FLAG)
+ {
+ if (uninit)
+ return CLD_FLAG_UNINITIALIZED;
+ if (get_attr_type (insn) == TYPE_CLD)
+ return CLD_FLAG_SET;
+
+ return CLD_FLAG_ANY;
+ }
+
+ /* Return i387 control word depending on insn i387_cw attribute. */
mode = get_attr_i387_cw (insn);
switch (entity)
@@ -8852,21 +8864,38 @@
return I387_CW_ANY;
}
-/* Output code to initialize control word copies used by trunc?f?i and
- rounding patterns. CURRENT_MODE is set to current control word,
- while NEW_MODE is set to new control word. */
+/* Emit mode switching instructions in optimize_mode_switching pass. */
void
-emit_i387_cw_initialization (int mode)
+ix86_emit_mode_set (int entity, int mode)
{
- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
- rtx new_mode;
+ rtx stored_mode, new_mode;
+ rtx reg;
int slot;
- rtx reg = gen_reg_rtx (HImode);
+ /* Handle CLD flag mode switching. */
+ if (entity == CLD_FLAG)
+ {
+ if (mode == CLD_FLAG_SET)
+ emit_insn (gen_cld_1());
+ return;
+ }
+
+ /* Output code to initialize control word copies used by trunc?f?i and
+ rounding patterns. CURRENT_MODE is set to current control word,
+ while NEW_MODE is set to new control word. */
+
+ if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
+ return;
+
+ stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
emit_insn (gen_x86_fnstcw_1 (stored_mode));
+
+ reg = gen_reg_rtx (HImode);
+
+ emit_insn (gen_x86_fnstcw_1 (stored_mode));
emit_move_insn (reg, copy_rtx (stored_mode));
if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)