This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Update MOVE_RATIO for profile
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 31 Aug 2008 21:48:59 +0200
- Subject: Update MOVE_RATIO for profile
Hi,
this patch adds speed argument to MOVE_RATIO/CLEAR_RATIO/SET_RATIO macros.
This mostly complettes transition of costs to profile with exception of
MOVE_COST macros. I have patch for those too, but they have little effect
and thus I would like to deffer it for next stage1.
Bootstrapped/retested x86_64-linux, additionally I've tested
to build all modified targets except for alpha. In separate patch I will send fixes
for problems I introduced in previous RTX_COST update and some dataflow updating I noticed
during the process.
I will commit it tomorrow if there are no complains.
* expr.c (MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P, SET_BY_PIECES_P): Pass speed operand.
* expr.h (MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Update.
* gimplify.c (gimplify_init_constructor): Add speed operand.
* tree-sra.c (decide_block_copy): Likewise.
* tree-inline.c (estimate_move_cost): Likewise.
* config/alpha/alpha.h (MOVE_RATIO): Update.
* config/frv/frv.c (MOVE_RATIO): Update.
* config/spu/spu.h (MOVE_RATIO): Update.
* config/sparc/sparc.h (MOVE_RATIO): Update.
* config/i386/i386.h (MOVE_RATIO, CLEAR_RATIO): Update.
* config/m68hc11/m68hc11.h (MOVE_RATIO): Update.
* config/cris/cris.h (MOVE_RATIO): Update.
* config/mn10300/mn10300.h (MOVE_RATIO): Update.
* config/arm/arm.h (MOVE_RATIO): Update.
* config/pa/pa.md: Update uses of MOVE_RATIO
* config/pa/pa.h (MOVE_RATIO): Update.
* config/mips/mips.h (MOVE_RATIO, MOVE_BY_PIECES, CLEAR_RATIO, SET_RATIO): Update.
* config/h8300/h8300.h (MOVE_RATIO): Update.
* config/v850/v850.h (MOVE_RATIO): Update.
* config/bfin/bfin.h (MOVE_RATIO): Update.
Index: expr.c
===================================================================
--- expr.c (revision 139836)
+++ expr.c (working copy)
@@ -175,7 +175,7 @@ static bool float_extend_from_mem[NUM_MA
#ifndef MOVE_BY_PIECES_P
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
- < (unsigned int) MOVE_RATIO)
+ < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
#endif
/* This macro is used to determine whether clear_by_pieces should be
@@ -183,7 +183,7 @@ static bool float_extend_from_mem[NUM_MA
#ifndef CLEAR_BY_PIECES_P
#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
- < (unsigned int) CLEAR_RATIO)
+ < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ()))
#endif
/* This macro is used to determine whether store_by_pieces should be
@@ -191,7 +191,7 @@ static bool float_extend_from_mem[NUM_MA
#ifndef SET_BY_PIECES_P
#define SET_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
- < (unsigned int) SET_RATIO)
+ < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ()))
#endif
/* This macro is used to determine whether store_by_pieces should be
@@ -199,7 +199,7 @@ static bool float_extend_from_mem[NUM_MA
#ifndef STORE_BY_PIECES_P
#define STORE_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
- < (unsigned int) MOVE_RATIO)
+ < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
#endif
/* This array records the insn_code of insns to perform block moves. */
Index: expr.h
===================================================================
--- expr.h (revision 139836)
+++ expr.h (working copy)
@@ -66,10 +66,10 @@ enum expand_modifier {EXPAND_NORMAL = 0,
#ifndef MOVE_RATIO
#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
-#define MOVE_RATIO 2
+#define MOVE_RATIO(speed) 2
#else
/* If we are optimizing for space (-Os), cut down the default move ratio. */
-#define MOVE_RATIO (optimize_size ? 3 : 15)
+#define MOVE_RATIO(speed) ((speed) ? 15 : 3)
#endif
#endif
@@ -78,10 +78,10 @@ enum expand_modifier {EXPAND_NORMAL = 0,
#ifndef CLEAR_RATIO
#if defined (HAVE_setmemqi) || defined (HAVE_setmemhi) || defined (HAVE_setmemsi) || defined (HAVE_setmemdi) || defined (HAVE_setmemti)
-#define CLEAR_RATIO 2
+#define CLEAR_RATIO(speed) 2
#else
/* If we are optimizing for space, cut down the default clear ratio. */
-#define CLEAR_RATIO (optimize_size ? 3 : 15)
+#define CLEAR_RATIO(speed) ((speed) ? 15 :3)
#endif
#endif
@@ -89,7 +89,7 @@ enum expand_modifier {EXPAND_NORMAL = 0,
SET_RATIO or more simple move-instruction sequences, we will do a movmem
or libcall instead. */
#ifndef SET_RATIO
-#define SET_RATIO MOVE_RATIO
+#define SET_RATIO(speed) MOVE_RATIO(speed)
#endif
enum direction {none, upward, downward};
Index: gimplify.c
===================================================================
--- gimplify.c (revision 139836)
+++ gimplify.c (working copy)
@@ -3595,7 +3595,8 @@ gimplify_init_constructor (tree *expr_p,
if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
cleared = true;
/* If there are "lots" of zeros, then block clear the object first. */
- else if (num_type_elements - num_nonzero_elements > CLEAR_RATIO
+ else if (num_type_elements - num_nonzero_elements
+ > CLEAR_RATIO (optimize_function_for_speed_p (cfun))
&& num_nonzero_elements < num_type_elements/4)
cleared = true;
/* ??? This bit ought not be needed. For any element not present
Index: tree-sra.c
===================================================================
--- tree-sra.c (revision 139836)
+++ tree-sra.c (working copy)
@@ -1902,10 +1902,10 @@ decide_block_copy (struct sra_elt *elt)
sensible default. */
max_size = SRA_MAX_STRUCTURE_SIZE
? SRA_MAX_STRUCTURE_SIZE
- : MOVE_RATIO * UNITS_PER_WORD;
+ : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD;
max_count = SRA_MAX_STRUCTURE_COUNT
? SRA_MAX_STRUCTURE_COUNT
- : MOVE_RATIO;
+ : MOVE_RATIO (optimize_function_for_speed_p (cfun));
full_size = tree_low_cst (size_tree, 1);
full_count = count_type_elements (elt->type, false);
Index: tree-inline.c
===================================================================
--- tree-inline.c (revision 139836)
+++ tree-inline.c (working copy)
@@ -2614,7 +2614,7 @@ estimate_move_cost (tree type)
size = int_size_in_bytes (type);
- if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
+ if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO (!optimize_size))
/* Cost of a memcpy call, 3 arguments and the call. */
return 4;
else
Index: config/alpha/alpha.h
===================================================================
--- config/alpha/alpha.h (revision 139836)
+++ config/alpha/alpha.h (working copy)
@@ -1079,7 +1079,7 @@ do { \
Without byte/word accesses, we want no more than four instructions;
with, several single byte accesses are better. */
-#define MOVE_RATIO (TARGET_BWX ? 7 : 2)
+#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2)
/* Largest number of bytes of an object that can be placed in a register.
On the Alpha we have plenty of registers, so use TImode. */
Index: config/s390/s390.h
===================================================================
--- config/s390/s390.h (revision 139836)
+++ config/s390/s390.h (working copy)
@@ -872,7 +872,7 @@ extern struct rtx_def *s390_compare_op0,
in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
here to compensate for that factor since mvc costs exactly the same
on 31 and 64 bit. */
-#define MOVE_RATIO (TARGET_64BIT? 2 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
/* Sections. */
Index: config/spu/spu.h
===================================================================
--- config/spu/spu.h (revision 139836)
+++ config/spu/spu.h (working copy)
@@ -438,7 +438,7 @@ targetm.resolve_overloaded_builtin = spu
#define SLOW_BYTE_ACCESS 0
-#define MOVE_RATIO 32
+#define MOVE_RATIO(speed) 32
#define NO_FUNCTION_CSE
Index: config/sparc/sparc.h
===================================================================
--- config/sparc/sparc.h (revision 139836)
+++ config/sparc/sparc.h (working copy)
@@ -2099,7 +2099,7 @@ do {
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead. */
-#define MOVE_RATIO (optimize_size ? 3 : 8)
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 139836)
+++ config/i386/i386.h (working copy)
@@ -1906,12 +1906,12 @@ do { \
If you don't define this, a reasonable default is used. */
-#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
/* If a clear memory operation would take CLEAR_RATIO or more simple
move-instruction sequences, we will do a clrmem or libcall instead. */
-#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
/* Define if shifts truncate the shift count
which implies one can omit a sign-extension or zero-extension
Index: config/m68hc11/m68hc11.h
===================================================================
--- config/m68hc11/m68hc11.h (revision 139836)
+++ config/m68hc11/m68hc11.h (working copy)
@@ -1508,7 +1505,7 @@ do {
/* MOVE_RATIO is the number of move instructions that is better than a
block move. Make this small on 6811, since the code size grows very
large with each move. */
-#define MOVE_RATIO 3
+#define MOVE_RATIO(speed) 3
/* Define if shifts truncate the shift count which implies one can omit
a sign-extension or zero-extension of a shift count. */
Index: config/cris/cris.h
===================================================================
--- config/cris/cris.h (revision 139836)
+++ config/cris/cris.h (working copy)
@@ -1242,7 +1242,7 @@ struct cum_args {int regs;};
word-length sizes will be emitted. The "9" will translate to
(9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
(8 instruction sequences) or less. */
-#define MOVE_RATIO 9
+#define MOVE_RATIO(speed) 9
/* Node: Sections */
Index: config/mn10300/mn10300.h
===================================================================
--- config/mn10300/mn10300.h (revision 139836)
+++ config/mn10300/mn10300.h (working copy)
@@ -814,7 +814,7 @@ while (0)
/* According expr.c, a value of around 6 should minimize code size, and
for the MN10300 series, that's our primary concern. */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
#define TEXT_SECTION_ASM_OP "\t.section .text"
#define DATA_SECTION_ASM_OP "\t.section .data"
Index: config/arm/arm.h
===================================================================
--- config/arm/arm.h (revision 139836)
+++ config/arm/arm.h (working copy)
@@ -2244,7 +2244,7 @@ do { \
#define MOVE_MAX 4
#undef MOVE_RATIO
-#define MOVE_RATIO (arm_tune_xscale ? 4 : 2)
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
Index: config/pa/pa.md
===================================================================
--- config/pa/pa.md (revision 139836)
+++ config/pa/pa.md (working copy)
@@ -3487,7 +3487,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block move pattern. */
@@ -3675,7 +3675,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block move pattern. */
@@ -3842,7 +3842,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block clear pattern. */
@@ -3956,7 +3956,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block clear pattern. */
Index: config/pa/pa.h
===================================================================
--- config/pa/pa.h (revision 139836)
+++ config/pa/pa.h (working copy)
@@ -1506,7 +1506,7 @@ do { \
arguments passed in registers to avoid infinite recursion during argument
setup for a function call. Why? Consider how we copy the stack slots
reserved for parameters when they may be trashed by a call. */
-#define MOVE_RATIO (TARGET_64BIT ? 8 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h (revision 139836)
+++ config/mips/mips.h (working copy)
@@ -2940,7 +2940,7 @@ while (0)
we'll have to generate a load/store pair for each, halve the
value of MIPS_CALL_RATIO to take that into account. */
-#define MOVE_RATIO \
+#define MOVE_RATIO(speed) \
(HAVE_movmemsi \
? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
: MIPS_CALL_RATIO / 2)
@@ -2961,20 +2961,20 @@ while (0)
? (SIZE) < UNITS_PER_WORD \
: (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
: (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
- < (unsigned int) MOVE_RATIO))
+ < (unsigned int) MOVE_RATIO (false)))
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
-#define CLEAR_RATIO \
- (optimize_size ? MIPS_CALL_RATIO : 15)
+#define CLEAR_RATIO(speed)\
+ ((speed) ? 15 : MIPS_CALL_RATIO)
/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
optimizing for size adjust the ratio to account for the overhead of
loading the constant and replicating it across the word. */
-#define SET_RATIO \
- (optimize_size ? MIPS_CALL_RATIO - 2 : 15)
+#define SET_RATIO(speed) \
+ ((speed) ? 15 : MIPS_CALL_RATIO - 2)
/* STORE_BY_PIECES_P can be used when copying a constant string, but
in that case each word takes 3 insns (lui, ori, sw), or more in
Index: config/h8300/h8300.h
===================================================================
--- config/h8300/h8300.h (revision 139836)
+++ config/h8300/h8300.h (working copy)
@@ -1189,10 +1189,8 @@ struct cum_arg
#define FINAL_PRESCAN_INSN(insn, operand, nop) \
final_prescan_insn (insn, operand, nop)
-#define MOVE_RATIO 3
extern int h8300_move_ratio;
-#undef MOVE_RATIO
-#define MOVE_RATIO h8300_move_ratio
+#define MOVE_RATIO(speed) h8300_move_ratio
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
Index: config/v850/v850.h
===================================================================
--- config/v850/v850.h (revision 139836)
+++ config/v850/v850.h (working copy)
@@ -865,7 +865,7 @@ do { \
/* According expr.c, a value of around 6 should minimize code size, and
for the V850 series, that's our primary concern. */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
/* Indirect calls are expensive, never turn a direct call
into an indirect call. */
Index: config/bfin/bfin.h
===================================================================
--- config/bfin/bfin.h (revision 139836)
+++ config/bfin/bfin.h (working copy)
@@ -998,7 +998,7 @@ do { \
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead. */
-#define MOVE_RATIO 5
+#define MOVE_RATIO(speed) 5
/* STORAGE LAYOUT: target machine storage layout
Define this macro as a C expression which is nonzero if accessing