This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Update MOVE_RATIO for profile


Hi,
this patch adds speed argument to MOVE_RATIO/CLEAR_RATIO/SET_RATIO macros.
This mostly complettes transition of costs to profile with exception of
MOVE_COST macros.  I have patch for those too, but they have little effect
and thus I would like to deffer it for next stage1.

Bootstrapped/retested x86_64-linux, additionally I've tested
to build all modified targets except for alpha.  In separate patch I will send fixes
for problems I introduced in previous RTX_COST update and some dataflow updating I noticed
during the process.

I will commit it tomorrow if there are no complains.

	* expr.c (MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P, SET_BY_PIECES_P): Pass speed operand.
	* expr.h (MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Update.
	* gimplify.c (gimplify_init_constructor): Add speed operand.
	* tree-sra.c (decide_block_copy): Likewise.
	* tree-inline.c (estimate_move_cost): Likewise.
	* config/alpha/alpha.h (MOVE_RATIO): Update.
	* config/frv/frv.c (MOVE_RATIO): Update.
	* config/spu/spu.h (MOVE_RATIO): Update.
	* config/sparc/sparc.h (MOVE_RATIO): Update.
	* config/i386/i386.h (MOVE_RATIO, CLEAR_RATIO): Update.
	* config/m68hc11/m68hc11.h (MOVE_RATIO): Update.
	* config/cris/cris.h (MOVE_RATIO): Update.
	* config/mn10300/mn10300.h (MOVE_RATIO): Update.
	* config/arm/arm.h (MOVE_RATIO): Update.
	* config/pa/pa.md: Update uses of MOVE_RATIO
	* config/pa/pa.h (MOVE_RATIO): Update.
	* config/mips/mips.h (MOVE_RATIO, MOVE_BY_PIECES, CLEAR_RATIO, SET_RATIO): Update.
	* config/h8300/h8300.h (MOVE_RATIO): Update.
	* config/v850/v850.h (MOVE_RATIO): Update.
	* config/bfin/bfin.h (MOVE_RATIO): Update.
Index: expr.c
===================================================================
--- expr.c	(revision 139836)
+++ expr.c	(working copy)
@@ -175,7 +175,7 @@ static bool float_extend_from_mem[NUM_MA
 #ifndef MOVE_BY_PIECES_P
 #define MOVE_BY_PIECES_P(SIZE, ALIGN) \
   (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-   < (unsigned int) MOVE_RATIO)
+   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
 #endif
 
 /* This macro is used to determine whether clear_by_pieces should be
@@ -183,7 +183,7 @@ static bool float_extend_from_mem[NUM_MA
 #ifndef CLEAR_BY_PIECES_P
 #define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
   (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) CLEAR_RATIO)
+   < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ()))
 #endif
 
 /* This macro is used to determine whether store_by_pieces should be
@@ -191,7 +191,7 @@ static bool float_extend_from_mem[NUM_MA
 #ifndef SET_BY_PIECES_P
 #define SET_BY_PIECES_P(SIZE, ALIGN) \
   (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) SET_RATIO)
+   < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ()))
 #endif
 
 /* This macro is used to determine whether store_by_pieces should be
@@ -199,7 +199,7 @@ static bool float_extend_from_mem[NUM_MA
 #ifndef STORE_BY_PIECES_P
 #define STORE_BY_PIECES_P(SIZE, ALIGN) \
   (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) MOVE_RATIO)
+   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
 #endif
 
 /* This array records the insn_code of insns to perform block moves.  */
Index: expr.h
===================================================================
--- expr.h	(revision 139836)
+++ expr.h	(working copy)
@@ -66,10 +66,10 @@ enum expand_modifier {EXPAND_NORMAL = 0,
 
 #ifndef MOVE_RATIO
 #if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
-#define MOVE_RATIO 2
+#define MOVE_RATIO(speed) 2
 #else
 /* If we are optimizing for space (-Os), cut down the default move ratio.  */
-#define MOVE_RATIO (optimize_size ? 3 : 15)
+#define MOVE_RATIO(speed) ((speed) ? 15 : 3)
 #endif
 #endif
 
@@ -78,10 +78,10 @@ enum expand_modifier {EXPAND_NORMAL = 0,
 
 #ifndef CLEAR_RATIO
 #if defined (HAVE_setmemqi) || defined (HAVE_setmemhi) || defined (HAVE_setmemsi) || defined (HAVE_setmemdi) || defined (HAVE_setmemti)
-#define CLEAR_RATIO 2
+#define CLEAR_RATIO(speed) 2
 #else
 /* If we are optimizing for space, cut down the default clear ratio.  */
-#define CLEAR_RATIO (optimize_size ? 3 : 15)
+#define CLEAR_RATIO(speed) ((speed) ? 15 :3)
 #endif
 #endif
 
@@ -89,7 +89,7 @@ enum expand_modifier {EXPAND_NORMAL = 0,
    SET_RATIO or more simple move-instruction sequences, we will do a movmem
    or libcall instead.  */
 #ifndef SET_RATIO
-#define SET_RATIO MOVE_RATIO
+#define SET_RATIO(speed) MOVE_RATIO(speed)
 #endif
 
 enum direction {none, upward, downward};
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 139836)
+++ gimplify.c	(working copy)
@@ -3595,7 +3595,8 @@ gimplify_init_constructor (tree *expr_p,
 	if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
 	  cleared = true;
 	/* If there are "lots" of zeros, then block clear the object first.  */
-	else if (num_type_elements - num_nonzero_elements > CLEAR_RATIO
+	else if (num_type_elements - num_nonzero_elements
+		 > CLEAR_RATIO (optimize_function_for_speed_p (cfun))
 		 && num_nonzero_elements < num_type_elements/4)
 	  cleared = true;
 	/* ??? This bit ought not be needed.  For any element not present
Index: tree-sra.c
===================================================================
--- tree-sra.c	(revision 139836)
+++ tree-sra.c	(working copy)
@@ -1902,10 +1902,10 @@ decide_block_copy (struct sra_elt *elt)
 	     sensible default.  */
 	  max_size = SRA_MAX_STRUCTURE_SIZE
 	    ? SRA_MAX_STRUCTURE_SIZE
-	    : MOVE_RATIO * UNITS_PER_WORD;
+	    : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD;
 	  max_count = SRA_MAX_STRUCTURE_COUNT
 	    ? SRA_MAX_STRUCTURE_COUNT
-	    : MOVE_RATIO;
+	    : MOVE_RATIO (optimize_function_for_speed_p (cfun));
 
 	  full_size = tree_low_cst (size_tree, 1);
 	  full_count = count_type_elements (elt->type, false);
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 139836)
+++ tree-inline.c	(working copy)
@@ -2614,7 +2614,7 @@ estimate_move_cost (tree type)
 
   size = int_size_in_bytes (type);
 
-  if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO)
+  if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO (!optimize_size))
     /* Cost of a memcpy call, 3 arguments and the call.  */
     return 4;
   else
Index: config/alpha/alpha.h
===================================================================
--- config/alpha/alpha.h	(revision 139836)
+++ config/alpha/alpha.h	(working copy)
@@ -1079,7 +1079,7 @@ do {									     \
    Without byte/word accesses, we want no more than four instructions;
    with, several single byte accesses are better.  */
 
-#define MOVE_RATIO  (TARGET_BWX ? 7 : 2)
+#define MOVE_RATIO(speed)  (TARGET_BWX ? 7 : 2)
 
 /* Largest number of bytes of an object that can be placed in a register.
    On the Alpha we have plenty of registers, so use TImode.  */
Index: config/s390/s390.h
===================================================================
--- config/s390/s390.h	(revision 139836)
+++ config/s390/s390.h	(working copy)
@@ -872,7 +872,7 @@ extern struct rtx_def *s390_compare_op0,
    in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
    here to compensate for that factor since mvc costs exactly the same
    on 31 and 64 bit.  */
-#define MOVE_RATIO (TARGET_64BIT? 2 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
 
 
 /* Sections.  */
Index: config/spu/spu.h
===================================================================
--- config/spu/spu.h	(revision 139836)
+++ config/spu/spu.h	(working copy)
@@ -438,7 +438,7 @@ targetm.resolve_overloaded_builtin = spu
 
 #define SLOW_BYTE_ACCESS 0
 
-#define MOVE_RATIO 32
+#define MOVE_RATIO(speed) 32
 
 #define NO_FUNCTION_CSE
 
Index: config/sparc/sparc.h
===================================================================
--- config/sparc/sparc.h	(revision 139836)
+++ config/sparc/sparc.h	(working copy)
@@ -2099,7 +2099,7 @@ do {                                    
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.  */
 
-#define MOVE_RATIO (optimize_size ? 3 : 8)
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 139836)
+++ config/i386/i386.h	(working copy)
@@ -1906,12 +1906,12 @@ do {									\
 
    If you don't define this, a reasonable default is used.  */
 
-#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
 
 /* If a clear memory operation would take CLEAR_RATIO or more simple
    move-instruction sequences, we will do a clrmem or libcall instead.  */
 
-#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
 
 /* Define if shifts truncate the shift count
    which implies one can omit a sign-extension or zero-extension
Index: config/m68hc11/m68hc11.h
===================================================================
--- config/m68hc11/m68hc11.h	(revision 139836)
+++ config/m68hc11/m68hc11.h	(working copy)
@@ -1508,7 +1505,7 @@ do {                                    
 /* MOVE_RATIO is the number of move instructions that is better than a
    block move.  Make this small on 6811, since the code size grows very
    large with each move.  */
-#define MOVE_RATIO		3
+#define MOVE_RATIO(speed)	3
 
 /* Define if shifts truncate the shift count which implies one can omit
    a sign-extension or zero-extension of a shift count.  */
Index: config/cris/cris.h
===================================================================
--- config/cris/cris.h	(revision 139836)
+++ config/cris/cris.h	(working copy)
@@ -1242,7 +1242,7 @@ struct cum_args {int regs;};
    word-length sizes will be emitted.  The "9" will translate to
    (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
    (8 instruction sequences) or less.  */
-#define MOVE_RATIO 9
+#define MOVE_RATIO(speed) 9
 
 
 /* Node: Sections */
Index: config/mn10300/mn10300.h
===================================================================
--- config/mn10300/mn10300.h	(revision 139836)
+++ config/mn10300/mn10300.h	(working copy)
@@ -814,7 +814,7 @@ while (0)
 
 /* According expr.c, a value of around 6 should minimize code size, and
    for the MN10300 series, that's our primary concern.  */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
 
 #define TEXT_SECTION_ASM_OP "\t.section .text"
 #define DATA_SECTION_ASM_OP "\t.section .data"
Index: config/arm/arm.h
===================================================================
--- config/arm/arm.h	(revision 139836)
+++ config/arm/arm.h	(working copy)
@@ -2244,7 +2244,7 @@ do {							\
 #define MOVE_MAX 4
 
 #undef  MOVE_RATIO
-#define MOVE_RATIO (arm_tune_xscale ? 4 : 2)
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
Index: config/pa/pa.md
===================================================================
--- config/pa/pa.md	(revision 139836)
+++ config/pa/pa.md	(working copy)
@@ -3487,7 +3487,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block move pattern.  */
@@ -3675,7 +3675,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block move pattern.  */
@@ -3842,7 +3842,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block clear pattern.  */
@@ -3956,7 +3956,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block clear pattern.  */
Index: config/pa/pa.h
===================================================================
--- config/pa/pa.h	(revision 139836)
+++ config/pa/pa.h	(working copy)
@@ -1506,7 +1506,7 @@ do { 									\
    arguments passed in registers to avoid infinite recursion during argument
    setup for a function call.  Why?  Consider how we copy the stack slots
    reserved for parameters when they may be trashed by a call.  */
-#define MOVE_RATIO (TARGET_64BIT ? 8 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h	(revision 139836)
+++ config/mips/mips.h	(working copy)
@@ -2940,7 +2940,7 @@ while (0)
    we'll have to generate a load/store pair for each, halve the
    value of MIPS_CALL_RATIO to take that into account.  */
 
-#define MOVE_RATIO					\
+#define MOVE_RATIO(speed)				\
   (HAVE_movmemsi					\
    ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
    : MIPS_CALL_RATIO / 2)
@@ -2961,20 +2961,20 @@ while (0)
 	  ? (SIZE) < UNITS_PER_WORD				\
 	  : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT))		\
    : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1)	\
-      < (unsigned int) MOVE_RATIO))
+      < (unsigned int) MOVE_RATIO (false)))
 
 /* For CLEAR_RATIO, when optimizing for size, give a better estimate
    of the length of a memset call, but use the default otherwise.  */
 
-#define CLEAR_RATIO \
-  (optimize_size ? MIPS_CALL_RATIO : 15)
+#define CLEAR_RATIO(speed)\
+  ((speed) ? 15 : MIPS_CALL_RATIO)
 
 /* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
    optimizing for size adjust the ratio to account for the overhead of
    loading the constant and replicating it across the word.  */
 
-#define SET_RATIO \
-  (optimize_size ? MIPS_CALL_RATIO - 2 : 15)
+#define SET_RATIO(speed) \
+  ((speed) ? 15 : MIPS_CALL_RATIO - 2)
 
 /* STORE_BY_PIECES_P can be used when copying a constant string, but
    in that case each word takes 3 insns (lui, ori, sw), or more in
Index: config/h8300/h8300.h
===================================================================
--- config/h8300/h8300.h	(revision 139836)
+++ config/h8300/h8300.h	(working copy)
@@ -1189,10 +1189,8 @@ struct cum_arg
 #define FINAL_PRESCAN_INSN(insn, operand, nop)	\
   final_prescan_insn (insn, operand, nop)
 
-#define MOVE_RATIO 3
 extern int h8300_move_ratio;
-#undef  MOVE_RATIO
-#define MOVE_RATIO h8300_move_ratio
+#define MOVE_RATIO(speed) h8300_move_ratio
 
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
Index: config/v850/v850.h
===================================================================
--- config/v850/v850.h	(revision 139836)
+++ config/v850/v850.h	(working copy)
@@ -865,7 +865,7 @@ do {									\
 
 /* According expr.c, a value of around 6 should minimize code size, and
    for the V850 series, that's our primary concern.  */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
 
 /* Indirect calls are expensive, never turn a direct call
    into an indirect call.  */
Index: config/bfin/bfin.h
===================================================================
--- config/bfin/bfin.h	(revision 139836)
+++ config/bfin/bfin.h	(working copy)
@@ -998,7 +998,7 @@ do {					       \
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.  */
 
-#define MOVE_RATIO 5
+#define MOVE_RATIO(speed) 5
 
 /* STORAGE LAYOUT: target machine storage layout
    Define this macro as a C expression which is nonzero if accessing


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]