This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Make prefetching parameters controlable with --param


Hello,

this is the first of series of patches that merge some of the bits
of the "Prefetching Improvements" project.  While I am going to submit
the main part of the changes later, there are some changes that are
not strictly related to the core of the project that I would like to
get merged before that.

This patch makes it possible to change the parameters for prefetching
(size of cache line, etc.) using --params, thus making it easier to
experiment with different values.

To make this possible, params mechanism is modified to enable to check
whether the value of the parameter was specified by user (so that the
default values set by md do not override them).

There are two more changes:
-- a parameter giving the size of the cache was added.  It is unused at
   the moment, but the "Prefetching Improvements" project uses it.
-- the default value for SIMULTANEOUS_PREFETCHES on k8 is changed,
   to reflect the behavior of AMD processors.

Bootstrapped & regtested (with -march=athlon -fprefetch-loop-arrays) on
i686.  I have also built a crosscompiler on ia64-linux, sh-linux and
sparc-linux, to check syntactical correctness of the changes to the
backends.

Zdenek

	* params.c (set_param_value): Initialize the "set" field.
	* params.h (struct param_info): Add "set" field.
	(PARAM_SET_P): New macro.
	(PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
	L1_CACHE_LINE_SIZE): New macros.
	* toplev.c (DEFPARAM): Initialize the "set" field.
	* tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
	SIMULTANEOUS_PREFETCHES): Removed.
	(PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
	(tree_ssa_prefetch_arrays): Dump the values of the parameters.
	* config/sparc/sparc.c: Include params.h.
	(sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
	Removed.
	* config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
	Removed.
	* config/i386/i386.c: Include params.h.
	(k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
	(override_options): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
	(OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
	L1_CACHE_LINE_SIZE parameters.
	* config/ia64/ia64.c (ia64_optimization_options): Set
	SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
	* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
	Removed.
	* params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
	PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.

Index: params.c
===================================================================
*** params.c	(revision 118601)
--- params.c	(working copy)
*************** set_param_value (const char *name, int v
*** 77,83 ****
  		 compiler_params[i].option,
  		 compiler_params[i].max_value);
  	else
! 	  compiler_params[i].value = value;
  	return;
        }
  
--- 77,86 ----
  		 compiler_params[i].option,
  		 compiler_params[i].max_value);
  	else
! 	  {
! 	    compiler_params[i].value = value;
! 	    compiler_params[i].set = true;
! 	  }
  	return;
        }
  
Index: params.h
===================================================================
*** params.h	(revision 118601)
--- params.h	(working copy)
*************** typedef struct param_info
*** 49,54 ****
--- 49,57 ----
    /* The associated value.  */
    int value;
  
+   /* True if the parameter was explicitly set.  */
+   bool set;
+ 
    /* Minimum acceptable value.  */
    int min_value;
    
*************** typedef enum compiler_param
*** 88,93 ****
--- 91,100 ----
  #define PARAM_VALUE(ENUM) \
    (compiler_params[(int) ENUM].value)
  
+ /* True if the value of the parameter was explicitly changed.  */
+ #define PARAM_SET_P(ENUM) \
+   (compiler_params[(int) ENUM].set)
+ 
  /* Macros for the various parameters.  */
  #define SALIAS_MAX_IMPLICIT_FIELDS \
    PARAM_VALUE (PARAM_SALIAS_MAX_IMPLICIT_FIELDS)
*************** typedef enum compiler_param
*** 151,154 ****
--- 158,169 ----
    ((size_t) PARAM_VALUE (PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE))
  #define MAX_SCHED_READY_INSNS \
    PARAM_VALUE (PARAM_MAX_SCHED_READY_INSNS)
+ #define PREFETCH_LATENCY \
+   PARAM_VALUE (PARAM_PREFETCH_LATENCY)
+ #define SIMULTANEOUS_PREFETCHES \
+   PARAM_VALUE (PARAM_SIMULTANEOUS_PREFETCHES)
+ #define L1_CACHE_SIZE \
+   PARAM_VALUE (PARAM_L1_CACHE_SIZE)
+ #define L1_CACHE_LINE_SIZE \
+   PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
  #endif /* ! GCC_PARAMS_H */
Index: toplev.c
===================================================================
*** toplev.c	(revision 118601)
--- toplev.c	(working copy)
*************** const char *user_label_prefix;
*** 387,396 ****
  
  static const param_info lang_independent_params[] = {
  #define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \
!   { OPTION, DEFAULT, MIN, MAX, HELP },
  #include "params.def"
  #undef DEFPARAM
!   { NULL, 0, 0, 0, NULL }
  };
  
  /* Output files for assembler code (real compiler output)
--- 387,396 ----
  
  static const param_info lang_independent_params[] = {
  #define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \
!   { OPTION, DEFAULT, false, MIN, MAX, HELP },
  #include "params.def"
  #undef DEFPARAM
!   { NULL, 0, false, 0, 0, NULL }
  };
  
  /* Output files for assembler code (real compiler output)
Index: tree-ssa-loop-prefetch.c
===================================================================
*** tree-ssa-loop-prefetch.c	(revision 118601)
--- tree-ssa-loop-prefetch.c	(working copy)
*************** Software Foundation, 59 Temple Place - S
*** 115,133 ****
  /* Magic constants follow.  These should be replaced by machine specific
     numbers.  */
  
- /* A number that should roughly correspond to the number of instructions
-    executed before the prefetch is completed.  */
- 
- #ifndef PREFETCH_LATENCY
- #define PREFETCH_LATENCY 200
- #endif
- 
- /* Number of prefetches that can run at the same time.  */
- 
- #ifndef SIMULTANEOUS_PREFETCHES
- #define SIMULTANEOUS_PREFETCHES 3
- #endif
- 
  /* True if write can be prefetched by a read prefetch.  */
  
  #ifndef WRITE_CAN_USE_READ_PREFETCH
--- 115,120 ----
*************** Software Foundation, 59 Temple Place - S
*** 140,149 ****
  #define READ_CAN_USE_WRITE_PREFETCH 0
  #endif
  
! /* Cache line size.  Assumed to be a power of two.  */
  
  #ifndef PREFETCH_BLOCK
! #define PREFETCH_BLOCK 32
  #endif
  
  /* Do we have a forward hardware sequential prefetching?  */
--- 127,138 ----
  #define READ_CAN_USE_WRITE_PREFETCH 0
  #endif
  
! /* The size of the block loaded by a single prefetch.  Usually, this is
!    the same as cache line size (at the moment, we only consider one level
!    of cache hierarchy).  */
  
  #ifndef PREFETCH_BLOCK
! #define PREFETCH_BLOCK L1_CACHE_LINE_SIZE
  #endif
  
  /* Do we have a forward hardware sequential prefetching?  */
*************** tree_ssa_prefetch_arrays (struct loops *
*** 1026,1031 ****
--- 1015,1033 ----
        || PREFETCH_BLOCK == 0)
      return 0;
  
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "Prefetching parameters:\n");
+       fprintf (dump_file, "    simultaneous prefetches: %d\n",
+ 	       SIMULTANEOUS_PREFETCHES);
+       fprintf (dump_file, "    prefetch latency: %d\n", PREFETCH_LATENCY);
+       fprintf (dump_file, "    L1 cache size: %d (%d bytes)\n",
+ 	       L1_CACHE_SIZE, L1_CACHE_SIZE * L1_CACHE_LINE_SIZE);
+       fprintf (dump_file, "    L1 cache line size: %d\n", L1_CACHE_LINE_SIZE);
+       fprintf (dump_file, "    prefetch block size: %d\n", PREFETCH_BLOCK);
+       fprintf (dump_file, "\n");
+     }
+ 
    initialize_original_copy_tables ();
  
    if (!built_in_decls[BUILT_IN_PREFETCH])
Index: config/sparc/sparc.c
===================================================================
*** config/sparc/sparc.c	(revision 118601)
--- config/sparc/sparc.c	(working copy)
*************** Boston, MA 02110-1301, USA.  */
*** 51,56 ****
--- 51,57 ----
  #include "cfglayout.h"
  #include "tree-gimple.h"
  #include "langhooks.h"
+ #include "params.h"
  
  /* Processor costs */
  static const
*************** sparc_override_options (void)
*** 827,832 ****
--- 828,847 ----
    if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
      target_flags |= MASK_LONG_DOUBLE_128;
  #endif
+ 
+   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
+     set_param_value ("simultaneous-prefetches",
+ 		     ((sparc_cpu == PROCESSOR_ULTRASPARC
+ 		       || sparc_cpu == PROCESSOR_NIAGARA)
+ 		      ? 2
+ 		      : (sparc_cpu == PROCESSOR_ULTRASPARC3
+ 			 ? 8 : 3)));
+   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
+     set_param_value ("l1-cache-line-size", 
+ 		     ((sparc_cpu == PROCESSOR_ULTRASPARC
+ 		       || sparc_cpu == PROCESSOR_ULTRASPARC3
+ 		       || sparc_cpu == PROCESSOR_NIAGARA)
+ 		      ? 64 : 32));
  }
  
  #ifdef SUBTARGET_ATTRIBUTE_TABLE
Index: config/sparc/sparc.h
===================================================================
*** config/sparc/sparc.h	(revision 118601)
--- config/sparc/sparc.h	(working copy)
*************** do {                                    
*** 2175,2193 ****
  	 : (sparc_cpu == PROCESSOR_NIAGARA \
  	    ? 4 \
  	 : 3)))
- 
- #define PREFETCH_BLOCK \
- 	((sparc_cpu == PROCESSOR_ULTRASPARC \
-           || sparc_cpu == PROCESSOR_ULTRASPARC3 \
- 	  || sparc_cpu == PROCESSOR_NIAGARA) \
-          ? 64 : 32)
- 
- #define SIMULTANEOUS_PREFETCHES \
- 	((sparc_cpu == PROCESSOR_ULTRASPARC \
- 	  || sparc_cpu == PROCESSOR_NIAGARA) \
-          ? 2 \
-          : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
-             ? 8 : 3))
  
  /* Control the assembler format that we output.  */
  
--- 2175,2180 ----
Index: config/i386/i386.h
===================================================================
*** config/i386/i386.h	(revision 118601)
--- config/i386/i386.h	(working copy)
*************** do {							\
*** 1739,1750 ****
  /* Define this as 1 if `char' should by default be signed; else as 0.  */
  #define DEFAULT_SIGNED_CHAR 1
  
- /* Number of bytes moved into a data cache for a single prefetch operation.  */
- #define PREFETCH_BLOCK ix86_cost->prefetch_block
- 
- /* Number of prefetch operations that can be done in parallel.  */
- #define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
- 
  /* Max number of bytes we can move from memory to memory
     in one reasonably fast instruction.  */
  #define MOVE_MAX 16
--- 1739,1744 ----
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 118601)
--- config/i386/i386.c	(working copy)
*************** Boston, MA 02110-1301, USA.  */
*** 50,55 ****
--- 50,56 ----
  #include "tree-gimple.h"
  #include "dwarf2.h"
  #include "tm-constrs.h"
+ #include "params.h"
  
  #ifndef CHECK_STACK_LIMIT
  #define CHECK_STACK_LIMIT (-1)
*************** struct processor_costs k8_cost = {
*** 536,542 ****
  					   in SImode, DImode and TImode */
    5,					/* MMX or SSE register to integer */
    64,					/* size of prefetch block */
!   6,					/* number of parallel prefetches */
    5,					/* Branch cost */
    COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
    COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
--- 537,548 ----
  					   in SImode, DImode and TImode */
    5,					/* MMX or SSE register to integer */
    64,					/* size of prefetch block */
!   /* New AMD processors newer drop prefetches; if they cannot be performed
!      immediatelly, they are queued.  We set number of simultaneous prefetches
!      to a large constant to reflect this (it probably is not a good idea not
!      to limit number of prefetches at all, as their execution also takes some
!      time).  */
!   100,					/* number of parallel prefetches */
    5,					/* Branch cost */
    COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
    COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
*************** override_options (void)
*** 2063,2068 ****
--- 2069,2080 ----
       so it won't slow down the compilation and make x87 code slower.  */
    if (!TARGET_SCHEDULE)
      flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+ 
+   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
+     set_param_value ("simultaneous-prefetches",
+ 		     ix86_cost->simultaneous_prefetches);
+   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
+     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
  }
  
  /* switch to the appropriate section for output of DECL.
Index: config/sh/sh.h
===================================================================
*** config/sh/sh.h	(revision 118601)
--- config/sh/sh.h	(working copy)
*************** do {									\
*** 495,500 ****
--- 495,502 ----
       the user explicitly requested this to be on or off.  */		\
    if (flag_schedule_insns > 0)						\
      flag_schedule_insns = 2;						\
+ 									\
+   set_param_value ("simultaneous-prefetches", 2);			\
  } while (0)
  
  #define ASSEMBLER_DIALECT assembler_dialect
*************** extern int current_function_interrupt;
*** 3467,3474 ****
  2:\n" TEXT_SECTION_ASM_OP);
  #endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
  
- #define SIMULTANEOUS_PREFETCHES 2
- 
  /* FIXME: middle-end support for highpart optimizations is missing.  */
  #define high_life_started reload_in_progress
  
--- 3469,3474 ----
Index: config/ia64/ia64.c
===================================================================
*** config/ia64/ia64.c	(revision 118601)
--- config/ia64/ia64.c	(working copy)
*************** ia64_optimization_options (int level ATT
*** 9782,9787 ****
--- 9782,9792 ----
  {
    /* Let the scheduler form additional regions.  */
    set_param_value ("max-sched-extend-regions-iters", 2);
+ 
+   /* Set the default values for cache-related parameters.  */
+   set_param_value ("simultaneous-prefetches", 6);
+   set_param_value ("l1-cache-line-size", 32);
+ 
  }
  
  #include "gt-ia64.h"
Index: config/ia64/ia64.h
===================================================================
*** config/ia64/ia64.h	(revision 118601)
--- config/ia64/ia64.h	(working copy)
*************** do {									\
*** 1979,1997 ****
     #pragma weak.  Note, #pragma weak will only be supported if SUPPORT_WEAK is
     defined.  */
  
- /* If this architecture supports prefetch, define this to be the number of
-    prefetch commands that can be executed in parallel.
- 
-    ??? This number is bogus and needs to be replaced before the value is
-    actually used in optimizations.  */
- 
- #define SIMULTANEOUS_PREFETCHES 6
- 
- /* If this architecture supports prefetch, define this to be the size of
-    the cache line that is prefetched.  */
- 
- #define PREFETCH_BLOCK 32
- 
  #define HANDLE_SYSV_PRAGMA 1
  
  /* A C expression for the maximum number of instructions to execute via
--- 1979,1984 ----
Index: params.def
===================================================================
*** params.def	(revision 118601)
--- params.def	(working copy)
*************** DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
*** 594,599 ****
--- 594,630 ----
  	 "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
  	 100, 0, 0)
  
+ /* Prefetching and cache-optimizations related parameters.  Default values are
+    usually set by machine description.  */
+ 
+ /* The number of insns executed before prefetch is completed.  */
+ 
+ DEFPARAM (PARAM_PREFETCH_LATENCY,
+ 	 "prefetch-latency",
+ 	 "The number of insns executed before prefetch is completed",
+ 	 200, 0, 0)
+ 
+ /* The number of prefetches that can run at the same time.  */
+ 
+ DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES,
+ 	  "simultaneous-prefetches",
+ 	  "The number of prefetches that can run at the same time",
+ 	  3, 0, 0)
+ 
+ /* The size of L1 cache in number of cache lines.  */
+ 
+ DEFPARAM (PARAM_L1_CACHE_SIZE,
+ 	  "l1-cache-size",
+ 	  "The size of L1 cache",
+ 	  1024, 0, 0)
+ 
+ /* The size of L1 cache line in bytes.  */
+ 
+ DEFPARAM (PARAM_L1_CACHE_LINE_SIZE,
+ 	  "l1-cache-line-size",
+ 	  "The size of L1 cache line",
+ 	  32, 0, 0)
+ 
  /*
  Local variables:
  mode:c


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]