This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[google]Implement an optimization based on reuse distance profiling (issue4517049)


This patch by Silvius Rus replaces calls to  certain functions with a specialized version that uses non-temporal stores based on memory reuse distance profiling. Bootstraps, no test regressions and the profiling works for a small test case. Ok for google/main.?

-Easwaran

2011-05-09  Silvius Rus <silvius.rus@gmail.com>

	* value-prof.c (interesting_stringop_to_profile_p): Disbale
	stringop profiling if reuse distance profiling is turned on.
	* value-prof.h (gimple_gen_reusedist, optimize_reusedist): Declare.
	* gcov-io.h: (GCOV_COUNTER_REUSE_DIST): New counter.
	(GCOV_COUNTERS): Update.
	(GCOV_COUNTER_NAMES): Add reuse_distance.
	(GCOV_MERGE_FUNCTIONS): Add __gcov_merge_reusedist.
	(__gcov_merge_reusedist): New declaration.
	* profile.c (branch_prob): Enable reuse distance profiling and
	optimization.
	* common.opt (fprofile-reusedist, foptimize-locality): New options.
	* tree-profile.c: Include params.h.
	(stringop_subst, reusedist_t): New structures.
	(stringop_subst_t, reusedist_t): New typedefs.
	(stringop_decl): New global array.
	(RD_NUM_COUNTERS): New constant.
	(get_stringop_subst, reusedist_is_interesting_call)
	(reusedist_instr_func_name, reusedist_get_instr_decl)
	(reusedist_make_instr_call, reusedist_from_counters)
	(gimple_gen_reusedist, nt_op_name, reusedist_get_size_threshold)
	(reusedist_get_distance_large_threshold)
	(reusedist_get_distance_small_threshold)
	(reusedist_get_count_threshold, reusedist_nt_is_worth_it)
	(reusedist_get_nt_decl, maybe_issue_profile_use_note)
	(reusedist_maybe_replace_with_nt_version, optimize_reusedist): New
	functions.
	(gate_tree_profile_ipa): Return true if reuse distance instrumentation
	or use is turned on.
	* Makefile.in (LIBGCOV): Add _gcov_merge_reusedist.
	* libgcov.c (__gcov_weighted_mean2, __gcov_merge_reusedist): New
	functions.
	* params.def (PARAM_REUSEDIST_MEAN_DIST_LARGE_THRESH)
	(PARAM_REUSEDIST_MEAN_DIST_SMALL_THRESH)
	(PARAM_REUSEDIST_CALL_COUNT_THRESH, PARAM_REUSEDIST_MEMCPY_SIZE_THRESH)
	(PARAM_REUSEDIST_MEMSET_SIZE_THRESH): New params.



Index: gcc/value-prof.c
===================================================================
--- gcc/value-prof.c	(revision 173496)
+++ gcc/value-prof.c	(working copy)
@@ -1708,6 +1708,14 @@ interesting_stringop_to_profile_p (tree fndecl, gi
 {
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
+  /* Disable stringop collection with reuse distance instrumentation
+     or optimization.  Otherwise we end up with hard to correct profile
+     mismatches for functions where reuse distance-based transformation are
+     made.  We see a number of "memcpy" at instrumentation time and a different
+     number of "memcpy" at profile use time.  */
+  if (flag_profile_reusedist || flag_optimize_locality)
+    return false;
+
   if (fcode != BUILT_IN_MEMCPY && fcode != BUILT_IN_MEMPCPY
       && fcode != BUILT_IN_MEMSET && fcode != BUILT_IN_BZERO)
     return false;
Index: gcc/value-prof.h
===================================================================
--- gcc/value-prof.h	(revision 173496)
+++ gcc/value-prof.h	(working copy)
@@ -103,6 +103,8 @@ extern void gimple_gen_const_delta_profiler (histo
 					     unsigned, unsigned);
 extern void gimple_gen_average_profiler (histogram_value, unsigned, unsigned);
 extern void gimple_gen_ior_profiler (histogram_value, unsigned, unsigned);
+extern void gimple_gen_reusedist (void);
+extern void optimize_reusedist (void);
 
 /* In profile.c.  */
 extern void init_branch_prob (void);
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 173496)
+++ gcc/gcov-io.h	(working copy)
@@ -374,7 +374,8 @@ typedef HOST_WIDEST_INT gcov_type;
 #define GCOV_LAST_VALUE_COUNTER 8  /* The last of counters used for value
 				      profiling.  */
 #define GCOV_COUNTER_DIRECT_CALL 9 /* Direct call counts.  */
-#define GCOV_COUNTERS		10
+#define GCOV_COUNTER_REUSE_DIST 10 /* Reuse distance measure.  */
+#define GCOV_COUNTERS		11
 
 /* Number of counters used for value profiling.  */
 #define GCOV_N_VALUE_COUNTERS \
@@ -383,7 +384,8 @@ typedef HOST_WIDEST_INT gcov_type;
   /* A list of human readable names of the counters */
 #define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", \
 				 "delta","indirect_call", "average", "ior", \
-				 "indirect_call_topn", "direct_call"}
+				 "indirect_call_topn", "direct_call", \
+                                 "reuse_distance"}
 
 #define GCOV_ICALL_TOPN_VAL  2   /* Track two hottest callees */
 #define GCOV_ICALL_TOPN_NCOUNTS  9 /* The number of counter entries per icall callsite */
@@ -397,7 +399,8 @@ typedef HOST_WIDEST_INT gcov_type;
 				 "__gcov_merge_add",	\
 				 "__gcov_merge_ior",	\
 				 "__gcov_merge_icall_topn",\
-                                 "__gcov_merge_dc" }
+                                 "__gcov_merge_dc",\
+                                 "__gcov_merge_reusedist" }
 
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)				\
@@ -570,6 +573,9 @@ extern void __gcov_merge_ior (gcov_type *, unsigne
 /* The merge function used for direct call counters.  */
 extern void __gcov_merge_dc (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
 
+/* The merge function used for reuse distance counters.  */
+extern void __gcov_merge_reusedist (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
+
 /* The merge function used for indirect call counters.  */
 extern void __gcov_merge_icall_topn (gcov_type *, unsigned) ATTRIBUTE_HIDDEN;
 
Index: gcc/profile.c
===================================================================
--- gcc/profile.c	(revision 173496)
+++ gcc/profile.c	(working copy)
@@ -1192,6 +1192,12 @@ branch_prob (void)
   EXIT_BLOCK_PTR->index = EXIT_BLOCK;
 #undef BB_TO_GCOV_INDEX
 
+  if (flag_profile_reusedist)
+    gimple_gen_reusedist ();
+
+  if (flag_optimize_locality)
+    optimize_reusedist ();
+
   if (flag_profile_values)
     gimple_find_values_to_profile (&values);
 
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 173496)
+++ gcc/common.opt	(working copy)
@@ -1049,6 +1049,14 @@ fdwarf2-cfi-asm
 Common Report Var(flag_dwarf2_cfi_asm) Init(HAVE_GAS_CFI_DIRECTIVE)
 Enable CFI tables via GAS assembler directives.
 
+fprofile-reusedist
+Common Report Var(flag_profile_reusedist)
+Profile generation for memory reuse distance.
+
+foptimize-locality
+Common Report Var(flag_optimize_locality)
+Optimization based on improving memory reference locality.
+
 fripa
 Common Report Var(flag_dyn_ipa)
 Perform Dynamic Inter-Procedural Analysis.
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c	(revision 173496)
+++ gcc/tree-profile.c	(working copy)
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "profile.h"
 #include "l-ipo.h"
+#include "params.h"
 #include "profile.h"
 #include "target.h"
 #include "output.h"
@@ -821,6 +822,509 @@ gimple_gen_ior_profiler (histogram_value value, un
   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
 }
 
+/* String operation substitution record.  For each operation, e.g., memcpy,
+   we keep up to four declarations, e.g., libopt__memcpy__{0,1,2,3}.
+   They correspond to memcpy versions in which memory access is nontemporal
+   in neither, first, second or both arguments (dst, src) respectively.  */
+
+struct stringop_subst
+{
+  const char* original_name;  /* E.g., "memcpy".  */
+  int num_args;               /* Number of args, 3 for memcpy.  */
+  int num_ptr_args;           /* Number of pointer args, 2 for memcpy.  */
+  tree instr_fun;             /* E.g., declaration of instrument_memcpy.  */
+  tree nt_ops[4];             /* E.g., libopt__memcpy__{0,1,2,3}.  */
+};
+typedef struct stringop_subst* stringop_subst_t;
+
+/* Substitution database.  TODO: switch to hash table.  */
+
+static struct stringop_subst stringop_decl[] =
+{
+  {"memcpy",      3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"memset",      3, 1, NULL, {NULL, NULL, NULL, NULL}},
+  {"memmove",     3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"memcmp",      3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"bcmp",        3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strlen",      1, 1, NULL, {NULL, NULL, NULL, NULL}},
+  {"strcpy",      2, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strncpy",     3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strcat",      2, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strncat",     3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strdup",      1, 1, NULL, {NULL, NULL, NULL, NULL}},
+  {"strndup",     2, 1, NULL, {NULL, NULL, NULL, NULL}},
+  {"strcmp",      2, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strncmp",     3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strcasecmp",  2, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {"strncasecmp", 3, 2, NULL, {NULL, NULL, NULL, NULL}},
+  {NULL,          0, 0, NULL, {NULL, NULL, NULL, NULL}}
+};
+
+/* Get the corresponding element in STRINGOP_DECL for NAME.  */
+
+static stringop_subst_t
+get_stringop_subst (const char* name)
+{
+  stringop_subst_t it;
+  for (it = stringop_decl; it->original_name; it++)
+    if (strcmp (name, it->original_name) == 0)
+      return it;
+  return 0;
+}
+
+/* Return the matching substitution if call site STMT is worth replacing.  */
+
+static stringop_subst_t
+reusedist_is_interesting_call (gimple stmt)
+{
+  tree fndecl, name;
+
+  if (gimple_code (stmt) != GIMPLE_CALL)
+    return 0;
+
+  fndecl = gimple_call_fndecl (stmt);
+
+  if (fndecl == NULL_TREE)
+    return 0;
+
+  name = DECL_NAME (fndecl);
+
+  if (name == NULL_TREE)
+    return 0;
+
+  return get_stringop_subst (IDENTIFIER_POINTER (name));
+}
+
+/* Make up an instrumentation function name for string operation OP.  */
+
+static void
+reusedist_instr_func_name (const char* op, char result[], int size)
+{
+  int written;
+
+  written = snprintf (result, size, "reusedist_instr_%s", op);
+
+  gcc_assert (written < size);
+}
+
+/* Create a declaration for an instr. function if not already done.
+   Use TEMPLATE_STMT to figure out argument types.  */
+
+static tree
+reusedist_get_instr_decl (gimple template_stmt, stringop_subst_t subst)
+{
+  if (!subst->instr_fun)
+    {
+      tree args;
+      char name[64];
+
+      if (!ptr_void)
+        ptr_void = build_pointer_type (void_type_node);
+
+      reusedist_instr_func_name (subst->original_name, name, 64);
+
+      switch (subst->num_args)
+        {
+          case 1:
+            args = build_function_type_list (
+                void_type_node, ptr_void,
+                TREE_TYPE (gimple_call_arg (template_stmt, 0)),
+                NULL_TREE);
+            break;
+          case 2:
+            args = build_function_type_list (
+                void_type_node, ptr_void,
+                TREE_TYPE (gimple_call_arg (template_stmt, 0)),
+                TREE_TYPE (gimple_call_arg (template_stmt, 1)),
+                NULL_TREE);
+            break;
+          case 3:
+            args = build_function_type_list (
+                void_type_node, ptr_void,
+                TREE_TYPE (gimple_call_arg (template_stmt, 0)),
+                TREE_TYPE (gimple_call_arg (template_stmt, 1)),
+                TREE_TYPE (gimple_call_arg (template_stmt, 2)),
+                NULL_TREE);
+            break;
+          default:
+            gcc_assert (false);
+        }
+      subst->instr_fun = build_fn_decl (name, args);
+    }
+
+  return subst->instr_fun;
+}
+
+/* Return call to instrumentation function for string op call site STMT.
+   Given a call to memcpy (dst, src, len), it will return a call to
+   reusedist_instrument_memcpy (counters, dst, src, len).  */
+
+static gimple
+reusedist_make_instr_call (gimple stmt, stringop_subst_t subst, tree counters)
+{
+  tree profiler_fn;
+
+  if (!subst)
+    return 0;
+
+  profiler_fn = reusedist_get_instr_decl (stmt, subst);
+
+ switch (subst->num_args)
+   {
+     case 1:
+       return gimple_build_call (profiler_fn, 1 + subst->num_args, counters,
+                                 gimple_call_arg (stmt, 0));
+     case 2:
+       return gimple_build_call (profiler_fn, 1 + subst->num_args, counters,
+                                 gimple_call_arg (stmt, 0),
+                                 gimple_call_arg (stmt, 1));
+     case 3:
+       return gimple_build_call (profiler_fn, 1 + subst->num_args, counters,
+                                 gimple_call_arg (stmt, 0),
+                                 gimple_call_arg (stmt, 1),
+                                 gimple_call_arg (stmt, 2));
+     default:
+       gcc_assert (false);
+   }
+}
+
+/* Reuse distance information for a single memory block at a single site.
+   For some operations, such as memcpy, there will be two such descriptors,
+   one of the source and one for the destination.
+   We're keeping the average reuse distance
+   (e.g., distance from a MEMCPY call until the memory written is first used).
+   We're also keeping the average operation size (e.g., memcpy size).
+   These averages are measured over all dynamic invocations of the same
+   static site.  We're also storing the dynamic operation count.
+
+   We're also keeping a measure named dist_x_size, which is the sum of
+   products (distance * size) across all dynamic instances.  This is meant
+   to account for some information loss through aggregation.  For instance,
+   consider two scenarios.
+   A: 50% of operations have large reuse distance but are very short.
+      50% of operations have short reuse distance but are very long.
+   B: 50% of operations have large reuse distance and are large.
+      50% of operations have short reuse distance and are short.
+   Without the dist_x_size measure, these scenarios can't be told apart
+   from the other three measures.  With the dist_x_size measure, scenario B
+   will look like a better candidate.  */
+
+struct reusedist_t {
+  gcov_type mean_dist;    /* Average reuse distance.  */
+  gcov_type mean_size;    /* Average size of memory referenced.  */
+  gcov_type count;        /* Operation count.  */
+  gcov_type dist_x_size;  /* Sum of (distance * size >> 12) across all ops.  */
+};
+
+typedef struct reusedist_t reusedist_t;
+
+/* Number of gcov counters for one reuse distance measurement.  */
+
+const int RD_NUM_COUNTERS = sizeof(reusedist_t) / sizeof(gcov_type);
+
+/* Initialize RD from gcov COUNTERS.  */
+
+static void
+reusedist_from_counters (const gcov_type* counters,
+                         reusedist_t* rd)
+{
+  memcpy (rd, counters, RD_NUM_COUNTERS * sizeof (gcov_type));
+}
+
+/* Instrument current function to collect reuse distance for string ops.
+   The heavy lifting is done by an external library.  The interface
+   to this library is functions like this:
+
+   void reusedist_instr_memcpy(gcov_type *counters,
+                               void *dst, void *src, size_t len);
+
+   This function will measure the reuse distance for the given operations
+   DST with offset LEN, and store values in COUNTERS for one or two pointer
+   arguments.  E.g., for memcpy 2 * RD_NUM_COUNTERS counters will be set,
+   first RD_NUM_COUNTERS for DST and last RD_NUM_COUNTERS for SRC.
+   For strlen, only RD_NUM_COUNTERS counters will be allocated thus the
+   runtime is expected to set only RD_NUM_COUNTERS counters.
+   The counters will record:
+   - mean reuse distance
+   - mean operation size
+   - call count
+   - sum(reuse distance * operation size) across all calls
+     To avoid overflow, each product is first scaled down by a factor of 2^12.
+
+   All reuse distance measurements for dynamic executions of the same static
+   string operation will be aggregated into a single set of counters.
+   The reuse distance library uses the passed COUNTERS pointer as index
+   in its internal tables.  */
+
+void
+gimple_gen_reusedist (void)
+{
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+
+  if (DECL_STATIC_CONSTRUCTOR (current_function_decl))
+    return;
+
+  gimple_init_edge_profiler ();
+
+  FOR_EACH_BB (bb)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+        gimple stmt = gsi_stmt (gsi);
+        stringop_subst_t subst = reusedist_is_interesting_call (stmt);
+
+        if (subst
+            && coverage_counter_alloc (
+                GCOV_COUNTER_REUSE_DIST,
+                subst->num_ptr_args * RD_NUM_COUNTERS))
+          {
+            location_t locus;
+            tree counters = tree_coverage_counter_addr (
+                GCOV_COUNTER_REUSE_DIST, 0);
+
+            counters = force_gimple_operand_gsi (
+                &gsi, counters, true, NULL_TREE, true, GSI_SAME_STMT);
+
+            gsi_insert_after (
+                &gsi,
+                reusedist_make_instr_call (stmt, subst, counters),
+                GSI_NEW_STMT);
+
+            locus = (stmt != NULL)
+                ? gimple_location (stmt)
+                : DECL_SOURCE_LOCATION (current_function_decl);
+            inform (locus,
+                    "inserted reuse distance instrumentation for %qs, using "
+                    "%d gcov counters", subst->original_name,
+                    subst->num_ptr_args * RD_NUM_COUNTERS);
+          }
+      }
+}
+
+/* Make up a nontemporal substitution name, e.g., "libopt__memcpy__3".  */
+
+static void
+nt_op_name (const char* name, int suffix, char result[], int size)
+{
+  int written;
+
+  written = snprintf (result, size, "libopt__%s__%d", name, suffix);
+
+  gcc_assert (written < size);
+}
+
+/* Get size threshold for reusedist substitution decisions.  */
+
+static gcov_type
+reusedist_get_size_threshold (const char* name)
+{
+  if (!strcmp (name, "memcpy"))
+    return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMCPY_SIZE_THRESH);
+
+  if (!strcmp (name, "memset"))
+    return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMSET_SIZE_THRESH);
+
+  /* Use memcpy threshold as default for unspecified operations.  */
+  return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMCPY_SIZE_THRESH);
+}
+
+/* Get distance threshold for reusedist substitution decisions.  */
+
+static gcov_type
+reusedist_get_distance_large_threshold (void)
+{
+  return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEAN_DIST_LARGE_THRESH);
+}
+
+/* Get distance threshold for reusedist substitution decisions.  */
+
+static gcov_type
+reusedist_get_distance_small_threshold (void)
+{
+  return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEAN_DIST_SMALL_THRESH);
+}
+
+/* Get call count threshold for reusedist substitution decisions.  */
+
+static gcov_type
+reusedist_get_count_threshold (void)
+{
+  return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_CALL_COUNT_THRESH);
+}
+
+/* Return whether switching to nontemporal string operation is worth it.
+   NAME is the function name, such as "memcpy".
+   COUNTERS is a pointer to gcov counters for this operation site.
+   Return 1 if worth it, -1 if not worth it and 0 if not sure.  */
+
+static int
+reusedist_nt_is_worth_it (const char* name, const gcov_type* counters)
+{
+  reusedist_t rd;
+
+  reusedist_from_counters (counters, &rd);
+
+  /* TODO: Need to add check for dist_x_size.  */
+
+  if (rd.mean_size < reusedist_get_size_threshold (name)
+      || rd.count < reusedist_get_count_threshold ())
+    /* If the size of the operation is small, don't substitute.  */
+    return 0;
+
+  if (rd.mean_dist >= reusedist_get_distance_large_threshold ())
+    /* Enforce non-temporal.  */
+    return 1;
+  else if (rd.mean_dist <= reusedist_get_distance_small_threshold ())
+    /* Enforce temporal.  */
+    return -1;
+  else
+    /* Not conclusive.  */
+    return 0;
+}
+
+/* Create a declaration for a nontemporal version if not already done.
+   INDEX is the index of the version in list [first, second, both].  */
+
+static tree
+reusedist_get_nt_decl (tree template_decl, stringop_subst_t subst, int index)
+{
+  if (!subst->nt_ops[index])
+    {
+      char nt_name[256];
+      nt_op_name (subst->original_name, index, nt_name, 256);
+      subst->nt_ops[index] = build_fn_decl (nt_name,
+                                            TREE_TYPE (template_decl));
+    }
+
+  return subst->nt_ops[index];
+}
+
+/* Issue notes with reuse distance values in COUNTERS for given ARG.  */
+
+static void
+maybe_issue_profile_use_note (location_t locus, gcov_type* counters, int arg)
+{
+  reusedist_t rd;
+
+  reusedist_from_counters (counters, &rd);
+
+  if (rd.count)
+    inform (locus, "reuse distance counters for arg %d: %lld %lld %lld %lld",
+            arg, (long long int)rd.mean_dist, (long long int)rd.mean_size,
+            (long long int)rd.count, (long long int)rd.dist_x_size);
+}
+
+/* Substitute with nontemporal version when profitable.  */
+
+static void
+reusedist_maybe_replace_with_nt_version (gimple stmt,
+                                         gcov_type* counters,
+                                         stringop_subst_t subst)
+{
+  int first, second, suffix;
+  tree subst_decl;
+  const char* name = subst->original_name;
+  location_t locus;
+
+  locus = (stmt != NULL)
+      ? gimple_location (stmt)
+      : DECL_SOURCE_LOCATION (current_function_decl);
+
+  gcc_assert (1 == subst->num_ptr_args || 2 == subst->num_ptr_args);
+
+  maybe_issue_profile_use_note (locus, counters, 1);
+  first = reusedist_nt_is_worth_it (name, counters);
+
+  if (2 == subst->num_ptr_args)
+    {
+      maybe_issue_profile_use_note (locus, counters + RD_NUM_COUNTERS, 2);
+      second = reusedist_nt_is_worth_it (name, counters + RD_NUM_COUNTERS);
+    }
+  else
+      second = 0;
+
+  if (first > 0)
+    /* Nontemporal in first arg.  */
+    {
+      /* The operation on the first arg should be nontemporal.  */
+      if (second > 0)
+        suffix = 3;
+      else
+        suffix = 1;
+    }
+  else if (first < 0)
+    /* Temporal in first arg.  */
+    {
+      if (second > 0)
+        suffix = 2;
+      else if (second < 0)
+        suffix = 0;
+      else
+        suffix = -1;
+    }
+  else
+    /* Don't know about the first arg.  */
+    {
+      if (second > 0)
+        suffix = 2;
+      else
+        suffix = -1;
+    }
+
+  if (suffix == -1)
+    return;
+
+  subst_decl = reusedist_get_nt_decl (gimple_call_fndecl (stmt), subst,
+                                      suffix);
+  gimple_call_set_fndecl (stmt, subst_decl);
+  inform (locus, "replaced %qs with non-temporal %qs",
+          subst->original_name,
+          IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (subst_decl)));
+}
+
+/* Replace string operations with equivalent nontemporal, when profitable.  */
+
+void
+optimize_reusedist (void)
+{
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+  unsigned n_counters;
+  unsigned counter_index = 0;
+  gcov_type *counters = get_coverage_counts_no_warn (
+      DECL_STRUCT_FUNCTION (current_function_decl),
+      GCOV_COUNTER_REUSE_DIST, &n_counters);
+
+  if (!n_counters || DECL_STATIC_CONSTRUCTOR (current_function_decl))
+    return;
+
+  gcc_assert (!(n_counters % RD_NUM_COUNTERS));
+
+  FOR_EACH_BB (bb)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+        gimple stmt = gsi_stmt (gsi);
+        stringop_subst_t subst = reusedist_is_interesting_call (stmt);
+
+        if (subst)
+          {
+            if (counter_index < n_counters)
+              reusedist_maybe_replace_with_nt_version (
+                  stmt, &counters[counter_index], subst);
+            counter_index += subst->num_ptr_args * RD_NUM_COUNTERS;
+          }
+      }
+
+  if (counter_index != n_counters)
+    {
+      warning (0, "coverage mismatch for reuse distance counters "
+               "in function %qs", IDENTIFIER_POINTER
+               (DECL_ASSEMBLER_NAME (current_function_decl)));
+      inform (input_location, "number of counters is %u instead of %u",
+              n_counters, counter_index);
+    }
+}
+
 /* Profile all functions in the callgraph.  */
 
 static unsigned int
@@ -1030,7 +1534,8 @@ gate_tree_profile_ipa (void)
 {
   return (!in_lto_p
 	  && (flag_branch_probabilities || flag_test_coverage
-	      || profile_arc_flag));
+	      || profile_arc_flag || flag_profile_reusedist
+              || flag_optimize_locality));
 }
 
 struct simple_ipa_opt_pass pass_ipa_tree_profile =
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 173496)
+++ gcc/Makefile.in	(working copy)
@@ -1523,7 +1523,8 @@ LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single
     _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
     _gcov_indirect_call_profiler _gcov_direct_call_profiler \
     _gcov_average_profiler _gcov_ior_profiler _gcov_merge_ior _gcov_merge_dc \
-    _gcov_merge_icall_topn _gcov_indirect_call_topn_profiler
+    _gcov_merge_icall_topn _gcov_indirect_call_topn_profiler \
+    _gcov_merge_reusedist
 
 FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \
     _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
Index: gcc/libgcov.c
===================================================================
--- gcc/libgcov.c	(revision 173496)
+++ gcc/libgcov.c	(working copy)
@@ -879,6 +879,59 @@ __gcov_merge_ior (gcov_type *counters, unsigned n_
 }
 #endif
 
+#ifdef L_gcov_merge_reusedist
+
+/* Return the weighted arithmetic mean of two values.  */
+
+static gcov_type
+__gcov_weighted_mean2 (gcov_type value1, gcov_type count1,
+                       gcov_type value2, gcov_type count2)
+{
+  if (count1 + count2 == 0)
+    return 0;
+  else
+    return (value1 * count1 + value2 * count2) / (count1 + count2);
+}
+
+void
+__gcov_merge_reusedist (gcov_type *counters, unsigned n_counters)
+{
+  unsigned i;
+
+  gcc_assert(!(n_counters % 4));
+
+  for (i = 0; i < n_counters; i += 4)
+    {
+      /* Decode current values.  */
+      gcov_type c_mean_dist = counters[i];
+      gcov_type c_mean_size = counters[i+1];
+      gcov_type c_count = counters[i+2];
+      gcov_type c_dist_x_size = counters[i+3];
+
+      /* Read and decode values in file.  */
+      gcov_type f_mean_dist = __gcov_read_counter ();
+      gcov_type f_mean_size = __gcov_read_counter ();
+      gcov_type f_count = __gcov_read_counter ();
+      gcov_type f_dist_x_size = __gcov_read_counter ();
+
+      /* Compute aggregates.  */
+      gcov_type a_mean_dist = __gcov_weighted_mean2 (
+          f_mean_dist, f_count, c_mean_dist, c_count);
+      gcov_type a_mean_size = __gcov_weighted_mean2 (
+          f_mean_size, f_count, c_mean_size, c_count);
+      gcov_type a_count = f_count + c_count;
+      gcov_type a_dist_x_size = f_dist_x_size + c_dist_x_size;
+
+      /* Encode back into counters.  */
+      counters[i] = a_mean_dist;
+      counters[i+1] = a_mean_size;
+      counters[i+2] = a_count;
+      counters[i+3] = a_dist_x_size;
+    }
+}
+
+#endif
+
 #ifdef L_gcov_merge_dc
 
 /* Returns 1 if the function global id GID is not valid.  */
Index: gcc/params.def
===================================================================
--- gcc/params.def	(revision 173496)
+++ gcc/params.def	(working copy)
@@ -892,6 +892,31 @@ DEFPARAM (PARAM_GCOV_DEBUG,
 	  "Looking for gcda file in current dir.",
 	  1, 0, 1)
 
+DEFPARAM (PARAM_REUSEDIST_MEAN_DIST_LARGE_THRESH,
+          "reusedist-mean-dist-large-thresh",
+          "Generate NTA stringops only if reusedist at least this size",
+          10000000, 0, 0)
+
+DEFPARAM (PARAM_REUSEDIST_MEAN_DIST_SMALL_THRESH,
+          "reusedist-mean-dist-small-thresh",
+          "Force temporal stringops if reusedist at most this size",
+          100000, 0, 0)
+
+DEFPARAM (PARAM_REUSEDIST_CALL_COUNT_THRESH,
+          "reusedist-call-count-thresh",
+          "Generate NTA stringops only if call count at least this large",
+          0, 0, 0)
+
+DEFPARAM (PARAM_REUSEDIST_MEMCPY_SIZE_THRESH,
+          "reusedist-memcpy-size-thresh",
+          "Generate memcpy-nta only if size at least this large",
+          4096, 0, 0)
+
+DEFPARAM (PARAM_REUSEDIST_MEMSET_SIZE_THRESH,
+          "reusedist-memset-size-thresh",
+          "Generate NTA memset only if size at least this large",
+          122880, 0, 0)
+
 /* Avoid SLP vectorization of large basic blocks.  */
 DEFPARAM (PARAM_SLP_MAX_INSNS_IN_BB,
           "slp-max-insns-in-bb",

--
This patch is available for review at http://codereview.appspot.com/4517049


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]