This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp3] Remove GOMP_BLOCKTIME env var, add GOMP_SPINCOUNT and handle OMP_WAIT_POLICY


Hi!

I've been playing with runtime detection of the number of
do_wait spins per millisecond (see attached proglet), unfortunately
on some architectures (e.g. ppc G5) it is completely unreliable, on others,
including x86_64, it is from time to time 10 times off.  But estimating
the spin count so that it is at most 10 times off at least on most CPUs
doesn't need any runtime detection, especially given that CPU frequencies
hit the ceiling.
So, instead of letting users specify GOMP_BLOCKCOUNT as time in milliseconds
to spin, this patch lets users specify GOMP_SPINCOUNT as number of spins.
If this isn't specified, the default is 30g for OMP_WAIT_POLICY=active
(30g is roughly 5 minutes, could be 2 or 15 minutes depending on hw)
20m for no OMP_WAIT_POLICY (roughly 0.2 seconds) and 0 for
OMP_WAIT_POLICY=passive.  Additionally, the patch tracks number of threads
currently managed by libgomp and if there are more libgomp managed threads
than available CPUs, the spin counts decrease radically (1k for
OMP_WAIT_POLICY=active, 100 for no OMP_WAIT_POLICY).
The busy waiting duration is a hint anyway, so being not very precise is
IMHO not a big deal, but e.g. doing clock_gettime every few iterations would
increase a latency a lot.

2008-03-19  Jakub Jelinek  <jakub@redhat.com>

	* libgomp.h (gomp_active_wait_policy): Remove decl.
	(gomp_throttled_spin_count_var, gomp_available_cpus,
	gomp_managed_threads): New extern decls.
	* team.c (gomp_team_start, gomp_team_end): If number of threads
	changed, adjust atomically gomp_managed_threads.
	* env.c (gomp_active_wait_policy, gomp_block_time_var): Remove.
	(gomp_throttled_spin_count_var, gomp_available_cpus,
	gomp_managed_threads): New variables.
	(parse_millis): Removed.
	(parse_spincount): New function.
	(parse_wait_policy): Return -1/0/1 instead of setting
	gomp_active_wait_policy.
	(initialize_env): Call gomp_init_num_threads unconditionally.
	Initialize gomp_available_cpus.  Call parse_spincount instead
	of parse_millis, initialize gomp_{,throttled_}spin_count_var
	depending on presence and value of OMP_WAIT_POLICY and
	GOMP_SPINCOUNT env vars.
	* config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var
	instead of gomp_spin_count_var if gomp_managed_threads >
	gomp_available_cpus.

--- libgomp/team.c	(revision 133292)
+++ libgomp/team.c	(working copy)
@@ -287,8 +287,24 @@ gomp_team_start (void (*fn) (void *), vo
 	}
     }
 
+  if (__builtin_expect (nthreads > old_threads_used, 0))
+    {
+      long diff = (long) nthreads - (long) old_threads_used;
+
+      if (old_threads_used == 0)
+	--diff;
+
+#ifdef HAVE_SYNC_BUILTINS
+      __sync_fetch_and_add (&gomp_managed_threads, diff);
+#else
+      gomp_mutex_lock (&gomp_remaining_threads_lock);
+      gomp_managed_threads += diff;
+      gomp_mutex_unlock (&gomp_remaining_threads_lock);
+#endif
+    }
+
   attr = &gomp_thread_attr;
-  if (gomp_cpu_affinity != NULL)
+  if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
     {
       size_t stacksize;
       pthread_attr_init (&thread_attr);
@@ -328,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo
 	gomp_fatal ("Thread creation failed: %s", strerror (err));
     }
 
-  if (gomp_cpu_affinity != NULL)
+  if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
     pthread_attr_destroy (&thread_attr);
 
  do_release:
@@ -338,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo
      that should arrive back at the end of this team.  The extra
      threads should be exiting.  Note that we arrange for this test
      to never be true for nested teams.  */
-  if (nthreads < old_threads_used)
-    gomp_barrier_reinit (&gomp_threads_dock, nthreads);
+  if (__builtin_expect (nthreads < old_threads_used, 0))
+    {
+      long diff = (long) nthreads - (long) old_threads_used;
+
+      gomp_barrier_reinit (&gomp_threads_dock, nthreads);
+
+#ifdef HAVE_SYNC_BUILTINS
+      __sync_fetch_and_add (&gomp_managed_threads, diff);
+#else
+      gomp_mutex_lock (&gomp_remaining_threads_lock);
+      gomp_managed_threads += diff;
+      gomp_mutex_unlock (&gomp_remaining_threads_lock);
+#endif
+    }
 }
 
 
@@ -357,6 +385,17 @@ gomp_team_end (void)
   gomp_end_task ();
   thr->ts = team->prev_ts;
 
+  if (__builtin_expect (thr->ts.team != NULL, 0))
+    {
+#ifdef HAVE_SYNC_BUILTINS
+      __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
+#else
+      gomp_mutex_lock (&gomp_remaining_threads_lock);
+      gomp_managed_threads -= team->nthreads - 1L;
+      gomp_mutex_unlock (&gomp_remaining_threads_lock);
+#endif
+    }
+
   free_team (team);
 }
 
--- libgomp/env.c	(revision 133291)
+++ libgomp/env.c	(working copy)
@@ -57,7 +57,6 @@ struct gomp_task_icv gomp_global_icv = {
 };
 
 unsigned short *gomp_cpu_affinity;
-bool gomp_active_wait_policy = false;
 size_t gomp_cpu_affinity_len;
 unsigned long gomp_max_active_levels_var = INT_MAX;
 unsigned long gomp_thread_limit_var = ULONG_MAX;
@@ -65,8 +64,8 @@ unsigned long gomp_remaining_threads_cou
 #ifndef HAVE_SYNC_BUILTINS
 gomp_mutex_t gomp_remaining_threads_lock;
 #endif
-static unsigned long gomp_block_time_var;
-unsigned long long gomp_spin_count_var;
+unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
+unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
 
 /* Parse the OMP_SCHEDULE environment variable.  */
 
@@ -239,14 +238,14 @@ parse_stacksize (const char *name, unsig
   return false;
 }
 
-/* Parse the GOMP_BLOCKTIME environment varible.  Return true if one was
+/* Parse the GOMP_SPINCOUNT environment varible.  Return true if one was
    present and it was successfully parsed.  */
 
 static bool
-parse_millis (const char *name, unsigned long *pvalue)
+parse_spincount (const char *name, unsigned long long *pvalue)
 {
   char *env, *end;
-  unsigned long value, mult = 1;
+  unsigned long long value, mult = 1;
 
   env = getenv (name);
   if (env == NULL)
@@ -257,17 +256,16 @@ parse_millis (const char *name, unsigned
   if (*env == '\0')
     goto invalid;
 
-  if (strncasecmp (env, "infinite", 8) != 0
-      || strncasecmp (env, "infinity", 8) != 0
-      || strncasecmp (env, "unexpire", 8) != 0)
+  if (strncasecmp (env, "infinite", 8) == 0
+      || strncasecmp (env, "infinity", 8) == 0)
     {
-      value = ULONG_MAX;
+      value = ~0ULL;
       end = env + 8;
       goto check_tail;
     }
 
   errno = 0;
-  value = strtoul (env, &end, 10);
+  value = strtoull (env, &end, 10);
   if (errno)
     goto invalid;
 
@@ -277,17 +275,17 @@ parse_millis (const char *name, unsigned
     {
       switch (tolower (*end))
 	{
-	case 's':
-	  mult = 1000;
+	case 'k':
+	  mult = 1000LL;
 	  break;
 	case 'm':
-	  mult = 60 * 1000;
+	  mult = 1000LL * 1000LL;
 	  break;
-	case 'h':
-	  mult = 60 * 60 * 1000;
+	case 'g':
+	  mult = 1000LL * 1000LL * 1000LL;
 	  break;
-	case 'd':
-	  mult = 24 * 60 * 60 * 1000;
+	case 't':
+	  mult = 1000LL * 1000LL * 1000LL * 1000LL;
 	  break;
 	default:
 	  goto invalid;
@@ -300,8 +298,8 @@ parse_millis (const char *name, unsigned
 	goto invalid;
     }
 
-  if (value > ULONG_MAX / mult)
-    value = ULONG_MAX;
+  if (value > ~0ULL / mult)
+    value = ~0ULL;
   else
     value *= mult;
 
@@ -348,33 +346,36 @@ parse_boolean (const char *name, bool *v
 /* Parse the OMP_WAIT_POLICY environment variable and store the
    result in gomp_active_wait_policy.  */
 
-static void
+static int
 parse_wait_policy (void)
 {
   const char *env;
+  int ret = -1;
 
   env = getenv ("OMP_WAIT_POLICY");
   if (env == NULL)
-    return;
+    return -1;
 
   while (isspace ((unsigned char) *env))
     ++env;
   if (strncasecmp (env, "active", 6) == 0)
     {
-      gomp_active_wait_policy = true;
+      ret = 1;
       env += 6;
     }
   else if (strncasecmp (env, "passive", 7) == 0)
     {
-      gomp_active_wait_policy = false;
+      ret = 0;
       env += 7;
     }
   else
     env = "X";
   while (isspace ((unsigned char) *env))
     ++env;
-  if (*env != '\0')
-    gomp_error ("Invalid value for environment variable OMP_WAIT_POLICY");
+  if (*env == '\0')
+    return ret;
+  gomp_error ("Invalid value for environment variable OMP_WAIT_POLICY");
+  return -1;
 }
 
 /* Parse the GOMP_CPU_AFFINITY environment varible.  Return true if one was
@@ -472,6 +473,7 @@ static void __attribute__((constructor))
 initialize_env (void)
 {
   unsigned long stacksize;
+  int wait_policy;
 
   /* Do a compile time check that mkomp_h.pl did good job.  */
   omp_check_defines ();
@@ -479,7 +481,6 @@ initialize_env (void)
   parse_schedule ();
   parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
-  parse_wait_policy ();
   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var);
   parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var);
   if (gomp_thread_limit_var != ULONG_MAX)
@@ -489,23 +490,34 @@ initialize_env (void)
       gomp_mutex_init (&gomp_remaining_threads_lock);
 #endif
     }
+  gomp_init_num_threads ();
+  gomp_available_cpus = gomp_global_icv.nthreads_var;
   if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_global_icv.nthreads_var))
-    gomp_init_num_threads ();
+    gomp_global_icv.nthreads_var = gomp_available_cpus;
   if (parse_affinity ())
     gomp_init_affinity ();
-  if (!parse_millis ("GOMP_BLOCKTIME", &gomp_block_time_var))
-    {
-      if (gomp_active_wait_policy)
-	gomp_block_time_var = 200; /* 200ms */
-    }
-  if (gomp_block_time_var > 0)
+  wait_policy = parse_wait_policy ();
+  if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
     {
-      if (gomp_block_time_var == ULONG_MAX)
-	gomp_spin_count_var = ~0ULL;
-      else
-	/* Estimate translation of gomp_block_time_var in milliseconds to
-	   spin count.  */;
-    }
+      /* Using a rough estimation of 100000 spins per msec,
+	 use 5 min blocking for OMP_WAIT_POLICY=active,
+	 200 msec blocking when OMP_WAIT_POLICY is not specificed
+	 and 0 when OMP_WAIT_POLICY=passive.
+	 Depending on the CPU speed, this can be e.g. 5 times longer
+	 or 5 times shorter.  */
+      if (wait_policy > 0)
+	gomp_spin_count_var = 30000000000LL;
+      else if (wait_policy < 0)
+	gomp_spin_count_var = 20000000LL;
+    }
+  /* gomp_throttled_spin_count_var is used when there are more libgomp
+     managed threads than available CPUs.  Use very short spinning.  */
+  if (wait_policy > 0)
+    gomp_throttled_spin_count_var = 1000LL;
+  else if (wait_policy < 0)
+    gomp_throttled_spin_count_var = 100LL;
+  if (gomp_throttled_spin_count_var > gomp_spin_count_var)
+    gomp_throttled_spin_count_var = gomp_spin_count_var;
 
   /* Not strictly environment related, but ordering constructors is tricky.  */
   pthread_attr_init (&gomp_thread_attr);
--- libgomp/libgomp.h	(revision 133305)
+++ libgomp/libgomp.h	(working copy)
@@ -190,8 +190,8 @@ extern unsigned long gomp_remaining_thre
 extern gomp_mutex_t gomp_remaining_threads_lock;
 #endif
 extern unsigned long gomp_max_active_levels_var;
-extern bool gomp_active_wait_policy;
-extern unsigned long long gomp_spin_count_var;
+extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
+extern unsigned long gomp_available_cpus, gomp_managed_threads;
 
 /* This structure describes a "task" to be run by a thread.  At present
    we implement only synchronous tasks, i.e. no tasks are deferred or
--- libgomp/config/linux/wait.h	(revision 133339)
+++ libgomp/config/linux/wait.h	(working copy)
@@ -51,6 +51,8 @@ static inline void do_wait (int *addr, i
 {
   unsigned long long i, count = gomp_spin_count_var;
 
+  if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0))
+    count = gomp_throttled_spin_count_var;
   for (i = 0; i < count; i++)
     if (__builtin_expect (*addr != val, 0))
       return;

	Jakub

Attachment: spins_per_msec.c
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]