This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! I've been playing with runtime detection of the number of do_wait spins per millisecond (see attached proglet), unfortunately on some architectures (e.g. ppc G5) it is completely unreliable, on others, including x86_64, it is from time to time 10 times off. But estimating the spin count so that it is at most 10 times off at least on most CPUs doesn't need any runtime detection, especially given that CPU frequencies hit the ceiling. So, instead of letting users specify GOMP_BLOCKCOUNT as time in milliseconds to spin, this patch lets users specify GOMP_SPINCOUNT as number of spins. If this isn't specified, the default is 30g for OMP_WAIT_POLICY=active (30g is roughly 5 minutes, could be 2 or 15 minutes depending on hw) 20m for no OMP_WAIT_POLICY (roughly 0.2 seconds) and 0 for OMP_WAIT_POLICY=passive. Additionally, the patch tracks number of threads currently managed by libgomp and if there are more libgomp managed threads than available CPUs, the spin counts decrease radically (1k for OMP_WAIT_POLICY=active, 100 for no OMP_WAIT_POLICY). The busy waiting duration is a hint anyway, so being not very precise is IMHO not a big deal, but e.g. doing clock_gettime every few iterations would increase a latency a lot. 2008-03-19 Jakub Jelinek <jakub@redhat.com> * libgomp.h (gomp_active_wait_policy): Remove decl. (gomp_throttled_spin_count_var, gomp_available_cpus, gomp_managed_threads): New extern decls. * team.c (gomp_team_start, gomp_team_end): If number of threads changed, adjust atomically gomp_managed_threads. * env.c (gomp_active_wait_policy, gomp_block_time_var): Remove. (gomp_throttled_spin_count_var, gomp_available_cpus, gomp_managed_threads): New variables. (parse_millis): Removed. (parse_spincount): New function. (parse_wait_policy): Return -1/0/1 instead of setting gomp_active_wait_policy. (initialize_env): Call gomp_init_num_threads unconditionally. Initialize gomp_available_cpus. Call parse_spincount instead of parse_millis, initialize gomp_{,throttled_}spin_count_var depending on presence and value of OMP_WAIT_POLICY and GOMP_SPINCOUNT env vars. * config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var instead of gomp_spin_count_var if gomp_managed_threads > gomp_available_cpus. --- libgomp/team.c (revision 133292) +++ libgomp/team.c (working copy) @@ -287,8 +287,24 @@ gomp_team_start (void (*fn) (void *), vo } } + if (__builtin_expect (nthreads > old_threads_used, 0)) + { + long diff = (long) nthreads - (long) old_threads_used; + + if (old_threads_used == 0) + --diff; + +#ifdef HAVE_SYNC_BUILTINS + __sync_fetch_and_add (&gomp_managed_threads, diff); +#else + gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_managed_threads += diff; + gomp_mutex_unlock (&gomp_remaining_threads_lock); +#endif + } + attr = &gomp_thread_attr; - if (gomp_cpu_affinity != NULL) + if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) { size_t stacksize; pthread_attr_init (&thread_attr); @@ -328,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo gomp_fatal ("Thread creation failed: %s", strerror (err)); } - if (gomp_cpu_affinity != NULL) + if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) pthread_attr_destroy (&thread_attr); do_release: @@ -338,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo that should arrive back at the end of this team. The extra threads should be exiting. Note that we arrange for this test to never be true for nested teams. */ - if (nthreads < old_threads_used) - gomp_barrier_reinit (&gomp_threads_dock, nthreads); + if (__builtin_expect (nthreads < old_threads_used, 0)) + { + long diff = (long) nthreads - (long) old_threads_used; + + gomp_barrier_reinit (&gomp_threads_dock, nthreads); + +#ifdef HAVE_SYNC_BUILTINS + __sync_fetch_and_add (&gomp_managed_threads, diff); +#else + gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_managed_threads += diff; + gomp_mutex_unlock (&gomp_remaining_threads_lock); +#endif + } } @@ -357,6 +385,17 @@ gomp_team_end (void) gomp_end_task (); thr->ts = team->prev_ts; + if (__builtin_expect (thr->ts.team != NULL, 0)) + { +#ifdef HAVE_SYNC_BUILTINS + __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); +#else + gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_managed_threads -= team->nthreads - 1L; + gomp_mutex_unlock (&gomp_remaining_threads_lock); +#endif + } + free_team (team); } --- libgomp/env.c (revision 133291) +++ libgomp/env.c (working copy) @@ -57,7 +57,6 @@ struct gomp_task_icv gomp_global_icv = { }; unsigned short *gomp_cpu_affinity; -bool gomp_active_wait_policy = false; size_t gomp_cpu_affinity_len; unsigned long gomp_max_active_levels_var = INT_MAX; unsigned long gomp_thread_limit_var = ULONG_MAX; @@ -65,8 +64,8 @@ unsigned long gomp_remaining_threads_cou #ifndef HAVE_SYNC_BUILTINS gomp_mutex_t gomp_remaining_threads_lock; #endif -static unsigned long gomp_block_time_var; -unsigned long long gomp_spin_count_var; +unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1; +unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; /* Parse the OMP_SCHEDULE environment variable. */ @@ -239,14 +238,14 @@ parse_stacksize (const char *name, unsig return false; } -/* Parse the GOMP_BLOCKTIME environment varible. Return true if one was +/* Parse the GOMP_SPINCOUNT environment varible. Return true if one was present and it was successfully parsed. */ static bool -parse_millis (const char *name, unsigned long *pvalue) +parse_spincount (const char *name, unsigned long long *pvalue) { char *env, *end; - unsigned long value, mult = 1; + unsigned long long value, mult = 1; env = getenv (name); if (env == NULL) @@ -257,17 +256,16 @@ parse_millis (const char *name, unsigned if (*env == '\0') goto invalid; - if (strncasecmp (env, "infinite", 8) != 0 - || strncasecmp (env, "infinity", 8) != 0 - || strncasecmp (env, "unexpire", 8) != 0) + if (strncasecmp (env, "infinite", 8) == 0 + || strncasecmp (env, "infinity", 8) == 0) { - value = ULONG_MAX; + value = ~0ULL; end = env + 8; goto check_tail; } errno = 0; - value = strtoul (env, &end, 10); + value = strtoull (env, &end, 10); if (errno) goto invalid; @@ -277,17 +275,17 @@ parse_millis (const char *name, unsigned { switch (tolower (*end)) { - case 's': - mult = 1000; + case 'k': + mult = 1000LL; break; case 'm': - mult = 60 * 1000; + mult = 1000LL * 1000LL; break; - case 'h': - mult = 60 * 60 * 1000; + case 'g': + mult = 1000LL * 1000LL * 1000LL; break; - case 'd': - mult = 24 * 60 * 60 * 1000; + case 't': + mult = 1000LL * 1000LL * 1000LL * 1000LL; break; default: goto invalid; @@ -300,8 +298,8 @@ parse_millis (const char *name, unsigned goto invalid; } - if (value > ULONG_MAX / mult) - value = ULONG_MAX; + if (value > ~0ULL / mult) + value = ~0ULL; else value *= mult; @@ -348,33 +346,36 @@ parse_boolean (const char *name, bool *v /* Parse the OMP_WAIT_POLICY environment variable and store the result in gomp_active_wait_policy. */ -static void +static int parse_wait_policy (void) { const char *env; + int ret = -1; env = getenv ("OMP_WAIT_POLICY"); if (env == NULL) - return; + return -1; while (isspace ((unsigned char) *env)) ++env; if (strncasecmp (env, "active", 6) == 0) { - gomp_active_wait_policy = true; + ret = 1; env += 6; } else if (strncasecmp (env, "passive", 7) == 0) { - gomp_active_wait_policy = false; + ret = 0; env += 7; } else env = "X"; while (isspace ((unsigned char) *env)) ++env; - if (*env != '\0') - gomp_error ("Invalid value for environment variable OMP_WAIT_POLICY"); + if (*env == '\0') + return ret; + gomp_error ("Invalid value for environment variable OMP_WAIT_POLICY"); + return -1; } /* Parse the GOMP_CPU_AFFINITY environment varible. Return true if one was @@ -472,6 +473,7 @@ static void __attribute__((constructor)) initialize_env (void) { unsigned long stacksize; + int wait_policy; /* Do a compile time check that mkomp_h.pl did good job. */ omp_check_defines (); @@ -479,7 +481,6 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); - parse_wait_policy (); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var); parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var); if (gomp_thread_limit_var != ULONG_MAX) @@ -489,23 +490,34 @@ initialize_env (void) gomp_mutex_init (&gomp_remaining_threads_lock); #endif } + gomp_init_num_threads (); + gomp_available_cpus = gomp_global_icv.nthreads_var; if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_global_icv.nthreads_var)) - gomp_init_num_threads (); + gomp_global_icv.nthreads_var = gomp_available_cpus; if (parse_affinity ()) gomp_init_affinity (); - if (!parse_millis ("GOMP_BLOCKTIME", &gomp_block_time_var)) - { - if (gomp_active_wait_policy) - gomp_block_time_var = 200; /* 200ms */ - } - if (gomp_block_time_var > 0) + wait_policy = parse_wait_policy (); + if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var)) { - if (gomp_block_time_var == ULONG_MAX) - gomp_spin_count_var = ~0ULL; - else - /* Estimate translation of gomp_block_time_var in milliseconds to - spin count. */; - } + /* Using a rough estimation of 100000 spins per msec, + use 5 min blocking for OMP_WAIT_POLICY=active, + 200 msec blocking when OMP_WAIT_POLICY is not specificed + and 0 when OMP_WAIT_POLICY=passive. + Depending on the CPU speed, this can be e.g. 5 times longer + or 5 times shorter. */ + if (wait_policy > 0) + gomp_spin_count_var = 30000000000LL; + else if (wait_policy < 0) + gomp_spin_count_var = 20000000LL; + } + /* gomp_throttled_spin_count_var is used when there are more libgomp + managed threads than available CPUs. Use very short spinning. */ + if (wait_policy > 0) + gomp_throttled_spin_count_var = 1000LL; + else if (wait_policy < 0) + gomp_throttled_spin_count_var = 100LL; + if (gomp_throttled_spin_count_var > gomp_spin_count_var) + gomp_throttled_spin_count_var = gomp_spin_count_var; /* Not strictly environment related, but ordering constructors is tricky. */ pthread_attr_init (&gomp_thread_attr); --- libgomp/libgomp.h (revision 133305) +++ libgomp/libgomp.h (working copy) @@ -190,8 +190,8 @@ extern unsigned long gomp_remaining_thre extern gomp_mutex_t gomp_remaining_threads_lock; #endif extern unsigned long gomp_max_active_levels_var; -extern bool gomp_active_wait_policy; -extern unsigned long long gomp_spin_count_var; +extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; +extern unsigned long gomp_available_cpus, gomp_managed_threads; /* This structure describes a "task" to be run by a thread. At present we implement only synchronous tasks, i.e. no tasks are deferred or --- libgomp/config/linux/wait.h (revision 133339) +++ libgomp/config/linux/wait.h (working copy) @@ -51,6 +51,8 @@ static inline void do_wait (int *addr, i { unsigned long long i, count = gomp_spin_count_var; + if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) + count = gomp_throttled_spin_count_var; for (i = 0; i < count; i++) if (__builtin_expect (*addr != val, 0)) return; Jakub
Attachment:
spins_per_msec.c
Description: Text document
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |