This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Come up with -flto=auto option.
- From: Martin Liška <mliska at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Jan Hubicka <hubicka at ucw dot cz>, Michael Matz <matz at suse dot de>, Richard Biener <richard dot guenther at gmail dot com>
- Date: Tue, 23 Jul 2019 10:30:07 +0200
- Subject: [PATCH] Come up with -flto=auto option.
Hi.
As we as openSUSE started using -flto, I see it very handy to have
an option value that will automatically detect number of cores
that can be used for parallel LTRANS phase.
Thoughts?
gcc/ChangeLog:
2019-07-23 Martin Liska <mliska@suse.cz>
* doc/invoke.texi: Document the new option value.
* lto-wrapper.c (cpuset_popcount): New function
is a copy of libgomp/config/linux/proc.c.
(init_num_threads): Likewise.
(run_gcc): Support -flto=auto.
---
gcc/doc/invoke.texi | 3 ++
gcc/lto-wrapper.c | 124 +++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 77a2d561e38..58656fbe1e1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10398,6 +10398,9 @@ parallel jobs by utilizing an installed @command{make} program. The
environment variable @env{MAKE} may be used to override the program
used. The default value for @var{n} is 1.
+You can specify @var{auto} to automatically detect number of
+cores that will determine the number of parallel jobs.
+
You can also specify @option{-flto=jobserver} to use GNU make's
job server mode to determine the number of parallel jobs. This
is useful when the Makefile calling GCC is already executing in parallel.
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index 946897726d0..5451285f896 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -1110,6 +1110,110 @@ cmp_priority (const void *a, const void *b)
return *((const int *)b)-*((const int *)a);
}
+/* Number of CPUs that can be used for parallel LTRANS phase. */
+
+static unsigned long nthreads_var = 0;
+
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+unsigned long cpuset_size;
+static unsigned long get_cpuset_size;
+cpu_set_t *cpusetp;
+
+unsigned long
+static cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
+{
+#ifdef CPU_COUNT_S
+ /* glibc 2.7 and above provide a macro for this. */
+ return CPU_COUNT_S (cpusetsize, cpusetp);
+#else
+#ifdef CPU_COUNT
+ if (cpusetsize == sizeof (cpu_set_t))
+ /* glibc 2.6 and above provide a macro for this. */
+ return CPU_COUNT (cpusetp);
+#endif
+ size_t i;
+ unsigned long ret = 0;
+ STATIC_ASSERT (sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int));
+ for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
+ {
+ unsigned long int mask = cpusetp->__bits[i];
+ if (mask == 0)
+ continue;
+ ret += __builtin_popcountl (mask);
+ }
+ return ret;
+#endif
+}
+#endif
+
+/* At startup, determine the default number of threads. It would seem
+ this should be related to the number of cpus online. */
+
+static void
+init_num_threads (void)
+{
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
+ cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
+ cpuset_size = CPU_ALLOC_SIZE (cpuset_size);
+#else
+ cpuset_size = sizeof (cpu_set_t);
+#endif
+
+ cpusetp = (cpu_set_t *) xmalloc (gomp_cpuset_size);
+ do
+ {
+ int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+ cpusetp);
+ if (ret == 0)
+ {
+ /* Count only the CPUs this process can use. */
+ nthreads_var = cpuset_popcount (cpuset_size, cpusetp);
+ if (nthreads_var == 0)
+ break;
+ get_cpuset_size = cpuset_size;
+#ifdef CPU_ALLOC_SIZE
+ unsigned long i;
+ for (i = cpuset_size * 8; i; i--)
+ if (CPU_ISSET_S (i - 1, cpuset_size, cpusetp))
+ break;
+ cpuset_size = CPU_ALLOC_SIZE (i);
+#endif
+ return;
+ }
+ if (ret != EINVAL)
+ break;
+#ifdef CPU_ALLOC_SIZE
+ if (cpuset_size < sizeof (cpu_set_t))
+ cpuset_size = sizeof (cpu_set_t);
+ else
+ cpuset_size = cpuset_size * 2;
+ if (cpuset_size < 8 * sizeof (cpu_set_t))
+ cpusetp
+ = (cpu_set_t *) realloc (cpusetp, cpuset_size);
+ else
+ {
+ /* Avoid fatal if too large memory allocation would be
+ requested, e.g. kernel returning EINVAL all the time. */
+ void *p = realloc (cpusetp, cpuset_size);
+ if (p == NULL)
+ break;
+ cpusetp = (cpu_set_t *) p;
+ }
+#else
+ break;
+#endif
+ }
+ while (1);
+ cpuset_size = 0;
+ nthreads_var = 1;
+ free (cpusetp);
+ cpusetp = NULL;
+#endif
+#ifdef _SC_NPROCESSORS_ONLN
+ nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
+#endif
+}
/* Execute gcc. ARGC is the number of arguments. ARGV contains the arguments. */
@@ -1124,6 +1228,7 @@ run_gcc (unsigned argc, char *argv[])
const char *collect_gcc, *collect_gcc_options;
int parallel = 0;
int jobserver = 0;
+ int auto_parallel = 0;
bool no_partition = false;
struct cl_decoded_option *fdecoded_options = NULL;
struct cl_decoded_option *offload_fdecoded_options = NULL;
@@ -1251,6 +1356,11 @@ run_gcc (unsigned argc, char *argv[])
jobserver = 1;
parallel = 1;
}
+ else if (strcmp (option->arg, "auto") == 0)
+ {
+ auto_parallel = 1;
+ parallel = 1;
+ }
else
{
parallel = atoi (option->arg);
@@ -1291,6 +1401,7 @@ run_gcc (unsigned argc, char *argv[])
{
lto_mode = LTO_MODE_LTO;
jobserver = 0;
+ auto_parallel = 0;
parallel = 0;
}
@@ -1485,6 +1596,16 @@ cont1:
if (jobserver)
obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
+ else if (auto_parallel)
+ {
+ char buf[256];
+ init_num_threads ();
+ if (verbose)
+ fprintf (stderr, "LTO parallelism level set to %ld\n",
+ nthreads_var);
+ sprintf (buf, "-fwpa=%ld", nthreads_var);
+ obstack_ptr_grow (&argv_obstack, xstrdup (buf));
+ }
else if (parallel > 1)
{
char buf[256];
@@ -1692,7 +1813,8 @@ cont:
i = 3;
if (!jobserver)
{
- snprintf (jobs, 31, "-j%d", parallel);
+ snprintf (jobs, 31, "-j%ld",
+ auto_parallel ? nthreads_var : parallel);
new_argv[i++] = jobs;
}
new_argv[i++] = "all";