[PATCH] Come up with -flto=auto option.

Jan Hubicka hubicka@ucw.cz
Tue Jul 23 09:29:00 GMT 2019


> Hi.
> 
> As we as openSUSE started using -flto, I see it very handy to have
> an option value that will automatically detect number of cores
> that can be used for parallel LTRANS phase.
> 
> Thoughts?
Hi,
great you found time to make this. It should become the default for
-flto IMO.

I think we also should auto-detect the case where jobserver is available
and in that case let make to connect to the outer jobserver.  (We should
also really convince make developers to invent way to connect to it w/o
the extra + role)

Honza
> 
> gcc/ChangeLog:
> 
> 2019-07-23  Martin Liska  <mliska@suse.cz>
> 
> 	* doc/invoke.texi: Document the new option value.
> 	* lto-wrapper.c (cpuset_popcount): New function
> 	is a copy of libgomp/config/linux/proc.c.
> 	(init_num_threads): Likewise.
> 	(run_gcc): Support -flto=auto.
> ---
>  gcc/doc/invoke.texi |   3 ++
>  gcc/lto-wrapper.c   | 124 +++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 126 insertions(+), 1 deletion(-)
> 
> 

> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 77a2d561e38..58656fbe1e1 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -10398,6 +10398,9 @@ parallel jobs by utilizing an installed @command{make} program.  The
>  environment variable @env{MAKE} may be used to override the program
>  used.  The default value for @var{n} is 1.
>  
> +You can specify @var{auto} to automatically detect number of
> +cores that will determine the number of parallel jobs.
> +
>  You can also specify @option{-flto=jobserver} to use GNU make's
>  job server mode to determine the number of parallel jobs. This
>  is useful when the Makefile calling GCC is already executing in parallel.
> diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
> index 946897726d0..5451285f896 100644
> --- a/gcc/lto-wrapper.c
> +++ b/gcc/lto-wrapper.c
> @@ -1110,6 +1110,110 @@ cmp_priority (const void *a, const void *b)
>    return *((const int *)b)-*((const int *)a);
>  }
>  
> +/* Number of CPUs that can be used for parallel LTRANS phase.  */
> +
> +static unsigned long nthreads_var = 0;
> +
> +#ifdef HAVE_PTHREAD_AFFINITY_NP
> +unsigned long cpuset_size;
> +static unsigned long get_cpuset_size;
> +cpu_set_t *cpusetp;
> +
> +unsigned long
> +static cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
> +{
> +#ifdef CPU_COUNT_S
> +  /* glibc 2.7 and above provide a macro for this.  */
> +  return CPU_COUNT_S (cpusetsize, cpusetp);
> +#else
> +#ifdef CPU_COUNT
> +  if (cpusetsize == sizeof (cpu_set_t))
> +    /* glibc 2.6 and above provide a macro for this.  */
> +    return CPU_COUNT (cpusetp);
> +#endif
> +  size_t i;
> +  unsigned long ret = 0;
> +  STATIC_ASSERT (sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int));
> +  for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
> +    {
> +      unsigned long int mask = cpusetp->__bits[i];
> +      if (mask == 0)
> +	continue;
> +      ret += __builtin_popcountl (mask);
> +    }
> +  return ret;
> +#endif
> +}
> +#endif
> +
> +/* At startup, determine the default number of threads.  It would seem
> +   this should be related to the number of cpus online.  */
> +
> +static void
> +init_num_threads (void)
> +{
> +#ifdef HAVE_PTHREAD_AFFINITY_NP
> +#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
> +  cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
> +  cpuset_size = CPU_ALLOC_SIZE (cpuset_size);
> +#else
> +  cpuset_size = sizeof (cpu_set_t);
> +#endif
> +
> +  cpusetp = (cpu_set_t *) xmalloc (gomp_cpuset_size);
> +  do
> +    {
> +      int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
> +					cpusetp);
> +      if (ret == 0)
> +	{
> +	  /* Count only the CPUs this process can use.  */
> +	  nthreads_var = cpuset_popcount (cpuset_size, cpusetp);
> +	  if (nthreads_var == 0)
> +	    break;
> +	  get_cpuset_size = cpuset_size;
> +#ifdef CPU_ALLOC_SIZE
> +	  unsigned long i;
> +	  for (i = cpuset_size * 8; i; i--)
> +	    if (CPU_ISSET_S (i - 1, cpuset_size, cpusetp))
> +	      break;
> +	  cpuset_size = CPU_ALLOC_SIZE (i);
> +#endif
> +	  return;
> +	}
> +      if (ret != EINVAL)
> +	break;
> +#ifdef CPU_ALLOC_SIZE
> +      if (cpuset_size < sizeof (cpu_set_t))
> +	cpuset_size = sizeof (cpu_set_t);
> +      else
> +	cpuset_size = cpuset_size * 2;
> +      if (cpuset_size < 8 * sizeof (cpu_set_t))
> +	cpusetp
> +	  = (cpu_set_t *) realloc (cpusetp, cpuset_size);
> +      else
> +	{
> +	  /* Avoid fatal if too large memory allocation would be
> +	     requested, e.g. kernel returning EINVAL all the time.  */
> +	  void *p = realloc (cpusetp, cpuset_size);
> +	  if (p == NULL)
> +	    break;
> +	  cpusetp = (cpu_set_t *) p;
> +	}
> +#else
> +      break;
> +#endif
> +    }
> +  while (1);
> +  cpuset_size = 0;
> +  nthreads_var = 1;
> +  free (cpusetp);
> +  cpusetp = NULL;
> +#endif
> +#ifdef _SC_NPROCESSORS_ONLN
> +  nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
> +#endif
> +}
>  
>  /* Execute gcc. ARGC is the number of arguments. ARGV contains the arguments. */
>  
> @@ -1124,6 +1228,7 @@ run_gcc (unsigned argc, char *argv[])
>    const char *collect_gcc, *collect_gcc_options;
>    int parallel = 0;
>    int jobserver = 0;
> +  int auto_parallel = 0;
>    bool no_partition = false;
>    struct cl_decoded_option *fdecoded_options = NULL;
>    struct cl_decoded_option *offload_fdecoded_options = NULL;
> @@ -1251,6 +1356,11 @@ run_gcc (unsigned argc, char *argv[])
>  	      jobserver = 1;
>  	      parallel = 1;
>  	    }
> +	  else if (strcmp (option->arg, "auto") == 0)
> +	    {
> +	      auto_parallel = 1;
> +	      parallel = 1;
> +	    }
>  	  else
>  	    {
>  	      parallel = atoi (option->arg);
> @@ -1291,6 +1401,7 @@ run_gcc (unsigned argc, char *argv[])
>      {
>        lto_mode = LTO_MODE_LTO;
>        jobserver = 0;
> +      auto_parallel = 0;
>        parallel = 0;
>      }
>  
> @@ -1485,6 +1596,16 @@ cont1:
>  
>        if (jobserver)
>  	obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
> +      else if (auto_parallel)
> +	{
> +	  char buf[256];
> +	  init_num_threads ();
> +	  if (verbose)
> +	    fprintf (stderr, "LTO parallelism level set to %ld\n",
> +		     nthreads_var);
> +	  sprintf (buf, "-fwpa=%ld", nthreads_var);
> +	  obstack_ptr_grow (&argv_obstack, xstrdup (buf));
> +	}
>        else if (parallel > 1)
>  	{
>  	  char buf[256];
> @@ -1692,7 +1813,8 @@ cont:
>  	  i = 3;
>  	  if (!jobserver)
>  	    {
> -	      snprintf (jobs, 31, "-j%d", parallel);
> +	      snprintf (jobs, 31, "-j%ld",
> +			auto_parallel ? nthreads_var : parallel);
>  	      new_argv[i++] = jobs;
>  	    }
>  	  new_argv[i++] = "all";
> 



More information about the Gcc-patches mailing list