This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Old school parallelization of WPA streaming


> On Thu, 21 Nov 2013, Jan Hubicka wrote:
> 
> > > 
> > > Why do you need an additional -fparallelism?  Wouldn't
> > > -fwpa=... be a better match, matching -flto=...?  As we already
> > > pass down a -fwpa option to WPA this would make things easier, no?
> > 
> > My plan was to possibly use same option later for parallelizing more parts of
> > compiler, not only WPA streaming. Streaming in may have some chance if we get
> > into thread safety of GGC or move sufficient amount of stuff out of GGC.  Also
> > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it
> > would make sense with -flto-partition=none and perhaps with local optimization,
> > too.
> 
> I'd like to drop -flto-partition=none eventually.  It's just one more
> path through the compiler to support ...
> 
> > But I can definitely update the patch to use -fwpa=N and we can deal with this
> > once this becomes real. (i.e. I have no clue how to parallelize inliner without
> > making its decisions dependent on the parallelizm and declining with parallelizm
> > increased nor I have real plans for stream in procedure)
> 
> Please.
> 

Hi,
here is updated patch. Sorry for taking time, I should have more time for hacking again
now...

Honza

	* lto-cgraph.c (asm_nodes_output): Make global.
	* lto-wrapper.c (run_gcc): Pass down paralelizm to WPA.
	* lto.c (lto_parallelism): New static var.
	(do_stream_out, wait_for_child, stream_out): New static functions.
	(lto_wpa_write_files): Add support for parallel streaming.
	(do_whole_program_analysis): Set parallelism.
	* lang.opt (fwpa): Add parameter.
	* lto-lang.c (lto_handle_option): Handle flag_wpa.
	(lto_init): Update use of flag_wpa.
	* lto-streamer.h (asm_nodes_output): Declare.
Index: lto-cgraph.c
===================================================================
*** lto-cgraph.c	(revision 205646)
--- lto-cgraph.c	(working copy)
*************** along with GCC; see the file COPYING3.
*** 53,58 ****
--- 53,61 ----
  #include "pass_manager.h"
  #include "ipa-utils.h"
  
+ /* True when asm nodes has been output.  */
+ bool asm_nodes_output = false;
+ 
  static void output_cgraph_opt_summary (void);
  static void input_cgraph_opt_summary (vec<symtab_node *>  nodes);
  
*************** output_symtab (void)
*** 889,895 ****
    lto_symtab_encoder_iterator lsei;
    int i, n_nodes;
    lto_symtab_encoder_t encoder;
-   static bool asm_nodes_output = false;
  
    if (flag_wpa)
      output_cgraph_opt_summary ();
--- 892,897 ----
Index: lto-wrapper.c
===================================================================
*** lto-wrapper.c	(revision 205646)
--- lto-wrapper.c	(working copy)
*************** run_gcc (unsigned argc, char *argv[])
*** 745,751 ****
        tmp += list_option_len;
        strcpy (tmp, ltrans_output_file);
  
!       obstack_ptr_grow (&argv_obstack, "-fwpa");
      }
  
    /* Append the input objects and possible preceding arguments.  */
--- 746,761 ----
        tmp += list_option_len;
        strcpy (tmp, ltrans_output_file);
  
!       if (jobserver)
! 	obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
!       else if (parallel > 1)
! 	{
! 	  char buf[256];
! 	  sprintf (buf, "-fwpa=%i", parallel);
! 	  obstack_ptr_grow (&argv_obstack, xstrdup (buf));
! 	}
!       else
!         obstack_ptr_grow (&argv_obstack, "-fwpa");
      }
  
    /* Append the input objects and possible preceding arguments.  */
Index: lto/lto.c
===================================================================
*** lto/lto.c	(revision 205646)
--- lto/lto.c	(working copy)
*************** along with GCC; see the file COPYING3.
*** 53,58 ****
--- 53,61 ----
  /* Vector to keep track of external variables we've seen so far.  */
  vec<tree, va_gc> *lto_global_var_decls;
  
+ /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver.  */
+ static int lto_parallelism;
+ 
  static GTY(()) tree first_personality_decl;
  
  /* Returns a hash code for P.  */
*************** cmp_partitions_order (const void *a, con
*** 2454,2459 ****
--- 2457,2554 ----
    return orderb - ordera;
  }
  
+ /* Actually stream out ENCODER into TEMP_FILENAME.  */
+ 
+ static void
+ do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
+ {
+   lto_file *file = lto_obj_file_open (temp_filename, true);
+   if (!file)
+     fatal_error ("lto_obj_file_open() failed");
+   lto_set_current_out_file (file);
+ 
+   ipa_write_optimization_summaries (encoder);
+ 
+   lto_set_current_out_file (NULL);
+   lto_obj_file_close (file);
+   free (file);
+ }
+ 
+ /* Wait for forked process and signal errors.  */
+ #ifdef HAVE_WORKING_FORK
+ static void
+ wait_for_child ()
+ {
+   int status;
+   do
+     {
+       int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
+       if (w == -1)
+ 	fatal_error ("waitpid failed");
+ 
+       if (WIFEXITED (status) && WEXITSTATUS (status))
+ 	fatal_error ("streaming subprocess failed");
+       else if (WIFSIGNALED (status))
+ 	fatal_error ("streaming subprocess was killed by signal");
+     }
+   while (!WIFEXITED(status) && !WIFSIGNALED(status));
+ }
+ #endif
+ 
+ /* Stream out ENCODER into TEMP_FILENAME
+    Fork if that seems to help.  */
+ 
+ static void
+ stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
+ {
+ #ifdef HAVE_WORKING_FORK
+   static int nruns;
+ 
+   if (!lto_parallelism || lto_parallelism == 1)
+     {
+       do_stream_out (temp_filename, encoder);
+       return;
+     }
+ 
+   /* Do not run more than LTO_PARALLELISM streamings
+      FIXME: we ignore limits on jobserver.  */
+   if (lto_parallelism > 0 && nruns >= lto_parallelism)
+     {
+       wait_for_child ();
+       nruns --;
+     }
+   /* If this is not the last parallel partition, execute new
+      streaming process.  */
+   if (!last)
+     {
+       pid_t cpid = fork ();
+ 
+       if (!cpid)
+ 	{
+ 	  setproctitle ("lto1-wpa-streaming");
+ 	  do_stream_out (temp_filename, encoder);
+ 	  exit (0);
+ 	}
+       /* Fork failed; lets do the job ourseleves.  */
+       else if (cpid == -1)
+         do_stream_out (temp_filename, encoder);
+       else
+ 	nruns++;
+     }
+   /* Last partition; stream it and wait for all children to die.  */
+   else
+     {
+       int i;
+       do_stream_out (temp_filename, encoder);
+       for (i = 0; i < nruns; i++)
+ 	wait_for_child ();
+     }
+   asm_nodes_output = true;
+ #else
+   do_stream_out (temp_filename, encoder);
+ #endif
+ }
+ 
  /* Write all output files in WPA mode and the file with the list of
     LTRANS units.  */
  
*************** static void
*** 2461,2478 ****
  lto_wpa_write_files (void)
  {
    unsigned i, n_sets;
-   lto_file *file;
    ltrans_partition part;
    FILE *ltrans_output_list_stream;
    char *temp_filename;
    size_t blen;
  
    /* Open the LTRANS output list.  */
    if (!ltrans_output_list)
      fatal_error ("no LTRANS output list filename provided");
-   ltrans_output_list_stream = fopen (ltrans_output_list, "w");
-   if (ltrans_output_list_stream == NULL)
-     fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
  
    timevar_push (TV_WHOPR_WPA);
  
--- 2556,2570 ----
  lto_wpa_write_files (void)
  {
    unsigned i, n_sets;
    ltrans_partition part;
    FILE *ltrans_output_list_stream;
    char *temp_filename;
+   vec <char *>temp_filenames = vNULL;
    size_t blen;
  
    /* Open the LTRANS output list.  */
    if (!ltrans_output_list)
      fatal_error ("no LTRANS output list filename provided");
  
    timevar_push (TV_WHOPR_WPA);
  
*************** lto_wpa_write_files (void)
*** 2508,2521 ****
  			   : cmp_partitions_order);
    for (i = 0; i < n_sets; i++)
      {
-       size_t len;
        ltrans_partition part = ltrans_partitions[i];
  
        /* Write all the nodes in SET.  */
        sprintf (temp_filename + blen, "%u.o", i);
-       file = lto_obj_file_open (temp_filename, true);
-       if (!file)
- 	fatal_error ("lto_obj_file_open() failed");
  
        if (!quiet_flag)
  	fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, part->insns);
--- 2600,2609 ----
*************** lto_wpa_write_files (void)
*** 2557,2577 ****
  	}
        gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
  
!       lto_set_current_out_file (file);
! 
!       ipa_write_optimization_summaries (part->encoder);
  
-       lto_set_current_out_file (NULL);
-       lto_obj_file_close (file);
-       free (file);
        part->encoder = NULL;
  
!       len = strlen (temp_filename);
!       if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
  	  || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
  	fatal_error ("writing to LTRANS output list %s: %m",
  		     ltrans_output_list);
      }
  
    lto_stats.num_output_files += n_sets;
  
--- 2645,2669 ----
  	}
        gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
  
!       stream_out (temp_filename, part->encoder, i == n_sets - 1);
  
        part->encoder = NULL;
  
!       temp_filenames.safe_push (xstrdup (temp_filename));
!     }
!   ltrans_output_list_stream = fopen (ltrans_output_list, "w");
!   if (ltrans_output_list_stream == NULL)
!     fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
!   for (i = 0; i < n_sets; i++)
!     {
!       unsigned int len = strlen (temp_filenames[i]);
!       if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
  	  || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
  	fatal_error ("writing to LTRANS output list %s: %m",
  		     ltrans_output_list);
+      free (temp_filenames[i]);
      }
+   temp_filenames.release();
  
    lto_stats.num_output_files += n_sets;
  
*************** do_whole_program_analysis (void)
*** 3126,3131 ****
--- 3218,3235 ----
  {
    symtab_node *node;
  
+   lto_parallelism = 1;
+ 
+   /* TODO: jobserver communicatoin is not supported, yet.  */
+   if (!strcmp (flag_wpa, "jobserver"))
+     lto_parallelism = -1;
+   else
+     {
+       lto_parallelism = atoi (flag_wpa);
+       if (lto_parallelism <= 0)
+ 	lto_parallelism = 0;
+     }
+ 
    timevar_start (TV_PHASE_OPT_GEN);
  
    /* Note that since we are in WPA mode, materialize_cgraph will not
Index: lto/lang.opt
===================================================================
*** lto/lang.opt	(revision 205646)
--- lto/lang.opt	(working copy)
*************** LTO Joined Var(ltrans_output_list)
*** 33,41 ****
  Specify a file to which a list of files output by LTRANS is written.
  
  fwpa
! LTO Driver Report Var(flag_wpa)
  Run the link-time optimizer in whole program analysis (WPA) mode.
  
  fresolution=
  LTO Joined
  The resolution file
--- 33,45 ----
  Specify a file to which a list of files output by LTRANS is written.
  
  fwpa
! LTO Driver Report
  Run the link-time optimizer in whole program analysis (WPA) mode.
  
+ fwpa=
+ LTO Driver RejectNegative Joined Var(flag_wpa)
+ Whole program analysis (WPA) mode with number of parallel jobs specified.
+ 
  fresolution=
  LTO Joined
  The resolution file
Index: lto/lto-lang.c
===================================================================
*** lto/lto-lang.c	(revision 205646)
--- lto/lto-lang.c	(working copy)
*************** lto_handle_option (size_t scode, const c
*** 749,754 ****
--- 749,758 ----
        warn_psabi = value;
        break;
  
+     case OPT_fwpa:
+       flag_wpa = value ? "" : NULL;
+       break;
+ 
      default:
        break;
      }
*************** static bool
*** 1148,1154 ****
  lto_init (void)
  {
    /* We need to generate LTO if running in WPA mode.  */
!   flag_generate_lto = flag_wpa;
  
    /* Create the basic integer types.  */
    build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
--- 1152,1158 ----
  lto_init (void)
  {
    /* We need to generate LTO if running in WPA mode.  */
!   flag_generate_lto = (flag_wpa != NULL);
  
    /* Create the basic integer types.  */
    build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
Index: lto-streamer.h
===================================================================
*** lto-streamer.h	(revision 205646)
--- lto-streamer.h	(working copy)
*************** void lto_output_location (struct output_
*** 873,878 ****
--- 873,879 ----
  
  
  /* In lto-cgraph.c  */
+ extern bool asm_nodes_output;
  lto_symtab_encoder_t lto_symtab_encoder_new (bool);
  int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node *);
  void lto_symtab_encoder_delete (lto_symtab_encoder_t);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]