cfg merge part 25 - web pass

Jan Hubicka jh@suse.cz
Wed Jun 19 11:10:00 GMT 2002


Hi,
on the i386 the effect of webizer is about 1% of performance at about 1%
of compilation time cost (spent in dataflow mostly).  The patch is not
fully redundant with new register allocator, as the benefits come from
better CSE done and with SSA, as SSA can't be done late after
tracer/loop unroller that are most common sources of splittable pseudos.

Honza

Wed Jun 19 19:48:52 CEST 2002  Jan Hubicka  <jh@suse.cz>
	* Makefile.in (web.o): New.
	* web.c: New file.
	* rtl.h (web_main): Declare.
	* timervar.def (TV_WEB): New.
	* toplev.c (dump_file_index, dump_file_info): Add DFI_web.
	(flag_web): New static variable.
	(lang_independent_ptions): Add "web".
	(rest_of_compilation): Call web_main.
	(parse_options_and_default_flags): Add flag_web.
	* invoke.texi (-fweb): Document.

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/Makefile.in,v
retrieving revision 1.901
diff -c -3 -p -r1.901 Makefile.in
*** Makefile.in	15 Jun 2002 17:31:25 -0000	1.901
--- Makefile.in	19 Jun 2002 17:43:44 -0000
*************** OBJS = alias.o bb-reorder.o bitmap.o bui
*** 736,742 ****
   sibcall.o simplify-rtx.o ssa.o ssa-ccp.o ssa-dce.o stmt.o		   \
   stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
   tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
!  $(GGC) $(out_object_file) $(EXTRA_OBJS)
  
  BACKEND = main.o libbackend.a
  
--- 736,742 ----
   sibcall.o simplify-rtx.o ssa.o ssa-ccp.o ssa-dce.o stmt.o		   \
   stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
   tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
!  web.o $(GGC) $(out_object_file) $(EXTRA_OBJS)
  
  BACKEND = main.o libbackend.a
  
*************** cse.o : cse.c $(CONFIG_H) $(SYSTEM_H) $(
*** 1489,1494 ****
--- 1489,1497 ----
  gcse.o : gcse.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(REGS_H) hard-reg-set.h \
     flags.h real.h insn-config.h ggc.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \
     function.h output.h toplev.h $(TM_P_H) $(PARAMS_H) except.h gt-gcse.h
+ web.o : web.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(REGS_H) hard-reg-set.h \
+    flags.h real.h insn-config.h ggc.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \
+    function.h output.h toplev.h $(TM_P_H) $(PARAMS_H)
  sibcall.o : sibcall.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(REGS_H) function.h \
     hard-reg-set.h flags.h insn-config.h $(RECOG_H) $(BASIC_BLOCK_H)
  resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h $(SYSTEM_H) \
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/rtl.h,v
retrieving revision 1.362
diff -c -3 -p -r1.362 rtl.h
*** rtl.h	14 Jun 2002 18:57:36 -0000	1.362
--- rtl.h	19 Jun 2002 17:43:51 -0000
*************** extern rtx expand_mult_highpart		PARAMS 
*** 2008,2013 ****
--- 2008,2015 ----
  #ifdef BUFSIZ
  extern int gcse_main			PARAMS ((rtx, FILE *));
  #endif
+ /* In web.c */
+ extern void web_main			PARAMS ((void));
  
  /* In global.c */
  extern void mark_elimination		PARAMS ((int, int));
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/egcs/gcc/timevar.def,v
retrieving revision 1.14
diff -c -3 -p -r1.14 timevar.def
*** timevar.def	1 Jun 2002 21:31:35 -0000	1.14
--- timevar.def	19 Jun 2002 17:43:51 -0000
*************** DEFTIMEVAR (TV_CSE                   , "
*** 59,64 ****
--- 59,65 ----
  DEFTIMEVAR (TV_GCSE                  , "global CSE")
  DEFTIMEVAR (TV_LOOP                  , "loop analysis")
  DEFTIMEVAR (TV_TRACER                , "tracer")
+ DEFTIMEVAR (TV_WEB                   , "web")
  DEFTIMEVAR (TV_CSE2                  , "CSE 2")
  DEFTIMEVAR (TV_BRANCH_PROB           , "branch prediction")
  DEFTIMEVAR (TV_FLOW                  , "flow analysis")
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/toplev.c,v
retrieving revision 1.654
diff -c -3 -p -r1.654 toplev.c
*** toplev.c	16 Jun 2002 19:31:01 -0000	1.654
--- toplev.c	19 Jun 2002 17:43:55 -0000
*************** enum dump_file_index
*** 222,227 ****
--- 222,228 ----
    DFI_cfg,
    DFI_bp,
    DFI_tracer,
+   DFI_web,
    DFI_cse2,
    DFI_life,
    DFI_combine,
*************** static struct dump_file_info dump_file[D
*** 270,275 ****
--- 271,277 ----
    { "cfg",	'f', 1, 0, 0 },
    { "bp",	'b', 1, 0, 0 },
    { "tracer",	'T', 1, 0, 0 },
+   { "web",      'Z', 0, 0, 0 },
    { "cse2",	't', 1, 0, 0 },
    { "life",	'f', 1, 0, 0 },	/* Yes, duplicate enable switch.  */
    { "combine",	'c', 1, 0, 0 },
*************** int flag_syntax_only = 0;
*** 601,606 ****
--- 603,612 ----
  
  static int flag_gcse;
  
+ /* Nonzero means performs web construction pass.  */
+ 
+ static int flag_web;
+ 
  /* Nonzero means perform loop optimizer.  */
  
  static int flag_loop_optimize;
*************** static const lang_independent_options f_
*** 1032,1037 ****
--- 1038,1045 ----
     N_("Return 'short' aggregates in registers") },
    {"delayed-branch", &flag_delayed_branch, 1,
     N_("Attempt to fill delay slots of branch instructions") },
+   {"web", &flag_web, 1,
+    N_("Construct webs and split unrelated uses of single variable") },
    {"gcse", &flag_gcse, 1,
     N_("Perform the global common subexpression elimination") },
    {"gcse-lm", &flag_gcse_lm, 1,
*************** rest_of_compilation (decl)
*** 2968,2973 ****
--- 2976,2993 ----
        timevar_pop (TV_TRACER);
        reg_scan (get_insns (), max_reg_num (), 0);
      }
+   if (flag_web)
+     {
+       open_dump_file (DFI_web, decl);
+       timevar_push (TV_WEB);
+       web_main ();
+       delete_trivially_dead_insns (get_insns (), max_reg_num ());
+       cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
+ 
+       timevar_pop (TV_WEB);
+       close_dump_file (DFI_web, print_rtl_with_bb, get_insns ());
+       reg_scan (get_insns (), max_reg_num (), 0);
+     }
  
    if (optimize > 0)
      {
*************** parse_options_and_default_flags (argc, a
*** 4718,4723 ****
--- 4737,4743 ----
      {
        flag_inline_functions = 1;
        flag_rename_registers = 1;
+       flag_web = 1;
      }
  
    if (optimize < 2 || optimize_size)
Index: web.c
===================================================================
RCS file: web.c
diff -N web.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- web.c	19 Jun 2002 17:43:55 -0000
***************
*** 0 ****
--- 1,320 ----
+ /* Web construction code for GNU compiler.
+    Contributed by Jan Hubicka
+    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+ 
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ /* Simple optimization pass that splits indepdendent uses of each pseudo
+    increasing effectivity of other optimizations.  The optimization can
+    serve as an example of the use of dataflow module.
+ 
+    We don't split registers with REG_USERVAR set unless -fmessy-debugging is
+    used, because debug information about such split variables is almost
+    useless.  
+ 
+    TODO
+     - Add code to keep debugging up-to-date after splitting of user variable
+       pseudos.  This can be done by remembering all the pseudos used for the
+       variable and use life analysis information before reload to determing
+       wich one of the possible choices is alive and in case more are live,
+       choose one with latest definition.
+ 
+       Some other optimization passes will benefit from the infrastructure
+       too.
+ 
+     - We may use profile information and ignore infrequent use for purposes
+       of web unifying inserting the compensation code later to implement full
+       induction variable expansion for loops (currently we expand only if
+       induction is dead afterwards, that is often the case anyway).  */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "toplev.h"
+ 
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "flags.h"
+ #include "basic-block.h"
+ #include "output.h"
+ #include "df.h"
+ #include "function.h"
+ 
+ 
+ /* This entry is allocated for each reference in the insn stream.  */
+ struct web_entry
+ {
+     /* pointer to the parent in the union/find tree.  */
+   struct web_entry *pred;
+     /* Newly assigned register to the entry.  Set only for roots.  */
+   rtx reg;
+ };
+ 
+ static struct web_entry *unionfind_root PARAMS ((struct web_entry *));
+ static void unionfind_union		PARAMS ((struct web_entry *,
+ 						 struct web_entry *));
+ static void union_defs			PARAMS ((struct df *, struct ref *,
+ 						 struct web_entry *,
+ 						 struct web_entry *));
+ static rtx entry_register		PARAMS ((struct web_entry *,
+ 						 struct ref *, char *, char *));
+ static void replace_ref			PARAMS ((struct ref *, rtx));
+ static int mark_addressof		PARAMS ((rtx *, void *));
+ 
+ /* Find the root of unionfind tree (the representatnt of set).  */
+ 
+ static struct web_entry *
+ unionfind_root (element)
+      struct web_entry *element;
+ {
+   struct web_entry *element1 = element, *element2;
+ 
+   while (element->pred)
+     element = element->pred;
+   while (element1->pred)
+     {
+       element2 = element1->pred;
+       element1->pred = element;
+       element1 = element2;
+     }
+   return element;
+ }
+ 
+ /* Union sets.  */
+ 
+ static void
+ unionfind_union (first, second)
+      struct web_entry *first, *second;
+ {
+   first = unionfind_root (first);
+   second = unionfind_root (second);
+   if (first == second)
+     return;
+   second->pred = first;
+ }
+ 
+ /* For each use, all possible defs reaching it must come in same register,
+    union them.  */
+ 
+ static void
+ union_defs (df, use, def_entry, use_entry)
+      struct df *df;
+      struct ref *use;
+      struct web_entry *def_entry;
+      struct web_entry *use_entry;
+ {
+   rtx insn = DF_REF_INSN (use);
+   struct df_link *link = DF_REF_CHAIN (use);
+   struct df_link *use_link = DF_INSN_USES (df, insn);
+   struct df_link *def_link = DF_INSN_DEFS (df, insn);
+   rtx set = single_set (insn);
+ 
+   /* Some instructions may use match_dup for it's operands.  In case the
+      operands are dead, we will assign them different pseudos creating
+      invalid instruction, so union all uses of the same operands for each
+      insn.  */
+ 
+   while (use_link)
+     {
+       if (use != use_link->ref
+ 	  && DF_REF_REAL_REG (use) == DF_REF_REAL_REG (use_link->ref))
+ 	unionfind_union (use_entry + DF_REF_ID (use),
+ 		         use_entry + DF_REF_ID (use_link->ref));
+       use_link = use_link->next;
+     }
+ 
+   /* Recognize trivial noop moves and attempt to keep them noop.
+      While most of noop moves should be removed we still keep some at
+      libcall boundaries and such.  */
+ 
+   if (set
+       && SET_SRC (set) == DF_REF_REG (use)
+       && SET_SRC (set) == SET_DEST (set))
+     {
+       while (def_link)
+ 	{
+ 	  if (DF_REF_REAL_REG (use) == DF_REF_REAL_REG (def_link->ref))
+ 	    unionfind_union (use_entry + DF_REF_ID (use),
+ 			     def_entry + DF_REF_ID (def_link->ref));
+ 	  def_link = def_link->next;
+ 	}
+     }
+   while (link)
+     {
+       unionfind_union (use_entry + DF_REF_ID (use),
+ 		       def_entry + DF_REF_ID (link->ref));
+       link = link->next;
+     }
+ 
+   /* An READ_WRITE use require the corresponding def to be in the same
+      register.  Find it and union.  */
+   if (use->flags & DF_REF_READ_WRITE)
+     {
+       struct df_link *link = DF_INSN_DEFS (df, DF_REF_INSN (use));
+ 
+       while (DF_REF_REAL_REG (link->ref) != DF_REF_REAL_REG (use))
+ 	link = link->next;
+ 
+       unionfind_union (use_entry + DF_REF_ID (use),
+ 		       def_entry + DF_REF_ID (link->ref));
+     }
+ }
+ 
+ /* Find corresponding register for given entry.  */
+ 
+ static rtx
+ entry_register (entry, ref, used, use_addressof)
+      struct web_entry *entry;
+      struct ref *ref;
+      char *used;
+      char *use_addressof;
+ {
+   struct web_entry *root;
+   rtx reg, newreg;
+ 
+   /* Find corresponding web and see if it has been visited.  */
+ 
+   root = unionfind_root (entry);
+   if (root->reg)
+     return root->reg;
+ 
+   /* We are seeing this web first time, do the assignment.  */
+ 
+   reg = DF_REF_REAL_REG (ref);
+ 
+   /* In case the original register is already assigned, generate new one.  */
+   if (!used[REGNO (reg)])
+     newreg = reg, used[REGNO (reg)] = 1;
+   else if (REG_USERVAR_P (reg) && 0/*&& !flag_messy_debugging*/)
+     {
+       newreg = reg;
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "New web forced to keep reg=%i (user variable)\n",
+ 		 REGNO (reg));
+     }
+   else if (use_addressof [REGNO (reg)])
+     {
+       newreg = reg;
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "New web forced to keep reg=%i (address taken)\n",
+ 		 REGNO (reg));
+     }
+   else
+     {
+       newreg = gen_reg_rtx (GET_MODE (reg));
+       REG_USERVAR_P (newreg) = REG_USERVAR_P (reg);
+       REG_POINTER (newreg) = REG_POINTER (reg);
+       REG_LOOP_TEST_P (newreg) = REG_LOOP_TEST_P (reg);
+       RTX_UNCHANGING_P (newreg) = RTX_UNCHANGING_P (reg);
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "Web oldreg=%i newreg=%i\n", REGNO (reg),
+ 		 REGNO (newreg));
+     }
+ 
+   root->reg = newreg;
+   return newreg;
+ }
+ 
+ /* Replace the reference by REG.  */
+ 
+ static void
+ replace_ref (ref, reg)
+    struct ref *ref;
+    rtx reg;
+ {
+   rtx oldreg = DF_REF_REAL_REG (ref);
+   rtx *loc = DF_REF_REAL_LOC (ref);
+ 
+   if (oldreg == reg)
+     return;
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, "Updating insn %i (%i->%i)\n",
+ 	     INSN_UID (DF_REF_INSN (ref)), REGNO (oldreg), REGNO (reg)); 
+   *loc = reg;
+ }
+ 
+ /* Mark each pseudo, whose address is taken.  */
+ 
+ static int
+ mark_addressof (rtl, data)
+      rtx *rtl;
+      void *data;
+ {
+   if (!*rtl)
+     return 0;
+   if (GET_CODE (*rtl) == ADDRESSOF
+       && REG_P (XEXP (*rtl, 0)))
+     ((char *)data)[REGNO (XEXP (*rtl, 0))] = 1;
+   return 0;
+ }
+ 
+ /* Main entry point.  */
+ 
+ void
+ web_main ()
+ {
+   struct df *df;
+   struct web_entry *def_entry;
+   struct web_entry *use_entry;
+   unsigned int i;
+   int max = max_reg_num ();
+   char *used;
+   char *use_addressof;
+   rtx insn;
+ 
+   df = df_init ();
+   df_analyse (df, 0, DF_UD_CHAIN | DF_EQUIV_NOTES);
+ 
+   def_entry =
+     (struct web_entry *) xcalloc (df->n_defs, sizeof (struct web_entry));
+   use_entry =
+     (struct web_entry *) xcalloc (df->n_uses, sizeof (struct web_entry));
+   used = (char *) xcalloc (max, sizeof (char));
+   use_addressof = (char *) xcalloc (max, sizeof (char));
+ 
+   if (rtl_dump_file)
+     df_dump (df, DF_UD_CHAIN | DF_DU_CHAIN, rtl_dump_file);
+ 
+   /* Produce the web.  */
+   for (i = 0; i < df->n_uses; i++)
+     union_defs (df, df->uses[i], def_entry, use_entry);
+ 
+   /* We can not safely rename registers whose address is taken.  */
+   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+     if (INSN_P (insn))
+       for_each_rtx (&PATTERN (insn), mark_addressof, use_addressof);
+ 
+   /* Update the instruction stream, allocating new registers for split pseudos
+      in progress.  */
+   for (i = 0; i < df->n_uses; i++)
+     replace_ref (df->uses[i], entry_register (use_entry + i, df->uses[i],
+ 					      used, use_addressof));
+   for (i = 0; i < df->n_defs; i++)
+     replace_ref (df->defs[i], entry_register (def_entry + i, df->defs[i],
+ 					      used, use_addressof));
+ 
+   /* Dataflow information is corrupt here, but it can be easy to update it
+      by creating new entries for new registers and update or calilng
+      df_insns_modify.  */
+   free (def_entry);
+   free (use_entry);
+   free (used);
+   free (use_addressof);
+   df_finish (df);
+ }
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/egcs/gcc/doc/invoke.texi,v
retrieving revision 1.152
diff -c -3 -p -r1.152 invoke.texi
*** doc/invoke.texi	16 Jun 2002 19:09:27 -0000	1.152
--- doc/invoke.texi	19 Jun 2002 17:44:05 -0000
*************** in the following sections.
*** 283,289 ****
  -fschedule-insns  -fschedule-insns2 @gol
  -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
  -fstrength-reduce  -fstrict-aliasing  -ftracer -fthread-jumps  -ftrapv @gol
! -funroll-all-loops  -funroll-loops  @gol
  --param @var{name}=@var{value}
  -O  -O0  -O1  -O2  -O3  -Os}
  
--- 283,289 ----
  -fschedule-insns  -fschedule-insns2 @gol
  -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
  -fstrength-reduce  -fstrict-aliasing  -ftracer -fthread-jumps  -ftrapv @gol
! -funroll-all-loops  -funroll-loops  -fweb @gol
  --param @var{name}=@var{value}
  -O  -O0  -O1  -O2  -O3  -Os}
  
*************** with @samp{r}.
*** 3024,3029 ****
--- 3024,3032 ----
  @item y
  @opindex dy
  Dump debugging information during parsing, to standard error.
+ @item Z
+ @opindex dZ
+ Dump after web construction pass, to @file{@var{file}.10.web}.
  @end table
  
  @item -fdump-unnumbered
*************** of registers left over after register al
*** 3885,3890 ****
--- 3888,3902 ----
  will most benefit processors with lots of registers.  It can, however,
  make debugging impossible, since variables will no longer stay in
  a ``home register''.
+ 
+ @item -fweb
+ @opindex fweb
+ Constructs webs as commonly used for register allocation purposes and assign
+ each web individual pseudo register.  This allows our register allocation pass
+ to operate on pseudos directly, but also strengthens several other optimization
+ passes, such as CSE, loop optimizer and trivial dead code remover.  It can,
+ however, make debugging impossible, since variables will no longer stay in a
+ ``home register''.
  
  @item -fno-cprop-registers
  @opindex fno-cprop-registers



More information about the Gcc-patches mailing list