This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[tree-ssa] Reducing PHI insertions


I've started looking at the insane costs of PHI node insertion for
20001226-1.c.  There's a couple issues that need to be addressed:

  1. We're not using the pruned or semi-pruned SSA PHI node insertion
     techniques.  This can cause us to insert far more PHI nodes than
     are actually needed.  To put it in perspective, 20001226-1.c should
     PHI nodes for 3-4 objects.  We actually generate PHI nodes for more
     than 40000 objects.

  2. Our CFG has far more blocks than are necessary.  For 20001226-1.c
     the tree SSA code generates roughly 24000 basic blocks.  33% of
     those blocks are totally unnecessary.


Both must be addressed for PHI node insertion to become a non-issue for
20001226-1.  Once both are fixed PHI node insertion for 20001226-1.c will
complete in less than a second  -- which is infinitely better than the
current situation where PHI insertion runs my machine out of memory, it
starts swapping insanely and eventually locks up :(

This patch addresses #1 by converting our PHI node insertion to use
semi-pruned rather than "minimal" PHI node insertion algorithms.

Note that this patch by itself will not help 20001226-1.c in any way
shape or form.  This is because of the issue in #2 -- if the CFG is
not improved, then every object in 20001226-1.c is effectively live across
a basic block boundary and thus the semi-pruned form is equivalent to the
"minimal" form which we currently implement.

While it doesn't help 20001226-1.c at the moment it does clearly help other
code.   Let's take combine.i as an example.   PHI node insertion time is cut
by 50%, rewriting is cut by 30% and CCP is cut by 22%.  Other future SSA
optimizers will also benefit as they'll have far fewer PHI nodes to consider.

Bootstrapped and regression tested whee.



        * tree-dfa.c (add_referenced_var): Annotate each item in the
        REFERENCED_VARS varray with a unique id.
        * tree-flow.h (struct var_ann_d): Add new uid field.
        * tree-ssa.c (mark_def_sites): Compute the set of variables
        live across basic blocks and return them in an sbitmap.
        (insert_phi_nodes): Use the set of nonlocal variables computed
        by mark_def_sites to reduce the number of PHI nodes inserted.
        (rewrite_into_ssa): Updated to deal with changes in
        insert_phi_nodes and mark_def_sites.  Free the sbitmap returned
        by mark_def_sites.


Index: tree-dfa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-dfa.c,v
retrieving revision 1.1.4.69
diff -c -3 -p -r1.1.4.69 tree-dfa.c
*** tree-dfa.c	3 Feb 2003 01:26:53 -0000	1.1.4.69
--- tree-dfa.c	3 Feb 2003 23:52:16 -0000
*************** add_referenced_var (var, data)
*** 2095,2105 ****
    slot = htab_find_slot (vars_found, (void *) var, INSERT);
    if (*slot == NULL)
      {
        /* This is the first time we find this variable, add it to the
!          REFERENCED_VARS array.  */
        *slot = (void *) var;
        VARRAY_PUSH_TREE (referenced_vars, var);
!       num_referenced_vars++;
      }
  }
  
--- 2095,2110 ----
    slot = htab_find_slot (vars_found, (void *) var, INSERT);
    if (*slot == NULL)
      {
+       var_ann_t ann;
+ 
        /* This is the first time we find this variable, add it to the
!          REFERENCED_VARS array, also annotate it with its unique id.  */
        *slot = (void *) var;
        VARRAY_PUSH_TREE (referenced_vars, var);
!       ann = var_ann (var);
!       if (! ann)
! 	ann = create_var_ann (var);
!       ann->uid = num_referenced_vars++;
      }
  }
  
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-flow.h,v
retrieving revision 1.1.4.48
diff -c -3 -p -r1.1.4.48 tree-flow.h
*** tree-flow.h	3 Feb 2003 01:26:53 -0000	1.1.4.48
--- tree-flow.h	3 Feb 2003 23:52:22 -0000
*************** struct var_ann_d GTY(())
*** 62,67 ****
--- 62,70 ----
  
    /* Variables that may alias this variable.  */
    varray_type may_aliases;
+   
+   /* Unique ID of this variable.  */
+   int uid;
  };
  
  
Index: tree-ssa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-ssa.c,v
retrieving revision 1.1.4.45
diff -c -3 -p -r1.1.4.45 tree-ssa.c
*** tree-ssa.c	3 Feb 2003 01:26:53 -0000	1.1.4.45
--- tree-ssa.c	3 Feb 2003 23:52:23 -0000
*************** struct currdef_d
*** 99,107 ****
  /* Local functions.  */
  static void init_tree_ssa		PARAMS ((void));
  static void delete_tree_ssa		PARAMS ((tree));
! static void mark_def_sites		PARAMS ((dominance_info));
  static void set_def_block		PARAMS ((tree, basic_block));
! static void insert_phi_nodes		PARAMS ((sbitmap *));
  static void rewrite_block		PARAMS ((basic_block));
  static void rewrite_stmts		PARAMS ((basic_block, varray_type *));
  static void rewrite_stmt		PARAMS ((tree, varray_type *));
--- 99,107 ----
  /* Local functions.  */
  static void init_tree_ssa		PARAMS ((void));
  static void delete_tree_ssa		PARAMS ((tree));
! static sbitmap mark_def_sites		PARAMS ((dominance_info));
  static void set_def_block		PARAMS ((tree, basic_block));
! static void insert_phi_nodes		PARAMS ((sbitmap *, sbitmap));
  static void rewrite_block		PARAMS ((basic_block));
  static void rewrite_stmts		PARAMS ((basic_block, varray_type *));
  static void rewrite_stmt		PARAMS ((tree, varray_type *));
*************** rewrite_into_ssa (fndecl)
*** 209,214 ****
--- 209,215 ----
       tree fndecl;
  {
    sbitmap *dfs;
+   sbitmap nonlocal_vars;
    dominance_info idom;
    
    timevar_push (TV_TREE_SSA_OTHER);
*************** rewrite_into_ssa (fndecl)
*** 228,237 ****
    compute_may_aliases ();
  
    /* Find variable references and mark definition sites.  */
!   mark_def_sites (idom);
  
    /* Insert PHI nodes at dominance frontiers of definition blocks.  */
!   insert_phi_nodes (dfs);
  
    /* Rewrite all the basic blocks in the program.  */
    timevar_push (TV_TREE_SSA_REWRITE_BLOCKS);
--- 229,238 ----
    compute_may_aliases ();
  
    /* Find variable references and mark definition sites.  */
!   nonlocal_vars = mark_def_sites (idom);
  
    /* Insert PHI nodes at dominance frontiers of definition blocks.  */
!   insert_phi_nodes (dfs, nonlocal_vars);
  
    /* Rewrite all the basic blocks in the program.  */
    timevar_push (TV_TREE_SSA_REWRITE_BLOCKS);
*************** rewrite_into_ssa (fndecl)
*** 239,244 ****
--- 240,246 ----
    timevar_pop (TV_TREE_SSA_REWRITE_BLOCKS);
  
    /* Free allocated memory.  */
+   free (nonlocal_vars);
    sbitmap_vector_free (dfs);
    free_dominance_info (idom);
    htab_delete (def_blocks);
*************** rewrite_into_ssa (fndecl)
*** 267,280 ****
  
     Also, compute the set of dominator children for each block in the
     flowgraph.  This will be used by rewrite_block when traversing the
!    flowgraph.  */
  
! static void
  mark_def_sites (idom)
       dominance_info idom;
  {
    basic_block bb;
    gimple_stmt_iterator si;
  
    /* Mark all the blocks that have definitions for each variable referenced
       in the function.  */
--- 269,293 ----
  
     Also, compute the set of dominator children for each block in the
     flowgraph.  This will be used by rewrite_block when traversing the
!    flowgraph.
  
!    Return a bitmap for the set of referenced variables which are
!    "nonlocal", ie those which are live across block boundaries.
!    This information is used to reduce the number of PHI nodes
!    we create.  */
! 
! static sbitmap
  mark_def_sites (idom)
       dominance_info idom;
  {
    basic_block bb;
    gimple_stmt_iterator si;
+   sbitmap nonlocal_vars;
+   sbitmap killed_vars;
+ 
+   nonlocal_vars = sbitmap_alloc (num_referenced_vars);
+   sbitmap_zero (nonlocal_vars);
+   killed_vars = sbitmap_alloc (num_referenced_vars);
  
    /* Mark all the blocks that have definitions for each variable referenced
       in the function.  */
*************** mark_def_sites (idom)
*** 286,310 ****
        if (idom_bb)
  	add_dom_child (idom_bb, bb);
  
        for (si = gsi_start_bb (bb); !gsi_end_bb_p (si); gsi_step_bb (&si))
  	{
  	  varray_type ops;
  	  size_t i;
  	  tree stmt;
! 	  
  	  stmt = gsi_stmt (si);
  	  STRIP_NOPS (stmt);
  
  	  get_stmt_operands (stmt);
  
! 	  if (def_op (stmt))
! 	    set_def_block (*(def_op (stmt)), bb);
  
  	  ops = vdef_ops (stmt);
  	  for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
  	    set_def_block (VDEF_RESULT (VARRAY_TREE (ops, i)), bb);
  	}
      }
  }
  
  
--- 299,363 ----
        if (idom_bb)
  	add_dom_child (idom_bb, bb);
  
+       /* We're interested in finding the variables which are used before
+          they are defined in this block.  So at the start of each block
+ 	 zero out KILLED_VARS.  */
+       sbitmap_zero (killed_vars);
+ 
        for (si = gsi_start_bb (bb); !gsi_end_bb_p (si); gsi_step_bb (&si))
  	{
  	  varray_type ops;
  	  size_t i;
  	  tree stmt;
! 	  tree *dest;
! 
  	  stmt = gsi_stmt (si);
  	  STRIP_NOPS (stmt);
  
  	  get_stmt_operands (stmt);
  
! 	  dest = def_op (stmt);
! 	  if (dest)
! 	    set_def_block (*dest, bb);
! 
! 	  /* If a variable is used before being set, then the variable
! 	     is live across a block boundary, so add it to NONLOCAL_VARS.  */
! 	  ops = use_ops (stmt);
! 	  for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
! 	    {
! 	      tree *use = VARRAY_GENERIC_PTR (ops, i);
! 	      int uid = var_ann (*use)->uid;
! 
! 	      if (! TEST_BIT (killed_vars, uid))
! 	        SET_BIT (nonlocal_vars, uid);
! 	    }
! 	  
! 	  /* Similarly for virtual uses.  */
! 	  ops = vuse_ops (stmt);
! 	  for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
! 	    {
! 	      tree use = VARRAY_GENERIC_PTR (ops, i);
! 	      int uid = var_ann (use)->uid;
! 
! 	      if (! TEST_BIT (killed_vars, uid))
! 	        SET_BIT (nonlocal_vars, uid);
! 	    }
! 
! 	  /* Now process the single destination of this statement. 
! 
! 	     Note that virtual definitions are irrelavent for
! 	     computing NONLOCAL_VARs and KILLED_VARS, so they are
! 	     ignored here.  */
! 	  if (dest)
! 	    SET_BIT (killed_vars, var_ann (*dest)->uid);
  
  	  ops = vdef_ops (stmt);
  	  for (i = 0; ops && i < VARRAY_ACTIVE_SIZE (ops); i++)
  	    set_def_block (VDEF_RESULT (VARRAY_TREE (ops, i)), bb);
  	}
      }
+   free (killed_vars);
+   return nonlocal_vars;
  }
  
  
*************** set_def_block (var, bb)
*** 338,348 ****
  
  /* Insert PHI nodes at the dominance frontier of blocks with variable
     definitions.  DFS contains the dominance frontier information for the
!    flowgraph.  */
  
  static void
! insert_phi_nodes (dfs)
       sbitmap *dfs;
  {
    size_t i;
  
--- 391,406 ----
  
  /* Insert PHI nodes at the dominance frontier of blocks with variable
     definitions.  DFS contains the dominance frontier information for the
!    flowgraph.
!    
!    NONLOCAL_VARs is a bitmap representing the set of variables which are
!    live across basic block boundaries.  Only variables in NONLOCAL_VARs
!    need PHI nodes.  */
  
  static void
! insert_phi_nodes (dfs, nonlocal_vars)
       sbitmap *dfs;
+      sbitmap nonlocal_vars;
  {
    size_t i;
  
*************** insert_phi_nodes (dfs)
*** 367,373 ****
       for the variable.  PHI nodes will be added to the dominance frontier
       blocks of each definition block.  */
    for (i = 0; i < num_referenced_vars; i++)
!     insert_phi_nodes_for (referenced_var (i), dfs);
  
    added = NULL;
    in_work = NULL;
--- 425,432 ----
       for the variable.  PHI nodes will be added to the dominance frontier
       blocks of each definition block.  */
    for (i = 0; i < num_referenced_vars; i++)
!     if (TEST_BIT (nonlocal_vars, i))
!       insert_phi_nodes_for (referenced_var (i), dfs);
  
    added = NULL;
    in_work = NULL;



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]