This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

TREE_ADDRESSABLE and trivial dead stores before inlining


Hi,
one of major limitations of pre-inline optimization is the inability to turn
variable into SSA even when all ADDR_EXPR operatoins on it was elliminated by
CSE and forwprop and the fact that we left in a code a lot of store only
objects (ie statements like var = {}; in C++ testcase).

I did some expreiments with enabling alias analysis before SSA but it seems
more like nice project for next release cycle than something we should do at
the end of stage1/early stage2 since there are number of problems with updating
SSA form after IPA passes and memory consumption needed for virtual operands.

This patch implements two very simple passes - one to recompute address taken and
other to remove stores into write only variables.

The passes are quite effective for common C++ code - for tramp3d 1937 variables
are brought to SSA in first addressables pass, 1128 in the second.
1125 stores are elliminated in the first pass, 684 in the second.

The simple DSE pass is also capable of eliminating LHS of struct=call ()
expressions we don't optimize in the full DSE. 540 such calls are eliminated in
first pass, 551 in second.

All these numbers propagate to much higer numbers after inlining as the dead
stuff used to be copied many times so the compilation is faster too and what
most importantly the code size estimates are better and thus inlining can be
tuned better and tramp3d-like testcases are no longer requiring so large unit
size growth (that is currently 60%, with couple of other minor tweeks, I can
get to 20% with no regressions on tramp3d and +- same results on DLV, I am
testing SPEC with IMA now too as well).

Similar scores reproduce on DLV and other C++ testcases I have, for GCC modules
the effect is minimal, but by reducing the inline unit growth I hope the IMA
GCC binary/SPEC binaries to get significantly smaller/compile times better so
those would benefit too.

The patch uncover latent SRA bug in compile/vector-2.c testcase, I am
sending separate patch for (second SRA gets null pointer by trying to
scalarize a vector that was already brought out of the aggregate in the
first SRA pass, I will send the simple fix once testing converge).  It
was bootstrapped/regtested i686-linux (modulo the vector-2.c testcase),
OK?

Honza

:ADDPATCH tree-optimization:
	* passes.c (init_optimization_passes): Add simple dce and addressable
	passes.
	* tree-ssa.c (execute_update_addresses_taken): New function.
	(pass_update_address_taken): New.
	* tree-ssa-dse.c (execute_simple_dse): New function.
	(pass_simple_dse): New.
	* tree-pass.h (pass_simple_dse, pass_update_address_taken): Declare.
Index: passes.c
===================================================================
*** passes.c	(revision 121280)
--- passes.c	(working copy)
*************** init_optimization_passes (void)
*** 489,506 ****
    NEXT_PASS (pass_reset_cc_flags);
    NEXT_PASS (pass_build_ssa);
    NEXT_PASS (pass_early_warn_uninitialized);
    NEXT_PASS (pass_rebuild_cgraph_edges);
    NEXT_PASS (pass_early_inline);
    NEXT_PASS (pass_cleanup_cfg);
    NEXT_PASS (pass_rename_ssa_copies);
    NEXT_PASS (pass_ccp);
    
    NEXT_PASS (pass_forwprop);
    NEXT_PASS (pass_sra);
    NEXT_PASS (pass_copy_prop);
    NEXT_PASS (pass_merge_phi);
    NEXT_PASS (pass_dce);
    NEXT_PASS (pass_tail_recursion);
    NEXT_PASS (pass_release_ssa_names);
  
    *p = NULL;
--- 490,511 ----
    NEXT_PASS (pass_reset_cc_flags);
    NEXT_PASS (pass_build_ssa);
    NEXT_PASS (pass_early_warn_uninitialized);
    NEXT_PASS (pass_rebuild_cgraph_edges);
    NEXT_PASS (pass_early_inline);
    NEXT_PASS (pass_ccp);
    NEXT_PASS (pass_cleanup_cfg);
    NEXT_PASS (pass_rename_ssa_copies);
    
    NEXT_PASS (pass_forwprop);
+   NEXT_PASS (pass_update_address_taken);
+   NEXT_PASS (pass_simple_dse);
    NEXT_PASS (pass_sra);
    NEXT_PASS (pass_copy_prop);
    NEXT_PASS (pass_merge_phi);
    NEXT_PASS (pass_dce);
+   NEXT_PASS (pass_update_address_taken);
+   NEXT_PASS (pass_simple_dse);
    NEXT_PASS (pass_tail_recursion);
    NEXT_PASS (pass_release_ssa_names);
  
    *p = NULL;
Index: tree-ssa.c
===================================================================
*** tree-ssa.c	(revision 121280)
--- tree-ssa.c	(working copy)
*************** struct tree_opt_pass pass_late_warn_unin
*** 1256,1258 ****
--- 1256,1354 ----
    0,                                    /* todo_flags_finish */
    0				        /* letter */
  };
+ 
+ /* Compute TREE_ADDRESSABLE for local variables.  */
+ 
+ static unsigned int
+ execute_update_addresses_taken (void)
+ {
+   tree var;
+   referenced_var_iterator rvi;
+   block_stmt_iterator bsi;
+   basic_block bb;
+   bitmap addresses_taken = BITMAP_ALLOC (NULL);
+   bitmap vars_updated = BITMAP_ALLOC (NULL);
+   bool update_vops;
+   tree phi;
+ 
+   /* Collect into ADDRESSES_TAKEN all variables whose address is taken within
+      the function body.  */
+   FOR_EACH_BB (bb)
+     {
+       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ 	{
+ 	  stmt_ann_t s_ann = stmt_ann (bsi_stmt (bsi));
+ 
+ 	  if (s_ann->addresses_taken)
+ 	    bitmap_ior_into (addresses_taken, s_ann->addresses_taken);
+ 	}
+       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ 	{
+ 	  unsigned i, phi_num_args = PHI_NUM_ARGS (phi);
+ 	  for (i = 0; i < phi_num_args; i++)
+ 	    {
+ 	      tree op = PHI_ARG_DEF (phi, i), var;
+ 	      if (TREE_CODE (op) == ADDR_EXPR
+ 		  && (var = get_base_address (TREE_OPERAND (op, 0))) != NULL_TREE
+ 		  && DECL_P (var))
+ 		bitmap_set_bit (addresses_taken, DECL_UID (var));
+ 	    }
+ 	}
+     }
+ 
+   /* When possible, clear ADDRESSABLE bit and mark variable for conversion into
+      SSA.  */
+   FOR_EACH_REFERENCED_VAR (var, rvi)
+     if (!is_global_var (var)
+ 	&& TREE_CODE (var) != RESULT_DECL
+ 	&& TREE_ADDRESSABLE (var)
+ 	&& !bitmap_bit_p (addresses_taken, DECL_UID (var)))
+       {
+         TREE_ADDRESSABLE (var) = 0;
+ 	if (is_gimple_reg (var))
+ 	  mark_sym_for_renaming (var);
+ 	update_vops = true;
+ 	bitmap_set_bit (vars_updated, DECL_UID (var));
+ 	if (dump_file)
+ 	  {
+ 	    fprintf (dump_file, "No longer having address taken ");
+ 	    print_generic_expr (dump_file, var, 0);
+ 	    fprintf (dump_file, "\n");
+ 	  }
+       }
+ 
+   /* Operand caches needs to be recomputed for operands referencing the updated
+      variables.  */
+   if (update_vops)
+     FOR_EACH_BB (bb)
+       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ 	{
+ 	  tree stmt = bsi_stmt (bsi);
+ 
+ 	  if ((LOADED_SYMS (stmt)
+ 	       && bitmap_intersect_p (LOADED_SYMS (stmt), vars_updated))
+ 	      || (STORED_SYMS (stmt)
+ 		  && bitmap_intersect_p (STORED_SYMS (stmt), vars_updated)))
+ 	    update_stmt (stmt);
+ 	}
+   BITMAP_FREE (addresses_taken);
+   BITMAP_FREE (vars_updated);
+   return 0;
+ }
+ 
+ struct tree_opt_pass pass_update_address_taken =
+ {
+   "addressables",			/* name */
+   NULL,					/* gate */
+   execute_update_addresses_taken,	/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   0,					/* tv_id */
+   PROP_ssa,				/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_update_ssa,                      /* todo_flags_finish */
+   0				        /* letter */
+ };
Index: tree-ssa-dse.c
===================================================================
*** tree-ssa-dse.c	(revision 121280)
--- tree-ssa-dse.c	(working copy)
*************** struct tree_opt_pass pass_dse = {
*** 814,816 ****
--- 814,950 ----
      | TODO_verify_ssa,		/* todo_flags_finish */
    0				/* letter */
  };
+ 
+ /* A very simple dead store pass elliminating write only local variables.
+    The pass does not require alias information and thus can be run before
+    inlining to quickly eliminate artefacts of some common C++ constructs.  */
+ 
+ static unsigned int
+ execute_simple_dse (void)
+ {
+   block_stmt_iterator bsi;
+   basic_block bb;
+   bitmap variables_loaded = BITMAP_ALLOC (NULL);
+   unsigned int todo = 0;
+ 
+   /* Collect into VARIABLES LOADED all variables that are read in function
+      body.  */
+   FOR_EACH_BB (bb)
+     for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+       if (LOADED_SYMS (bsi_stmt (bsi)))
+ 	bitmap_ior_into (variables_loaded,
+ 			 LOADED_SYMS (bsi_stmt (bsi)));
+ 
+   /* Look for statements writting into the write only variables.
+      And try to remove them.  */
+ 
+   FOR_EACH_BB (bb)
+     for (bsi = bsi_start (bb); !bsi_end_p (bsi);)
+       {
+ 	tree stmt = bsi_stmt (bsi), op;
+ 	bool removed = false;
+         ssa_op_iter iter;
+ 
+ 	if (STORED_SYMS (stmt) && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
+ 	    && TREE_CODE (stmt) != RETURN_EXPR
+ 	    && !bitmap_intersect_p (STORED_SYMS (stmt), variables_loaded))
+ 	  {
+ 	    unsigned int i;
+ 	    bitmap_iterator bi;
+ 	    bool dead = true;
+ 
+ 
+ 
+ 	    /* See if statements writes only the write only variables and
+ 	       verify that there are no volatile operands.  tree-ssa-operands
+ 	       sets has_volatile_ops flag for all statements involving
+ 	       reads and writes when aliases are not built to prevent passes
+ 	       from removing them as dead.  The flag thus has no use for us
+ 	       and we need to look into all operands.  */
+ 	      
+ 	    EXECUTE_IF_SET_IN_BITMAP (STORED_SYMS (stmt), 0, i, bi)
+ 	      {
+ 		tree var = referenced_var_lookup (i);
+ 		if (TREE_ADDRESSABLE (var)
+ 		    || is_global_var (var)
+ 		    || TREE_THIS_VOLATILE (var))
+ 		  dead = false;
+ 	      }
+ 
+ 	    if (dead && LOADED_SYMS (stmt))
+ 	      EXECUTE_IF_SET_IN_BITMAP (LOADED_SYMS (stmt), 0, i, bi)
+ 		if (TREE_THIS_VOLATILE (referenced_var_lookup (i)))
+ 		  dead = false;
+ 
+ 	    if (dead)
+ 	      FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_ALL_OPERANDS)
+ 		if (TREE_THIS_VOLATILE (op))
+ 		  dead = false;
+ 
+ 	    /* Look for possible occurence var = indirect_ref (...) where
+ 	       indirect_ref itself is volatile.  */
+ 
+ 	    if (dead && TREE_THIS_VOLATILE (GIMPLE_STMT_OPERAND (stmt, 1)))
+ 	      dead = false;
+ 
+ 	    if (dead)
+ 	      {
+ 		tree call = get_call_expr_in (stmt);
+ 
+ 		/* When LHS of var = call (); is dead, simplify it into
+ 		   call (); saving one operand.  */
+ 		if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
+ 		    && call
+ 		    && TREE_SIDE_EFFECTS (call))
+ 		  {
+ 		    if (dump_file && (dump_flags & TDF_DETAILS))
+ 		      {
+ 			fprintf (dump_file, "Deleted LHS of call: ");
+ 			print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ 			fprintf (dump_file, "\n");
+ 		      }
+ 		    push_stmt_changes (bsi_stmt_ptr (bsi));
+ 		    TREE_BLOCK (call) = TREE_BLOCK (stmt);
+ 		    bsi_replace (&bsi, call, false);
+ 		    maybe_clean_or_replace_eh_stmt (stmt, call);
+ 		    mark_symbols_for_renaming (call);
+ 		    pop_stmt_changes (bsi_stmt_ptr (bsi));
+ 		  }
+ 		else
+ 		  {
+ 		    if (dump_file && (dump_flags & TDF_DETAILS))
+ 		      {
+ 			fprintf (dump_file, "  Deleted dead store '");
+ 			print_generic_expr (dump_file, stmt, dump_flags);
+ 			fprintf (dump_file, "'\n");
+ 		      }
+ 		    removed = true;
+ 		    bsi_remove (&bsi, true);
+ 		    todo |= TODO_cleanup_cfg;
+ 		  }
+ 		todo |= TODO_remove_unused_locals | TODO_ggc_collect;
+ 	      }
+ 	  }
+ 	if (!removed)
+ 	  bsi_next (&bsi);
+       }
+   BITMAP_FREE (variables_loaded);
+   return todo;
+ }
+ 
+ struct tree_opt_pass pass_simple_dse =
+ {
+   "sdse",				/* name */
+   NULL,					/* gate */
+   execute_simple_dse,			/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   0,					/* tv_id */
+   PROP_ssa,				/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_dump_func,          	        /* todo_flags_finish */
+   0				        /* letter */
+ };
Index: tree-pass.h
===================================================================
*** tree-pass.h	(revision 121280)
--- tree-pass.h	(working copy)
*************** extern struct tree_opt_pass pass_warn_fu
*** 288,293 ****
--- 289,295 ----
  extern struct tree_opt_pass pass_phiopt;
  extern struct tree_opt_pass pass_forwprop;
  extern struct tree_opt_pass pass_dse;
+ extern struct tree_opt_pass pass_simple_dse;
  extern struct tree_opt_pass pass_nrv;
  extern struct tree_opt_pass pass_mark_used_blocks;
  extern struct tree_opt_pass pass_rename_ssa_copies;
*************** extern struct tree_opt_pass pass_early_i
*** 403,408 ****
--- 405,411 ----
  extern struct tree_opt_pass pass_inline_parameters;
  extern struct tree_opt_pass pass_apply_inline;
  extern struct tree_opt_pass pass_all_early_optimizations;
+ extern struct tree_opt_pass pass_update_address_taken;
  
  /* The root of the compilation pass tree, once constructed.  */
  extern struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]