Enable SSA at -O0

Jan Hubicka jh@suse.cz
Thu Jul 17 15:39:00 GMT 2008


Hi,
this patch enables SSA at -O0.  There are no regressions with the last round of
fixes I sent and I also tested gdb testsuite that we don't obstructate debug
info.

Concerning memory usage, naturally SSA comes at some cost. For Gerald's testcase I get
266673Kb instead of 252286Kb.  We however produce smaller assembly:
-rw-r--r-- 1 jh jh  683217 Jul 17 13:11 combine2.s
-rw-r--r-- 1 jh jh 4960222 Jul 17 16:39 generate-3.4.s
to:
-rw-r--r-- 1 jh jh  671035 Jul 17 13:32 combine2.s
-rw-r--r-- 1 jh jh 4859672 Jul 17 16:40 generate-3.4.s

For compilation of GCC modules I get following timmings:

real    2m54.967s
user    2m52.875s
sys     0m2.044s

real    2m54.997s
user    2m52.871s
sys     0m2.124s

to:

real    2m55.269s
user    2m53.195s
sys     0m2.084s

real    2m55.175s
user    2m53.039s
sys     0m2.140s

For Gerald's testcase it is 4.51s to 4.50s

With tuples, we might be slightly better here, since SSA passes gets noticeably
cheaper relative to RTL passes and we save some little work at RTL level with
the patch.  We can also get some improvement by limited DCE. Initial
tests was in favour of SSA, but I had to disable more of TER.  In
particular it would be nice to allow TERing loads into expression when
there are no stores in between the statements, this is very common case.

The main motivation for the patch is however better maintenability of compiler
(ie Richards variable length arrays won't need non-SSA lowering passes) and to
get rid of O0 code quality regressions we have relative to pre-tree-SSA
compilers.

Bootstrapped/regtested i686-linux.  I am testing x86_64-linux and PPC-linux but both
passed with earlier version of the patch.

Honza

	* cgraph.c (cgraph_add_new_function): Do early local passes.
	* tree-nrv.c (gate_pass_return_slot): New gate.
	(pass_nrv): Add the gate.
	* tree-ssa-coalese.c (hash_ssa_name_by_var, eq_ssa_name_by_var): New
	functions.
	(coalesce_ssa_name): Coalesce SSA names.
	* tree-ssa-live.c (remove_unused_locals): Be more conservative when
	not optimizing so unused user vars remains visible.
	* common.opt (flag_tree_ter): Always enable by default.
	* tree-ssa-ter.c: Include flags.h
	(is_replaceable_p): Check that locations match; when aliasing is missing
	be conservative about loads.
	* tree-optimize.c (gate_init_datastructures): Remove.
	(pass_init_datastructures): New.
	* passes.c: Reorder passes so we always go into SSA.

Index: cgraph.c
===================================================================
--- cgraph.c	(revision 137903)
+++ cgraph.c	(working copy)
@@ -1366,7 +1366,7 @@
 	if (!lowered)
           tree_lowering_passes (fndecl);
 	bitmap_obstack_initialize (NULL);
-	if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl)) && optimize)
+	if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl)))
 	  execute_pass_list (pass_early_local_passes.pass.sub);
 	bitmap_obstack_release (NULL);
 	tree_rest_of_compilation (fndecl);
Index: tree-nrv.c
===================================================================
--- tree-nrv.c	(revision 137903)
+++ tree-nrv.c	(working copy)
@@ -226,12 +226,18 @@
   return 0;
 }
 
+static bool
+gate_pass_return_slot (void)
+{
+  return optimize > 0;
+}
+
 struct gimple_opt_pass pass_nrv = 
 {
  {
   GIMPLE_PASS,
   "nrv",				/* name */
-  NULL,					/* gate */
+  gate_pass_return_slot,		/* gate */
   tree_nrv,				/* execute */
   NULL,					/* sub */
   NULL,					/* next */
Index: tree-ssa-coalesce.c
===================================================================
--- tree-ssa-coalesce.c	(revision 137903)
+++ tree-ssa-coalesce.c	(working copy)
@@ -1297,7 +1297,25 @@
     }
 }
 
+/* Returns a hash code for P.  */
 
+static hashval_t
+hash_ssa_name_by_var (const void *p)
+{
+  const_tree n = (const_tree) p;
+  return (hashval_t) htab_hash_pointer (SSA_NAME_VAR (n));
+}
+
+/* Returns nonzero if P1 and P2 are equal.  */
+
+static int
+eq_ssa_name_by_var (const void *p1, const void *p2)
+{
+  const_tree n1 = (const_tree) p1;
+  const_tree n2 = (const_tree) p2;
+  return SSA_NAME_VAR (n1) == SSA_NAME_VAR (n2);
+}
+
 /* Reduce the number of copies by coalescing variables in the function.  Return
    a partition map with the resulting coalesces.  */
 
@@ -1310,13 +1328,45 @@
   coalesce_list_p cl;
   bitmap used_in_copies = BITMAP_ALLOC (NULL);
   var_map map;
+  unsigned int i;
+  static htab_t ssa_name_hash;
 
   cl = create_coalesce_list ();
   map = create_outofssa_var_map (cl, used_in_copies);
 
+  /* We need to coalesce all names originating same SSA_NAME_VAR
+     so debug info remains undisturbed.  */
+  if (!optimize)
+    {
+      ssa_name_hash = htab_create (10, hash_ssa_name_by_var,
+      				   eq_ssa_name_by_var, NULL);
+      for (i = 1; i < num_ssa_names; i++)
+	{
+	  tree a = ssa_name (i);
+
+	  if (a && SSA_NAME_VAR (a) && !DECL_ARTIFICIAL (SSA_NAME_VAR (a)))
+	    {
+	      tree *slot = (tree *) htab_find_slot (ssa_name_hash, a, INSERT);
+
+	      if (!*slot)
+		*slot = a;
+	      else
+		{
+		  add_coalesce (cl, SSA_NAME_VERSION (a), SSA_NAME_VERSION (*slot),
+				MUST_COALESCE_COST - 1);
+		  bitmap_set_bit (used_in_copies, SSA_NAME_VERSION (a));
+		  bitmap_set_bit (used_in_copies, SSA_NAME_VERSION (*slot));
+		}
+	    }
+	}
+      htab_delete (ssa_name_hash);
+    }
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_var_map (dump_file, map);
+
   /* Don't calculate live ranges for variables not in the coalesce list.  */
   partition_view_bitmap (map, used_in_copies, true);
   BITMAP_FREE (used_in_copies);
 
   if (num_var_partitions (map) < 1)
     {
Index: tree-ssa-live.c
===================================================================
--- tree-ssa-live.c	(revision 137903)
+++ tree-ssa-live.c	(working copy)
@@ -582,7 +582,8 @@
   var_ann_t ann;
   bitmap global_unused_vars = NULL;
 
-  mark_scope_block_unused (DECL_INITIAL (current_function_decl));
+  if (optimize)
+    mark_scope_block_unused (DECL_INITIAL (current_function_decl));
   /* Assume all locals are unused.  */
   FOR_EACH_REFERENCED_VAR (t, rvi)
     var_ann (t)->used = false;
@@ -661,7 +662,8 @@
 
 	  if (TREE_CODE (var) == VAR_DECL
 	      && is_global_var (var)
-	      && bitmap_bit_p (global_unused_vars, DECL_UID (var)))
+	      && bitmap_bit_p (global_unused_vars, DECL_UID (var))
+	      && (optimize || DECL_ARTIFICIAL (var)))
 	    *cell = TREE_CHAIN (*cell);
 	  else
 	    cell = &TREE_CHAIN (*cell);
@@ -681,9 +683,11 @@
 	&& TREE_CODE (t) != RESULT_DECL
 	&& !(ann = var_ann (t))->used
 	&& !ann->symbol_mem_tag
-	&& !TREE_ADDRESSABLE (t))
+	&& !TREE_ADDRESSABLE (t)
+	&& (optimize || DECL_ARTIFICIAL (t)))
       remove_referenced_var (t);
-  remove_unused_scope_block_p (DECL_INITIAL (current_function_decl));
+  if (optimize)
+    remove_unused_scope_block_p (DECL_INITIAL (current_function_decl));
 }
 
 
Index: common.opt
===================================================================
--- common.opt	(revision 137903)
+++ common.opt	(working copy)
@@ -1157,7 +1157,7 @@
 Perform scalar replacement of aggregates
 
 ftree-ter
-Common Report Var(flag_tree_ter) Optimization
+Common Report Var(flag_tree_ter) Init(1) Optimization
 Replace temporary expressions in the SSA->normal pass
 
 ftree-lrs
Index: tree-ssa-ter.c
===================================================================
--- tree-ssa-ter.c	(revision 137903)
+++ tree-ssa-ter.c	(working copy)
@@ -30,6 +30,7 @@
 #include "tree-flow.h"
 #include "tree-dump.h"
 #include "tree-ssa-live.h"
+#include "flags.h"
 
 
 /* Temporary Expression Replacement (TER)
@@ -364,6 +365,8 @@
   tree call_expr;
   use_operand_p use_p;
   tree def, use_stmt;
+  location_t locus1, locus2;
+  tree block1, block2;
 
   /* Only consider modify stmts.  */
   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
@@ -386,6 +389,36 @@
   if (bb_for_stmt (use_stmt) != bb_for_stmt (stmt))
     return false;
 
+  if (GIMPLE_STMT_P (stmt))
+    {
+      locus1 = GIMPLE_STMT_LOCUS (stmt);
+      block1 = GIMPLE_STMT_BLOCK (stmt);
+    }
+  else
+    {
+      locus1 = *EXPR_LOCUS (stmt);
+      block1 = TREE_BLOCK (stmt);
+    }
+  if (GIMPLE_STMT_P (use_stmt))
+    {
+      locus2 = GIMPLE_STMT_LOCUS (use_stmt);
+      block2 = GIMPLE_STMT_BLOCK (use_stmt);
+    }
+  if (TREE_CODE (use_stmt) == PHI_NODE)
+    {
+      locus2 = 0;
+      block2 = NULL_TREE;
+    }
+  else
+    {
+      locus2 = *EXPR_LOCUS (use_stmt);
+      block2 = TREE_BLOCK (use_stmt);
+    }
+
+  if (!optimize
+      && ((locus1 && locus1 != locus2) || (block1 && block1 != block2)))
+    return false;
+
   /* Used in this block, but at the TOP of the block, not the end.  */
   if (TREE_CODE (use_stmt) == PHI_NODE)
     return false;
@@ -394,6 +427,10 @@
   if (!(ZERO_SSA_OPERANDS (stmt, SSA_OP_VDEF)))
     return false;
 
+  /* Without alias info we can't move around loads.  */
+  if (stmt_ann (stmt)->references_memory && !optimize)
+    return false;
+
   /* Float expressions must go through memory if float-store is on.  */
   if (flag_float_store 
       && FLOAT_TYPE_P (TREE_TYPE (GENERIC_TREE_OPERAND (stmt, 1))))
@@ -412,7 +449,6 @@
   /* Leave any stmt with volatile operands alone as well.  */
   if (stmt_ann (stmt)->has_volatile_ops)
     return false;
-  
 
   return true;
 }
Index: tree-optimize.c
===================================================================
--- tree-optimize.c	(revision 137903)
+++ tree-optimize.c	(working copy)
@@ -341,20 +341,12 @@
   return 0;
 }
 
-/* Gate: initialize or not the SSA datastructures.  */
-
-static bool
-gate_init_datastructures (void)
-{
-  return (optimize >= 1);
-}
-
 struct gimple_opt_pass pass_init_datastructures =
 {
  {
   GIMPLE_PASS,
   NULL,					/* name */
-  gate_init_datastructures,		/* gate */
+  NULL,					/* gate */
   execute_init_datastructures,		/* execute */
   NULL,					/* sub */
   NULL,					/* next */
Index: passes.c
===================================================================
--- passes.c	(revision 137903)
+++ passes.c	(working copy)
@@ -542,12 +542,13 @@
       NEXT_PASS (pass_cleanup_cfg);
       NEXT_PASS (pass_init_datastructures);
       NEXT_PASS (pass_expand_omp);
+
+      NEXT_PASS (pass_referenced_vars);
+      NEXT_PASS (pass_reset_cc_flags);
+      NEXT_PASS (pass_build_ssa);
       NEXT_PASS (pass_all_early_optimizations);
 	{
 	  struct opt_pass **p = &pass_all_early_optimizations.pass.sub;
-	  NEXT_PASS (pass_referenced_vars);
-	  NEXT_PASS (pass_reset_cc_flags);
-	  NEXT_PASS (pass_build_ssa);
 	  NEXT_PASS (pass_early_warn_uninitialized);
 	  NEXT_PASS (pass_rebuild_cgraph_edges);
 	  NEXT_PASS (pass_early_inline);
@@ -574,8 +575,8 @@
 	  NEXT_PASS (pass_tail_recursion);
 	  NEXT_PASS (pass_convert_switch);
           NEXT_PASS (pass_profile);
-	  NEXT_PASS (pass_release_ssa_names);
 	}
+      NEXT_PASS (pass_release_ssa_names);
       NEXT_PASS (pass_rebuild_cgraph_edges);
     }
   NEXT_PASS (pass_ipa_increase_alignment);
@@ -712,11 +713,12 @@
       NEXT_PASS (pass_tail_calls);
       NEXT_PASS (pass_rename_ssa_copies);
       NEXT_PASS (pass_uncprop);
-      NEXT_PASS (pass_del_ssa);
-      NEXT_PASS (pass_nrv);
-      NEXT_PASS (pass_mark_used_blocks);
-      NEXT_PASS (pass_cleanup_cfg_post_optimizing);
     }
+  NEXT_PASS (pass_del_ssa);
+  NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_mark_used_blocks);
+  NEXT_PASS (pass_cleanup_cfg_post_optimizing);
+
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_free_datastructures);
   NEXT_PASS (pass_mudflap_2);



More information about the Gcc-patches mailing list