This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[mem-ssa] Merge 2/4: New DSE for aggregates



DSE improvements from Aldy.



2006-12-06 Aldy Hernandez <aldyh@redhat.com>


	* tree-ssa-dse.c (aggregate_vardecl_d): New.
	(dse_global_data): Add aggregate_vardecl field.
	(dse_possible_dead_store_p): New.
	Add prev_defvar variable.
	Allow immediate uses and previous immediate uses to differ
	if they are setting different parts of the whole.
	(get_aggregate_vardecl): New.
	(dse_record_partial_aggregate_store): New.
	(dse_whole_aggregate_clobbered_p): New.
	(dse_partial_kill_p): New.
	(dse_optimize_stmt): Abstract code checking a possible dead store
	into new function dse_possible_dead_store_p().
	Call dse_maybe_record_aggregate_store().
	When checking whether a STMT and its USE_STMT refer to the
	same memory address, check also for partial kills that clobber
	the whole.
	Move some variable definitions to the block where they are used.
	(aggregate_vardecl_hash): New.
	(aggregate_vardecl_eq): New.
	(aggregate_vardecl_free): New.
	(aggregate_whole_store_p): New.
	(tree_ssa_dse): Initialize and free aggregate_vardecl.
	Mark which aggregate stores we care about.

Index: tree-ssa-dse.c
===================================================================
--- tree-ssa-dse.c	(revision 119149)
+++ tree-ssa-dse.c	(working copy)
@@ -1,5 +1,5 @@
/* Dead store elimination
-   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.

This file is part of GCC.

@@ -34,6 +34,8 @@ Boston, MA 02110-1301, USA.  */
#include "tree-dump.h"
#include "domwalk.h"
#include "flags.h"
+#include "hashtab.h"
+#include "sbitmap.h"

/* This file implements dead store elimination.

@@ -65,6 +67,26 @@ Boston, MA 02110-1301, USA. */
the CFG. */


+/* Given an aggregate, this records the parts of it which have been
+ stored into. */
+struct aggregate_vardecl_d
+{
+ /* The aggregate. */
+ tree decl;
+
+ /* Some aggregates are too big for us to handle or never get stored
+ to as a whole. If this field is TRUE, we don't care about this
+ aggregate. */
+ bool ignore;
+
+ /* Number of parts in the whole. */
+ unsigned nparts;
+ + /* A bitmap of parts of the aggregate that have been set. If part N
+ of an aggregate has been stored to, bit N should be on. */
+ sbitmap parts_set;
+};
+
struct dse_global_data
{
/* This is the global bitmap for store statements.
@@ -73,6 +95,10 @@ struct dse_global_data
that we want to record, set the bit corresponding to the statement's
unique ID in this bitmap. */
bitmap stores;
+
+ /* A hash table containing the parts of an aggregate which have been
+ stored to. */
+ htab_t aggregate_vardecl;
};


/* We allocate a bitmap-per-block for stores which are encountered
@@ -101,6 +127,7 @@ static void dse_optimize_stmt (struct do
static void dse_record_phis (struct dom_walk_data *, basic_block);
static void dse_finalize_block (struct dom_walk_data *, basic_block);
static void record_voperand_set (bitmap, bitmap *, unsigned int);
+static void dse_record_partial_aggregate_store (tree, struct dse_global_data *);

static unsigned max_stmt_uid; /* Maximal uid of a statement. Uids to phi
nodes are assigned using the versions of
@@ -173,7 +200,7 @@ memory_ssa_name_same (tree *expr_p, int


  /* If we've found a default definition, then there's no problem.  Both
     stores will post-dominate it.  And def_bb will be NULL.  */
-  if (expr == default_def (SSA_NAME_VAR (expr)))
+  if (SSA_NAME_IS_DEFAULT_DEF (expr))
    return NULL_TREE;

  def_stmt = SSA_NAME_DEF_STMT (expr);
@@ -210,6 +237,288 @@ memory_address_same (tree store1, tree s
	  == NULL);
}

+
+/* A helper of dse_optimize_stmt.
+ Given a MODIFY_EXPR in STMT, check that each VDEF has one use, and that
+ one use is another VDEF clobbering the first one.
+
+ Return TRUE if the above conditions are met, otherwise FALSE. */
+
+static bool
+dse_possible_dead_store_p (tree stmt,
+ use_operand_p *first_use_p,
+ use_operand_p *use_p,
+ tree *use_stmt,
+ struct dse_global_data *dse_gd,
+ struct dse_block_local_data *bd)
+{
+ ssa_op_iter op_iter;
+ bool fail = false;
+ def_operand_p var1;
+ vuse_vec_p vv;
+ tree defvar = NULL_TREE, temp;
+ tree prev_defvar = NULL_TREE;
+ stmt_ann_t ann = stmt_ann (stmt);
+
+ /* We want to verify that each virtual definition in STMT has
+ precisely one use and that all the virtual definitions are
+ used by the same single statement. When complete, we
+ want USE_STMT to refer to the one statement which uses
+ all of the virtual definitions from STMT. */
+ *use_stmt = NULL;
+ FOR_EACH_SSA_VDEF_OPERAND (var1, vv, stmt, op_iter)
+ {
+ defvar = DEF_FROM_PTR (var1);
+
+ /* If this virtual def does not have precisely one use, then
+ we will not be able to eliminate STMT. */
+ if (!has_single_use (defvar))
+ {
+ fail = true;
+ break;
+ }
+
+ /* Get the one and only immediate use of DEFVAR. */
+ single_imm_use (defvar, use_p, &temp);
+ gcc_assert (*use_p != NULL_USE_OPERAND_P);
+ *first_use_p = *use_p;
+
+ /* If the immediate use of DEF_VAR is not the same as the
+ previously find immediate uses, then we will not be able
+ to eliminate STMT. */
+ if (*use_stmt == NULL)
+ {
+ *use_stmt = temp;
+ prev_defvar = defvar;
+ }
+ else if (temp != *use_stmt)
+ {
+ /* The immediate use and the previously found immediate use
+ must be the same, except... if they're uses of different
+ parts of the whole. */
+ if (TREE_CODE (defvar) == SSA_NAME
+ && TREE_CODE (SSA_NAME_VAR (defvar)) == STRUCT_FIELD_TAG
+ && TREE_CODE (prev_defvar) == SSA_NAME
+ && TREE_CODE (SSA_NAME_VAR (prev_defvar)) == STRUCT_FIELD_TAG
+ && (SFT_PARENT_VAR (SSA_NAME_VAR (defvar))
+ == SFT_PARENT_VAR (SSA_NAME_VAR (prev_defvar))))
+ ;
+ else
+ {
+ fail = true;
+ break;
+ }
+ }
+ }
+
+ if (fail)
+ {
+ record_voperand_set (dse_gd->stores, &bd->stores, ann->uid);
+ dse_record_partial_aggregate_store (stmt, dse_gd);
+ return false;
+ }
+
+ /* Skip through any PHI nodes we have already seen if the PHI
+ represents the only use of this store.
+
+ Note this does not handle the case where the store has
+ multiple VDEFs which all reach a set of PHI nodes in the same block. */
+ while (*use_p != NULL_USE_OPERAND_P
+ && TREE_CODE (*use_stmt) == PHI_NODE
+ && bitmap_bit_p (dse_gd->stores, get_stmt_uid (*use_stmt)))
+ {
+ /* A PHI node can both define and use the same SSA_NAME if
+ the PHI is at the top of a loop and the PHI_RESULT is
+ a loop invariant and copies have not been fully propagated.
+
+ The safe thing to do is exit assuming no optimization is
+ possible. */
+ if (SSA_NAME_DEF_STMT (PHI_RESULT (*use_stmt)) == *use_stmt)
+ return false;
+
+ /* Skip past this PHI and loop again in case we had a PHI
+ chain. */
+ single_imm_use (PHI_RESULT (*use_stmt), use_p, use_stmt);
+ }
+
+ return true;
+}
+
+
+/* Given a DECL, return its AGGREGATE_VARDECL_D entry. If no entry is
+ found and INSERT is TRUE, add a new entry. */
+
+static struct aggregate_vardecl_d *
+get_aggregate_vardecl (tree decl, struct dse_global_data *dse_gd, bool insert)
+{
+ struct aggregate_vardecl_d av, *av_p;
+ void **slot;
+
+ av.decl = decl;
+ slot = htab_find_slot (dse_gd->aggregate_vardecl, &av, insert ? INSERT : NO_INSERT);
+
+
+ /* Not found, and we don't want to insert. */
+ if (slot == NULL)
+ return NULL;
+
+ /* Create new entry. */
+ if (*slot == NULL)
+ {
+ av_p = XNEW (struct aggregate_vardecl_d);
+ av_p->decl = decl;
+
+ /* Record how many parts the whole has. */
+ if (TREE_CODE (TREE_TYPE (decl)) == COMPLEX_TYPE)
+ av_p->nparts = 2;
+ else if (TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
+ {
+ tree fields;
+
+ /* Count the number of fields. */
+ fields = TYPE_FIELDS (TREE_TYPE (decl));
+ av_p->nparts = 0;
+ while (fields)
+ {
+ av_p->nparts++;
+ fields = TREE_CHAIN (fields);
+ }
+ }
+ else
+ abort ();
+
+ av_p->ignore = true;
+ av_p->parts_set = sbitmap_alloc (HOST_BITS_PER_LONG);
+ sbitmap_zero (av_p->parts_set);
+ *slot = av_p;
+ }
+ else
+ av_p = (struct aggregate_vardecl_d *) *slot;
+
+ return av_p;
+}
+
+
+/* If STMT is a partial store into an aggregate, record which part got set. */
+
+static void
+dse_record_partial_aggregate_store (tree stmt, struct dse_global_data *dse_gd)
+{
+ tree lhs, decl;
+ enum tree_code code;
+ struct aggregate_vardecl_d *av_p;
+ int part;
+
+ gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
+
+ lhs = TREE_OPERAND (stmt, 0);
+ code = TREE_CODE (lhs);
+ if (code != IMAGPART_EXPR
+ && code != REALPART_EXPR
+ && code != COMPONENT_REF)
+ return;
+ decl = TREE_OPERAND (lhs, 0);
+ /* Early bail on things like nested COMPONENT_REFs. */
+ if (TREE_CODE (decl) != VAR_DECL)
+ return;
+ /* Early bail on unions. */
+ if (code == COMPONENT_REF
+ && TREE_CODE (TREE_TYPE (TREE_OPERAND (lhs, 0))) != RECORD_TYPE)
+ return;
+ + av_p = get_aggregate_vardecl (decl, dse_gd, /*insert=*/false);
+ /* Run away, this isn't an aggregate we care about. */
+ if (!av_p || av_p->ignore)
+ return;
+
+ switch (code)
+ {
+ case IMAGPART_EXPR:
+ part = 0;
+ break;
+ case REALPART_EXPR:
+ part = 1;
+ break;
+ case COMPONENT_REF:
+ {
+ tree orig_field, fields;
+ tree record_type = TREE_TYPE (TREE_OPERAND (lhs, 0));
+
+ /* Get FIELD_DECL. */
+ orig_field = TREE_OPERAND (lhs, 1);
+
+ /* FIXME: Eeech, do this more efficiently. Perhaps
+ calculate bit/byte offsets. */
+ part = -1;
+ fields = TYPE_FIELDS (record_type);
+ while (fields)
+ {
+ ++part;
+ if (fields == orig_field)
+ break;
+ fields = TREE_CHAIN (fields);
+ }
+ gcc_assert (part >= 0);
+ }
+ break;
+ default:
+ return;
+ }
+
+ /* Record which part was set. */
+ SET_BIT (av_p->parts_set, part);
+}
+
+
+/* Return TRUE if all parts in an AGGREGATE_VARDECL have been set. */
+
+static inline bool
+dse_whole_aggregate_clobbered_p (struct aggregate_vardecl_d *av_p)
+{
+ unsigned int i;
+ sbitmap_iterator sbi;
+ int nbits_set = 0;
+
+ /* Count the number of partial stores (bits set). */
+ EXECUTE_IF_SET_IN_SBITMAP (av_p->parts_set, 0, i, sbi)
+ nbits_set++;
+ return ((unsigned) nbits_set == av_p->nparts);
+}
+
+
+/* Return TRUE if STMT is a store into a whole aggregate whose parts we
+ have already seen and recorded. */
+
+static bool
+dse_partial_kill_p (tree stmt, struct dse_global_data *dse_gd)
+{
+ tree decl;
+ struct aggregate_vardecl_d *av_p;
+
+ /* Make sure this is a store into the whole. */
+ if (TREE_CODE (stmt) == MODIFY_EXPR)
+ {
+ enum tree_code code;
+
+ decl = TREE_OPERAND (stmt, 0);
+ code = TREE_CODE (TREE_TYPE (decl));
+
+ if (code != COMPLEX_TYPE && code != RECORD_TYPE)
+ return false;
+
+ if (TREE_CODE (decl) != VAR_DECL)
+ return false;
+ }
+ else
+ return false;
+
+ av_p = get_aggregate_vardecl (decl, dse_gd, /*insert=*/false);
+ gcc_assert (av_p != NULL);
+
+ return dse_whole_aggregate_clobbered_p (av_p);
+}
+
+
/* Attempt to eliminate dead stores in the statement referenced by BSI.


A dead store is a store into a memory location which will later be
@@ -234,7 +543,7 @@ dse_optimize_stmt (struct dom_walk_data


  /* If this statement has no virtual defs, then there is nothing
     to do.  */
-  if (ZERO_SSA_OPERANDS (stmt, (SSA_OP_VMAYDEF|SSA_OP_VMUSTDEF)))
+  if (ZERO_SSA_OPERANDS (stmt, SSA_OP_VDEF))
    return;

/* We know we have virtual definitions. If this is a MODIFY_EXPR that's
@@ -249,78 +558,14 @@ dse_optimize_stmt (struct dom_walk_data {
use_operand_p first_use_p = NULL_USE_OPERAND_P;
use_operand_p use_p = NULL;
- tree use_stmt, temp;
- tree defvar = NULL_TREE, usevar = NULL_TREE;
- bool fail = false;
- use_operand_p var2;
- def_operand_p var1;
- ssa_op_iter op_iter;
-
- /* We want to verify that each virtual definition in STMT has
- precisely one use and that all the virtual definitions are
- used by the same single statement. When complete, we
- want USE_STMT to refer to the one statement which uses
- all of the virtual definitions from STMT. */
- use_stmt = NULL;
- FOR_EACH_SSA_MUST_AND_MAY_DEF_OPERAND (var1, var2, stmt, op_iter)
- {
- defvar = DEF_FROM_PTR (var1);
- usevar = USE_FROM_PTR (var2);
-
- /* If this virtual def does not have precisely one use, then
- we will not be able to eliminate STMT. */
- if (! has_single_use (defvar))
- {
- fail = true;
- break;
- }
-
- /* Get the one and only immediate use of DEFVAR. */
- single_imm_use (defvar, &use_p, &temp);
- gcc_assert (use_p != NULL_USE_OPERAND_P);
- first_use_p = use_p;
-
- /* If the immediate use of DEF_VAR is not the same as the
- previously find immediate uses, then we will not be able
- to eliminate STMT. */
- if (use_stmt == NULL)
- use_stmt = temp;
- else if (temp != use_stmt)
- {
- fail = true;
- break;
- }
- }
-
- if (fail)
- {
- record_voperand_set (dse_gd->stores, &bd->stores, ann->uid);
- return;
- }
-
- /* Skip through any PHI nodes we have already seen if the PHI
- represents the only use of this store.
-
- Note this does not handle the case where the store has
- multiple V_{MAY,MUST}_DEFs which all reach a set of PHI nodes in the
- same block. */
- while (use_p != NULL_USE_OPERAND_P
- && TREE_CODE (use_stmt) == PHI_NODE
- && bitmap_bit_p (dse_gd->stores, get_stmt_uid (use_stmt)))
- {
- /* A PHI node can both define and use the same SSA_NAME if
- the PHI is at the top of a loop and the PHI_RESULT is
- a loop invariant and copies have not been fully propagated.
+ tree use_stmt;


-	     The safe thing to do is exit assuming no optimization is
-	     possible.  */
-	  if (SSA_NAME_DEF_STMT (PHI_RESULT (use_stmt)) == use_stmt)
-	    return;
+      if (!dse_possible_dead_store_p (stmt, &first_use_p, &use_p, &use_stmt,
+				      dse_gd, bd))
+	return;

-	  /* Skip past this PHI and loop again in case we had a PHI
-	     chain.  */
-	  single_imm_use (PHI_RESULT (use_stmt), &use_p, &use_stmt);
-	}
+      /* If this is a partial store into an aggregate, record it.  */
+      dse_record_partial_aggregate_store (stmt, dse_gd);

/* If we have precisely one immediate use at this point, then we may
have found redundant store. Make sure that the stores are to
@@ -328,13 +573,15 @@ dse_optimize_stmt (struct dom_walk_data SSA-form variables in the address will have the same values. */
if (use_p != NULL_USE_OPERAND_P
&& bitmap_bit_p (dse_gd->stores, get_stmt_uid (use_stmt))
- && operand_equal_p (TREE_OPERAND (stmt, 0),
- TREE_OPERAND (use_stmt, 0), 0)
+ && (operand_equal_p (TREE_OPERAND (stmt, 0),
+ TREE_OPERAND (use_stmt, 0), 0)
+ || dse_partial_kill_p (stmt, dse_gd))
&& memory_address_same (stmt, use_stmt))
{
- /* Make sure we propagate the ABNORMAL bit setting. */
- if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (USE_FROM_PTR (first_use_p)))
- SSA_NAME_OCCURS_IN_ABNORMAL_PHI (usevar) = 1;
+ ssa_op_iter op_iter;
+ def_operand_p var1;
+ vuse_vec_p vv;
+ tree stmt_lhs;


if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -342,12 +589,23 @@ dse_optimize_stmt (struct dom_walk_data print_generic_expr (dump_file, bsi_stmt (bsi), dump_flags);
fprintf (dump_file, "'\n");
}
+
/* Then we need to fix the operand of the consuming stmt. */
- FOR_EACH_SSA_MUST_AND_MAY_DEF_OPERAND (var1, var2, stmt, op_iter)
+ stmt_lhs = USE_FROM_PTR (first_use_p);
+ FOR_EACH_SSA_VDEF_OPERAND (var1, vv, stmt, op_iter)
{
+ tree usevar, temp;
+
single_imm_use (DEF_FROM_PTR (var1), &use_p, &temp);
- SET_USE (use_p, USE_FROM_PTR (var2));
+ gcc_assert (VUSE_VECT_NUM_ELEM (*vv) == 1);
+ usevar = VUSE_ELEMENT_VAR (*vv, 0);
+ SET_USE (use_p, usevar);
+
+ /* Make sure we propagate the ABNORMAL bit setting. */
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (stmt_lhs))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (usevar) = 1;
}
+
/* Remove the dead store. */
bsi_remove (&bsi, true);


@@ -396,6 +654,54 @@ dse_finalize_block (struct dom_walk_data
      }
}

+
+/* Hashing and equality functions for AGGREGATE_VARDECL.  */
+
+static hashval_t
+aggregate_vardecl_hash (const void *p)
+{
+  return htab_hash_pointer
+    ((const void *)((const struct aggregate_vardecl_d *)p)->decl);
+}
+
+static int
+aggregate_vardecl_eq (const void *p1, const void *p2)
+{
+  return ((const struct aggregate_vardecl_d *)p1)->decl
+    == ((const struct aggregate_vardecl_d *)p2)->decl;
+}
+
+
+/* Free memory allocated by one entry in AGGREGATE_VARDECL.  */
+
+static void
+aggregate_vardecl_free (void *p)
+{
+  struct aggregate_vardecl_d *entry = (struct aggregate_vardecl_d *) p;
+  sbitmap_free (entry->parts_set);
+  free (entry);
+}
+
+
+/* Return true if STMT is a store into an entire aggregate.  */
+
+static bool
+aggregate_whole_store_p (tree stmt)
+{
+  if (TREE_CODE (stmt) == MODIFY_EXPR)
+    {
+      tree lhs = TREE_OPERAND (stmt, 0);
+      enum tree_code code = TREE_CODE (TREE_TYPE (lhs));
+
+      if (code == COMPLEX_TYPE || code == RECORD_TYPE)
+	return true;
+    }
+  return false;
+}
+
+
+/* Main entry point.  */
+
static unsigned int
tree_ssa_dse (void)
{
@@ -403,15 +709,40 @@ tree_ssa_dse (void)
  struct dse_global_data dse_gd;
  basic_block bb;

- /* Create a UID for each statement in the function. Ordering of the
- UIDs is not important for this pass. */
+ dse_gd.aggregate_vardecl = + htab_create (37, aggregate_vardecl_hash,
+ aggregate_vardecl_eq, aggregate_vardecl_free);
+
max_stmt_uid = 0;
FOR_EACH_BB (bb)
{
block_stmt_iterator bsi;


      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
-	stmt_ann (bsi_stmt (bsi))->uid = max_stmt_uid++;
+	{
+	  tree stmt = bsi_stmt (bsi);
+
+	  /* Record aggregates which have been stored into as a whole.  */
+	  if (aggregate_whole_store_p (stmt))
+	    {
+	      tree lhs = TREE_OPERAND (stmt, 0);
+	      if (TREE_CODE (lhs) == VAR_DECL)
+		{
+		  struct aggregate_vardecl_d *av_p;
+
+		  av_p = get_aggregate_vardecl (lhs, &dse_gd, /*insert=*/true);
+		  av_p->ignore = false;
+
+		  /* Ignore aggregates with too many parts.  */
+		  if (av_p->nparts > HOST_BITS_PER_LONG)
+		    av_p->ignore = true;
+		}
+	    }
+
+	  /* Create a UID for each statement in the function.
+	     Ordering of the UIDs is not important for this pass.  */
+	  stmt_ann (stmt)->uid = max_stmt_uid++;
+	}
    }

  /* We might consider making this a property of each pass so that it
@@ -437,6 +768,7 @@ tree_ssa_dse (void)

  /* This is the main hash table for the dead store elimination pass.  */
  dse_gd.stores = BITMAP_ALLOC (NULL);
+
  walk_data.global_data = &dse_gd;

  /* Initialize the dominator walker.  */
@@ -448,8 +780,9 @@ tree_ssa_dse (void)
  /* Finalize the dominator walker.  */
  fini_walk_dominator_tree (&walk_data);

-  /* Release the main bitmap.  */
+  /* Release unneeded data.  */
  BITMAP_FREE (dse_gd.stores);
+  htab_delete (dse_gd.aggregate_vardecl);

  /* For now, just wipe the post-dominator information.  */
  free_dominance_info (CDI_POST_DOMINATORS);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]