This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] [4.2] Address computation PRE


This patch implements elimination of partially redundant address computations. A while after writing the summary in http://gcc.gnu.org/ml/gcc/2005-09/msg00821.html, I noticed that GCC is not able to optimize the case even with current mainline and -fno-cse-skip-blocks -fno-cse-follow-jumps. The reason is that addresses:

1) are redundant but appear in different blocks;

2) are loaded from GOT, so they take more than one instruction to load, and each instruction depends on the previous one.

Item 2 means that GCSE cannot optimize it, because it is not a cascading optimization. Item 1 means that CSE without path following cannot optimize.

Therefore I tried to implement a restricted, but faster and more powerful form of GCSE: it looks at REG_EQUAL notes to detect address computations, gathers all the instructions depending on the one with the note, and treats them as a single entity for the sake of eliminating them.

I don't like the idea very much in theory, but in practice I think this patch:

1) this is also performing a specialized load PRE (for loads from the GOT), and of a kind that (unlike e.g. PR23455) is impossible to perform on the GIMPLE level. Actually, it is complementary to what can be done on trees so (unlike popular expectation, and unlike store motion) load PRE on trees would *not* subsume -fgcse-lm.

2) the only alternative seems to be to implement progressive lowering, which is not an easy task.

3) while I implemented it to recover performance lost from disabling path following, from some quick tests it appears to give visible speed improvements even over full-blown CSE. I measured a 3% improvement on bzip2 -- this was on a laptop, so take this result with more than a grain of salt, but it was extremely consistent.

4) is maybe a hack, but IMHO a well-written one. It is fast, it is relatively small, it uses sound algorithms (LCM), it is optimal.

5) helps on all architectures (on x86 for shared libraries or PIE only, actually).

Comments?

Paolo
Index: passes.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/passes.c,v
retrieving revision 2.111
diff -p -u -u -r2.111 passes.c
--- passes.c	9 Sep 2005 00:46:38 -0000	2.111
+++ passes.c	27 Sep 2005 07:35:57 -0000
@@ -595,5 +595,6 @@ init_optimization_passes (void)
   NEXT_PASS (pass_cse);
+  NEXT_PASS (pass_addr_gcse);
   NEXT_PASS (pass_gcse);
   NEXT_PASS (pass_loop_optimize);
   NEXT_PASS (pass_jump_bypass);
   NEXT_PASS (pass_cfg);
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1541
diff -p -u -u -r1.1541 Makefile.in
--- Makefile.in	14 Sep 2005 09:26:41 -0000	1.1541
+++ Makefile.in	27 Sep 2005 07:35:59 -0000
@@ -945,8 +945,8 @@ OBJS-common = \
  tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o	   \
  tree-ssa-math-opts.o							   \
  tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o		   \
- alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
- cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
+ addr-gcse.o alias.o bb-reorder.o bitmap.o builtins.o caller-save.o	   \
+ calls.o cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o	   \
  cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
  cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o 	   \
  dbxout.o ddg.o tree-ssa-loop-ch.o loop-invariant.o tree-ssa-loop-im.o	   \
@@ -2378,6 +2381,9 @@ reorg.o : reorg.c $(CONFIG_H) $(SYSTEM_H
    $(INSN_ATTR_H) except.h $(RECOG_H) function.h $(FLAGS_H) output.h \
    $(EXPR_H) toplev.h $(PARAMS_H) $(TM_P_H) $(OBSTACK_H) $(RESOURCE_H) \
    timevar.h target.h tree-pass.h
+addr-gcse.o : addr-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
+   insn-config.h df.h $(HASHTAB_H) $(BASIC_BLOCK_H) toplev.h output.h \
+   $(OBSTACK_H) $(RECOG_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H)
 alias.o : alias.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(FLAGS_H) hard-reg-set.h $(BASIC_BLOCK_H) $(REGS_H) toplev.h output.h \
    $(ALIAS_H) $(EMIT_RTL_H) $(GGC_H) function.h cselib.h $(TREE_H) $(TM_P_H) \
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.54
diff -p -u -u -r1.54 timevar.def
--- timevar.def	1 Aug 2005 07:47:23 -0000	1.54
+++ timevar.def	27 Sep 2005 07:35:59 -0000
@@ -124,6 +124,7 @@ DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "
 DEFTIMEVAR (TV_CSE                   , "CSE")
 DEFTIMEVAR (TV_LOOP                  , "loop analysis")
 DEFTIMEVAR (TV_GCSE                  , "global CSE")
+DEFTIMEVAR (TV_ADDRESS_GCSE          , "redundant addresses")
 DEFTIMEVAR (TV_CPROP1                , "CPROP 1")
 DEFTIMEVAR (TV_PRE                   , "PRE")
 DEFTIMEVAR (TV_HOIST                 , "code hoisting")
/* Global removal of redundant address computations for GNU compiler.
   Copyright (C) 2005 Free Software Foundation, Inc.
   Contributed by Paolo Bonzini.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.  */

/* This pass performs LCM-based elimination of redundant address
   computations.  Such computations can be made of many instructions
   (for example when position-independent code, TOC addressing or TLS
   are in use), so gcse.c is not able to optimize them successfully
   unless you introduce additional passes.

   This pass looks at REG_EQUAL notes to detect address computations.
   It then gathers all the instructions depending on the one with the
   note.  All these instructions in such a chain are single set
   instructions and the regs they define should have a single
   definition and use.  Different occurrences of the same computation
   may occur in a function, and they are detected with a hash table.

   Each of them, however, will store the result in a different pseudo.
   Before removing redudant computations, we pick one as a `canonical'
   register.  The canonical register is picked more or less randomly;
   however, it should be the first (or only) occurrence of the address
   in a basic block.  This happens quite naturally, and simplifies the
   deletion of redundant copies.

   Each distinct address is handed as a single `entity' to the lazy
   code motion algorithms, and then inserted/deleted according to the
   result of the LCM analysis.  The steps are:

   - insert copies of the sets on edges suggested by LCM;

   - when LCM suggested to delete the canonical copy, do so (it will be
     inserted elsewhere);

   - when LCM said a non-canonical copy is necessary, modify the last insn
     so that it sets the canonical register.  Other non-canonical copies
     are left aside, and delete_trivially_dead_insns will remove it;

   - modify all uses of non-canonical copies, so that they refer to the
     canonical register.

   If there are multiple computations in the same basic block, LCM's
   indications only apply to the first---all the others are obviously
   both redundant and non-canonical, and are treated as such.

   This pass is also a relatively simple example of how to use the LCM
   routines.   */


#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "toplev.h"
#include "flags.h"

#include "insn-config.h"
#include "obstack.h"
#include "rtl.h"
#include "recog.h"
#include "basic-block.h"
#include "df.h"
#include "hashtab.h"

#include "timevar.h"
#include "tree-dump.h"
#include "tree-pass.h"

/* In GCC 4.2, this function should be in cfgrtl.c.  */

static rtx
insert_insn_end_bb (rtx pat, basic_block bb)
{
  rtx insn = BB_END (bb);
  rtx new_insn;

  /* If the last insn is a jump, insert EXPR in front [taking care to
     handle cc0, etc. properly].  Similarly we need to care trapping
     instructions in presence of non-call exceptions.  */

  if (JUMP_P (insn)
      || (NONJUMP_INSN_P (insn)
          && (!single_succ_p (bb)
              || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
    {
#ifdef HAVE_cc0
      rtx note;
#endif
      /* If this is a jump table, then we can't insert stuff here.  Since
         we know the previous real insn must be the tablejump, we insert
         the new instruction just before the tablejump.  */
      if (GET_CODE (PATTERN (insn)) == ADDR_VEC
          || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
        insn = prev_real_insn (insn);

#ifdef HAVE_cc0
      /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
         if cc0 isn't set.  */
      note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
      if (note)
        insn = XEXP (note, 0);
      else
        {
          rtx maybe_cc0_setter = prev_nonnote_insn (insn);
          if (maybe_cc0_setter
              && INSN_P (maybe_cc0_setter)
              && sets_cc0_p (PATTERN (maybe_cc0_setter)))
	    insn = maybe_cc0_setter;
        }
#endif
      /* FIXME: What if something in cc0/jump uses value set in new insn?  */
      new_insn = emit_insn_before_noloc (pat, insn);
    }

  /* Likewise if the last insn is a call, as will happen in the presence
     of exception handling.  */
  else if (CALL_P (insn)
           && (!single_succ_p (bb)
               || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
    {
      /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
         we search backward and place the instructions before the first
         parameter is loaded.  Do this for everyone for consistency and a
         presumption that we'll get better code elsewhere as well.  */

      /* Since different machines initialize their parameter registers
         in different orders, assume nothing.  Collect the set of all
         parameter registers.  */
      insn = find_first_parameter_load (insn, BB_HEAD (bb));

      /* If we found all the parameter loads, then we want to insert
         before the first parameter load.

         If we did not find all the parameter loads, then we might have
         stopped on the head of the block, which could be a CODE_LABEL.
         If we inserted before the CODE_LABEL, then we would be putting
         the insn in the wrong basic block.  In that case, put the insn
         after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
      while (LABEL_P (insn)
             || NOTE_INSN_BASIC_BLOCK_P (insn))
        insn = NEXT_INSN (insn);

      new_insn = emit_insn_before_noloc (pat, insn);
    }
  else
    new_insn = emit_insn_after_noloc (pat, insn);

  return new_insn;
}





/* An obstack for our working variables.  */
static struct obstack gcse_addr_obstack;

/* A table that enables us to look up addr_expr's by their RTX.  */
static htab_t addr_hashtab;

static bitmap moved_regs;
static int n_moved_regs;

static struct df *df;

struct addr_insn {
  /* One instruction in the computation of an address.  */
  rtx insn;

  /* Cached result of single_set for INSN.  */
  rtx set;

  /* The previous and next instruction contributing to the computation
     of this address.  */
  struct addr_insn *prev, *next;
};

struct addr_copy {
  /* The last insn for this occurrence of an address, i.e. the one
     with the REG_EQUAL note.  */
  rtx insn;

  /* Next occurrence of this same address.  */
  struct addr_copy *next;
};

struct addr_expr {
  /* Hash value of VAL, cache for speed.  */
  hashval_t hash;

  /* Address being computed, taken from the REG_EQUAL note.  */
  rtx val;

  /* List of instructions necessary to compute this address.  */
  struct addr_insn *first, *last;

  /* List of other places where the same address is computed.  If several
     copies come from the same basic block, it is important that they are
     ordered as they appear in the instruction stream.  This way, 
     move_address_computations can easily keep the first and discard
     all the others.  */
  struct addr_copy *first_copy, *last_copy;
};

/* Return a hash code for the address X, which is found in a REG_EQUAL note.  */
static inline hashval_t
hash_addr (rtx x)
{
  int do_not_record = false;
  hashval_t result;
  if (GET_CODE (x) == CONST)
    x = XEXP (x, 0);

  result = hash_rtx (x, VOIDmode, &do_not_record, NULL, false);
  gcc_assert (!do_not_record);
  return result;
}


/* The hash function for our hash table.  The value is always computed with
   hash_rtx when adding an element; this function just extracts the
   hash value from a struct addr_expr structure.  */

static hashval_t
addr_expr_hash (const void *entry)
{
  const struct addr_expr *v = (const struct addr_expr *) entry;
  return v->hash;
}


/* Allocate a struct addr_expr and the first struct addr_insn, representing
   that INSN is the last of the instructions that compute VAL.  SET is the
   only set in INSN.  */ 
static struct addr_expr *
addr_expr_new (rtx val, rtx insn, rtx set)
{
  struct addr_insn *ai;
  struct addr_expr *ae;

  ai = obstack_alloc (&gcse_addr_obstack, sizeof (struct addr_insn));
  ai->insn = insn;
  ai->set = set;
  ai->prev = ai->next = NULL;

  ae = obstack_alloc (&gcse_addr_obstack, sizeof (struct addr_expr));
  if (GET_CODE (val) == CONST)
    val = XEXP (val, 0);

  ae->hash = hash_addr (val);
  ae->val = val;
  ae->first = ae->last = ai;
  ae->first_copy = ae->last_copy = NULL;
  return ae;
}


/* Register that INSN a (possibly redundant) computation of the same address
   represented by AE.  */
static void
addr_expr_add_copy (struct addr_expr *ae, rtx insn)
{
  struct addr_copy *ac;

  ac = obstack_alloc (&gcse_addr_obstack, sizeof (struct addr_copy));
  ac->insn = insn;
  ac->next = NULL;

  if (ae->first_copy)
    ae->last_copy->next = ac, ae->last_copy = ac;
  else
    ae->first_copy = ae->last_copy = ac;
}

/* Return a pointer to a slot where X can be stored.  Create a new slot if
   X is not in the hash table, and in this case the returned value will be
   pointing to NULL.  */

static inline struct addr_expr **
addr_expr_insert (rtx x)
{
  void **slot;
  slot = htab_find_slot_with_hash (addr_hashtab, x, hash_addr (x), INSERT);
  return (struct addr_expr **) slot;
}


/* Return a pointer to a struct addr_expr corresponding to the address
   loaded in the REGNO pseudo.  */

static inline struct addr_expr *
addr_expr_lookup_regno (int regno)
{
  struct ref *def = df->regs[regno].defs->ref;
  rtx note = find_reg_note (DF_REF_INSN (def), REG_EQUAL, NULL_RTX);
  rtx addr = XEXP (note, 0);
  void **slot = htab_find_slot_with_hash (addr_hashtab, addr,
					  hash_addr (addr), NO_INSERT);

  gcc_assert (*slot);
  return (struct addr_expr *) *slot;
}


/* The equality test for our hash table.  The first argument ENTRY is a table
   element (i.e. a cselib_val), while the second arg X is an rtx.  */

static int
addr_expr_equals_rtx (const void *entry, const void *x_arg)
{
  const struct addr_expr *ae = (const struct addr_expr *) entry;
  rtx x = (rtx) x_arg;

  if (GET_CODE (x) == CONST)
    x = XEXP (x, 0);

  return rtx_equal_p (ae->val, x);
}


/* Return the basic block of AE's chain of insns.  */

static inline basic_block
addr_expr_bb (struct addr_expr *ae)
{
  return BLOCK_FOR_INSN (ae->last->insn);
}


/* Return the REG rtx where AE's chain of insns stores the address.  */

static inline rtx
addr_expr_reg (struct addr_expr *ae)
{
  return SET_DEST (ae->last->set);
}


/* Recursive worker for for_each_rtx, looking for insns that should be also
   entered in the insn chain pointed to by DATA.  Stop the walk if a
   non-readonly or trapping MEM is found, or a REG that has more than
   one DEF or USE.  */ 

static int
enter_dependent_insns (rtx *px, void *data)
{
  rtx x = *px;

  /* Memory accesses are ok if they are safe.  */
  if (MEM_P (x))
    {
      if (MEM_READONLY_P (x) && MEM_NOTRAP_P (x))
	return 0;
      else
	return 1;
    }

  /* For register accesses we will chain more instructions.  */
  if (REG_P (x))
    {
      struct addr_expr *ae;
      struct addr_insn *ai;
      struct df_link *defs = df->regs[REGNO (x)].defs;
      struct df_link *uses = df->regs[REGNO (x)].uses;
      rtx insn, set;
      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
	{
	  /* For a hard register, if it is set in the function, fail.  */
	  if (defs)
	    return 1;
	}
      else
	{
	  /* For a pseudo register, if it is set or used elsewhere, fail.
	     Else, make the definition part of the chain.  */
          if (defs->next || !uses || uses->next)
	    return 1;

          insn = DF_REF_INSN (defs->ref);
          set = single_set (insn);
          if (!set || !REG_P (SET_DEST (set)))
	    return 1;

          /* The worklist will pick it up.  */
          ae = (struct addr_expr *) data;
          ai = obstack_alloc (&gcse_addr_obstack, sizeof (struct addr_insn));
          ai->insn = insn;
          ai->set = set;
          ai->next = ae->first;
          ai->prev = NULL;
          ae->first->prev = ai;
          ae->first = ai;
	}

      return 0;
    }

  /* For now, make it simple.  More careful analysis of calls will be
     necessary to handle TLS.  */
  if (CALL_P (x))
    return 1;

  return 0;
}


/* Create a struct addr_expr starting from INSN and adding insns into the
   chain recursively.  */

static struct addr_expr *
gather_dependent_insns (rtx val, rtx insn, rtx set)
{
  struct addr_insn *worklist;
  struct addr_expr *ae;

  ae = addr_expr_new (val, insn, set);

  /* Build a chain of the instructions computing the constant.  */
  for (worklist = ae->last; worklist; worklist = worklist->prev)
    if (for_each_rtx (&SET_SRC (worklist->set), enter_dependent_insns, ae))
      {
        obstack_free (&gcse_addr_obstack, ae);
        return NULL;
      }

  return ae;
}


/* Find all address computations and enter them into a hash table.  */

static void
gather_address_computations (void)
{
  basic_block bb;
  rtx insn;

  FOR_EACH_BB (bb)
    FOR_BB_INSNS (bb, insn)
      {
	struct addr_expr *ae, **slot;
	rtx set, note;
	unsigned int regno;

	note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
        if (!note || GET_CODE (XEXP (note, 0)) != SYMBOL_REF)
	  continue;

	set = single_set (insn);
	if (!set || !REG_P (SET_DEST (set)))
	  continue;

	/* There must be a single set of the reg.  */
	regno = REGNO (SET_DEST (set));
	if (df->regs[regno].defs->next)
	  continue;

	/* See if the address was present elsewhere.  */
	slot = addr_expr_insert (XEXP (note, 0));
	regno = REGNO (SET_DEST (set));
	if (*slot)
	  {
	    addr_expr_add_copy (*slot, insn);
	    if (dump_file)
	      fprintf (dump_file, "A copy of the address in reg %d is also",
		       REGNO (addr_expr_reg (*slot)));
	  }
	else
	  {
	    ae = gather_dependent_insns (XEXP (note, 0), insn, set);
	    if (!ae)
	      continue;

	    *slot = ae;
	    bitmap_set_bit (moved_regs, regno);
	    n_moved_regs++;
	    if (dump_file)
	      fprintf (dump_file, "Found a new address");
	  }

	if (dump_file)
	  {
	    fprintf (dump_file, " in reg %d\n", regno);
	    print_inline_rtx (dump_file, XEXP (note, 0), 2); 
	    fprintf (dump_file, "\n\n");
	  }
      }
}


/* Insert the necessary copies of AE on the edges in EDGE_LIST, checking the
   N-th bit of every bitmap in INSERT.  */

static bool
insert_address_computations (struct addr_expr *ae, struct edge_list *edge_list,
			     sbitmap *insert, int n)
{
  rtx reg = addr_expr_reg (ae);
  bool need_commit = false;
  int e_index;
  struct addr_insn *ai;

  for (e_index = NUM_EDGES (edge_list) - 1; e_index >= 0; e_index--)
    if (TEST_BIT (insert[e_index], n))
      {
	edge e = INDEX_EDGE (edge_list, e_index);
	
	need_commit = true;

	/* We can't insert anything on an abnormal and critical edge,
	   so we insert the insn at the end of the previous block.  There
	   are several alternatives detailed in Morgans book P277 (sec 10.5)
	   for handling this situation.  This one is easiest for now.  */

        if (e->flags & EDGE_ABNORMAL)
	  {
	    rtx insn;

	    ai = ae->first;
	    insn = insert_insn_end_bb (copy_rtx (ai->set), e->src);
	    while ((ai = ai->next) != NULL)
	      insn = emit_insn_after_noloc (copy_rtx (ai->set), insn);
	  }
	else
	  {
	    need_commit = true;
	    for (ai = ae->first; ai; ai = ai->next)
	      insert_insn_on_edge (copy_rtx (ai->set), e);
	  }

	if (dump_file)
	  fprintf (dump_file, "Reg %d needed on edge between %d and %d\n",
		   REGNO (reg), e->src->index, e->dest->index);
      }
  
  return need_commit;
}


/* Delete unnecessary copies of AE, checking the N-th bit of the
   DELETE bitmap for the basic block that defines AE.  This routine is
   only concerned with the `canonical' definition that is already
   using the correct pseudo.  */

static void
delete_address_computations (struct addr_expr *ae, sbitmap *delete, int n)
{
  basic_block bb = addr_expr_bb (ae);
  struct addr_insn *ai;

  /* Remove the original definition if unnecessary.  */
  if (TEST_BIT (delete[bb->index], n))
    {
      for (ai = ae->first; ai; ai = ai->next)
	{
	  ai->set = copy_rtx (ai->set);
	  delete_insn (ai->insn);
	}
      
      if (dump_file)
	{
	  rtx reg = addr_expr_reg (ae);
	  fprintf (dump_file, "Reg %d not needed in block %d\n",
		   REGNO (reg), bb->index);
	}
    }
  else
    {
      /* Any other definition in this basic block is unnecessary.  */
      SET_BIT (delete[bb->index], n);
    }
}


/* For all the copies of AE, check if it has to be deleted using the N-th
   bit of the DELETE bitmaps.  If it should be kept, modify it to set
   the same pseudo as the `canonical' definition

   We don't care about removing unneccessary, non-canonical
   definitions.  delete_trivially_dead_insn will do so because their
   destination pseudos will not be used anymore.  */

static void
redirect_address_computations (struct addr_expr *ae, sbitmap *delete, int n)
{
  rtx reg = addr_expr_reg (ae);
  struct addr_copy *ac;

  for (ac = ae->first_copy; ac; ac = ac->next)
    {
      int bb_index = BLOCK_FOR_INSN (ac->insn)->index;
      rtx set = single_set (ac->insn);
      int old_regno = REGNO (SET_DEST (set));
      struct df_link *uses;

      if (dump_file)
        fprintf (dump_file, "Reg %d replaced throughout with %d\n",
		 old_regno, REGNO (reg));

      for (uses = df->regs[old_regno].uses; uses; uses = uses->next)
	{
	  bool ok;
	  ok = validate_change (DF_REF_INSN (uses->ref), DF_REF_LOC (uses->ref),
			        reg, false);
	  gcc_assert (ok);
	}

      if (!TEST_BIT (delete[bb_index], n))
	{
	  bool ok;
	  ok = validate_change (ac->insn, &SET_DEST (set), reg, false);
	  gcc_assert (ok);
	  
	  /* Any other definition in this basic block is unnecessary.  */
	  SET_BIT (delete[bb_index], n);
	  
	  if (dump_file)
	    fprintf (dump_file,
		     "Reg %d needed in block %d, diverting insn %d\n",
		     REGNO (reg), bb_index, INSN_UID (ac->insn));
	}
    }
}

/* Run LCM and move around redundant computations.  */

static void
move_address_computations (void)
{
  /* LCM inputs.  */
  sbitmap *antic, *transp, *comp, *kill;

  /* LCM outputs.  */
  sbitmap *insert, *delete;
  struct edge_list *edge_list;

  unsigned int i, n;
  bitmap_iterator bi;
  bool need_commit = false;

  antic = sbitmap_vector_alloc (last_basic_block, n_moved_regs);
  transp = sbitmap_vector_alloc (last_basic_block, n_moved_regs);
  comp = sbitmap_vector_alloc (last_basic_block, n_moved_regs);
  kill = sbitmap_vector_alloc (last_basic_block, n_moved_regs);

  /* These are trivial, as the computed values are ultimately constants.  */
  sbitmap_vector_ones (transp, last_basic_block);
  sbitmap_vector_zero (kill, last_basic_block);

  /* Compute these ones using the bitmaps we already got.  */
  sbitmap_vector_zero (antic, last_basic_block);
  sbitmap_vector_zero (comp, last_basic_block);

  n = 0;
  EXECUTE_IF_SET_IN_BITMAP (moved_regs, 0, i, bi)
    {
      struct addr_expr *ae = addr_expr_lookup_regno (i);
      struct addr_copy *ac;
      basic_block bb = addr_expr_bb (ae);

      if (dump_file)
	fprintf (dump_file, "Register %d computed in basic block %d\n",
		 i, bb->index);

      SET_BIT (comp[bb->index], n);
      SET_BIT (antic[bb->index], n);
      for (ac = ae->first_copy; ac; ac = ac->next)
	{
	  int bb_index = BLOCK_FOR_INSN (ac->insn)->index;
	  if (dump_file)
	    fprintf (dump_file,
		     "A copy of register %d is also in basic block %d\n",
		     i, bb_index);

          SET_BIT (comp[bb_index], n);
          SET_BIT (antic[bb_index], n);
	}
      n++;
    }

  edge_list = pre_edge_lcm (dump_file, n_moved_regs, transp, comp, antic,
                            kill, &insert, &delete);

  /* PRE inputs not needed anymore.  */
  sbitmap_vector_free (antic);
  sbitmap_vector_free (transp);
  sbitmap_vector_free (comp);
  sbitmap_vector_free (kill);

  n = 0;
  EXECUTE_IF_SET_IN_BITMAP (moved_regs, 0, i, bi)
    {
      struct addr_expr *ae = addr_expr_lookup_regno (i);

      need_commit |= insert_address_computations (ae, edge_list, insert, n);
      delete_address_computations (ae, delete, n);
      redirect_address_computations (ae, delete, n);
      n++;
    }

  if (need_commit)
    commit_edge_insertions ();

  /* Clear PRE outputs.  */
  sbitmap_vector_free (delete);
  sbitmap_vector_free (insert);
  free_edge_list (edge_list);
}



static bool
gate_handle_addr_gcse (void)
{
  return optimize > 0 && flag_gcse_addresses;
}

static void
gcse_addr_main (void)
{
  df = df_init ();
  df_analyze (df, NULL, DF_RD_CHAIN | DF_RU_CHAIN | DF_HARD_REGS);

  gcc_obstack_init (&gcse_addr_obstack);
  addr_hashtab = htab_create (31, addr_expr_hash, addr_expr_equals_rtx, NULL);
  moved_regs = BITMAP_ALLOC (NULL);
  n_moved_regs = 0;

  gather_address_computations ();
  if (n_moved_regs)
    move_address_computations ();

  BITMAP_FREE (moved_regs);
  htab_delete (addr_hashtab);
  obstack_free (&gcse_addr_obstack, NULL);
  df_finish (df);

  if (dump_file)
    fprintf (dump_file, "\n");
  delete_trivially_dead_insns (get_insns (), max_reg_num ());
}

struct tree_opt_pass pass_addr_gcse =
{
  "addr-gcse",                          /* name */
  gate_handle_addr_gcse,                /* gate */
  gcse_addr_main,			/* execute */
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
  TV_ADDRESS_GCSE,                      /* tv_id */
  0,                                    /* properties_required */
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
  TODO_dump_func | TODO_verify_flow,    /* todo_flags_finish */
  0                                     /* letter */
};


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]