[PING*2][PATCH][Updated][STAGE2] New interference graph implementation.

Kenneth Zadeck zadeck@naturalbridge.com
Fri Oct 5 14:28:00 GMT 2007


Peter Bergner wrote:
> With Kenny's commit of his new interference graph builder, my new interference
> graph representation patch needed updating due to some of the patched code
> being moved to different files and was changed with Kenny's patch.  The original
> patch was posted during stage2 here:
>
>     http://gcc.gnu.org/ml/gcc-patches/2007-09/msg00529.html
>
> The updated patch and ChangeLog is here.  With help from Kenny, this has
> bootstrapped and regtested with no regressions on powerpc64-linux (ran the
> testsuite in both 32-bit and 64-bit modes), x86-{32,64}-linux and
> ia-64-linux.
>
>   
The only issue that i have with this patch is that there seems to be a
general convention in gcc that functions with names that end in "_p" are
tests, like "bitmap_empty_p".  I think that the two functions,
set_conflict_p and set_conflicts_p should be renamed.

Asside from that, the patch is fine for me.

Kenny

> Is this ok for mainline?
>
> Peter
>
> 	* ra-conflict.c: Include "sparseset.h".
> 	(conflicts): Change to HOST_WIDEST_FAST_INT.
> 	(allocnos_live): Redefine variable as a sparseset.
> 	(SET_ALLOCNO_LIVE, CLEAR_ALLOCNO_LIVE, GET_ALLOCNO_LIVE): Delete macros.
> 	(allocno_row_words): Removed global variable.
> 	(partial_bitnum, max_bitnum, adjacency_pool, adjacency): New variables.
> 	(CONFLICT_BITNUM, CONFLICT_BITNUM_FAST): New defines.
> 	(conflict_p, set_conflict_p, set_conflicts_p): New functions.
> 	(record_one_conflict_between_regnos): Cache allocno values and reuse.
> 	Use set_conflict_p.
> 	(record_one_conflict): Update uses of allocnos_live to use
> 	the sparseset routines.  Use set_conflicts_p.
> 	(mark_reg_store): Likewise.
> 	(set_reg_in_live): Likewise.
> 	(global_conflicts): Update uses of allocnos_live.
> 	Use the new adjacency list to visit an allocno's neighbors
> 	rather than iterating over all possible allocnos.
> 	Call set_conflicts_p to setup conflicts rather than adding
> 	them manually.
> 	* global.c: Comments updated.  
> 	(CONFLICTP): Delete define.
> 	(regno_compare): New function.  Add prototype.
> 	(global_alloc): Sort the allocno to regno mapping according to
> 	which basic blocks the regnos are referenced in.  Modify the
> 	conflict bit matrix to a compressed triangular bitmatrix.
> 	Only allocate the conflict bit matrix and adjacency lists if
> 	we are actually going to allocate something.
> 	(expand_preferences): Use conflict_p.  Update uses of allocnos_live.
> 	(prune_preferences): Use the FOR_EACH_CONFLICT macro to visit an
> 	allocno's neighbors rather than iterating over all possible allocnos.
> 	(mirror_conflicts): Removed function.
> 	(dump_conflicts): Iterate over regnos rather than allocnos so
> 	that all dump output will be sorted by regno number.
> 	Use the FOR_EACH_CONFLICT macro.
> 	* ra.h: Comments updated.
> 	(conflicts): Update prototype to HOST_WIDEST_FAST_INT.
> 	(partial_bitnum, max_bitnum, adjacency, adjacency_pool): Add prototypes.
> 	(ADJACENCY_VEC_LENGTH, FOR_EACH_CONFLICT): New defines.
> 	(adjacency_list_d, adjacency_iterator_d): New types.
> 	(add_neighbor, adjacency_iter_init, adjacency_iter_done,
> 	adjacency_iter_next, regno_basic_block): New static inline functions.
> 	(EXECUTE_IF_SET_IN_ALLOCNO_SET): Removed define.
> 	(conflict_p): Add function prototype.
> 	* sparseset.h, sparseset.c: New files.
> 	* Makefile.in (OBJS-common): Add sparseset.o.
> 	(sparseset.o): New rule.
>
> Index: ra-conflict.c
> ===================================================================
> --- ra-conflict.c	(revision 128990)
> +++ ra-conflict.c	(working copy)
> @@ -41,63 +41,174 @@ along with GCC; see the file COPYING3.  
>  #include "vecprim.h"
>  #include "ra.h"
>  #include "sbitmap.h"
> -
> -/* Test, set or clear bit number I in allocnos_live,
> -   a bit vector indexed by allocno.  */
> -
> -#define SET_ALLOCNO_LIVE(A, I)			\
> -  ((A)[(unsigned) (I) / HOST_BITS_PER_WIDE_INT]				\
> -     |= ((HOST_WIDE_INT) 1 << ((unsigned) (I) % HOST_BITS_PER_WIDE_INT)))
> -
> -#define CLEAR_ALLOCNO_LIVE(A, I)		\
> -  ((A)[(unsigned) (I) / HOST_BITS_PER_WIDE_INT]			\
> -     &= ~((HOST_WIDE_INT) 1 << ((unsigned) (I) % HOST_BITS_PER_WIDE_INT)))
> -
> -#define GET_ALLOCNO_LIVE(A, I)		\
> -  ((A)[(unsigned) (I) / HOST_BITS_PER_WIDE_INT]			\
> -     & ((HOST_WIDE_INT) 1 << ((unsigned) (I) % HOST_BITS_PER_WIDE_INT)))
> +#include "sparseset.h"
>  
>  /* Externs defined in regs.h.  */
>  
>  int max_allocno;
>  struct allocno *allocno;
> -HOST_WIDE_INT *conflicts;
> -int allocno_row_words;
> +HOST_WIDEST_FAST_INT *conflicts;
>  int *reg_allocno;
> +int *partial_bitnum;
> +int max_bitnum;
> +alloc_pool adjacency_pool;
> +adjacency_t **adjacency;
>  
>  typedef struct df_ref * df_ref_t;
>  DEF_VEC_P(df_ref_t);
>  DEF_VEC_ALLOC_P(df_ref_t,heap);
>  
> +/* Macros to determine the bit number within the triangular bit matrix for
> +   the two allocnos Low and HIGH, with LOW strictly less than HIGH.  */
> +
> +#define CONFLICT_BITNUM(I, J) \
> +  (((I) < (J)) ? (partial_bitnum[I] + (J)) : (partial_bitnum[J] + (I)))
> +
> +#define CONFLICT_BITNUM_FAST(I, I_PARTIAL_BITNUM, J) \
> +  (((I) < (J)) ? ((I_PARTIAL_BITNUM) + (J)) : (partial_bitnum[J] + (I)))
> +
> +bool
> +conflict_p (int allocno1, int allocno2)
> +{
> +  int bitnum;
> +  HOST_WIDEST_FAST_INT word, mask;
> +
> +#ifdef ENABLE_CHECKING
> +  int blk1, blk2;
> +
> +  gcc_assert (allocno1 >= 0 && allocno1 < max_allocno);
> +  gcc_assert (allocno2 >= 0 && allocno2 < max_allocno);
> +
> +  blk1 = regno_basic_block (allocno[allocno1].reg);
> +  blk2 = regno_basic_block (allocno[allocno2].reg);
> +  gcc_assert (blk1 == 0 || blk2 == 0 || blk1 == blk2);
> +#endif
> +
> +  if (allocno1 == allocno2)
> +    /* By definition, an allocno does not conflict with itself.  */
> +    return 0;
> +
> +  bitnum = CONFLICT_BITNUM (allocno1, allocno2);
> +
> +#ifdef ENABLE_CHECKING
> +  gcc_assert (bitnum >= 0 && bitnum < max_bitnum);
> +#endif
> +
> +  word = conflicts[bitnum / HOST_BITS_PER_WIDEST_FAST_INT];
> +  mask = (HOST_WIDEST_FAST_INT) 1 << (bitnum % HOST_BITS_PER_WIDEST_FAST_INT);
> +  return (word & mask) != 0;
> +}
> +
> +/* Add conflict edges between ALLOCNO1 and ALLOCNO2.  */
> +
> +static void
> +set_conflict_p (int allocno1, int allocno2)
> +{
> +  int bitnum, index;
> +  HOST_WIDEST_FAST_INT word, mask;
> +
> +#ifdef ENABLE_CHECKING
> +  int blk1, blk2;
> +
> +  gcc_assert (allocno1 >= 0 && allocno1 < max_allocno);
> +  gcc_assert (allocno2 >= 0 && allocno2 < max_allocno);
> +
> +  blk1 = regno_basic_block (allocno[allocno1].reg);
> +  blk2 = regno_basic_block (allocno[allocno2].reg);
> +  gcc_assert (blk1 == 0 || blk2 == 0 || blk1 == blk2);
> +#endif
> +
> +  /* By definition, an allocno does not conflict with itself.  */
> +  if (allocno1 == allocno2)
> +    return;
> +
> +  bitnum = CONFLICT_BITNUM (allocno1, allocno2);
> +
> +#ifdef ENABLE_CHECKING
> +  gcc_assert (bitnum >= 0 && bitnum < max_bitnum);
> +#endif
> +
> +  index = bitnum / HOST_BITS_PER_WIDEST_FAST_INT;
> +  word = conflicts[index];
> +  mask = (HOST_WIDEST_FAST_INT) 1 << (bitnum % HOST_BITS_PER_WIDEST_FAST_INT);
> +
> +  if ((word & mask) == 0)
> +    {
> +      conflicts[index] = word | mask;
> +      add_neighbor (allocno1, allocno2);
> +      add_neighbor (allocno2, allocno1);
> +    }
> +}
> +
> +/* Add conflict edges between ALLOCNO1 and all allocnos currently live.  */
> +
> +static void
> +set_conflicts_p (int allocno1, sparseset live)
> +{
> +  int i;
> +  int bitnum, index;
> +  HOST_WIDEST_FAST_INT word, mask;
> +  int partial_bitnum_allocno1;
> +
> +#ifdef ENABLE_CHECKING
> +  gcc_assert (allocno1 >= 0 && allocno1 < max_allocno);
> +#endif
> +
> +  partial_bitnum_allocno1 = partial_bitnum[allocno1];
> +
> +  EXECUTE_IF_SET_IN_SPARSESET (live, i)
> +  {
> +    /* By definition, an allocno does not conflict with itself.  */
> +    if (allocno1 == i)
> +      continue;
> +
> +#ifdef ENABLE_CHECKING
> +    gcc_assert (i >= 0 && i < max_allocno);
> +#endif
> +
> +    bitnum = CONFLICT_BITNUM_FAST (allocno1, partial_bitnum_allocno1, i);
> +
> +#ifdef ENABLE_CHECKING
> +    gcc_assert (bitnum >= 0 && bitnum < max_bitnum);
> +#endif
> +
> +    index = bitnum / HOST_BITS_PER_WIDEST_FAST_INT;
> +    word = conflicts[index];
> +    mask = (HOST_WIDEST_FAST_INT) 1 << (bitnum % HOST_BITS_PER_WIDEST_FAST_INT);
> +
> +    if ((word & mask) == 0)
> +      {
> +	conflicts[index] = word | mask;
> +	add_neighbor (allocno1, i);
> +	add_neighbor (i, allocno1);
> +      }
> +  }
> +}
> +
> +
>  /* Add a conflict between R1 and R2.  */
>  
>  static void
>  record_one_conflict_between_regnos (enum machine_mode mode1, int r1, 
>  				    enum machine_mode mode2, int r2)
>  {
> +  int allocno1 = reg_allocno[r1];
> +  int allocno2 = reg_allocno[r2];
> +
>    if (dump_file)
>      fprintf (dump_file, "  rocbr adding %d<=>%d\n", r1, r2);
> -  if (reg_allocno[r1] >= 0 && reg_allocno[r2] >= 0)
> -    {
> -      int tr1 = reg_allocno[r1];
> -      int tr2 = reg_allocno[r2];
> -      int ialloc_prod = tr1 * allocno_row_words;
>  
> -      SET_ALLOCNO_LIVE ((&conflicts[ialloc_prod]), tr2);
> -    }
> -  else if (reg_allocno[r1] >= 0)
> +  if (allocno1 >= 0 && allocno2 >= 0)
> +    set_conflict_p (allocno1, allocno2);
> +  else if (allocno1 >= 0)
>      {
> -      int tr1 = reg_allocno[r1];
> -
>        if (r2 < FIRST_PSEUDO_REGISTER)
> -	add_to_hard_reg_set (&allocno[tr1].hard_reg_conflicts, mode2, r2);
> +	add_to_hard_reg_set (&allocno[allocno1].hard_reg_conflicts, mode2, r2);
>      }
> -  else if (reg_allocno[r2] >= 0)
> +  else if (allocno2 >= 0)
>      {
> -      int tr2 = reg_allocno[r2];
> -
>        if (r1 < FIRST_PSEUDO_REGISTER)
> -        add_to_hard_reg_set (&allocno[tr2].hard_reg_conflicts, mode1, r1);
> +        add_to_hard_reg_set (&allocno[allocno2].hard_reg_conflicts, mode1, r1);
>      }
>  
>    /* Now, recursively handle the reg_renumber cases.  */
> @@ -115,7 +226,7 @@ record_one_conflict_between_regnos (enum
>     before calling here.  */
>  
>  static void
> -record_one_conflict (HOST_WIDE_INT *allocnos_live, 
> +record_one_conflict (sparseset allocnos_live, 
>  		     HARD_REG_SET *hard_regs_live, int regno)
>  {
>    int i;
> @@ -123,18 +234,17 @@ record_one_conflict (HOST_WIDE_INT *allo
>    if (regno < FIRST_PSEUDO_REGISTER)
>      /* When a hard register becomes live, record conflicts with live
>         pseudo regs.  */
> -    EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i,
> +    EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i)
>        {
>  	SET_HARD_REG_BIT (allocno[i].hard_reg_conflicts, regno);
>  	if (dump_file)
>  	  fprintf (dump_file, "  roc adding %d<=>%d\n", allocno[i].reg, regno);
> -      });
> +      }
>    else
>      /* When a pseudo-register becomes live, record conflicts first
>         with hard regs, then with other pseudo regs.  */
>      {
>        int ialloc = reg_allocno[regno];
> -      int ialloc_prod = ialloc * allocno_row_words;
>  
>        if (dump_file)
>  	{
> @@ -144,18 +254,16 @@ record_one_conflict (HOST_WIDE_INT *allo
>  		&& !TEST_HARD_REG_BIT (allocno[ialloc].hard_reg_conflicts, i))
>  	      fprintf (dump_file, "%d ", i);
>  	  
> -	  EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i,
> +	  EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i)
>  	    {
> -	      if (!GET_ALLOCNO_LIVE (&conflicts[ialloc_prod], i))
> +	      if (!conflict_p (ialloc, i))
>  		fprintf (dump_file, "%d ", allocno[i].reg);
> -	    });
> +	    }
>  	  fprintf (dump_file, ")\n");
>  	}
>  
>        IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, *hard_regs_live);
> -
> -      for (i = allocno_row_words - 1; i >= 0; i--)
> -	conflicts[ialloc_prod + i] |= allocnos_live[i];
> +      set_conflicts_p (ialloc, allocnos_live);
>      }
>  }
>  
> @@ -168,7 +276,7 @@ record_one_conflict (HOST_WIDE_INT *allo
>     nothing.  */
>  
>  static void
> -mark_reg_store (HOST_WIDE_INT *allocnos_live, 
> +mark_reg_store (sparseset allocnos_live, 
>  		HARD_REG_SET *hard_regs_live, 
>  		struct df_ref *ref)
>  {
> @@ -340,7 +448,7 @@ ra_init_live_subregs (bool init_value, 
>     set not live even if REG is a subreg.  */
>  
>  inline static void
> -clear_reg_in_live (HOST_WIDE_INT *allocnos_live,
> +clear_reg_in_live (sparseset allocnos_live,
>  		   sbitmap *live_subregs, 
>  		   int *live_subregs_used,
>  		   HARD_REG_SET *hard_regs_live, 
> @@ -359,7 +467,7 @@ clear_reg_in_live (HOST_WIDE_INT *allocn
>  	  unsigned int start = SUBREG_BYTE (reg);
>  	  unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
>  
> -	  ra_init_live_subregs (GET_ALLOCNO_LIVE (allocnos_live, allocnum) != 0, 
> +	  ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
>  				live_subregs, live_subregs_used, allocnum, reg);
>  
>  	  /* Ignore the paradoxical bits.  */
> @@ -375,19 +483,19 @@ clear_reg_in_live (HOST_WIDE_INT *allocn
>  	  if (sbitmap_empty_p (live_subregs[allocnum]))
>  	    {
>  	      live_subregs_used[allocnum] = 0;
> -	      CLEAR_ALLOCNO_LIVE (allocnos_live, allocnum);
> +	      sparseset_clear_bit (allocnos_live, allocnum);
>  	    }
>  	  else
>  	    /* Set the allocnos live here because that bit has to be
>  	       true to get us to look at the live_subregs fields.  */
> -	    SET_ALLOCNO_LIVE (allocnos_live, allocnum);
> +	    sparseset_set_bit (allocnos_live, allocnum);
>  	}
>        else
>  	{
>  	  /* Resetting the live_subregs_used is effectively saying do not use the 
>  	     subregs because we are writing the whole pseudo.  */
>  	  live_subregs_used[allocnum] = 0;
> -	  CLEAR_ALLOCNO_LIVE (allocnos_live, allocnum);
> +	  sparseset_clear_bit (allocnos_live, allocnum);
>  	}
>      }
>  
> @@ -423,7 +531,7 @@ clear_reg_in_live (HOST_WIDE_INT *allocn
>     set live even if REG is a subreg.  */
>  
>  inline static void
> -set_reg_in_live (HOST_WIDE_INT *allocnos_live, 
> +set_reg_in_live (sparseset allocnos_live, 
>  		 sbitmap *live_subregs, 
>  		 int *live_subregs_used,
>  		 HARD_REG_SET *hard_regs_live, 
> @@ -441,7 +549,7 @@ set_reg_in_live (HOST_WIDE_INT *allocnos
>  	  unsigned int start = SUBREG_BYTE (reg);
>  	  unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
>  
> -	  ra_init_live_subregs (GET_ALLOCNO_LIVE (allocnos_live, allocnum) != 0, 
> +	  ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
>  				live_subregs, live_subregs_used, allocnum, reg);
>  	  
>  	  /* Ignore the paradoxical bits.  */
> @@ -459,7 +567,7 @@ set_reg_in_live (HOST_WIDE_INT *allocnos
>  	   subregs because we are writing the whole pseudo.  */
>  	  live_subregs_used[allocnum] = 0;
>       
> -      SET_ALLOCNO_LIVE (allocnos_live, allocnum);
> +      sparseset_set_bit (allocnos_live, allocnum);
>      }
>        
>    if (regno >= FIRST_PSEUDO_REGISTER)
> @@ -630,7 +738,7 @@ global_conflicts (void)
>  
>    HARD_REG_SET hard_regs_live;
>    HARD_REG_SET renumbers_live;
> -  HOST_WIDE_INT *allocnos_live;
> +  sparseset allocnos_live;
>    bitmap live = BITMAP_ALLOC (NULL);
>    VEC (df_ref_t, heap) *clobbers = NULL;
>    VEC (df_ref_t, heap) *dying_regs = NULL;
> @@ -654,7 +762,7 @@ global_conflicts (void)
>        fprintf (dump_file, "\n");
>      }
>  
> -  allocnos_live = XNEWVEC (HOST_WIDE_INT, allocno_row_words);
> +  allocnos_live = sparseset_alloc (max_allocno);
>  
>    FOR_EACH_BB (bb)
>      {
> @@ -663,7 +771,7 @@ global_conflicts (void)
>        bitmap_copy (live, DF_LIVE_OUT (bb));
>        df_simulate_artificial_refs_at_end (bb, live);
>  
> -      memset (allocnos_live, 0, allocno_row_words * sizeof (HOST_WIDE_INT));
> +      sparseset_clear (allocnos_live);
>        memset (live_subregs_used, 0, max_allocno * sizeof (int));
>        CLEAR_HARD_REG_SET (hard_regs_live);
>        CLEAR_HARD_REG_SET (renumbers_live);
> @@ -720,11 +828,11 @@ global_conflicts (void)
>  		  fprintf (dump_file, "%d ", i);
>  
>  	      fprintf (dump_file, "] pseudos [");
> -	      EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i,
> +	      EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i)
>  		{
>  		  dump_ref (dump_file, " ", "", regno_reg_rtx[allocno[i].reg],
>  			    allocno[i].reg, live_subregs, live_subregs_used);
> -		});
> +		}
>  	      fprintf (dump_file, "]\n");
>  	    }
>  
> @@ -803,7 +911,7 @@ global_conflicts (void)
>  	     cannot not want to kill the renumbers from the other
>  	     pseudos.  */
>  	  CLEAR_HARD_REG_SET (renumbers_live);
> -	  EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i,
> +	  EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i)
>  	    {
>  	      unsigned int regno = allocno[i].reg;
>  	      int renumber = reg_renumber[regno];
> @@ -811,7 +919,7 @@ global_conflicts (void)
>  	      if (renumber >= 0 && renumber < FIRST_PSEUDO_REGISTER)
>  		set_renumbers_live (&renumbers_live, live_subregs, live_subregs_used, 
>  				    i, renumber);
> -	    }); 
> +	    }
>  					 
>  	  /* Add the uses to the live sets.  Keep track of the regs
>  	     that are dying inside the insn, this set will be useful
> @@ -850,7 +958,7 @@ global_conflicts (void)
>  		      unsigned int start = SUBREG_BYTE (reg);
>  		      unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
>  
> -		      ra_init_live_subregs (GET_ALLOCNO_LIVE (allocnos_live, allocnum) != 0, 
> +		      ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
>  					    live_subregs, live_subregs_used, allocnum, reg);
>  		      
>  		      /* Ignore the paradoxical bits.  */
> @@ -870,13 +978,13 @@ global_conflicts (void)
>  			  start++;
>  			}
>  		      
> -		      SET_ALLOCNO_LIVE (allocnos_live, allocnum);
> +		      sparseset_set_bit (allocnos_live, allocnum);
>  		      if (renumber >= 0 && renumber < FIRST_PSEUDO_REGISTER)
>  			set_renumbers_live (&renumbers_live, live_subregs, live_subregs_used, 
>  					    allocnum, renumber);
>  		    }
>  		  
> -		  else if (GET_ALLOCNO_LIVE (allocnos_live, allocnum) == 0)
> +		  else if (!sparseset_bit_p (allocnos_live, allocnum))
>  		    {
>  		      if (dump_file)
>  			fprintf (dump_file, "    dying pseudo\n");
> @@ -885,7 +993,7 @@ global_conflicts (void)
>  			 effectively saying do not use the subregs
>  			 because we are reading the whole pseudo.  */
>  		      live_subregs_used[allocnum] = 0;
> -		      SET_ALLOCNO_LIVE (allocnos_live, allocnum);
> +		      sparseset_set_bit (allocnos_live, allocnum);
>  		      if (renumber >= 0 && renumber < FIRST_PSEUDO_REGISTER)
>  			set_renumbers_live (&renumbers_live, live_subregs, live_subregs_used, 
>  					    allocnum, renumber);
> @@ -1087,10 +1195,10 @@ global_conflicts (void)
>  	     because caller-save, fixup_abnormal_edges and possibly the table
>  	     driven EH machinery are not quite ready to handle such regs live
>  	     across such edges.  */
> -	  EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i,
> -					 {
> -					   allocno[i].no_stack_reg = 1;
> -					 });
> +	  EXECUTE_IF_SET_IN_SPARSESET (allocnos_live, i)
> +	    {
> +	      allocno[i].no_stack_reg = 1;
> +	    }
>  
>  	  for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
>  	    record_one_conflict (allocnos_live, &hard_regs_live, i);
> Index: global.c
> ===================================================================
> --- global.c	(revision 128990)
> +++ global.c	(working copy)
> @@ -63,26 +63,41 @@ along with GCC; see the file COPYING3.  
>     reg numbers to allocnos and vice versa.
>     max_allocno gets the number of allocnos in use.
>  
> -   2. Allocate a max_allocno by max_allocno conflict bit matrix and
> -   clear it.  This is called "conflict".
> +   2. Allocate a max_allocno by max_allocno compressed triangular conflict
> +   bit matrix (a triangular bit matrix with portions removed for which we
> +   can guarantee there are no conflicts, example: two local pseudos that
> +   live in different basic blocks) and clear it.  This is called "conflict".
> +   Note that for triangular bit matrices, there are two possible equations
> +   for computing the bit number for two allocnos: LOW and HIGH (LOW < HIGH):
> +
> +     1) BITNUM = f(HIGH) + LOW, where
> +       f(HIGH) = (HIGH * (HIGH - 1)) / 2
> +
> +     2) BITNUM = f(LOW) + HIGH, where
> +       f(LOW) = LOW * (max_allocno - LOW) + (LOW * (LOW - 1)) / 2 - LOW - 1
> +
> +   We use the second (and less common) equation as this gives us better
> +   cache locality for local allocnos that are live within the same basic
> +   block.  Also note that f(HIGH) and f(LOW) can be precalculated for all
> +   values of HIGH and LOW, so all that is necessary to compute the bit
> +   number for two allocnos LOW and HIGH is a load followed by an addition.
>  
>     Allocate a max_allocno by FIRST_PSEUDO_REGISTER conflict matrix for
>     conflicts between allocnos and explicit hard register use (which
>     includes use of pseudo-registers allocated by local_alloc).  This
>     is the hard_reg_conflicts inside each allocno.
>  
> -   3. For each basic block
> -    walk forward through the block, recording which
> -    pseudo-registers and which hardware registers are live.
> -    Build the conflict matrix between the pseudo-registers
> -    and another of pseudo-registers versus hardware registers.
> -    Also record the preferred hardware registers
> -    for each pseudo-register.
> +   3. For each basic block, walk backward through the block, recording
> +   which pseudo-registers and which hardware registers are live.
> +   Build the conflict matrix between the pseudo-registers and another of
> +   pseudo-registers versus hardware registers.
>  
> -   4. Sort a table of the allocnos into order of
> -   desirability of the variables.
> +   4. For each basic block, walk backward through the block, recording
> +   the preferred hardware registers for each pseudo-register.
>  
> -   5. Allocate the variables in that order; each if possible into
> +   5. Sort a table of the allocnos into order of desirability of the variables.
> +
> +   6. Allocate the variables in that order; each if possible into
>     a preferred register, else into another register.  */
>  

>  /* A vector of the integers from 0 to max_allocno-1,
> @@ -90,12 +105,6 @@ along with GCC; see the file COPYING3.  
>  
>  static int *allocno_order;
>  
> -/* Two macros to test or store 1 in an element of `conflicts'.  */
> -
> -#define CONFLICTP(I, J) \
> - (conflicts[(I) * allocno_row_words + (unsigned) (J) / HOST_BITS_PER_WIDE_INT]	\
> -  & ((HOST_WIDE_INT) 1 << ((unsigned) (J) % HOST_BITS_PER_WIDE_INT)))
> -
>  /* Set of registers that global-alloc isn't supposed to use.  */
>  
>  static HARD_REG_SET no_global_alloc_regs;
> @@ -206,8 +215,8 @@ compute_regs_asm_clobbered (char *regs_a
>  
>  static HARD_REG_SET eliminable_regset;
>  
> +static int regno_compare (const void *, const void *);
>  static int allocno_compare (const void *, const void *);
> -static void mirror_conflicts (void);
>  static void expand_preferences (void);
>  static void prune_preferences (void);
>  static void set_preferences (void);
> @@ -315,6 +324,8 @@ global_alloc (void)
>  {
>    int retval;
>    size_t i;
> +  int max_blk;
> +  int *num_allocnos_per_blk;
>  
>    compute_regsets (&eliminable_regset, &no_global_alloc_regs);
>  
> @@ -357,9 +368,8 @@ global_alloc (void)
>  
>    reg_allocno = XNEWVEC (int, max_regno);
>  
> -  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> -    reg_allocno[i] = -1;
> -
> +  /* Initially fill the reg_allocno array with regno's...  */
> +  max_blk = 0;
>    max_allocno = 0;
>    for (i = FIRST_PSEUDO_REGISTER; i < (size_t) max_regno; i++)
>      /* Note that reg_live_length[i] < 0 indicates a "constant" reg
> @@ -371,28 +381,88 @@ global_alloc (void)
>  	&& (! current_function_has_nonlocal_label
>  	    || REG_N_CALLS_CROSSED (i) == 0))
>        {
> -	reg_allocno[i] = max_allocno++;
> +	int blk = regno_basic_block (i);
> +	reg_allocno[max_allocno++] = i;
> +	if (blk > max_blk)
> +	  max_blk = blk;
>  	gcc_assert (REG_LIVE_LENGTH (i));
>        }
> -    else
> -      reg_allocno[i] = -1;
>  
>    allocno = XCNEWVEC (struct allocno, max_allocno);
> +  partial_bitnum = XNEWVEC (int, max_allocno);
> +  num_allocnos_per_blk = XCNEWVEC (int, max_blk + 1);
>  
> -  for (i = FIRST_PSEUDO_REGISTER; i < (size_t) max_regno; i++)
> -    if (reg_allocno[i] >= 0)
> -      {
> -	int num = reg_allocno[i];
> -	allocno[num].reg = i;
> -	allocno[num].size = PSEUDO_REGNO_SIZE (i);
> -	allocno[num].calls_crossed += REG_N_CALLS_CROSSED (i);
> -	allocno[num].throwing_calls_crossed
> -	  += REG_N_THROWING_CALLS_CROSSED (i);
> -	allocno[num].n_refs += REG_N_REFS (i);
> -	allocno[num].freq += REG_FREQ (i);
> -	if (allocno[num].live_length < REG_LIVE_LENGTH (i))
> -	  allocno[num].live_length = REG_LIVE_LENGTH (i);
> -      }
> +  /* ...so we can sort them in the order we want them to receive
> +     their allocnos.  */
> +  qsort (reg_allocno, max_allocno, sizeof (int), regno_compare);
> +
> +  for (i = 0; i < (size_t) max_allocno; i++)
> +    {
> +      int regno = reg_allocno[i];
> +      int blk = regno_basic_block (regno);
> +      num_allocnos_per_blk[blk]++;
> +      allocno[i].reg = regno;
> +      allocno[i].size = PSEUDO_REGNO_SIZE (regno);
> +      allocno[i].calls_crossed += REG_N_CALLS_CROSSED (regno);
> +      allocno[i].throwing_calls_crossed
> +	+= REG_N_THROWING_CALLS_CROSSED (regno);
> +      allocno[i].n_refs += REG_N_REFS (regno);
> +      allocno[i].freq += REG_FREQ (regno);
> +      if (allocno[i].live_length < REG_LIVE_LENGTH (regno))
> +	allocno[i].live_length = REG_LIVE_LENGTH (regno);
> +    }
> +
> +  /* The "global" block must contain all allocnos.  */
> +  num_allocnos_per_blk[0] = max_allocno;
> +
> +  /* Now reinitialize the reg_allocno array in terms of the
> +     optimized regno to allocno mapping we created above.  */
> +  for (i = 0; i < (size_t) max_regno; i++)
> +    reg_allocno[i] = -1;
> +
> +  max_bitnum = 0;
> +  for (i = 0; i < (size_t) max_allocno; i++)
> +    {
> +      int regno = allocno[i].reg;
> +      int blk = regno_basic_block (regno);
> +      int row_size = --num_allocnos_per_blk[blk];
> +      reg_allocno[regno] = (int) i;
> +      partial_bitnum[i] = (row_size > 0) ? max_bitnum - ((int) i + 1) : -1;
> +      max_bitnum += row_size;
> +    }
> +
> +#ifdef ENABLE_CHECKING
> +  gcc_assert (max_bitnum <= ((max_allocno * (max_allocno - 1)) / 2));
> +#endif
> +
> +  if (dump_file)
> +    {
> +      int num_bits, num_bytes, actual_bytes;
> +
> +      fprintf (dump_file, "## max_blk:     %d\n", max_blk);
> +      fprintf (dump_file, "## max_regno:   %d\n", max_regno);
> +      fprintf (dump_file, "## max_allocno: %d\n", max_allocno);
> +
> +      num_bits = max_bitnum;
> +      num_bytes = CEIL (num_bits, 8);
> +      actual_bytes = num_bytes;
> +      fprintf (dump_file, "## Compressed triangular bitmatrix size: ");
> +      fprintf (dump_file, "%d bits, %d bytes\n", num_bits, num_bytes);
> +
> +      num_bits = (max_allocno * (max_allocno - 1)) / 2;
> +      num_bytes = CEIL (num_bits, 8);
> +      fprintf (dump_file, "## Standard triangular bitmatrix size:   ");
> +      fprintf (dump_file, "%d bits, %d bytes [%.2f%%]\n",
> +	       num_bits, num_bytes,
> +	       100.0 * ((double) actual_bytes / (double) num_bytes));
> +
> +      num_bits = max_allocno * max_allocno;
> +      num_bytes = CEIL (num_bits, 8);
> +      fprintf (dump_file, "## Square bitmatrix size:                ");
> +      fprintf (dump_file, "%d bits, %d bytes [%.2f%%]\n",
> +	       num_bits, num_bytes,
> +	       100.0 * ((double) actual_bytes / (double) num_bytes));
> +    }
>  
>    /* Calculate amount of usage of each hard reg by pseudos
>       allocated by local-alloc.  This is to see if we want to
> @@ -433,18 +503,26 @@ global_alloc (void)
>  	  fprintf (dump_file, " %d", (int)i);
>        fprintf (dump_file, "\n");
>      }
> -  allocno_row_words = (max_allocno + HOST_BITS_PER_WIDE_INT - 1) / HOST_BITS_PER_WIDE_INT;
>  
> -  /* We used to use alloca here, but the size of what it would try to
> -     allocate would occasionally cause it to exceed the stack limit and
> -     cause unpredictable core dumps.  Some examples were > 2Mb in size.  */
> -  conflicts = XCNEWVEC (HOST_WIDE_INT, max_allocno * allocno_row_words);
> +  conflicts = NULL;
> +  adjacency = NULL;
> +  adjacency_pool = NULL;
>  
>    /* If there is work to be done (at least one reg to allocate),
>       perform global conflict analysis and allocate the regs.  */
>  
>    if (max_allocno > 0)
>      {
> +      /* We used to use alloca here, but the size of what it would try to
> +	 allocate would occasionally cause it to exceed the stack limit and
> +	 cause unpredictable core dumps.  Some examples were > 2Mb in size.  */
> +      conflicts = XCNEWVEC (HOST_WIDEST_FAST_INT,
> +			    CEIL(max_bitnum, HOST_BITS_PER_WIDEST_FAST_INT));
> +
> +      adjacency = XCNEWVEC (adjacency_t *, max_allocno);
> +      adjacency_pool = create_alloc_pool ("global_alloc adjacency list pool",
> +					  sizeof (adjacency_t), 1024);
> +
>        /* Scan all the insns and compute the conflicts among allocnos
>  	 and between allocnos and hard regs.  */
>  
> @@ -460,8 +538,6 @@ global_alloc (void)
>  	 global_conflicts.  */
>        df_set_flags (DF_NO_INSN_RESCAN);
>  
> -      mirror_conflicts ();
> -
>        /* Eliminate conflicts between pseudos and eliminable registers.  If
>  	 the register is not eliminated, the pseudo won't really be able to
>  	 live in the eliminable register, so the conflict doesn't matter.
> @@ -536,6 +612,7 @@ global_alloc (void)
>  	  }
>  
>        free (allocno_order);
> +      free (conflicts);
>      }
>  
>    /* Do the reloads now while the allocno data still exists, so that we can
> @@ -552,12 +629,44 @@ global_alloc (void)
>  
>    /* Clean up.  */
>    free (reg_allocno);
> +  free (num_allocnos_per_blk);
> +  free (partial_bitnum);
>    free (allocno);
> -  free (conflicts);
> +  if (adjacency != NULL)
> +    {
> +      free_alloc_pool (adjacency_pool);
> +      free (adjacency);
> +    }
>  
>    return retval;
>  }
>  
> +/* Sort predicate for ordering the regnos.  We want the regno to allocno
> +   mapping to have the property that all "global" regnos (ie, regnos that
> +   are referenced in more than one basic block) have smaller allocno values
> +   than "local" regnos (ie, regnos referenced in only one basic block).
> +   In addition, for two basic blocks "i" and "j" with i < j, all regnos
> +   local to basic block i should have smaller allocno values than regnos
> +   local to basic block j.
> +   Returns -1 (1) if *v1p should be allocated before (after) *v2p.  */
> +
> +static int
> +regno_compare (const void *v1p, const void *v2p)
> +{
> +  int regno1 = *(const int *)v1p;
> +  int regno2 = *(const int *)v2p;
> +  int blk1 = REG_BASIC_BLOCK (regno1);
> +  int blk2 = REG_BASIC_BLOCK (regno2);
> +
> +  /* Prefer lower numbered basic blocks.  Note that global and unknown
> +     blocks have negative values, giving them high precedence.  */
> +  if (blk1 - blk2)
> +    return blk1 - blk2;
> +
> +  /* If both regs are referenced from the same block, sort by regno.  */
> +  return regno1 - regno2;
> +}
> +
>  /* Sort predicate for ordering the allocnos.
>     Returns -1 (1) if *v1 should be allocated before (after) *v2.  */
>  
> @@ -609,8 +718,8 @@ expand_preferences (void)
>  	if (REG_NOTE_KIND (link) == REG_DEAD
>  	    && REG_P (XEXP (link, 0))
>  	    && reg_allocno[REGNO (XEXP (link, 0))] >= 0
> -	    && ! CONFLICTP (reg_allocno[REGNO (SET_DEST (set))],
> -			    reg_allocno[REGNO (XEXP (link, 0))]))
> +	    && ! conflict_p (reg_allocno[REGNO (SET_DEST (set))],
> +			     reg_allocno[REGNO (XEXP (link, 0))]))
>  	  {
>  	    int a1 = reg_allocno[REGNO (SET_DEST (set))];
>  	    int a2 = reg_allocno[REGNO (XEXP (link, 0))];
> @@ -828,14 +937,14 @@ prune_preferences (void)
>  	 these registers).  */
>        HARD_REG_SET temp, temp2;
>        int allocno2;
> +      adjacency_iter ai;
>  
>        num = allocno_order[i];
>  
>        CLEAR_HARD_REG_SET (temp);
>        CLEAR_HARD_REG_SET (temp2);
>  
> -      EXECUTE_IF_SET_IN_ALLOCNO_SET (conflicts + num * allocno_row_words,
> -				     allocno2,
> +      FOR_EACH_CONFLICT (num, allocno2, ai)
>  	{
>  	  if (allocno_to_order[allocno2] > i)
>  	    {
> @@ -846,7 +955,7 @@ prune_preferences (void)
>  		IOR_HARD_REG_SET (temp2,
>  				  allocno[allocno2].hard_reg_full_preferences);
>  	    }
> -	});
> +	}
>  
>        AND_COMPL_HARD_REG_SET (temp, allocno[num].hard_reg_full_preferences);
>        IOR_HARD_REG_SET (temp, temp2);
> @@ -1167,6 +1276,7 @@ find_reg (int num, HARD_REG_SET losers, 
>      {
>        int lim, j;
>        HARD_REG_SET this_reg;
> +      adjacency_iter ai;
>  
>        /* Yes.  Record it as the hard register of this pseudo-reg.  */
>        reg_renumber[allocno[num].reg] = best_reg;
> @@ -1184,11 +1294,10 @@ find_reg (int num, HARD_REG_SET losers, 
>  	}
>        /* For each other pseudo-reg conflicting with this one,
>  	 mark it as conflicting with the hard regs this one occupies.  */
> -      lim = num;
> -      EXECUTE_IF_SET_IN_ALLOCNO_SET (conflicts + lim * allocno_row_words, j,
> +      FOR_EACH_CONFLICT (num, j, ai)
>  	{
>  	  IOR_HARD_REG_SET (allocno[j].hard_reg_conflicts, this_reg);
> -	});
> +	}
>      }
>  }
>  

> @@ -1224,38 +1333,6 @@ retry_global_alloc (int regno, HARD_REG_
>      }
>  }
>  

> -/* If CONFLICTP (i, j) is true, make sure CONFLICTP (j, i) is also true.  */
> -static void
> -mirror_conflicts (void)
> -{
> -  int i, j;
> -  int rw = allocno_row_words;
> -  int rwb = rw * HOST_BITS_PER_WIDE_INT;
> -  HOST_WIDE_INT *p = conflicts;
> -  HOST_WIDE_INT *q0 = conflicts, *q1, *q2;
> -  unsigned HOST_WIDE_INT mask;
> -
> -  for (i = max_allocno - 1, mask = 1; i >= 0; i--, mask <<= 1)
> -    {
> -      if (! mask)
> -	{
> -	  mask = 1;
> -	  q0++;
> -	}
> -      for (j = allocno_row_words - 1, q1 = q0; j >= 0; j--, q1 += rwb)
> -	{
> -	  unsigned HOST_WIDE_INT word;
> -
> -	  for (word = (unsigned HOST_WIDE_INT) *p++, q2 = q1; word;
> -	       word >>= 1, q2 += rw)
> -	    {
> -	      if (word & 1)
> -		*q2 |= mask;
> -	    }
> -	}
> -    }
> -}
> -

>  /* Indicate that hard register number FROM was eliminated and replaced with
>     an offset from hard register number TO.  The status of hard registers live
>     at the start of a basic block is updated by replacing a use of FROM with
> @@ -1519,6 +1596,7 @@ static void
>  dump_conflicts (FILE *file)
>  {
>    int i;
> +  int regno;
>    int has_preferences;
>    int nregs;
>    nregs = 0;
> @@ -1529,46 +1607,51 @@ dump_conflicts (FILE *file)
>        nregs++;
>      }
>    fprintf (file, ";; %d regs to allocate:", nregs);
> -  for (i = 0; i < max_allocno; i++)
> -    {
> -      int j;
> -      if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
> -	continue;
> -      fprintf (file, " %d", allocno[allocno_order[i]].reg);
> -      for (j = 0; j < max_regno; j++)
> -	if (reg_allocno[j] == allocno_order[i]
> -	    && j != allocno[allocno_order[i]].reg)
> -	  fprintf (file, "+%d", j);
> -      if (allocno[allocno_order[i]].size != 1)
> -	fprintf (file, " (%d)", allocno[allocno_order[i]].size);
> -    }
> +  for (regno = 0; regno < max_regno; regno++)
> +    if ((i = reg_allocno[regno]) >= 0)
> +      {
> +	int j;
> +	if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
> +	  continue;
> +	fprintf (file, " %d", allocno[allocno_order[i]].reg);
> +	for (j = 0; j < max_regno; j++)
> +	  if (reg_allocno[j] == allocno_order[i]
> +	      && j != allocno[allocno_order[i]].reg)
> +	    fprintf (file, "+%d", j);
> +	if (allocno[allocno_order[i]].size != 1)
> +	  fprintf (file, " (%d)", allocno[allocno_order[i]].size);
> +      }
>    fprintf (file, "\n");
>  
> -  for (i = 0; i < max_allocno; i++)
> -    {
> -      int j;
> -      fprintf (file, ";; %d conflicts:", allocno[i].reg);
> -      for (j = 0; j < max_allocno; j++)
> -	if (CONFLICTP (j, i))
> -	  fprintf (file, " %d", allocno[j].reg);
> -      for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> -	if (TEST_HARD_REG_BIT (allocno[i].hard_reg_conflicts, j) && ! fixed_regs[j])
> -	  fprintf (file, " %d", j);
> -      fprintf (file, "\n");
> -
> -      has_preferences = 0;
> -      for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> -	if (TEST_HARD_REG_BIT (allocno[i].hard_reg_preferences, j))
> -	  has_preferences = 1;
> -
> -      if (! has_preferences)
> -	continue;
> -      fprintf (file, ";; %d preferences:", allocno[i].reg);
> -      for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> -	if (TEST_HARD_REG_BIT (allocno[i].hard_reg_preferences, j))
> -	  fprintf (file, " %d", j);
> -      fprintf (file, "\n");
> -    }
> +  for (regno = 0; regno < max_regno; regno++)
> +    if ((i = reg_allocno[regno]) >= 0)
> +      {
> +	int j;
> +	adjacency_iter ai;
> +	fprintf (file, ";; %d conflicts:", allocno[i].reg);
> +	FOR_EACH_CONFLICT (i, j, ai)
> +	  {
> +	    fprintf (file, " %d", allocno[j].reg);
> +	  }
> +	for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> +	  if (TEST_HARD_REG_BIT (allocno[i].hard_reg_conflicts, j)
> +	      && !fixed_regs[j])
> +	    fprintf (file, " %d", j);
> +	fprintf (file, "\n");
> +
> +	has_preferences = 0;
> +	for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> +	  if (TEST_HARD_REG_BIT (allocno[i].hard_reg_preferences, j))
> +	    has_preferences = 1;
> +
> +	if (!has_preferences)
> +	  continue;
> +	fprintf (file, ";; %d preferences:", allocno[i].reg);
> +	for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
> +	  if (TEST_HARD_REG_BIT (allocno[i].hard_reg_preferences, j))
> +	    fprintf (file, " %d", j);
> +	fprintf (file, "\n");
> +      }
>    fprintf (file, "\n");
>  }
>  
> Index: ra.h
> ===================================================================
> --- ra.h	(revision 128990)
> +++ ra.h	(working copy)
> @@ -85,49 +85,139 @@ extern struct allocno *allocno;
>  
>  extern int max_allocno;
>  
> -/* max_allocno by max_allocno array of bits, recording whether two
> -   allocno's conflict (can't go in the same hardware register).
> +/* max_allocno by max_allocno compressed triangular bit matrix,
> +   recording whether two allocnos conflict (can't go in the same
> +   hardware register).  */
>  
> -   `conflicts' is symmetric after the call to mirror_conflicts.  */
> -
> -extern HOST_WIDE_INT *conflicts;
> -
> -/* Number of ints required to hold max_allocno bits.
> -   This is the length of a row in `conflicts'.  */
> -
> -extern int allocno_row_words;
> +extern HOST_WIDEST_FAST_INT *conflicts;
>  
>  /* Indexed by (pseudo) reg number, gives the allocno, or -1
>     for pseudo registers which are not to be allocated.  */
>  
>  extern int *reg_allocno;
>  
> +/* Precalculated partial bit number in the compressed triangular bit matrix.
> +   For two allocnos, the final bit number is: partial_bitnum[LOW] + HIGH.  */
> +
> +extern int *partial_bitnum;
> +
> +/* Size in bits of the compressed triangular bit matrix.  */
> +
> +extern int max_bitnum;
> +
> +/* The pool to allocate the adjacency list elements from.  */
> +
> +extern alloc_pool adjacency_pool;
> +
> +/* The maximum number of neighbors stored in the neighbors vector before
> +   we have to chain in another vector.  */
> +
> +#define ADJACENCY_VEC_LENGTH 30
> +
> +/* Conflict graph adjacency list.  */
> +
> +typedef struct adjacency_list_d
> +{
> +  int neighbors[ADJACENCY_VEC_LENGTH];
> +  unsigned int index;
> +  struct adjacency_list_d *next;
> +} adjacency_t;
> +
> +extern adjacency_t **adjacency;
> +
> +/* Add NEIGHBOR to ALLOC_NO's adjacency list.  It is assumed the caller
> +   has already determined that NEIGHBOR is not already neighbor by
> +   checking the conflict bit matrix.  */
> +
> +static inline void
> +add_neighbor (int alloc_no, int neighbor)
> +{
> +  adjacency_t *adjlist = adjacency[alloc_no];
> +
> +  if (adjlist == NULL || adjlist->index == ADJACENCY_VEC_LENGTH)
> +    {
> +      adjacency_t *new = pool_alloc (adjacency_pool);
> +      new->index = 0;
> +      new->next = adjlist;
> +      adjlist = new;
> +      adjacency[alloc_no] = adjlist;
> +    }
> +
> +  adjlist->neighbors[adjlist->index++] = neighbor;
> +}
> +
> +/* Iterator for adjacency lists.  */
> +
> +typedef struct adjacency_iterator_d
> +{
> +  adjacency_t *vec;
> +  unsigned int idx;
> +} adjacency_iter;
> +
> +/* Initialize a single adjacency list iterator.  */
> +
> +static inline int
> +adjacency_iter_init (adjacency_iter *ai, int allocno1)
> +{
> +  ai->vec = adjacency[allocno1];
> +  ai->idx = 0;
> +  return ai->vec != NULL;
> +}
> +
> +/* Test whether we have visited all of the neighbors.  */
> +
> +static inline int
> +adjacency_iter_done (adjacency_iter *ai)
> +{
> +  return ai->idx > ai->vec->index;
> +}
> +
> +/* Advance to the next neighbor in AI.  */
> +
> +static inline int
> +adjacency_iter_next (adjacency_iter *ai)
> +{
> +  unsigned int idx = ai->idx;
> +  int neighbor = ai->vec->neighbors[idx++];
> +  if (idx >= ai->vec->index && ai->vec->next != NULL)
> +    {
> +      ai->vec = ai->vec->next;
> +      ai->idx = 0;
> +    }
> +  else
> +    ai->idx = idx;
> +  return neighbor;
> +}
> +
> +/* Return the one basic block regno is used in.  If regno is used
> +   in more than one basic block or if it is unknown which block it
> +   is used in, return 0.  */
> +
> +static inline int
> +regno_basic_block (int regno)
> +{
> +  int block = REG_BASIC_BLOCK (regno);
> +  if (block < 0)
> +    block = 0;
> +  return block;
> +}
> +
>  extern void global_conflicts (void);
>  
>  /* In global.c  */
>  
> -/* For any allocno set in ALLOCNO_SET, set ALLOCNO to that allocno,
> -   and execute CODE.  */
> -#define EXECUTE_IF_SET_IN_ALLOCNO_SET(ALLOCNO_SET, ALLOCNO, CODE)	\
> -do {									\
> -  int i_;								\
> -  int allocno_;								\
> -  HOST_WIDE_INT *p_ = (ALLOCNO_SET);					\
> -									\
> -  for (i_ = allocno_row_words - 1, allocno_ = 0; i_ >= 0;		\
> -       i_--, allocno_ += HOST_BITS_PER_WIDE_INT)			\
> -    {									\
> -      unsigned HOST_WIDE_INT word_ = (unsigned HOST_WIDE_INT) *p_++;	\
> -									\
> -      for ((ALLOCNO) = allocno_; word_; word_ >>= 1, (ALLOCNO)++)	\
> -	{								\
> -	  if (word_ & 1)						\
> -	    {CODE;}							\
> -	}								\
> -    }									\
> -} while (0)
> +/* Macro to visit all of IN_ALLOCNO's neighbors.  Neighbors are
> +   returned in OUT_ALLOCNO for each iteration of the loop.  */
>  
> -extern void ra_init_live_subregs (bool, sbitmap *, int *, int, rtx reg);
> +#define FOR_EACH_CONFLICT(IN_ALLOCNO, OUT_ALLOCNO, ITER)		\
> +  if (!adjacency || !adjacency_iter_init (&(ITER), (IN_ALLOCNO)))	\
> +    ;									\
> +  else									\
> +    for ((OUT_ALLOCNO) = adjacency_iter_next (&(ITER));			\
> +	 !adjacency_iter_done (&(ITER));				\
> +	 (OUT_ALLOCNO) = adjacency_iter_next (&(ITER)))
>  
> +extern void ra_init_live_subregs (bool, sbitmap *, int *, int, rtx);
> +extern bool conflict_p (int, int);
>  
>  #endif /* GCC_RA_H */
> Index: sparseset.c
> ===================================================================
> --- sparseset.c	(revision 0)
> +++ sparseset.c	(revision 0)
> @@ -0,0 +1,232 @@
> +/* SparseSet implementation.
> +   Copyright (C) 2007 Free Software Foundation, Inc.
> +   Contributed by Peter Bergner <bergner@vnet.ibm.com>
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "libiberty.h"
> +#include "sparseset.h"
> +
> +
> +/* Allocate and clear a n_elms SparseSet.  */
> +
> +sparseset
> +sparseset_alloc (SPARSESET_ELT_TYPE n_elms)
> +{
> +  unsigned int n_bytes = sizeof (struct sparseset_def)
> +			 + ((n_elms - 1) * 2 * sizeof (SPARSESET_ELT_TYPE));
> +
> +  sparseset set = (sparseset) xmalloc (n_bytes);
> +  set->dense = &(set->elms[0]);
> +  set->sparse = &(set->elms[n_elms]);
> +  set->size = n_elms;
> +  sparseset_clear (set);
> +  return set;
> +}
> +
> +/* Low level routine not meant for use outside of sparseset.[ch].
> +   Assumes idx1 < s->members and idx2 < s->members.  */
> +
> +static inline void
> +sparseset_swap (sparseset s, SPARSESET_ELT_TYPE idx1, SPARSESET_ELT_TYPE idx2)
> +{
> +  SPARSESET_ELT_TYPE tmp = s->dense[idx2];
> +  sparseset_insert_bit (s, s->dense[idx1], idx2);
> +  sparseset_insert_bit (s, tmp, idx1);
> +}
> +
> +/* Operation: S = S - {e}
> +   Delete e from the set S if it is a member of S.  */
> +
> +void
> +sparseset_clear_bit (sparseset s, SPARSESET_ELT_TYPE e)
> +{
> +  if (sparseset_bit_p (s, e))
> +    {
> +      SPARSESET_ELT_TYPE idx = s->sparse[e];
> +      SPARSESET_ELT_TYPE iter = s->iter;
> +      SPARSESET_ELT_TYPE mem = s->members - 1;
> +
> +      /* If we are iterating over this set and we want to delete a
> +	 member we've already visited, then we swap the element we
> +	 want to delete with the element at the current iteration
> +	 index so that it plays well together with the code below
> +	 that actually removes the element.  */
> +      if (s->iterating && idx <= iter)
> +	{
> +	  if (idx < iter)
> +	    {
> +	      sparseset_swap (s, idx, iter);
> +	      idx = iter;
> +	    }
> +	  s->iter_inc = 0;
> +	}
> +
> +      /* Replace the element we want to delete with the last element
> +	 in the dense array and then decrement s->members, effectively
> +	 removing the element we want to delete.  */
> +      sparseset_insert_bit (s, s->dense[mem], idx);
> +      s->members = mem;
> +    }
> +}
> +
> +/* Operation: D = S
> +   Restrictions: none.  */
> +
> +void
> +sparseset_copy (sparseset d, sparseset s)
> +{
> +  SPARSESET_ELT_TYPE i;
> +
> +  if (d == s)
> +    return;
> +
> +  sparseset_clear (d);
> +  for (i = 0; i < s->members; i++)
> +    sparseset_insert_bit (d, s->dense[i], i);
> +  d->members = s->members;
> +}
> +
> +/* Operation: D = A & B.
> +   Restrictions: none.  */
> +
> +void
> +sparseset_and (sparseset d, sparseset a, sparseset b)
> +{
> +  SPARSESET_ELT_TYPE e;
> +
> +  if (a == b)
> +    {
> +      if (d != a)
> +	sparseset_copy (d, a);
> +      return;
> +    }
> +
> +  if (d == a || d == b)
> +    {
> +      sparseset s = (d == a) ? b : a;
> +
> +      EXECUTE_IF_SET_IN_SPARSESET (d, e)
> +	if (!sparseset_bit_p (s, e))
> +	  sparseset_clear_bit (d, e);
> +    }
> +  else
> +    {
> +      sparseset sml, lrg;
> +
> +      if (sparseset_cardinality (a) < sparseset_cardinality (b))
> +	{
> +	  sml = a;
> +	  lrg = b;
> +	}
> +      else
> +	{
> +	  sml = b;
> +	  lrg = a;
> +	}
> +
> +      sparseset_clear (d);
> +      EXECUTE_IF_SET_IN_SPARSESET (sml, e)
> +	if (sparseset_bit_p (lrg, e))
> +	  sparseset_set_bit (d, e);
> +    }
> +}
> +
> +/* Operation: D = A & ~B.
> +   Restrictions: D != B, unless D == A == B.  */
> +
> +void
> +sparseset_and_compl (sparseset d, sparseset a, sparseset b)
> +{
> +  SPARSESET_ELT_TYPE e;
> +
> +  if (a == b)
> +    {
> +      sparseset_clear (d);
> +      return;
> +    }
> +
> +  gcc_assert (d != b);
> +
> +  if (d == a)
> +    {
> +      if (sparseset_cardinality (d) < sparseset_cardinality (b))
> +	{
> +	  EXECUTE_IF_SET_IN_SPARSESET (d, e)
> +	    if (sparseset_bit_p (b, e))
> +	      sparseset_clear_bit (d, e);
> +	}
> +      else
> +	{
> +	  EXECUTE_IF_SET_IN_SPARSESET (b, e)
> +	    sparseset_clear_bit (d, e);
> +	}
> +    }
> +  else
> +    {
> +      sparseset_clear (d);
> +      EXECUTE_IF_SET_IN_SPARSESET (a, e)
> +	if (!sparseset_bit_p (b, e))
> +	  sparseset_set_bit (d, e);
> +    }
> +}
> +
> +/* Operation: D = A | B.
> +   Restrictions: none.  */
> +
> +void
> +sparseset_ior (sparseset d, sparseset a, sparseset b)
> +{
> +  SPARSESET_ELT_TYPE e;
> +
> +  if (a == b)
> +    sparseset_copy (d, a);
> +  else if (d == b)
> +    {
> +      EXECUTE_IF_SET_IN_SPARSESET (a, e)
> +	sparseset_set_bit (d, e);
> +    }
> +  else
> +    {
> +      if (d != a)
> +        sparseset_copy (d, a);
> +      EXECUTE_IF_SET_IN_SPARSESET (b, e)
> +	sparseset_set_bit (d, e);
> +    }
> +}
> +
> +/* Operation: A == B
> +   Restrictions: none.  */
> +
> +bool
> +sparseset_equal_p (sparseset a, sparseset b)
> +{
> +  SPARSESET_ELT_TYPE e;
> +
> +  if (a == b)
> +    return true;
> +
> +  if (sparseset_cardinality (a) != sparseset_cardinality (b))
> +    return false;
> +
> +  EXECUTE_IF_SET_IN_SPARSESET (a, e)
> +    if (!sparseset_bit_p (b, e))
> +      return false;
> +
> +  return true;
> +}
> +
> Index: sparseset.h
> ===================================================================
> --- sparseset.h	(revision 0)
> +++ sparseset.h	(revision 0)
> @@ -0,0 +1,162 @@
> +/* SparseSet implementation.
> +   Copyright (C) 2007 Free Software Foundation, Inc.
> +   Contributed by Peter Bergner <bergner@vnet.ibm.com>
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#ifndef GCC_SPARSESET_H
> +#define GCC_SPARSESET_H
> +
> +#include "system.h"
> +#include <assert.h>
> +
> +#define SPARSESET_ELT_BITS ((unsigned) HOST_BITS_PER_WIDEST_FAST_INT)
> +#define SPARSESET_ELT_TYPE unsigned int
> +
> +/* Data Structure used for the SparseSet representation.  */
> +
> +typedef struct sparseset_def
> +{
> +  SPARSESET_ELT_TYPE *dense;	/* Dense array.  */
> +  SPARSESET_ELT_TYPE *sparse; 	/* Sparse array.  */
> +  SPARSESET_ELT_TYPE members;	/* Number of elements.  */
> +  SPARSESET_ELT_TYPE size;	/* Maximum number of elements.  */
> +  SPARSESET_ELT_TYPE iter;	/* Iterator index.  */
> +  unsigned char iter_inc;	/* Iteration increment amount.  */
> +  bool iterating;
> +  SPARSESET_ELT_TYPE elms[2];   /* Combined dense and sparse arrays.  */
> +} *sparseset;
> +
> +#define sparseset_free(MAP)  free(MAP)
> +extern sparseset sparseset_alloc (SPARSESET_ELT_TYPE n_elms);
> +extern void sparseset_clear_bit (sparseset, SPARSESET_ELT_TYPE);
> +extern void sparseset_copy (sparseset, sparseset);
> +extern void sparseset_and (sparseset, sparseset, sparseset);
> +extern void sparseset_and_compl (sparseset, sparseset, sparseset);
> +extern void sparseset_ior (sparseset, sparseset, sparseset);
> +extern bool sparseset_equal_p (sparseset, sparseset);
> +
> +/* Operation: S = {}
> +   Clear the set of all elements.  */
> +
> +static inline void
> +sparseset_clear (sparseset s)
> +{
> +  s->members = 0;
> +  s->iterating = false;
> +}
> +
> +/* Return the number of elements currently in the set.  */
> +
> +static inline SPARSESET_ELT_TYPE
> +sparseset_cardinality (sparseset s)
> +{
> +  return s->members;
> +}
> +
> +/* Return the maximum number of elements this set can hold.  */
> +
> +static inline SPARSESET_ELT_TYPE
> +sparseset_size (sparseset s)
> +{
> +  return s->size;
> +}
> +
> +/* Return true if e is a member of the set S, otherwise return false.  */
> +
> +static inline bool
> +sparseset_bit_p (sparseset s, SPARSESET_ELT_TYPE e)
> +{
> +  SPARSESET_ELT_TYPE idx;
> +
> +  gcc_assert (e < s->size);
> +
> +  idx = s->sparse[e];
> +
> +  return idx < s->members && s->dense[idx] == e;
> +}
> +
> +/* Low level insertion routine not meant for use outside of sparseset.[ch].
> +   Assumes E is valid and not already a member of the set S.  */
> +
> +static inline void
> +sparseset_insert_bit (sparseset s, SPARSESET_ELT_TYPE e, SPARSESET_ELT_TYPE idx)
> +{
> +  s->sparse[e] = idx;
> +  s->dense[idx] = e;
> +}
> +
> +/* Operation: S = S + {e}
> +   Insert E into the set S, if it isn't already a member.  */
> +
> +static inline void
> +sparseset_set_bit (sparseset s, SPARSESET_ELT_TYPE e)
> +{
> +  if (!sparseset_bit_p (s, e))
> +    sparseset_insert_bit (s, e, s->members++);
> +}
> +
> +/* Return and remove an arbitrary element from the set S.  */
> +
> +static inline SPARSESET_ELT_TYPE
> +sparseset_pop (sparseset s)
> +{
> +  SPARSESET_ELT_TYPE mem = s->members;
> +
> +  gcc_assert (mem != 0);
> +
> +  s->members = mem - 1;
> +  return s->dense[mem];
> +}
> +
> +static inline void
> +sparseset_iter_init (sparseset s)
> +{
> +  s->iter = 0;
> +  s->iter_inc = 1;
> +  s->iterating = true;
> +}
> +
> +static inline bool
> +sparseset_iter_p (sparseset s)
> +{
> +  if (s->iterating && s->iter < s->members)
> +    return true;
> +  else
> +    return s->iterating = false;
> +}
> +
> +static inline SPARSESET_ELT_TYPE
> +sparseset_iter_elm (sparseset s)
> +{
> +  return s->dense[s->iter];
> +}
> +
> +static inline void
> +sparseset_iter_next (sparseset s)
> +{
> +  s->iter += s->iter_inc;
> +  s->iter_inc = 1;
> +}
> +
> +#define EXECUTE_IF_SET_IN_SPARSESET(SPARSESET, ITER)			\
> +  for (sparseset_iter_init (SPARSESET);					\
> +       sparseset_iter_p (SPARSESET)					\
> +       && (((ITER) = sparseset_iter_elm (SPARSESET)) || 1);		\
> +       sparseset_iter_next (SPARSESET))
> +
> +#endif /* GCC_SPARSESET_H */
> Index: Makefile.in
> ===================================================================
> --- Makefile.in	(revision 128990)
> +++ Makefile.in	(working copy)
> @@ -1126,6 +1126,7 @@ OBJS-common = \
>  	sdbout.o \
>  	see.o \
>  	simplify-rtx.o \
> +	sparseset.o \
>  	sreal.o \
>  	stack-ptr-mod.o \
>  	stmt.o \
> @@ -1765,6 +1766,7 @@ sbitmap.o: sbitmap.c $(CONFIG_H) $(SYSTE
>      $(FLAGS_H) hard-reg-set.h $(BASIC_BLOCK_H) $(OBSTACK_H)
>  ebitmap.o: ebitmap.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
>  	$(EBITMAP_H)
> +sparseset.o: sparseset.c $(SYSTEM_H) sparseset.h
>  
>  COLLECT2_OBJS = collect2.o tlink.o intl.o version.o
>  COLLECT2_LIBS = @COLLECT2_LIBS@
>   



More information about the Gcc-patches mailing list