This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFA: patch for usage of more accurate live information for theglobal and the reload.


Richard Henderson wrote:

On Mon, May 17, 2004 at 02:20:59PM -0400, Vladimir Makarov wrote:


+ FOR_EACH_BB (bb)
+ {
+ bb->aux = bb_info = xmalloc (sizeof (struct bb_info));



Consider using alloc_aux_for_block(s).




+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ {
+ bitmap_set_bit (bb_info->pavin, i);
+ bitmap_set_bit (bb_info->pavout, i);
+ }



You're computing the same set for each block. Consider filling in the bits just once and copying the set for each instance.



+ static void
+ forward_propagate_solution (bool (*eq) (basic_block, basic_block, bool))



This is called once; why pass the eq function?




+ VARRAY_BB_INIT (new_bbs, 1000, "basic blocks for the next iter.");
+ FOR_EACH_BB (bb)
+ {
+ VARRAY_PUSH_BB (bbs, bb);
+ }



Inefficient when number of basic blocks > 1000. You know exactly how many blocks there are to begin.

All the actual algorithm bits look good.


I've just comitted the following modified patch incorportaing all your proposal. I tested it again on bootstrap.

As for data flow equation solver functions from df.c, I found that gcc bootstrap will be ~ 1% slower with them (besides a lot of changes to the patch). So the modified patch still contains the original data solver.

Vlad

Index: global.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/global.c,v
retrieving revision 1.100
diff -c -p -r1.100 global.c
*** global.c	4 Feb 2004 19:15:16 -0000	1.100
--- global.c	25 May 2004 19:09:28 -0000
*************** static void set_preference (rtx, rtx);
*** 306,312 ****
--- 306,323 ----
  static void dump_conflicts (FILE *);
  static void reg_becomes_live (rtx, rtx, void *);
  static void reg_dies (int, enum machine_mode, struct insn_chain *);
+ 
+ static void allocate_bb_info (void);
+ static void free_bb_info (void);
+ static void calculate_local_reg_bb_info (void);
+ static void set_up_bb_rts_numbers (void);
+ static int rpost_cmp (const void *, const void *);
+ static bool modify_bb_reg_pav (basic_block, basic_block, bool);
+ static void calculate_reg_pav (void);
+ static void make_accurate_live_analysis (void);
+ 
  
+ 
  /* Perform allocation of pseudo-registers not allocated by local_alloc.
     FILE is a file to output debugging information on,
     or zero if such output is not desired.
*************** global_alloc (FILE *file)
*** 329,334 ****
--- 340,347 ----
    size_t i;
    rtx x;
  
+   make_accurate_live_analysis ();
+ 
    max_allocno = 0;
  
    /* A machine may have certain hard registers that
*************** record_one_conflict (int regno)
*** 1377,1394 ****
  
        IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, hard_regs_live);
        for (j = allocno_row_words - 1; j >= 0; j--)
! 	{
! #if 0
! 	  int k;
! 	  for (k = 0; k < n_no_conflict_pairs; k++)
! 	    if (! ((j == no_conflict_pairs[k].allocno1
! 		    && ialloc == no_conflict_pairs[k].allocno2)
! 		   ||
! 		   (j == no_conflict_pairs[k].allocno2
! 		    && ialloc == no_conflict_pairs[k].allocno1)))
! #endif /* 0 */
! 	      conflicts[ialloc_prod + j] |= allocnos_live[j];
! 	}
      }
  }
  
--- 1390,1396 ----
  
        IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, hard_regs_live);
        for (j = allocno_row_words - 1; j >= 0; j--)
! 	conflicts[ialloc_prod + j] |= allocnos_live[j];
      }
  }
  
*************** mark_reg_store (rtx reg, rtx setter, voi
*** 1503,1509 ****
  /* Like mark_reg_set except notice just CLOBBERs; ignore SETs.  */
  
  static void
! mark_reg_clobber (rtx reg, rtx setter, void *data ATTRIBUTE_UNUSED)
  {
    if (GET_CODE (setter) == CLOBBER)
      mark_reg_store (reg, setter, data);
--- 1505,1511 ----
  /* Like mark_reg_set except notice just CLOBBERs; ignore SETs.  */
  
  static void
! mark_reg_clobber (rtx reg, rtx setter, void *data)
  {
    if (GET_CODE (setter) == CLOBBER)
      mark_reg_store (reg, setter, data);
*************** dump_global_regs (FILE *file)
*** 1973,1975 ****
--- 1975,2238 ----
        fprintf (file, " %d", i);
    fprintf (file, "\n\n");
  }
+ 
+ 
+ 
+ /* This page contains code to make live information more accurate.
+    The accurate register liveness at program point P means:
+      o there is a path from P to usage of the register and the
+        register is not redefined or killed on the path.
+      o register at P is partially available, i.e. there is a path from
+        a register definition to the point P and the register is not
+        killed (clobbered) on the path
+ 
+    The standard GCC live information means only the first condition.
+    Without the partial availability, there will be more register
+    conflicts and as a consequence worse register allocation.  The
+    typical example where the information can be different is a
+    register initialized in the loop at the basic block preceding the
+    loop in CFG. */
+ 
+ /* The following structure contains basic block data flow information
+    used to calculate partial availability of registers.  */
+ 
+ struct bb_info
+ {
+   /* The basic block reverse post-order number.  */
+   int rts_number;
+   /* Registers correspondingly killed (clobbered) and defined but not
+      killed afterward in the basic block.  */
+   bitmap killed, avloc;
+   /* Registers partially available correspondingly at the start and
+      end of the basic block.  */
+   bitmap pavin, pavout;
+ };
+ 
+ /* Macros for accessing data flow information of basic blocks.  */
+ 
+ #define BB_INFO(BB) ((struct bb_info *) (BB)->aux)
+ #define BB_INFO_BY_INDEX(N) BB_INFO (BASIC_BLOCK(N))
+ 
+ /* The function allocates the info structures of each basic block.  It
+    also initialized PAVIN and PAVOUT as if all hard registers were
+    partially available.  */
+ 
+ static void
+ allocate_bb_info (void)
+ {
+   int i;
+   basic_block bb;
+   struct bb_info *bb_info;
+   bitmap init;
+ 
+   alloc_aux_for_blocks (sizeof (struct bb_info));
+   init = BITMAP_XMALLOC ();
+   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+     bitmap_set_bit (init, i);
+   FOR_EACH_BB (bb)
+     {
+       bb_info = bb->aux;
+       bb_info->avloc = BITMAP_XMALLOC ();
+       bb_info->killed = BITMAP_XMALLOC ();
+       bb_info->pavin = BITMAP_XMALLOC ();
+       bb_info->pavout = BITMAP_XMALLOC ();
+       bitmap_copy (bb_info->pavin, init);
+       bitmap_copy (bb_info->pavout, init);
+     }
+   BITMAP_XFREE (init);
+ }
+ 
+ /* The function frees the allocated info of all basic blocks.  */
+ 
+ static void
+ free_bb_info (void)
+ {
+   basic_block bb;
+   struct bb_info *bb_info;
+ 
+   FOR_EACH_BB (bb)
+     {
+       bb_info = BB_INFO (bb);
+       BITMAP_XFREE (bb_info->pavout);
+       BITMAP_XFREE (bb_info->pavin);
+       BITMAP_XFREE (bb_info->killed);
+       BITMAP_XFREE (bb_info->avloc);
+     }
+   free_aux_for_blocks ();
+ }
+ 
+ /* The function modifies local info for register REG being changed in
+    SETTER.  DATA is used to pass the current basic block info.  */
+ 
+ static void
+ mark_reg_change (rtx reg, rtx setter, void *data)
+ {
+   int regno;
+   basic_block bb = data;
+   struct bb_info *bb_info = BB_INFO (bb);
+ 
+   if (GET_CODE (reg) == SUBREG)
+     reg = SUBREG_REG (reg);
+ 
+   if (GET_CODE (reg) != REG)
+     return;
+ 
+   regno = REGNO (reg);
+   bitmap_set_bit (bb_info->killed, regno);
+   
+   if (GET_CODE (setter) != CLOBBER)
+     bitmap_set_bit (bb_info->avloc, regno);
+   else
+     bitmap_clear_bit (bb_info->avloc, regno);
+ }
+ 
+ /* The function calculates local info for each basic block.  */
+ 
+ static void
+ calculate_local_reg_bb_info (void)
+ {
+   basic_block bb;
+   rtx insn, bound;
+ 
+   FOR_EACH_BB (bb)
+     {
+       bound = NEXT_INSN (BB_END (bb));
+       for (insn = BB_HEAD (bb); insn != bound; insn = NEXT_INSN (insn))
+ 	if (INSN_P (insn))
+ 	  note_stores (PATTERN (insn), mark_reg_change, bb);
+     }
+ }
+ 
+ /* The function sets up reverse post-order number of each basic
+    block.  */
+ 
+ static void
+ set_up_bb_rts_numbers (void)
+ {
+   int i;
+   int *rts_order;
+   
+   rts_order = xmalloc (sizeof (int) * n_basic_blocks);
+   flow_reverse_top_sort_order_compute (rts_order);
+   for (i = 0; i < n_basic_blocks; i++)
+     BB_INFO_BY_INDEX (rts_order [i])->rts_number = i;
+   free (rts_order);
+ }
+ 
+ /* Compare function for sorting blocks in reverse postorder.  */
+ 
+ static int
+ rpost_cmp (const void *bb1, const void *bb2)
+ {
+   basic_block b1 = *(basic_block *) bb1, b2 = *(basic_block *) bb2;
+ 
+   return BB_INFO (b2)->rts_number - BB_INFO (b1)->rts_number;
+ }
+ 
+ /* The function calculates partial availability of registers.  The
+    function calculates partial availability at the end of basic block
+    BB by propagating partial availability at end of predecessor basic
+    block PRED.  The function returns true if the partial availability
+    at the end of BB has been changed or if CHANGED_P.  We have the
+    following equations:
+ 
+      bb.pavin = empty for entry block | union (pavout of predecessors)
+      bb.pavout = union (bb.pavin - b.killed, bb.avloc)  */
+ 
+ static bool
+ modify_bb_reg_pav (basic_block bb, basic_block pred, bool changed_p)
+ {
+   struct bb_info *bb_info;
+   bitmap bb_pavin, bb_pavout;
+ 
+   bb_info = BB_INFO (bb);
+   bb_pavin = bb_info->pavin;
+   bb_pavout = bb_info->pavout;
+   if (pred->index != ENTRY_BLOCK)
+     bitmap_a_or_b (bb_pavin, bb_pavin, BB_INFO (pred)->pavout);
+   changed_p |= bitmap_union_of_diff (bb_pavout, bb_info->avloc,
+ 				     bb_pavin, bb_info->killed);
+   return changed_p;
+ }
+ 
+ /* The function calculates partial register availability.  */
+ 
+ static void
+ calculate_reg_pav (void)
+ {
+   basic_block bb, succ;
+   edge e;
+   bool changed_p;
+   int i, nel;
+   varray_type bbs, new_bbs, temp;
+   basic_block *bb_array;
+   sbitmap wset;
+ 
+   VARRAY_BB_INIT (bbs, n_basic_blocks, "basic blocks");
+   VARRAY_BB_INIT (new_bbs, n_basic_blocks, "basic blocks for the next iter.");
+   FOR_EACH_BB (bb)
+     {
+       VARRAY_PUSH_BB (bbs, bb);
+     }
+   wset = sbitmap_alloc (n_basic_blocks + 1);
+   while (VARRAY_ACTIVE_SIZE (bbs))
+     {
+       bb_array = &VARRAY_BB (bbs, 0);
+       nel = VARRAY_ACTIVE_SIZE (bbs);
+       qsort (bb_array, nel, sizeof (basic_block), rpost_cmp);
+       sbitmap_zero (wset);
+       for (i = 0; i < nel; i++)
+ 	{
+ 	  bb = bb_array [i];
+ 	  changed_p = 0;
+ 	  for (e = bb->pred; e; e = e->pred_next)
+ 	    changed_p = modify_bb_reg_pav (bb, e->src, changed_p);
+ 	  if (changed_p)
+ 	    for (e = bb->succ; e; e = e->succ_next)
+ 	      {
+ 		succ = e->dest;
+ 		if (succ->index != EXIT_BLOCK && !TEST_BIT (wset, succ->index))
+ 		  {
+ 		    SET_BIT (wset, succ->index);
+ 		    VARRAY_PUSH_BB (new_bbs, succ);
+ 		  }
+ 	      }
+ 	}
+       temp = bbs;
+       bbs = new_bbs;
+       new_bbs = temp;
+       VARRAY_POP_ALL (new_bbs);
+     }
+   sbitmap_free (wset);
+ }
+ 
+ /* The following function makes live information more accurate by
+    modifying global_live_at_start and global_live_at_end of basic
+    blocks.  After the function call a register lives at a program
+    point only if it is initialized on a path from CFG entry to the
+    program point.  The standard GCC life analysis permits registers to
+    live uninitialized. */
+ 
+ static void
+ make_accurate_live_analysis (void)
+ {
+   basic_block bb;
+   struct bb_info *bb_info;
+ 
+   max_regno = max_reg_num ();
+   compact_blocks ();
+   allocate_bb_info ();
+   calculate_local_reg_bb_info ();
+   set_up_bb_rts_numbers ();
+   calculate_reg_pav ();
+   FOR_EACH_BB (bb)
+     {
+       bb_info = BB_INFO (bb);
+       
+       bitmap_a_and_b (bb->global_live_at_start, bb->global_live_at_start,
+ 		      bb_info->pavin);
+       bitmap_a_and_b (bb->global_live_at_end, bb->global_live_at_end,
+ 		      bb_info->pavout);
+     }
+   free_bb_info ();
+ }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]