[lra] patch from Richard Sandiford's review of lra-spills.c and lra-coalesce.c

Vladimir Makarov vmakarov@redhat.com
Thu Oct 11 00:42:00 GMT 2012


   The following patch implements most Richard's proposals for LRA 
lra-spills.c and lra-coalesce.c files.

   The patch was successfully bootstrapped on x86/x86-64.

   Committed as rev. 192341.

2012-10-10  Vladimir Makarov  <vmakarov@redhat.com>

         * lra-coalesce.c (removed_pseudos_bitmap): Remove.
         (update_live_info): Rewrite without the bitmap.
         (coalescable_pseudo_p): New function.
         (lra_coalesce): Use it.  Rewrite using one loop instead of two
         nested ones.  Use set_noop_p.
         * lra-spills.c: Fix typos in comments.
         (struct slot, assign_mem_slot): Modify comments.
         (pseudo_reg_slot_compare): Ditto.  Don't use MAX. Use
         biggest_mode only.
         (assign_spill_hard_regs): Modify comments.
         (set_jump_crosses): Rename to setjump_crosses.
         (add_pseudo_to_slot): Use first->next instead of
         pseudo_slots[slots[slot_num].regno].next.
         (remove_pseudos): Make it void result function.
         (lra_spill): Change comment.


-------------- next part --------------
Index: lra-coalesce.c
===================================================================
--- lra-coalesce.c	(revision 192325)
+++ lra-coalesce.c	(working copy)
@@ -179,34 +179,55 @@ mem_move_p (int regno1, int regno2)
   return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0;
 }
 
+/* Pseudos used instead of the coalesced pseudos.  */
+static bitmap_head used_pseudos_bitmap;
 
-/* Pseudos which go away after coalescing and pseudos used instead of
-   the removed pseudos.	 */
-static bitmap_head removed_pseudos_bitmap, used_pseudos_bitmap;
-
-/* Set up REMOVED_PSEUDOS_BITMAP and USED_PSEUDOS_BITMAP, and update
-   LR_BITMAP (a BB live info bitmap).  */
+/* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info
+   bitmap).  */
 static void
 update_live_info (bitmap lr_bitmap)
 {
   unsigned int j;
   bitmap_iterator bi;
 
-  bitmap_clear (&removed_pseudos_bitmap);
   bitmap_clear (&used_pseudos_bitmap);
   EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap,
 			    FIRST_PSEUDO_REGISTER, j, bi)
+    bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
+  if (! bitmap_empty_p (&used_pseudos_bitmap))
     {
-      bitmap_set_bit (&removed_pseudos_bitmap, j);
-      bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
-    }
-  if (! bitmap_empty_p (&removed_pseudos_bitmap))
-    {
-      bitmap_and_compl_into (lr_bitmap, &removed_pseudos_bitmap);
+      bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap);
       bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap);
     }
 }
 
+/* Return true if pseudo REGNO can be potentially coalesced.  Use
+   SPLIT_PSEUDO_BITMAP to find pseudos whose live ranges were
+   split.  */
+static bool
+coalescable_pseudo_p (int regno, bitmap split_origin_bitmap)
+{
+  lra_assert (regno >= FIRST_PSEUDO_REGISTER);
+  /* Don't coalesce inheritance pseudos because spilled inheritance
+     pseudos will be removed in subsequent 'undo inheritance'
+     pass.  */
+  return (lra_reg_info[regno].restore_regno < 0
+	  /* We undo splits for spilled pseudos whose live ranges were
+	     split.  So don't coalesce them, it is not necessary and
+	     the undo transformations would be wrong.  */
+	  && ! bitmap_bit_p (split_origin_bitmap, regno)
+	  /* Don't coalesces bound pseudos.  Bound pseudos has own
+	     rules for finding live ranges.  It is hard to maintain
+	     this info with coalescing and it is not worth to do
+	     it.  */
+	  && ! bitmap_bit_p (&lra_bound_pseudos, regno)
+	  /* We don't want to coalesce regnos with equivalences, at
+	     least without updating this info.  */
+	  && ira_reg_equiv[regno].constant == NULL_RTX
+	  && ira_reg_equiv[regno].memory == NULL_RTX
+	  && ira_reg_equiv[regno].invariant == NULL_RTX);
+}
+
 /* The major function for aggressive pseudo coalescing of moves only
    if the both pseudos were spilled and not bound.  */
 bool
@@ -214,7 +235,7 @@ lra_coalesce (void)
 {
   basic_block bb;
   rtx mv, set, insn, next, *sorted_moves;
-  int i, n, mv_num, sregno, dregno, restore_regno;
+  int i, mv_num, sregno, dregno, restore_regno;
   unsigned int regno;
   int coalesced_moves;
   int max_regno = max_reg_num ();
@@ -249,96 +270,56 @@ lra_coalesce (void)
 	    && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
 	    && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
 	    && mem_move_p (sregno, dregno)
-	    /* Don't coalesce inheritance pseudos because spilled
-	       inheritance pseudos will be removed in subsequent 'undo
-	       inheritance' pass.  */
-	    && lra_reg_info[sregno].restore_regno < 0
-	    && lra_reg_info[dregno].restore_regno < 0
-	    /* We undo splits for spilled pseudos whose live ranges
-	       were split.  So don't coalesce them, it is not
-	       necessary and the undo transformations would be
-	       wrong.  */
-	    && ! bitmap_bit_p (&split_origin_bitmap, sregno)
-	    && ! bitmap_bit_p (&split_origin_bitmap, dregno)
+	    && coalescable_pseudo_p (sregno, &split_origin_bitmap)
+	    && coalescable_pseudo_p (dregno, &split_origin_bitmap)
 	    && ! side_effects_p (set)
-	    /* Don't coalesces bound pseudos.  Bound pseudos has own
-	       rules for finding live ranges.  It is hard to maintain
-	       this info with coalescing and it is not worth to do
-	       it.  */
-	    && ! bitmap_bit_p (&lra_bound_pseudos, sregno)
-	    && ! bitmap_bit_p (&lra_bound_pseudos, dregno)
-	    /* We don't want to coalesce regnos with equivalences,
-	       at least without updating this info.  */
-	    && ira_reg_equiv[sregno].constant == NULL_RTX
-	    && ira_reg_equiv[sregno].memory == NULL_RTX
-	    && ira_reg_equiv[sregno].invariant == NULL_RTX
-	    && ira_reg_equiv[dregno].constant == NULL_RTX
-	    && ira_reg_equiv[dregno].memory == NULL_RTX
-	    && ira_reg_equiv[dregno].invariant == NULL_RTX
 	    && !(lra_intersected_live_ranges_p
 		 (lra_reg_info[sregno].live_ranges,
 		  lra_reg_info[dregno].live_ranges)))
 	  sorted_moves[mv_num++] = insn;
     }
-  bitmap_clear (&removed_pseudos_bitmap);
+  bitmap_clear (&split_origin_bitmap);
   qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
   /* Coalesced copies, most frequently executed first.	*/
   bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
   bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
-  for (; mv_num != 0;)
+  for (i = 0; i < mv_num; i++)
     {
-      for (i = 0; i < mv_num; i++)
+      mv = sorted_moves[i];
+      set = single_set (mv);
+      lra_assert (set != NULL && REG_P (SET_SRC (set))
+		  && REG_P (SET_DEST (set)));
+      sregno = REGNO (SET_SRC (set));
+      dregno = REGNO (SET_DEST (set));
+      if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
 	{
-	  mv = sorted_moves[i];
-	  set = single_set (mv);
-	  lra_assert (set != NULL && REG_P (SET_SRC (set))
-		      && REG_P (SET_DEST (set)));
-	  sregno = REGNO (SET_SRC (set));
-	  dregno = REGNO (SET_DEST (set));
-	  if (! lra_intersected_live_ranges_p
-		(lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
-		 lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges))
-	    {
-	      coalesced_moves++;
-	      if (lra_dump_file != NULL)
-		fprintf
-		  (lra_dump_file,
-		   "	  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
-		   INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
-		   dregno, ORIGINAL_REGNO (SET_DEST (set)),
-		   BLOCK_FOR_INSN (mv)->frequency);
-	      bitmap_ior_into (&involved_insns_bitmap,
-			       &lra_reg_info[sregno].insn_bitmap);
-	      bitmap_ior_into (&involved_insns_bitmap,
-			       &lra_reg_info[dregno].insn_bitmap);
-	      merge_pseudos (sregno, dregno);
-	      i++;
-	      break;
-	    }
+	  coalesced_moves++;
+	  if (lra_dump_file != NULL)
+	    fprintf
+	      (lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
+	       INSN_UID (mv), sregno, dregno,
+	       BLOCK_FOR_INSN (mv)->frequency);
+	  /* We updated involved_insns_bitmap when doing the merge.  */
 	}
-      /* Collect the rest of copies.  */
-      for (n = 0; i < mv_num; i++)
+      else if (!(lra_intersected_live_ranges_p
+		 (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
+		  lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
 	{
-	  mv = sorted_moves[i];
-	  set = single_set (mv);
-	  lra_assert (set != NULL && REG_P (SET_SRC (set))
-		      && REG_P (SET_DEST (set)));
-	  sregno = REGNO (SET_SRC (set));
-	  dregno = REGNO (SET_DEST (set));
-	  if (first_coalesced_pseudo[sregno] != first_coalesced_pseudo[dregno])
-	    sorted_moves[n++] = mv;
-	  else if (lra_dump_file != NULL)
-	    {
-	      coalesced_moves++;
-	      fprintf
-		(lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
-		 INSN_UID (mv), sregno, dregno,
-		 BLOCK_FOR_INSN (mv)->frequency);
-	    }
+	  coalesced_moves++;
+	  if (lra_dump_file != NULL)
+	    fprintf
+	      (lra_dump_file,
+	       "  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
+	       INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
+	       dregno, ORIGINAL_REGNO (SET_DEST (set)),
+	       BLOCK_FOR_INSN (mv)->frequency);
+	  bitmap_ior_into (&involved_insns_bitmap,
+			   &lra_reg_info[sregno].insn_bitmap);
+	  bitmap_ior_into (&involved_insns_bitmap,
+			   &lra_reg_info[dregno].insn_bitmap);
+	  merge_pseudos (sregno, dregno);
 	}
-      mv_num = n;
     }
-  bitmap_initialize (&removed_pseudos_bitmap, &reg_obstack);
   bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
   FOR_EACH_BB (bb)
     {
@@ -351,10 +332,7 @@ lra_coalesce (void)
 	    if (! substitute (&insn))
 	      continue;
 	    lra_update_insn_regno_info (insn);
-	    if ((set = single_set (insn)) != NULL_RTX
-		&& REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
-		&& REGNO (SET_SRC (set)) == REGNO (SET_DEST (set))
-		&& ! side_effects_p (set))
+	    if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
 	      {
 		/* Coalesced move.  */
 		if (lra_dump_file != NULL)
@@ -364,7 +342,6 @@ lra_coalesce (void)
 	      }
 	  }
     }
-  bitmap_clear (&removed_pseudos_bitmap);
   bitmap_clear (&used_pseudos_bitmap);
   bitmap_clear (&involved_insns_bitmap);
   bitmap_clear (&coalesced_pseudos_bitmap);
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 192325)
+++ lra-spills.c	(working copy)
@@ -22,7 +22,7 @@ along with GCC; see the file COPYING3.	I
 /* This file contains code for a pass to change spilled pseudos into
    memory.
 
-   The pass creates necessary stack slots and assign spilled pseudos
+   The pass creates necessary stack slots and assigns spilled pseudos
    to the stack slots in following way:
 
    for all spilled pseudos P most frequently used first do
@@ -43,15 +43,14 @@ along with GCC; see the file COPYING3.	I
    If at least one stack slot was created, we need to run more passes
    because we have new addresses which should be checked and because
    the old address displacements might change and address constraints
-   (or insn memory constraints) might be not satisfied any more.
+   (or insn memory constraints) might not be satisfied any more.
 
    For some targets, the pass can spill some pseudos into hard
    registers of different class (usually into vector registers)
    instead of spilling them into memory if it is possible and
-   profitable.	Spilling GENERAL_REGS pseudo into SSE registers for
-   modern Intel x86/x86-64 processors is an example of such
-   optimization.  And this is actually recommended by Intel
-   optimization guide.
+   profitable.  Spilling GENERAL_REGS pseudo into SSE registers for
+   Intel Corei7 is an example of such optimization.  And this is
+   actually recommended by Intel optimization guide.
 
    The file also contains code for final change of pseudos on hard
    regs correspondingly assigned to them.  */
@@ -101,8 +100,8 @@ struct pseudo_slot
 /* The stack slots for each spilled pseudo.  Indexed by regnos.	 */
 static struct pseudo_slot *pseudo_slots;
 
-/* The structure describes a stack slot which can be used for several
-   spilled pseudos.  */
+/* The structure describes a register or a stack slot which can be
+   used for several spilled pseudos.  */
 struct slot
 {
   /* First pseudo with given stack slot.  */
@@ -122,7 +121,7 @@ struct slot
 };
 
 /* Array containing info about the stack slots.	 The array element is
-   indexed by the stack slot number in the range [0..slost_num).  */
+   indexed by the stack slot number in the range [0..slots_num).  */
 static struct slot *slots;
 /* The number of the stack slots currently existing.  */
 static int slots_num;
@@ -146,16 +145,16 @@ assign_mem_slot (int i)
   
   x = slots[pseudo_slots[i].slot_num].mem;
   
+  /* We can use a slot already allocated because it is guaranteed the
+     slot provides both enough inherent space and enough total
+     space.  */
   if (x)
     ;
   /* Each pseudo has an inherent size which comes from its own mode,
      and a total size which provides room for paradoxical subregs
-     which refer to the pseudo reg in wider modes.
-     
-     We can use a slot already allocated if it provides both enough
-     inherent space and enough total space.  Otherwise, we allocate a
-     new slot, making sure that it has no less inherent space, and no
-     less total space, then the previous slot.	*/
+     which refer to the pseudo reg in wider modes.  We allocate a new
+     slot, making sure that it has enough inherent space and total
+     space.  */
   else
     {
       rtx stack_slot;
@@ -187,8 +186,6 @@ assign_mem_slot (int i)
   if (BYTES_BIG_ENDIAN && inherent_size < total_size)
     adjust += (total_size - inherent_size);
   
-  /* If we have any adjustment to make, or if the stack slot is the
-     wrong mode, make a new stack slot.	 */
   x = adjust_address_nv (x, GET_MODE (regno_reg_rtx[i]), adjust);
   
   /* Set all of the memory attributes as appropriate for a spill.  */
@@ -217,10 +214,18 @@ regno_freq_compare (const void *v1p, con
 # define STACK_GROWS_DOWNWARD 0
 #endif
 
-/* Sort pseudos according their slot numbers putting ones with smaller
-   numbers first, or last when the frame pointer is not needed.	 So
-   pseudos with the first slot will be finally addressed with smaller
-   address displacement.  */
+/* Sort pseudos according to their slots, putting the slots in the order
+   that they should be allocated.  Slots with lower numbers have the highest
+   priority and should get the smallest displacement from the stack or
+   frame pointer (whichever is being used).
+
+   The first allocated slot is always closest to the frame pointer,
+   so prefer lower slot numbers when frame_pointer_needed.  If the stack
+   and frame grow in the same direction, then the first allocated slot is
+   always closest to the initial stack pointer and furthest away from the
+   final stack pointer, so allocate higher numbers first when using the
+   stack pointer in that case.  The reverse is true if the stack and
+   frame grow in opposite directions.  */
 static int
 pseudo_reg_slot_compare (const void *v1p, const void *v2p)
 {
@@ -234,18 +239,17 @@ pseudo_reg_slot_compare (const void *v1p
   if ((diff = slot_num1 - slot_num2) != 0)
     return (frame_pointer_needed
 	    || !FRAME_GROWS_DOWNWARD == STACK_GROWS_DOWNWARD ? diff : -diff);
-  total_size1 = MAX (PSEUDO_REGNO_BYTES (regno1),
-		     GET_MODE_SIZE (lra_reg_info[regno1].biggest_mode));
-  total_size2 = MAX (PSEUDO_REGNO_BYTES (regno2),
-		     GET_MODE_SIZE (lra_reg_info[regno2].biggest_mode));
+  total_size1 = GET_MODE_SIZE (lra_reg_info[regno1].biggest_mode);
+  total_size2 = GET_MODE_SIZE (lra_reg_info[regno2].biggest_mode);
   if ((diff = total_size2 - total_size1) != 0)
     return diff;
   return regno1 - regno2;
 }
 
-/* Assign spill hard registers to N pseudos in PSEUDO_REGNOS.  Put the
-   pseudos which did not get a spill hard register at the beginning of
-   array PSEUDO_REGNOS.	 Return the number of such pseudos.  */
+/* Assign spill hard registers to N pseudos in PSEUDO_REGNOS which is
+   sorted in order of highest frequency first.  Put the pseudos which
+   did not get a spill hard register at the beginning of array
+   PSEUDO_REGNOS.  Return the number of such pseudos.  */
 static int
 assign_spill_hard_regs (int *pseudo_regnos, int n)
 {
@@ -257,7 +261,7 @@ assign_spill_hard_regs (int *pseudo_regn
   basic_block bb;
   HARD_REG_SET conflict_hard_regs;
   bitmap_head ok_insn_bitmap;
-  bitmap set_jump_crosses = regstat_get_setjmp_crosses ();
+  bitmap setjump_crosses = regstat_get_setjmp_crosses ();
   /* Hard registers which can not be used for any purpose at given
      program point because they are unallocatable or already allocated
      for other pseudos.	 */ 
@@ -287,7 +291,7 @@ assign_spill_hard_regs (int *pseudo_regn
     {
       regno = pseudo_regnos[i];
       rclass = lra_get_allocno_class (regno);
-      if (bitmap_bit_p (set_jump_crosses, regno)
+      if (bitmap_bit_p (setjump_crosses, regno)
 	  || (spill_class
 	      = ((enum reg_class)
 		 targetm.spill_class ((reg_class_t) rclass,
@@ -355,7 +359,7 @@ add_pseudo_to_slot (int regno, int slot_
   else
     {
       first = pseudo_slots[regno].first = &pseudo_slots[slots[slot_num].regno];
-      pseudo_slots[regno].next = pseudo_slots[slots[slot_num].regno].next;
+      pseudo_slots[regno].next = first->next;
       first->next = &pseudo_slots[regno];
     }
   pseudo_slots[regno].mem = NULL_RTX;
@@ -408,17 +412,16 @@ assign_stack_slot_num_and_sort_pseudos (
 /* Recursively process LOC in INSN and change spilled pseudos to the
    corresponding memory or spilled hard reg.  Ignore spilled pseudos
    created from the scratches.	*/
-static bool
+static void
 remove_pseudos (rtx *loc, rtx insn)
 {
   int i;
-  bool res;
   rtx hard_reg;
   const char *fmt;
   enum rtx_code code;
 
   if (*loc == NULL_RTX)
-    return false;
+    return;
   code = GET_CODE (*loc);
   if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER
       && lra_get_regno_hard_regno (i) < 0
@@ -430,24 +433,22 @@ remove_pseudos (rtx *loc, rtx insn)
     {
       hard_reg = spill_hard_reg[i];
       *loc = copy_rtx (hard_reg != NULL_RTX ? hard_reg : pseudo_slots[i].mem);
-      return true;
+      return;
     }
 
-  res = false;
   fmt = GET_RTX_FORMAT (code);
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
-	res = remove_pseudos (&XEXP (*loc, i), insn) || res;
+	remove_pseudos (&XEXP (*loc, i), insn);
       else if (fmt[i] == 'E')
 	{
 	  int j;
 
 	  for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
-	    res = remove_pseudos (&XVECEXP (*loc, i, j), insn) || res;
+	    remove_pseudos (&XVECEXP (*loc, i, j), insn);
 	}
     }
-  return res;
 }
 
 /* Convert spilled pseudos into their stack slots or spill hard regs,
@@ -506,10 +507,10 @@ lra_need_for_spills_p (void)
   return false;
 }
 
-/* Change spilled pseudos into memory or spill hard regs.  The
-   function put changed insns on the constraint stack (these insns
-   will be considered on the next constraint pass).  The changed insns
-   are all insns in which pseudos were changed.	 */
+/* Change spilled pseudos into memory or spill hard regs.  Put changed
+   insns on the constraint stack (these insns will be considered on
+   the next constraint pass).  The changed insns are all insns in
+   which pseudos were changed.  */
 void
 lra_spill (void)
 {


More information about the Gcc-patches mailing list