This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[yara-branch] patch for register pressure relief, coloring improvement, and some speeding up


This patch adds register pressure relief (new flag -frelief).  The
algorithm is close to one described in Morgan's book.  We are
searching for loops (basic blocks) with high register pressure and
split cans living through the loops but not used there on two parts:
one part (a new can) corresponds to the loops and another one is the
rest of the original can.  It helps to give a hard register to the
second part during coloring and memory to the first one and as a
result it increases a chance to get a hard register to other cans in
the high pressure loops.

The patch also improves coloring by ordering cans in buckets so cans
with higher priority popped first from the coloring stack and as a
consequence the cans have bigger chance to get more preferable hard
register (or memory with smaller displacement).

The patch also speeds building IR for the allocator by calculating
necessity of only one register for given operand once for insn with
given code (earlier it was calculated every time for each RTL insn).

2006-05-03 Vladimir Makarov <vmakarov@redhat.com>

* toplev.h (flag_relief): New external declration.

	* yara-int.h (YARA_NO_FREQ_BUCKET_ORDER,
	YARA_NREGS_BUCKET_ORDER, YARA_BB_RELIEF): New macros.
	(yara_loop_tree_node): New fields.
	allocno_live_at_start, can_through, reg_pressure, and
	reg_pressure_excess.
	(op_set_t): New typedef.
	(MAX_OP_SET_SIZE, ZERO_OP_SET, SET_OP_RANGE, TEST_OP, CLEAR_OP,
	SET_OP, CLEAR_OP_SET, SET_OP_SET, COPY_OP_SET, OR_OP_SET,
	AND_OP_SET, EQ_OP_SET): New macros.
	(insn_op_info): New field single_reg_op_set.

	* yara-color.c (remove_unecessary_can_conflicts): New function.
	(setup_cover_classes_and_reg_costs): Use the function.
	(reg_pressure_live_can_bitmap,
	curr_reg_pressure_calculation_bb_node, curr_reg_pressure,
	curr_reg_pressure_class, high_reg_pressure_subloops,
	subloop_array_size, quiet_subloops,
	reg_pressure_decrease_allocno_bitmap,
	reg_pressure_decrease_allocno_varray): New static variables.
	(set_up_can_through, update_node_reg_pressure, live_allocno,
	die_allocno, calculate_reg_pressure,
	process_can_to_choose_split_can, choose_can_to_split,
	decrease_reg_pressure, perform_split, get_first_loop_bb,
	get_last_loop_bb, reduce_subloop_reg_pressure,
	reg_pressure_excess, loop_reg_pressure_excess_cmp,
	reduce_loop_reg_pressure, reduce_reg_pressure_inside_loop,
	reduce_reg_pressure): New functions.
	(add_can_copy, split_can_by_split_allocno_bitmap): Add calls of
	remove_unecessary_can_conflicts.
	(add_can_to_ordered_bucket): New function.
	(push_globals_to_stack): Use add_can_to_ordered_bucket instead of
	add_can_to_bucket.  Add a debug output.
	(pop_globals_from_stack): Add a debug output.
	(global_can_alloc): Use add_can_to_ordered_bucket instead of
	add_can_to_bucket.
	(yara_color): Call reduce_reg_pressure.

* common.opt (frelief): New entry.

	* yara-ir.c (single_reg_allocno_class): Rename to
	single_reg_operand_class.  Change the signature.
	(mark_output_allocno_death): Remove.
	(build_insn_allocno_conflicts): Change the signature.
	(create_loop_tree_nodes): Allocate allocno_live_at_start and
	can_through.
	(finish_loop_tree_nodes): Free allocno_live_at_start and
	can_through.
	(create_insn_info): Set up single_reg_op_set.
	(build_insn_allocno_copy_conflicts):  Move code from
	mark_output_allocno_death.
	(create_bb_allocno, create_edge_allocno): Set up
	allocno_live_at_start.
	

Index: toplev.h
===================================================================
--- toplev.h	(revision 112203)
+++ toplev.h	(working copy)
@@ -134,6 +134,7 @@ extern int time_report;
 extern int flag_yara;
 extern int flag_optimistic_coalescing;
 extern int flag_extended_coalescing;
+extern int flag_relief;
 extern int flag_rematerialize;
 extern int flag_split;
 
Index: yara-int.h
===================================================================
--- yara-int.h	(revision 113079)
+++ yara-int.h	(working copy)
@@ -32,6 +32,9 @@ Software Foundation, 51 Franklin Street,
 #define YARA_PRIORITY_COLORING 256
 #define YARA_NO_UPDATE_COSTS 512
 #define YARA_NO_BIASED_COLORING 1024
+#define YARA_NO_FREQ_BUCKET_ORDER 2048
+#define YARA_NREGS_BUCKET_ORDER 4096
+#define YARA_BB_RELIEF 8192
 
 
 #ifdef ENABLE_CHECKING
@@ -124,6 +127,17 @@ struct yara_loop_tree_node
   /* Registers referred in the node.  */
   bitmap regno_refs;
 
+  /* Region allocnos living at start of the corresponding bb or
+     loop.  */
+  bitmap allocno_live_at_start;
+  /* Cans living through the node (but not used).  */
+  bitmap can_through;
+  /* Maximal pressure in given node.  */
+  int reg_pressure [N_REG_CLASSES];
+  /* Temporary field.  Register pressure excess for the current reg
+     class.  */
+  int reg_pressure_excess;
+
   /* Map regno -> allocno for given loop.  */
   allocno_t *regno_allocno_map;
 
@@ -821,6 +835,24 @@ struct can_copy
   struct can_copy *tied_can_copy;
 };
 
+/* Set of insn operands.  */
+typedef unsigned long op_set_t;
+
+#define MAX_OP_SET_SIZE         ((int) sizeof (op_set_t) * CHAR_BIT)
+
+#define ZERO_OP_SET             ((op_set_t) 0)
+#define SET_OP_RANGE(ops, n, m) 			\
+  ((ops) |= ((~ (op_set_t) 0) >> (MAX_OP_SET_SIZE - (m) + (n) - 1)) << (n))
+#define TEST_OP(ops, n)         (((ops) >> (n)) & 1)
+#define CLEAR_OP(ops, n)        ((ops) &= ~ ((op_set_t) 1) << (n))
+#define SET_OP(ops, n)          ((ops) |= ((op_set_t) 1) << (n))
+#define CLEAR_OP_SET(ops)       ((ops) = 0)
+#define SET_OP_SET(ops)         ((ops) = ~ ZERO_OP_SET)
+#define COPY_OP_SET(to, from)   ((to) = (from))
+#define OR_OP_SET(to, from)     ((to) |= (from))
+#define AND_OP_SET(to, from)    ((to) &= (from))
+#define EQ_OP_SET(as1, as2)     ((as1) == (as2))
+
 /* Information about the insn of given code.  */
 struct insn_op_info
 {
@@ -833,6 +865,9 @@ struct insn_op_info
   /* The following field value is true if there are commutative
      operands in the insn.  */
   bool commutative_op_p;
+  /* If an opernad can use only one hard_register, the corresponding
+     bit will be set.  */
+  op_set_t single_reg_op_set;
 };
 
 /* The following is a map: insn code -> insn operand info.  */
Index: yara-color.c
===================================================================
--- yara-color.c	(revision 113079)
+++ yara-color.c	(working copy)
@@ -284,6 +284,7 @@ find_reg_class_closure (void)
   yara_assert (ok_p);
   setup_class_translate ();
 }
+
 
 
 /* Minimal costs of usage the current insn alternative operand placed
@@ -948,6 +949,26 @@ setup_can_classes (void)
   yara_free (can_classes_num);
 }
 
+/* The function removes conflict cans of CAN which have different
+   cover class.  */
+static void
+remove_unecessary_can_conflicts (can_t can)
+{
+  int i, j;
+  enum reg_class cover_class;
+  can_t *can_vec, conflict_can;
+
+  cover_class = CAN_COVER_CLASS (can);
+  if (cover_class == NO_REGS)
+    return;
+  can_vec = CAN_CONFLICT_CAN_VEC (can);
+  yara_assert (can_vec != NULL);
+  for (i = j = 0; (conflict_can = can_vec [j]) != NULL; j++)
+    if (cover_class == CAN_COVER_CLASS (conflict_can))
+      can_vec [i++] = conflict_can;
+  can_vec [i] = NULL;
+}
+
 /* Function setting up reg class preferences and cover class for
    cans.  */
 static void
@@ -960,7 +981,7 @@ setup_cover_classes_and_reg_costs (void)
   enum op_type op_mode;
   basic_block bb;
   allocno_t a;
-  can_t can, conflict_can, *can_vec;
+  can_t can;
   struct insn_op_info *info;
   int *can_class_cost, *can_memory_cost, *cost_ptr, *costs;
 
@@ -1109,23 +1130,554 @@ setup_cover_classes_and_reg_costs (void)
      cans (remember that memory is assigned to slotno not to
      can).  */
   for (i = 0; i < cans_num; i++)
-    {
-      can = cans [i];
-      cover_class = CAN_COVER_CLASS (can);
-      if (cover_class == NO_REGS)
-	continue;
-      can_vec = CAN_CONFLICT_CAN_VEC (can);
-      yara_assert (can_vec != NULL);
-      for (k = j = 0; (conflict_can = can_vec [j]) != NULL; j++)
-	if (cover_class == CAN_COVER_CLASS (conflict_can))
-	  can_vec [k++] = conflict_can;
-      can_vec [k] = NULL;
-    }
+    remove_unecessary_can_conflicts (cans [i]);
   yara_free (can_classes);
   yara_free (can_memory_cost);
   yara_free (can_class_cost);
 }
 
+
+
+/* This page contains code for register pressure relief by splitting
+   cans on parts one of which lives in loops and basic block and not
+   used and which therefore probably gets memory.  */
+
+/* Bitmap of cans currently live.  It is used for reg pressure
+   calculation.  */
+static bitmap reg_pressure_live_can_bitmap;
+
+/* Current bb node being processed for register pressure
+   calculation.  */
+static struct yara_loop_tree_node *curr_reg_pressure_calculation_bb_node;
+
+/* Current reg pressuer for reg classes.  Only elements for cover
+   classes are defined.  */
+static int curr_reg_pressure [N_REG_CLASSES];
+
+/* Current register class (cover class) for which we are trying to
+   reduce register pressure.  */
+static enum reg_class curr_reg_pressure_class;
+
+/* Array of high pressure subloops of the loop being considered for
+   register pressure reduction.  */
+static varray_type high_reg_pressure_subloops;
+
+/* This is just a start of the previous varray.  */
+static struct yara_loop_tree_node **subloop_array;
+
+/* The size of the previous array.  */
+static int subloop_array_size;
+
+/* Array of all subloops (of the loop in consideration) in which can
+   choosen for splitting lives through.  */
+static varray_type quiet_subloops;
+
+/* Bitmap of the current can allocnos for splitting.  */
+static bitmap reg_pressure_decrease_allocno_bitmap;
+
+/* Varray for allocnos of splitted can.  */
+static varray_type reg_pressure_decrease_allocno_varray;
+
+/* The function sets up can_through for NODE.  */
+static void
+set_up_can_through (struct yara_loop_tree_node *node)
+{
+  bitmap_iterator bi;
+  bitmap can_through, regno_refs;
+  int i, regno;
+  allocno_t a;
+  can_t can;
+
+  can_through = node->can_through;
+  regno_refs = node->regno_refs;
+  EXECUTE_IF_SET_IN_BITMAP (node->allocno_live_at_start, 0, i, bi)
+    {
+      a = allocnos [i];
+      if ((regno = ALLOCNO_REGNO (a)) < 0 || bitmap_bit_p (regno_refs, regno))
+	continue;
+      can = ALLOCNO_CAN (a);
+      if (can == NULL)
+	continue;
+      bitmap_set_bit (can_through, CAN_NUM (can));
+    }
+}
+
+/* This recursive function updates max pressure for NODE and its
+   parents for the current reg class.  */
+static void
+update_node_reg_pressure (struct yara_loop_tree_node *node)
+{
+  if (curr_reg_pressure [curr_reg_pressure_class]
+      <= node->reg_pressure [curr_reg_pressure_class])
+    return;
+  node->reg_pressure [curr_reg_pressure_class]
+    = curr_reg_pressure [curr_reg_pressure_class];
+  if (node->father != NULL)
+    update_node_reg_pressure (node->father);
+}
+
+/* This function updates living cans and reg pressure processing start
+   of allocno A life.  */
+static void
+live_allocno (allocno_t a)
+{
+  can_t can;
+
+  can = ALLOCNO_CAN (a);
+  if (can == NULL
+      || bitmap_bit_p (reg_pressure_live_can_bitmap, CAN_NUM (can)))
+    return;
+  bitmap_set_bit (reg_pressure_live_can_bitmap, CAN_NUM (can));
+  curr_reg_pressure_class = CAN_COVER_CLASS (can);
+  if (curr_reg_pressure_class == NO_REGS)
+    return;
+  curr_reg_pressure [curr_reg_pressure_class]++;
+  update_node_reg_pressure (curr_reg_pressure_calculation_bb_node);
+}
+
+/* This function updates living cans and reg pressure processing
+   allocno A death.  */
+static void
+die_allocno (allocno_t a)
+{
+  can_t can;
+
+  can = ALLOCNO_CAN (a);
+  if (can == NULL
+      || ! bitmap_bit_p (reg_pressure_live_can_bitmap, CAN_NUM (can)))
+    return;
+  bitmap_clear_bit (reg_pressure_live_can_bitmap, CAN_NUM (can));
+  curr_reg_pressure_class = CAN_COVER_CLASS (can);
+  if (curr_reg_pressure_class == NO_REGS)
+    return;
+  curr_reg_pressure [curr_reg_pressure_class]--;
+  yara_assert (curr_reg_pressure [curr_reg_pressure_class] >= 0);
+}
+
+/* This function calculate max register pressure of all bbs and loops
+   for all cover classes.  It also sets up can_through for the bbs and
+   loops.  */
+static void
+calculate_reg_pressure (void)
+{
+  int i, j, class_num;
+  unsigned uid;
+  rtx insn, bound;
+  allocno_t a;
+  copy_t before_copies, after_copies, cp;
+  bitmap_iterator bi;
+
+  reg_pressure_live_can_bitmap = yara_allocate_bitmap ();
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      if (i == ENTRY_BLOCK || i == EXIT_BLOCK)
+	continue;
+      for (class_num = 0; class_num < final_reg_class_cover_size; class_num++)
+	yara_bb_nodes [i].reg_pressure [final_reg_class_cover [class_num]] = 0;
+      set_up_can_through (&yara_bb_nodes [i]);
+    }
+  for (i = 0; i < (int) yara_loops.num; i++)
+    {
+      for (class_num = 0; class_num < final_reg_class_cover_size; class_num++)
+	yara_loop_nodes [i].reg_pressure [final_reg_class_cover [class_num]]
+	  = 0;
+      set_up_can_through (&yara_loop_nodes [i]);
+    }
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      if (i == ENTRY_BLOCK || i == EXIT_BLOCK)
+	continue;
+      curr_reg_pressure_calculation_bb_node = &yara_bb_nodes [i];
+      for (class_num = 0; class_num < final_reg_class_cover_size; class_num++)
+	curr_reg_pressure [final_reg_class_cover [class_num]] = 0;
+      bitmap_clear (reg_pressure_live_can_bitmap);
+      EXECUTE_IF_SET_IN_BITMAP (yara_bb_nodes [i].allocno_live_at_start,
+				0, j, bi)
+	{
+	  live_allocno (allocnos [j]);;
+	}
+      bound = NEXT_INSN (BB_END (curr_reg_pressure_calculation_bb_node->bb));
+      for (insn = BB_HEAD (curr_reg_pressure_calculation_bb_node->bb);
+	   insn != bound;
+	   insn = NEXT_INSN (insn))
+	{
+	  if (! INSN_P (insn))
+	    continue;
+	  uid = INSN_UID (insn);
+	  before_copies = before_insn_copies [uid];
+	  after_copies = after_insn_copies [uid];
+	  for (cp = before_copies; cp != NULL; cp = COPY_NEXT_COPY (cp))
+	    {
+	      if ((a = COPY_SRC (cp)) != NULL)
+		die_allocno (a);
+	      live_allocno (COPY_DST (cp));
+	    }
+	  for (cp = after_copies; cp != NULL; cp = COPY_NEXT_COPY (cp))
+	    {
+	      a = COPY_SRC (cp);
+	      if (ALLOCNO_TYPE (a) != INSN_ALLOCNO)
+		live_allocno (a);
+	      else if (INSN_ALLOCNO_EARLY_CLOBBER (a))
+		{
+		  yara_assert (INSN_ALLOCNO_OP_MODE (a) == OP_OUT);
+		  live_allocno (a);
+		}
+	    }
+	  for (cp = before_copies; cp != NULL; cp = COPY_NEXT_COPY (cp))
+	    die_allocno (COPY_DST (cp));
+	  for (cp = after_copies; cp != NULL; cp = COPY_NEXT_COPY (cp))
+	    {
+	      a = COPY_SRC (cp);
+	      if (ALLOCNO_TYPE (a) == INSN_ALLOCNO)
+		live_allocno (a);
+	    }
+	  for (cp = after_copies; cp != NULL; cp = COPY_NEXT_COPY (cp))
+	    {
+	      die_allocno (COPY_SRC (cp));
+	      if (COPY_DST (cp) != NULL)
+		live_allocno (COPY_DST (cp));
+	    }
+	}
+    }
+  yara_free_bitmap (reg_pressure_live_can_bitmap);
+}
+
+/* The function processes all subloops of the loop in consideration,
+   finds in how many subloops CAN live through, and updates
+   *MAX_SUBLOOPS_NUM and *BEST if necessary.  */
+static void
+process_can_to_choose_split_can (can_t can, int *max_subloops_num, can_t *best)
+{
+  int i, subloops_num, can_num;
+
+  can_num = CAN_NUM (can);
+  subloops_num = 0;
+  for (i = subloop_array_size - 1; i >= 0; i--)
+    if (bitmap_bit_p (subloop_array [i]->can_through, can_num))
+      {
+	subloops_num++;
+	if (*max_subloops_num < subloops_num)
+	  {
+	    *max_subloops_num = subloops_num;
+	    *best = can;
+	  }
+      }
+}
+
+/* The function finds the best can to split for SUBLOOP and forms
+   array of subloops where the can lives through (quiet subloops).  */
+static can_t
+choose_can_to_split (struct yara_loop_tree_node *subloop)
+{
+  bitmap_iterator bi;
+  bitmap can_through;
+  int max_subloops_num, i, best_num;
+  can_t best, can;
+  
+  can_through = subloop->can_through;
+  max_subloops_num = 0;
+  best = NULL;
+  EXECUTE_IF_SET_IN_BITMAP (can_through, 0, i, bi)
+    {
+      can = cans [i];
+      if (CAN_COVER_CLASS (can) == curr_reg_pressure_class)
+	process_can_to_choose_split_can (can, &max_subloops_num, &best);
+    }
+  if (best != NULL)
+    {
+      best_num = CAN_NUM (best);
+      /* Forming quiet subloops for the best.  */
+      VARRAY_POP_ALL (quiet_subloops);
+      for (i = subloop_array_size - 1; i >= 0; i--)
+	if (bitmap_bit_p (subloop_array [i]->can_through, best_num))
+	  VARRAY_PUSH_GENERIC_PTR (quiet_subloops, subloop_array [i]);
+    }
+  return best;
+}
+
+/* The function decreases register pressure for NODE and all internal
+   nodes after CAN was slitted.  */
+static void
+decrease_reg_pressure (struct yara_loop_tree_node *node, can_t can)
+{
+  struct yara_loop_tree_node *subloop;;
+
+  if (node->loop != NULL)
+    for (subloop = node->inner; subloop; subloop = subloop->next)
+      decrease_reg_pressure (subloop, can);
+  /* Don't consider can for the node again.  */
+  bitmap_clear_bit (node->can_through, CAN_NUM (can));
+  yara_assert (curr_reg_pressure_class == CAN_COVER_CLASS (can));
+  node->reg_pressure [curr_reg_pressure_class]--;
+}
+
+/* The function decreases register pressure by splitting CAN. */
+static void
+perform_split (can_t can)
+{
+  struct yara_loop_tree_node *subloop, **quiet_subloops_array;
+  int i, n, last, freq, quiet_subloops_array_size;
+  allocno_t a, *can_allocnos;
+  can_t new_can, *can_vec, c;
+
+  quiet_subloops_array
+    = (struct yara_loop_tree_node **) &VARRAY_GENERIC_PTR (quiet_subloops, 0);
+  quiet_subloops_array_size = VARRAY_ACTIVE_SIZE (quiet_subloops);
+  bitmap_clear (reg_pressure_decrease_allocno_bitmap);
+  can_allocnos = CAN_ALLOCNOS (can);
+  for (i = 0; i < quiet_subloops_array_size; i++)
+    {
+      subloop = quiet_subloops_array [i];
+      for (n = 0; (a = can_allocnos [n]) != NULL; n++)
+	if (ALLOCNO_TYPE (a) == REGION_ALLOCNO
+	    && REGION_ALLOCNO_NODE (a) == subloop
+	    && ! bitmap_bit_p (reg_pressure_decrease_allocno_bitmap,
+			       ALLOCNO_NUM (a)))
+	  {
+	    bitmap_set_bit (reg_pressure_decrease_allocno_bitmap,
+			    ALLOCNO_NUM (a));
+	    decrease_reg_pressure (subloop, can);
+	    break;
+	  }
+      if (a == NULL)
+	/* Failure: don't consider can for the subloop again.  */
+	bitmap_clear_bit (subloop->can_through, CAN_NUM (can));
+    }
+  /* Actual splitting.  */
+  VARRAY_POP_ALL (reg_pressure_decrease_allocno_varray);
+  for (last = i = 0; (a = can_allocnos [i]) != NULL; i++)
+    {
+      if (bitmap_bit_p (reg_pressure_decrease_allocno_bitmap, ALLOCNO_NUM (a)))
+	VARRAY_PUSH_GENERIC_PTR (reg_pressure_decrease_allocno_varray, a);
+      else
+	can_allocnos [last++] = a;
+    }
+  yara_assert (last != 0);
+  n = VARRAY_ACTIVE_SIZE (reg_pressure_decrease_allocno_varray);
+  if (n == 0)
+    return;
+  can_allocnos [last] = NULL;
+  /* Create a new can.  */
+  new_can = create_can ();
+  CAN_SLOTNO (new_can) = CAN_SLOTNO (can);
+  can_allocnos = CAN_ALLOCNOS (new_can)
+    = yara_allocate ((n + 1) * sizeof (allocno_t));
+  memcpy (can_allocnos,
+	  &VARRAY_GENERIC_PTR (reg_pressure_decrease_allocno_varray, 0),
+	  n * sizeof (allocno_t));
+  can_allocnos [n] = NULL;
+  CAN_MODE (new_can) = CAN_MODE (can);
+  CAN_COVER_CLASS (new_can) = CAN_COVER_CLASS (can);
+  for (i = 0; (a = can_allocnos [i]) != NULL; i++)
+    ALLOCNO_CAN (a) = new_can;
+  freq = can_freq (new_can);
+  CAN_FREQ (new_can) = freq;
+  CAN_FREQ (can) -= freq;
+  yara_assert (CAN_FREQ (can) >= 0);
+  if (CAN_CALL_P (can))
+    {
+      setup_can_call_info (can);
+      setup_can_call_info (new_can);
+    }
+  CAN_GLOBAL_P (new_can) = CAN_GLOBAL_P (can);
+  can_vec = CAN_CONFLICT_CAN_VEC (can);
+  yara_assert (can_vec != NULL);
+  for (i = 0; (c = can_vec [i]) != NULL; i++)
+    {
+      yara_free (CAN_CONFLICT_CAN_VEC (c));
+      CAN_CONFLICT_CAN_VEC (c) = NULL;
+      create_can_conflicts (c);
+      remove_unecessary_can_conflicts (c);
+    }
+  yara_free (can_vec);
+  CAN_CONFLICT_CAN_VEC (can) = NULL;
+  /* ??? can_copies  */
+  /* cover_class_cost, memory_cost is not used by Chaitin??? */
+  /* ??? hard_reg_costs */
+  n = class_hard_regs_num [CAN_COVER_CLASS (new_can)];
+  CAN_HARD_REG_COSTS (new_can) = yara_allocate (sizeof (int) * n);
+  memset (CAN_HARD_REG_COSTS (new_can), 0, sizeof (int) * n);
+  create_can_conflicts (can);
+  remove_unecessary_can_conflicts (can);
+  create_can_conflicts (new_can);
+  remove_unecessary_can_conflicts (new_can);
+  if (yara_dump_file != NULL)
+    {
+      fprintf (stderr, "+++Splitting %scan#%d into:\n",
+	       (CAN_GLOBAL_P (can) ? "g" : ""), CAN_NUM (can));
+      print_can (stderr, can);
+      print_can (stderr, new_can);
+    }
+}
+
+/* The recursive function returns the first basic block of node LOOP
+   (in the loop tree).  */
+static basic_block
+get_first_loop_bb (struct yara_loop_tree_node *loop)
+{
+  if (loop->bb != NULL)
+    return loop->bb;
+  return get_first_loop_bb (loop->inner);
+}
+
+/* The recursive function returns the last basic block of node LOOP
+   (in the loop tree).  */
+static basic_block
+get_last_loop_bb (struct yara_loop_tree_node *loop)
+{
+  if (loop->bb != NULL)
+    return loop->bb;
+  for (loop = loop->inner; loop->next != NULL; loop = loop->next)
+    ;
+  return get_last_loop_bb (loop);
+}
+
+/* The function reduces register reg_pressure for SUBLOOP.  It finds a can
+   living through the subloop and biggest number of other subloops of
+   the loop in consideration.  */
+static bool
+reduce_subloop_reg_pressure (struct yara_loop_tree_node *subloop)
+{
+  can_t can;
+
+  if (yara_dump_file != NULL)
+    {
+      fprintf (yara_dump_file, "Trying to reduce reg_pressure in ");
+      if (subloop->bb != NULL)
+	fprintf (yara_dump_file, "BB %d:\n", subloop->bb->index);
+      else
+	fprintf (yara_dump_file, "LOOP %d (%d-%d):\n", subloop->loop->num,
+		 get_first_loop_bb (subloop)->index,
+		 get_last_loop_bb (subloop)->index);
+    }
+  can = choose_can_to_split (subloop);
+  if (can == NULL)
+    return false; /* failure */
+  if (yara_dump_file != NULL)
+    {
+      struct yara_loop_tree_node *loop, **quiet_subloops_array;
+      int i, quiet_subloops_array_size;
+
+      fprintf (stderr, "  Success with can %d -- all region:\n",
+	       CAN_NUM (can));
+      quiet_subloops_array
+	= (struct yara_loop_tree_node **) &VARRAY_GENERIC_PTR (quiet_subloops,
+							       0);
+      quiet_subloops_array_size = VARRAY_ACTIVE_SIZE (quiet_subloops);
+      for (i = quiet_subloops_array_size - 1; i >= 0; i--)
+	{
+	  loop = quiet_subloops_array [i];
+	  if (loop->bb != NULL)
+	    fprintf (stderr, "    BB %d:\n", loop->bb->index);
+	  else
+	    fprintf (stderr, "    LOOP %d (%d-%d):\n",
+		     loop->loop->num, get_first_loop_bb (loop)->index,
+		     get_last_loop_bb (loop)->index);
+	}
+    }
+  perform_split (can);
+  return true;
+}
+
+/* The function returns register reg_pressure excess in loop or block
+   NODE.  */
+static int
+reg_pressure_excess (struct yara_loop_tree_node *node)
+{
+  return (node->reg_pressure [curr_reg_pressure_class]
+	  - available_class_regs [curr_reg_pressure_class]);
+}
+
+/* The function is used to sort loops putting loops with smaller
+   reg_pressure excess first.  */
+static int
+loop_reg_pressure_excess_cmp (const void *ptr1, const void *ptr2)
+{
+  struct yara_loop_tree_node *n1 = *(struct yara_loop_tree_node **) ptr1;
+  struct yara_loop_tree_node *n2 = *(struct yara_loop_tree_node **) ptr2;
+
+  return n1->reg_pressure_excess - n2->reg_pressure_excess;
+}
+
+/* The function reduces reg_pressure in LOOP.  It collects all subloops
+   with high register reg_pressure and then reduces reg_pressure in the
+   subloops starting with subloops with higher register reg_pressure.  */
+static void
+reduce_loop_reg_pressure (struct yara_loop_tree_node *loop)
+{
+  struct yara_loop_tree_node *subloop;
+  int i;
+
+  VARRAY_POP_ALL (high_reg_pressure_subloops);
+  for (subloop = loop->inner; subloop; subloop = subloop->next)
+    if (reg_pressure_excess (subloop) > 0
+	&& ((YARA_PARAMS & YARA_BB_RELIEF) || subloop->loop != NULL))
+      VARRAY_PUSH_GENERIC_PTR (high_reg_pressure_subloops, subloop);
+  for (;;)
+    {
+      subloop_array = ((struct yara_loop_tree_node **)
+		       &VARRAY_GENERIC_PTR (high_reg_pressure_subloops, 0));
+      subloop_array_size = VARRAY_ACTIVE_SIZE (high_reg_pressure_subloops);
+      if (subloop_array_size == 0)
+	break;
+      for (i = 0; i < subloop_array_size; i++)
+	{
+	  subloop = subloop_array [i];
+	  subloop->reg_pressure_excess = reg_pressure_excess (subloop);
+	}
+      qsort (subloop_array, subloop_array_size,
+	     sizeof (struct yara_loop_tree_node *),
+	     loop_reg_pressure_excess_cmp);
+      subloop = VARRAY_TOP_GENERIC_PTR (high_reg_pressure_subloops);
+      if (subloop->reg_pressure_excess == 0)
+	break;
+      if (! reduce_subloop_reg_pressure (subloop))
+	VARRAY_POP (high_reg_pressure_subloops);
+    }
+}
+
+/* This recursive function reduces reg_pressure in LOOP and all its
+   subloops.  */
+static void
+reduce_reg_pressure_inside_loop (struct yara_loop_tree_node *loop)
+{
+  struct yara_loop_tree_node *subloop;
+
+  if (loop->loop != NULL && reg_pressure_excess (loop) > 0)
+    {
+      reduce_loop_reg_pressure (loop);
+      for (subloop = loop->inner; subloop; subloop = subloop->next)
+	reduce_reg_pressure_inside_loop (subloop);
+    }
+}
+
+/* The function is a start point of register reg_pressure reduction.  */
+static void
+reduce_reg_pressure (void)
+{
+  int class_num;
+
+  calculate_reg_pressure ();
+  VARRAY_GENERIC_PTR_NOGC_INIT (high_reg_pressure_subloops, 100,
+				"subloops with high reg pressure");
+  VARRAY_GENERIC_PTR_NOGC_INIT (quiet_subloops, 100,
+				"subloops in which allocno is not mentioned");
+  VARRAY_GENERIC_PTR_NOGC_INIT (reg_pressure_decrease_allocno_varray,
+				allocnos_num, "allocnos for new can");
+  reg_pressure_decrease_allocno_bitmap = yara_allocate_bitmap ();
+  for (class_num = 0; class_num < final_reg_class_cover_size; class_num++)
+    {
+      curr_reg_pressure_class = final_reg_class_cover [class_num];
+      reduce_reg_pressure_inside_loop (yara_loop_tree_root);
+    }
+  yara_free_bitmap (reg_pressure_decrease_allocno_bitmap);
+  VARRAY_FREE (reg_pressure_decrease_allocno_varray);
+  VARRAY_FREE (quiet_subloops);
+  VARRAY_FREE (high_reg_pressure_subloops);
+}
+
+
+
 static bool
 add_can_copy (can_t can, can_t another_can, bool to_p, int freq)
 {
@@ -1294,10 +1846,10 @@ split_can_by_split_allocno_bitmap (can_t
       yara_free (CAN_CONFLICT_CAN_VEC (c));
       CAN_CONFLICT_CAN_VEC (c) = NULL;
       create_can_conflicts (c);
+      remove_unecessary_can_conflicts (c);
     }
   yara_free (can_vec);
   CAN_CONFLICT_CAN_VEC (can) = NULL;
-  /* ??? internal_cost, dividing_nodes is needed for cluing.  */
   /* ??? can_copies  */
   /* cover_class_cost, memory_cost is not used by Chaitin??? */
   /* ??? hard_reg_costs */
@@ -1305,7 +1857,9 @@ split_can_by_split_allocno_bitmap (can_t
   CAN_HARD_REG_COSTS (new_can) = yara_allocate (sizeof (int) * n);
   memset (CAN_HARD_REG_COSTS (new_can), 0, sizeof (int) * n);
   create_can_conflicts (can);
+  remove_unecessary_can_conflicts (can);
   create_can_conflicts (new_can);
+  remove_unecessary_can_conflicts (new_can);
   if (yara_dump_file != NULL)
     {
       fprintf (yara_dump_file, "+++Splitting %scan#%d into:\n",
@@ -1561,6 +2115,45 @@ add_can_to_bucket (can_t can, can_t *buc
   *bucket_ptr = can;
 }
 
+/* Add CAN to *BUCKET_PTR bucket maintaining the order according their
+   frequency (if bit YARA_NO_BUCKET_ORDER is clear).  CAN should be
+   not in a bucket before the call.  */
+static void
+add_can_to_ordered_bucket (can_t can, can_t *bucket_ptr)
+{
+  can_t before, after;
+  enum reg_class cover_class;
+  int freq, nregs;
+
+  if ((YARA_PARAMS & YARA_NO_FREQ_BUCKET_ORDER))
+    {
+      before = *bucket_ptr;
+      after = NULL;
+    }
+  else
+    {
+      freq = CAN_FREQ (can);
+      cover_class = CAN_COVER_CLASS (can);
+      nregs = reg_class_nregs [cover_class] [CAN_MODE (can)];
+      for (before = *bucket_ptr, after = NULL;
+	   before != NULL;
+	   after = before, before = CAN_NEXT_BUCKET_CAN (before))
+	if (((YARA_PARAMS & YARA_NREGS_BUCKET_ORDER)
+	     && cover_class == CAN_COVER_CLASS (before)
+	     && nregs < reg_class_nregs [cover_class] [CAN_MODE (before)])
+	    || CAN_FREQ (before) > freq)
+	  break;
+    }
+  CAN_NEXT_BUCKET_CAN (can) = before;
+  CAN_PREV_BUCKET_CAN (can) = after;
+  if (after == NULL)
+    *bucket_ptr = can;
+  else
+    CAN_NEXT_BUCKET_CAN (after) = can;
+  if (before != NULL)
+    CAN_PREV_BUCKET_CAN (before) = can;
+}
+
 /* Delete CAN from *BUCKET_PTR bucket.  It should be there before
    the call.  */
 static void
@@ -1631,13 +2224,15 @@ push_globals_to_stack (void)
       if (colorable_can_bucket != NULL)
 	{
 	  can = colorable_can_bucket;
+	  delete_can_from_bucket (can, &colorable_can_bucket);
+	  if (yara_dump_file != NULL)
+	    fprintf (yara_dump_file, "Pushing %d\n", CAN_NUM (can));
 	  cover_class = CAN_COVER_CLASS (can);
 	  if (cover_class != NO_REGS)
 	    size = reg_class_nregs [cover_class] [CAN_MODE (can)];
 	  yara_assert (CAN_LEFT_CONFLICTS_NUM (can)
 		       + reg_class_nregs [cover_class] [CAN_MODE (can)]
 		       <= available_class_regs [cover_class]);
-	  bucket_ptr = &colorable_can_bucket;
 	}
       else
 	{
@@ -1705,8 +2300,10 @@ push_globals_to_stack (void)
 				(bucket_varray, CAN_LEFT_CONFLICTS_NUM (can)));
 		}
 	    }
+	  delete_can_from_bucket (can, bucket_ptr);
+	  if (yara_dump_file != NULL)
+	    fprintf (yara_dump_file, "Pushing %d (potential spill)\n", CAN_NUM (can));
 	}
-      delete_can_from_bucket (can, bucket_ptr);
       CAN_IN_GRAPH_P (can) = false;
       VARRAY_PUSH_GENERIC_PTR (global_stack_varray, can);
       if (cover_class == NO_REGS)
@@ -1731,7 +2328,8 @@ push_globals_to_stack (void)
 	      conflicts_num = CAN_LEFT_CONFLICTS_NUM (conflict_can);
 	      if (conflicts_num + conflict_size
 		  <= available_class_regs [cover_class])
-		add_can_to_bucket (conflict_can, &colorable_can_bucket);
+		add_can_to_ordered_bucket (conflict_can,
+					   &colorable_can_bucket);
 	      else
 		{
 		  if (first_non_empty_bucket_num > conflicts_num)
@@ -1942,19 +2540,26 @@ pop_globals_from_stack (void)
 	  CAN_HARD_REGNO (can) = -1;
 	  continue;
 	}
+      if (yara_dump_file != NULL)
+	fprintf (yara_dump_file, "popping %d", CAN_NUM (can));
       if (choose_global_hard_reg (can))
 	{
+	  if (yara_dump_file != NULL)
+	    fprintf (yara_dump_file, "-- assign reg\n");
 	  CAN_IN_GRAPH_P (can) = true;
 	  update_copy_costs (can);
 	  continue;
 	}
-      yara_assert (CAN_SLOTNO (can) == CAN_NUM (can));
       if (flag_split && split_global_can (can))
 	{
+	  if (yara_dump_file != NULL)
+	    fprintf (yara_dump_file, "-- push back\n");
 	  /* Process CAN again.  */
 	  VARRAY_PUSH_GENERIC_PTR (global_stack_varray, can);
 	  continue;
 	}
+      if (yara_dump_file != NULL)
+	fprintf (yara_dump_file, "-- spill\n");
       /* ??? local alloc for conflicting and preferenced.  */
       CAN_IN_GRAPH_P (can) = true;
       CAN_HARD_REGNO (can) = -1;
@@ -2016,7 +2621,7 @@ global_can_alloc (void)
 	VARRAY_PUSH_GENERIC_PTR (bucket_varray, NULL);
       if (conflict_cans_size + reg_class_nregs [cover_class] [CAN_MODE (can)]
 	  <= available_class_regs [cover_class])
-	add_can_to_bucket (can, &colorable_can_bucket);
+	add_can_to_ordered_bucket (can, &colorable_can_bucket);
       else
 	add_can_to_bucket (can,
 			   (can_t *) &VARRAY_GENERIC_PTR (bucket_varray,
@@ -3220,7 +3825,7 @@ pseudo_reg_copy_cost (copy_t cp)
 	    cost = memory_move_cost [dst_mode]
                                     [REGNO_REG_CLASS (dst_hard_regno)] [1];
 	  else
-	    /* ??? constant lodaing */
+	    /* ??? constant loading */
 	    cost = register_move_cost [dst_mode]
                                       [REGNO_REG_CLASS (dst_hard_regno)]
 	                              [REGNO_REG_CLASS (dst_hard_regno)];
@@ -3261,6 +3866,8 @@ yara_color (void)
   biased_can_bitmap = yara_allocate_bitmap ();
   conflict_can_bitmap = yara_allocate_bitmap ();
   setup_cover_classes_and_reg_costs ();
+  if (flag_relief)
+    reduce_reg_pressure ();
   add_move_costs ();
   if (yara_dump_file != NULL)
     print_cans (yara_dump_file);
Index: common.opt
===================================================================
--- common.opt	(revision 112203)
+++ common.opt	(working copy)
@@ -709,6 +709,10 @@ fregmove
 Common Report Var(flag_regmove)
 Enables a register move optimization
 
+frelief
+Common Report Var(flag_relief)
+Register pressure relief
+
 frematerialize
 Common Report Var(flag_rematerialize)
 Perform a register rematerialization
Index: yara-ir.c
===================================================================
--- yara-ir.c	(revision 113079)
+++ yara-ir.c	(working copy)
@@ -192,15 +192,14 @@ static void set_call_info (allocno_t, vo
 static void set_single_hard_reg_allocno_info (allocno_t, void *, int);
 static enum reg_class single_alt_reg_class (const char *,
 					    struct insn_op_info *, int);
-static enum reg_class single_reg_allocno_class (allocno_t);
+static enum reg_class single_reg_operand_class (rtx insn, int);
 
 #ifdef HAVE_ANY_SECONDARY_MOVES
 static void set_copy_conflict (allocno_t, void *, int);
 #endif
-static void mark_output_allocno_death (allocno_t, rtx);
 static void build_insn_allocno_copy_conflicts (copy_t, rtx, bool);
 static void set_call_info (allocno_t, void *, int);
-static void build_insn_allocno_conflicts (rtx);
+static void build_insn_allocno_conflicts (rtx, op_set_t);
 static allocno_t create_region_allocno (int, struct yara_loop_tree_node *,
 					rtx);
 static int before_insn_copy_compare (const void *, const void *);
@@ -904,11 +903,19 @@ create_loop_tree_nodes (void)
   yara_bb_nodes
     = yara_allocate (sizeof (struct yara_loop_tree_node) * bbs_num);
   for (i = 0; i < bbs_num; i++)
-    yara_bb_nodes [i].regno_refs = yara_allocate_bitmap ();
+    {
+      yara_bb_nodes [i].regno_refs = yara_allocate_bitmap ();
+      yara_bb_nodes [i].allocno_live_at_start = yara_allocate_bitmap ();
+      yara_bb_nodes [i].can_through = yara_allocate_bitmap ();
+    }
   yara_loop_nodes = yara_allocate (sizeof (struct yara_loop_tree_node)
 				   * yara_loops.num);
   for (i = 0; i < (int) yara_loops.num; i++)
-    yara_loop_nodes [i].regno_refs = yara_allocate_bitmap ();
+    {
+      yara_loop_nodes [i].regno_refs = yara_allocate_bitmap ();
+      yara_loop_nodes [i].allocno_live_at_start = yara_allocate_bitmap ();
+      yara_loop_nodes [i].can_through = yara_allocate_bitmap ();
+    }
 }
 
 static void
@@ -916,12 +923,20 @@ finish_loop_tree_nodes (void)
 {
   int i;
 
-  for (i = 0; i < bbs_num; i++)
-    yara_free_bitmap (yara_bb_nodes [i].regno_refs);
-  yara_free (yara_bb_nodes);
   for (i = 0; i < (int) yara_loops.num; i++)
-    yara_free_bitmap (yara_loop_nodes [i].regno_refs);
+    {
+      yara_free_bitmap (yara_loop_nodes [i].can_through);
+      yara_free_bitmap (yara_loop_nodes [i].allocno_live_at_start);
+      yara_free_bitmap (yara_loop_nodes [i].regno_refs);
+    }
   yara_free (yara_loop_nodes);
+  for (i = 0; i < bbs_num; i++)
+    {
+      yara_free_bitmap (yara_bb_nodes [i].can_through);
+      yara_free_bitmap (yara_bb_nodes [i].allocno_live_at_start);
+      yara_free_bitmap (yara_bb_nodes [i].regno_refs);
+    }
+  yara_free (yara_bb_nodes);
 }
 
 
@@ -2735,6 +2750,7 @@ create_insn_info (rtx insn)
       = yara_allocate (sizeof (char *) * recog_data.n_operands
 		       * recog_data.n_alternatives);
   info->commutative_op_p = false;
+  CLEAR_OP_SET (info->single_reg_op_set);
   for (i = 0; i < recog_data.n_operands; i++)
     {
       str = yara_allocate (sizeof (char)
@@ -2763,6 +2779,9 @@ create_insn_info (rtx insn)
 	    }
 	}
     }
+  for (i = 0; i < recog_data.n_operands; i++)
+    if (single_reg_operand_class (insn, i) != NO_REGS)
+      SET_OP (info->single_reg_op_set, i);
   return info;
 }
 
@@ -3000,30 +3019,6 @@ set_copy_conflict (allocno_t live_a, voi
 
 #endif
 
-static void
-mark_output_allocno_death (allocno_t a, rtx insn ATTRIBUTE_UNUSED)
-{
-  allocno_t curr_a;
-
-  mark_allocno_death (a);
-  if (ALLOCNO_TYPE (a) != INSN_ALLOCNO)
-    return;
-  yara_assert (INSN_ALLOCNO_OP_MODE (a) == OP_OUT
-	       || INSN_ALLOCNO_OP_MODE (a) == OP_INOUT);
-  /* We assume that each addr allocno can occur at most once in output
-     memory allocno.  */
-  for (curr_a = curr_insn_allocnos;
-       curr_a != NULL;
-       curr_a = INSN_ALLOCNO_NEXT (curr_a))
-    if (INSN_ALLOCNO_ADDR_OUTPUT_ALLOCNO (curr_a) == a)
-      {
-	yara_assert (GET_CODE (*INSN_ALLOCNO_LOC (a)) == MEM);
-        yara_assert (find_post_insn_allocno_copy (curr_a, insn) == NULL);
-	mark_allocno_death (curr_a);
-	INSN_ALLOCNO_ADDR_OUTPUT_ALLOCNO (curr_a) = NULL;
-      }
-}
-
 static bool
 copy_src_p (copy_t list, allocno_t src)
 {
@@ -3039,7 +3034,7 @@ static void
 build_insn_allocno_copy_conflicts (copy_t list, rtx insn, bool after_insn_p)
 {
   int regno;
-  allocno_t src;
+  allocno_t src, curr_a;
   copy_t cp;
   bool no_map_change_p;
 
@@ -3059,8 +3054,27 @@ build_insn_allocno_copy_conflicts (copy_
 	    }
 	  else
 	    {
-	      mark_output_allocno_death (src, insn);
-	      if (regno >= 0 && HARD_REGISTER_NUM_P (regno))
+	      mark_allocno_death (src);
+	      if (regno < 0)
+		{
+		  yara_assert (INSN_ALLOCNO_OP_MODE (src) == OP_OUT
+			       || INSN_ALLOCNO_OP_MODE (src) == OP_INOUT);
+		  /* We assume that each addr allocno can occur at most
+		     once in output memory allocno.  */
+		  if (GET_CODE (*INSN_ALLOCNO_LOC (src)) == MEM)
+		    for (curr_a = curr_insn_allocnos;
+			 curr_a != NULL;
+			 curr_a = INSN_ALLOCNO_NEXT (curr_a))
+		      if (INSN_ALLOCNO_ADDR_OUTPUT_ALLOCNO (curr_a) == src)
+			{
+			  yara_assert
+			    (find_post_insn_allocno_copy (curr_a, insn)
+			     == NULL);
+			  mark_allocno_death (curr_a);
+			  INSN_ALLOCNO_ADDR_OUTPUT_ALLOCNO (curr_a) = NULL;
+			}
+		}
+	      else if (HARD_REGISTER_NUM_P (regno))
 		{
 		  /* It is a copy of out/inout allocno into hard
 		     register.  */
@@ -3239,21 +3253,21 @@ single_alt_reg_class (const char *constr
    register.  If it is so, the function returns the class of the hard
    register.  Otherwise it returns NO_REGS.  */
 static enum reg_class
-single_reg_allocno_class (allocno_t a)
+single_reg_operand_class (rtx insn, int op_num)
 {
   enum reg_class cl, next_cl;
   struct insn_op_info *info;
-  int i, op_num;
+  int i, n_alts;
+  char **constraints;
 
-  op_num = INSN_ALLOCNO_TYPE (a) - OPERAND_BASE;
-  info = insn_infos [INSN_UID (INSN_ALLOCNO_INSN (a))];
-  if (op_num < 0 || info->n_alts == 0)
+  info = insn_infos [INSN_UID (insn)];
+  if (op_num < 0 || (n_alts = info->n_alts) == 0)
     return NO_REGS;
   cl = NO_REGS;
-  for (i = 0; i < info->n_alts; i++)
+  constraints = &info->op_constraints [op_num * n_alts];
+  for (i = 0; i < n_alts; i++)
     {
-      next_cl = single_alt_reg_class (info->op_constraints
-				      [op_num * info->n_alts + i], info, i);
+      next_cl = single_alt_reg_class (constraints [i], info, i);
       if ((cl != NO_REGS && next_cl != cl)
 	  || available_class_regs [next_cl] > 1)
 	return NO_REGS;
@@ -3262,10 +3276,12 @@ single_reg_allocno_class (allocno_t a)
   return cl;
 }
 
+/* The function create conflicts for allocnos of INSN whose operands
+   requiring a single hard register given by SINGLE_REG_OP_SET.  */
 static void
-build_insn_allocno_conflicts (rtx insn)
+build_insn_allocno_conflicts (rtx insn, op_set_t single_reg_op_set)
 {
-  int i, hard_regno;
+  int i, hard_regno, op_num;
   allocno_t a;
   rtx link;
 
@@ -3297,16 +3313,20 @@ build_insn_allocno_conflicts (rtx insn)
 			      GET_MODE (output_insn_hard_regs [i]));
       }
 
-  for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
-    if (INSN_ALLOCNO_OP_MODE (a) == OP_IN)
-      {
-	enum reg_class cl;
-
-	cl = single_reg_allocno_class (a);
-	if (cl != NO_REGS)
+  
+  if (! EQ_OP_SET (single_reg_op_set, ZERO_OP_SET))
+    for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
+      if (INSN_ALLOCNO_OP_MODE (a) == OP_IN
+	  && (op_num = INSN_ALLOCNO_TYPE (a) - OPERAND_BASE) >= 0
+	  && TEST_OP (single_reg_op_set, op_num))
+	{
+	  enum reg_class cl;
+	  
+	  cl = single_reg_operand_class (insn, op_num);
+	  yara_assert (cl != NO_REGS);
 	  process_live_allocnos (set_single_hard_reg_allocno_info,
 				 (void *) cl);
-      }
+	}
 
   /* Death used allocnos: */
   for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
@@ -3353,17 +3373,20 @@ build_insn_allocno_conflicts (rtx insn)
 	|| INSN_ALLOCNO_OP_MODE (a) == OP_INOUT)
       mark_allocno_live (a, false);
 
-  for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
-    if (INSN_ALLOCNO_OP_MODE (a) == OP_OUT
-	|| INSN_ALLOCNO_OP_MODE (a) == OP_INOUT)
-      {
-	enum reg_class cl;
-
-	cl = single_reg_allocno_class (a);
-	if (cl != NO_REGS)
+  if (! EQ_OP_SET (single_reg_op_set, ZERO_OP_SET))
+    for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
+      if ((INSN_ALLOCNO_OP_MODE (a) == OP_OUT
+	   || INSN_ALLOCNO_OP_MODE (a) == OP_INOUT)
+	  && (op_num = INSN_ALLOCNO_TYPE (a) - OPERAND_BASE) >= 0
+	  && TEST_OP (single_reg_op_set, op_num))
+	{
+	  enum reg_class cl;
+	  
+	  cl = single_reg_operand_class (insn, op_num);
+	  yara_assert (cl != NO_REGS);
 	  process_live_allocnos (set_single_hard_reg_allocno_info,
 				 (void *) cl);
-      }
+	}
 
   /* Set up hard regs without allocnos */
   for (i = 0; i < output_insn_hard_reg_num; i++)
@@ -3489,6 +3512,7 @@ create_insn_copies (rtx insn)
   allocno_t src, dst;
   enum op_type op_mode;
   rtx x;
+  struct insn_op_info *info;
 
   uid = INSN_UID (insn);
   after.point_type = AFTER_INSN;
@@ -3572,7 +3596,9 @@ create_insn_copies (rtx insn)
   after_insn_copies [INSN_UID (insn)]
     = sort_copy_list (after_insn_copies [INSN_UID (insn)],
 		      after_insn_copy_compare);
-  build_insn_allocno_conflicts (insn);
+  info = insn_infos [INSN_UID (insn)];
+  build_insn_allocno_conflicts
+    (insn, info == NULL ? ZERO_OP_SET : info->single_reg_op_set);
 }
 
 static void
@@ -3962,7 +3988,11 @@ create_bb_allocno (int regno, struct yar
 	      = create_region_allocno (regno, bb_node->father, NULL_RTX);
 	}
       if (start_p)
-	bb_node->regno_allocno_map [regno] = outside_a;
+	{
+	  bb_node->regno_allocno_map [regno] = outside_a;
+	  bitmap_set_bit (bb_node->allocno_live_at_start,
+			  ALLOCNO_NUM (outside_a));
+	}
       add_live_through_allocno (outside_a, start_p, bb_node->bb, NULL);
     }
   else if (start_p)
@@ -3982,6 +4012,7 @@ create_bb_allocno (int regno, struct yar
 	}
       point.point_type = AT_BB_START;
       cp = create_copy (a, outside_a, point, NULL_RTX);
+      bitmap_set_bit (bb_node->allocno_live_at_start, ALLOCNO_NUM (a));
     }
   else
     {
@@ -4094,7 +4125,11 @@ create_edge_allocno (int regno, edge e, 
 	IOR_HARD_REG_SET (ALLOCNO_HARD_REG_CONFLICTS (outside_a),
 			  prohibited_abnormal_edge_hard_regs);
       if (entry_p)
-	loop_node->regno_allocno_map [regno] = outside_a;
+	{
+	  loop_node->regno_allocno_map [regno] = outside_a;
+	  bitmap_set_bit (loop_node->allocno_live_at_start,
+			  ALLOCNO_NUM (outside_a));
+	}
       add_live_through_allocno (outside_a, false, NULL, e);
     }
   else if (entry_p)
@@ -4115,6 +4150,7 @@ create_edge_allocno (int regno, edge e, 
 	  a = outside_a;
 	  loop_node->regno_allocno_map [regno] = a;
 	}
+      bitmap_set_bit (loop_node->allocno_live_at_start, ALLOCNO_NUM (a));
       if (a == outside_a)
 	add_live_through_allocno (outside_a, false, NULL, e);
       else

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]