This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[yara-branch] patch for cost calculation and stack compaction improvements and fixing some bugs


The following patch mostly

  o improves hard register cost calculation for CANS mainly for
    two-address targets when there are insns with hard registers
    besides moves (x86_64 port generates a lot of them).

  o implements another more aggressive way of stack compacting which
    can be switched on by using yara-params.  The default compacting
    was to be designed to maintain correct displacements.  When all
    stack is addressed using in range displacements, we can compact
    stack according the usage frequencies of the stack slots.  This
    technique is switched off for now because I still have no code to
    figure out that only correct displacements are used for
    addressing stack slots.  You can use it for x86 because any
    displacement is correct.  It is probably safe for x86_64 too.

  o fixes bugs for register costs calculations, register elimination,
    stack slot displacement calculations, allocno coalescing, emitting
    copies which affected PPC port and slightly x86_64 port.  I am
    working on ppc port now.  But unlike x86 and x86_64 ports, it is
    still far away to be working (although about 40% of spec2000
    tests run correctly for now).

Vlad


2006-06-23 Vladimir Makarov <vmakarov@redhat.com>


	* yara-int.h (YARA_AGGRESSIVE_STACK_COMPACTION): New macro.
	(get_duplication_allocno): New external function definition.

	* yara-ir.c (get_duplication_allocno): Make it external.
	(get_move): Check that cans are from different partition sets.
	(make_aggressive_coalescing): Don't coalesce allocnos with
	equivalent locations.
	
	* yara-insn.c (assign_constraint): Check hard register conflicts
	for hard register allocnos.

	* yara-color.c (update_min_op_costs): Take
	CANNOT_CHANGE_MODE_CLASS into account.
	(setup_cover_classes_and_reg_costs): Check undefined costs.
	(decrease_hard_reg_cost): New function.
	(add_move_costs): Use the new function.  Take hard registers in
	insns other than moves.

	* yara-final.c (get_eliminate_subst_rtx): Change type of the
	offset.
	(get_slot_start): Check value of FRAME_GROWS_DOWNWARD not the
	definition.
	(get_allocno_memory_slot_rtx): Ditto.  Fix the calculation of
	displacement relative to the stack pointer.
	(emit_plus_assign): New function.
	(emit_copy): Use it.
	(add_copy_list): Put copies after jump insn on the CFG edges.

	* yara-trans.c (reserve_stack_memory): Check value of
	FRAME_GROWS_DOWNWARD not the definition.
	(find_free_stack_memory, get_stack_memory_start_frame_offset,
	register_slot_start_change, register_memory_slot_usage,
	get_temp_stack_memory_slot_rtx, memory_slot_intersected,
	undo_memory_slot_change): Ditto.
	(compact_stack): Add code for full stack slots rearrangement.
	(check_elimination_in_addr, eliminate_reg): Browse elimination
	chain only for given register.
	(check_hard_regno_memory_on_constraint): Check possible
	alternatives for all operands.



Index: yara-int.h
===================================================================
--- yara-int.h	(revision 114654)
+++ yara-int.h	(working copy)
@@ -34,6 +34,7 @@ Software Foundation, 51 Franklin Street,
 #define YARA_NO_FREQ_BUCKET_ORDER 1024
 #define YARA_NREGS_BUCKET_ORDER 2048
 #define YARA_BB_RELIEF 4096
+#define YARA_AGGRESSIVE_STACK_COMPACTION 8192
 
 
 #ifdef ENABLE_CHECKING
@@ -906,6 +907,7 @@ extern allocno_t insn_allocno (rtx, rtx)
 extern can_t create_can (void);
 extern void setup_can_call_info (can_t);
 extern int can_freq (can_t);
+extern allocno_t get_duplication_allocno (allocno_t, bool);
 extern void print_can (FILE *, can_t);
 extern void debug_can (can_t);
 extern void print_cans (FILE *);
Index: yara-insn.c
===================================================================
--- yara-insn.c	(revision 114654)
+++ yara-insn.c	(working copy)
@@ -765,7 +765,11 @@ assign_constraint (allocno_t a, const ch
 		     && hard_reg_in_set_p (get_allocno_hard_regno
 					   (a, ALLOCNO_REGNO (a)),
 					   ALLOCNO_MODE (a),
-					   reg_class_contents [cl]))
+					   reg_class_contents [cl])
+		     && hard_reg_not_in_set_p (get_allocno_hard_regno
+					       (a, ALLOCNO_REGNO (a)),
+					       ALLOCNO_MODE (a),
+					       ALLOCNO_HARD_REG_CONFLICTS (a)))
 	      {
 		yara_assert (INSN_ALLOCNO_TIED_ALLOCNO (a) == NULL
 			     || INSN_ALLOCNO_ORIGINAL_P (a));
Index: yara-color.c
===================================================================
--- yara-color.c	(revision 114654)
+++ yara-color.c	(working copy)
@@ -402,11 +402,12 @@ static void
 update_min_op_costs (allocno_t a, enum reg_class class, bool mem_p)
 {
   int op_num, cost, i;
-  bool equiv_const_p;
+  bool equiv_const_p, check_mode_change_p;
   enum reg_class cl, *classes;
   enum machine_mode mode;
   enum op_type op_mode;
   can_t can;
+  rtx op, reg;
 
   yara_assert (ALLOCNO_TYPE (a) == INSN_ALLOCNO);
   yara_assert (class != NO_REGS || mem_p);
@@ -444,6 +445,13 @@ update_min_op_costs (allocno_t a, enum r
   yara_assert (cost >= 0);
   if (cost < min_op_memory_cost [op_num])
     min_op_memory_cost [op_num] = cost;
+  reg = op = *INSN_ALLOCNO_LOC (a);
+  check_mode_change_p = false;
+  if (GET_CODE (op) == SUBREG)
+    {
+      SKIP_TO_REG (reg, op);
+      check_mode_change_p = REG_P (reg);
+    }
   for (i = 0; (cl = classes [i]) != NO_REGS; i++)
     {
       if (mem_p)
@@ -451,6 +459,9 @@ update_min_op_costs (allocno_t a, enum r
 		 ? memory_move_cost [mode] [cl] [0] : 0)
 		+ (op_mode == OP_OUT || op_mode == OP_INOUT
 		   ? memory_move_cost [mode] [cl] [1] : 0));
+      else if (check_mode_change_p
+	       && CANNOT_CHANGE_MODE_CLASS (GET_MODE (reg), GET_MODE (op), cl))
+	continue;
       else
 	{
 	  cost = 0;
@@ -1164,16 +1175,16 @@ setup_cover_classes_and_reg_costs (void)
 			  * freq);
 		    costs = &can_class_cost [can_num * N_REG_CLASSES];
 		    for (i = 0; (cl1 = classes [i]) != NO_REGS; i++)
-		      {
-			/* ??? Tables to speed up.  */
-			if ((op_mode == OP_IN || op_mode == OP_INOUT)
-			    && ! class_subset_p [cl1] [cl])
-			  costs [cl1]
-			    += register_move_cost [mode] [cl1] [cl] * freq;
-			if ((op_mode == OP_OUT || op_mode == OP_INOUT)
-			    && ! class_subset_p [cl] [cl1])
-			  costs [cl1]
-			    += register_move_cost [mode] [cl] [cl1] * freq;
+		      if (costs [cl1] != INT_MAX)
+			{
+			  if ((op_mode == OP_IN || op_mode == OP_INOUT)
+			      && ! class_subset_p [cl1] [cl])
+			    costs [cl1]
+			      += register_move_cost [mode] [cl1] [cl] * freq;
+			  if ((op_mode == OP_OUT || op_mode == OP_INOUT)
+			      && ! class_subset_p [cl] [cl1])
+			    costs [cl1]
+			      += register_move_cost [mode] [cl] [cl1] * freq;
 		      }
 		  }
 	      }	
@@ -1821,18 +1832,53 @@ add_can_copies (can_t dst_can, can_t src
   CAN_COPIES (dst_can)->tied_can_copy = CAN_COPIES (src_can);
 }
 
+/* The following function decreases cost of HARD_REGNO for can of
+   allocno A by cost of move to hard register (if to_p) or move from
+   the hard register multiplied by FREQ.  */
+
+static void
+decrease_hard_reg_cost (allocno_t a, int hard_regno, int freq, bool to_p)
+{
+  int i;
+  enum machine_mode mode;
+  enum reg_class class, cover_class;
+  can_t can;
+
+  can = ALLOCNO_CAN (a);
+  if (can == NULL)
+    return;
+  cover_class = CAN_COVER_CLASS (can);
+  if (can == NULL
+      || ! TEST_HARD_REG_BIT (reg_class_contents [cover_class],
+			      hard_regno))
+    return;
+  yara_assert (cover_class != NO_REGS);
+  mode = CAN_MODE (can);
+  class = REGNO_REG_CLASS (hard_regno);
+  if (TEST_HARD_REG_BIT (no_alloc_regs, hard_regno))
+    return;
+  i = class_hard_reg_index [cover_class] [hard_regno];
+  CAN_HARD_REG_COSTS (can) [i]
+    -= freq * (to_p
+	       ? register_move_cost [mode] [class] [cover_class]
+	       : register_move_cost [mode] [cover_class] [class]);
+}
+
+/* To make a better subsequent hard register choice the function
+   stores information about move insns between cans and changes hard
+   register costs of cans which probably get the same hard register as
+   an explicit hard register in a RTL insn usually because of
+   constraints of two-address insn architecture.  */
 static void
 add_move_costs (void)
 {
-  int freq, i, hard_regno;
+  int i, freq, hard_regno;
   rtx insn, bound, set, dst, src;
   basic_block bb;
   allocno_t a, src_a, dst_a;
   copy_t cp;
-  can_t can, src_can, dst_can;
+  can_t src_can, dst_can;
   bool to_p;
-  enum machine_mode mode;
-  enum reg_class class, cover_class;
 
   if (flag_relief)
     {
@@ -1883,25 +1929,7 @@ add_move_costs (void)
 		  }
 		else
 		  continue;
-		can = ALLOCNO_CAN (a);
-		if (can == NULL)
-		  continue;
-		cover_class = CAN_COVER_CLASS (can);
-		if (can == NULL
-		    || ! TEST_HARD_REG_BIT (reg_class_contents [cover_class],
-					    hard_regno))
-		  continue;
-		yara_assert (cover_class != NO_REGS);
-		mode = CAN_MODE (can);
-		class = REGNO_REG_CLASS (hard_regno);
-		if (TEST_HARD_REG_BIT (no_alloc_regs, hard_regno))
-		  continue;
-		i = class_hard_reg_index [cover_class] [hard_regno];
-		CAN_HARD_REG_COSTS (can) [i]
-		  -= freq
-		     * (to_p
-			? register_move_cost [mode] [class] [cover_class]
-			: register_move_cost [mode] [cover_class] [class]);
+		decrease_hard_reg_cost (a, hard_regno, freq, to_p);
 	      }
 	    else
 	      {
@@ -1912,6 +1940,45 @@ add_move_costs (void)
 		add_can_copies (dst_can, src_can, freq);
 	      }
 	  }
+	else
+	  {
+	    allocno_t src, dst;
+	    int src_regno, dst_regno;
+
+	    for (src = insn_allocnos [INSN_UID (insn)];
+		 src != NULL;
+		 src = INSN_ALLOCNO_NEXT (src))
+	      /* How to handle SUBREGs???  */
+	      if (INSN_ALLOCNO_OP_MODE (src) == OP_IN
+		  && REG_P (*INSN_ALLOCNO_LOC (src))
+		  && find_reg_note (insn, REG_DEAD,
+				    *INSN_ALLOCNO_LOC (src)) != NULL_RTX
+		  && (dst = get_duplication_allocno (src, true)) != NULL
+		  && REG_P (*INSN_ALLOCNO_LOC (dst))
+		  && ALLOCNO_MODE (src) == ALLOCNO_MODE (dst))
+		{
+		  src_regno = ALLOCNO_REGNO (src);
+		  dst_regno = ALLOCNO_REGNO (dst);
+		  if (! HARD_REGISTER_NUM_P (src_regno)
+		      && ! HARD_REGISTER_NUM_P (dst_regno))
+		    continue;
+		  if (! HARD_REGISTER_NUM_P (dst_regno))
+		    {
+		      to_p = true;
+		      a = dst;
+		      hard_regno = ALLOCNO_REGNO (src);
+		    }
+		  else if (! HARD_REGISTER_NUM_P (src_regno))
+		    {
+		      to_p = false;
+		      a = src;
+		      hard_regno = ALLOCNO_REGNO (dst);
+		    }
+		  else
+		    continue;
+		  decrease_hard_reg_cost (a, hard_regno, freq, to_p);
+		}
+	  }
     }
 }
 
Index: yara-final.c
===================================================================
--- yara-final.c	(revision 114654)
+++ yara-final.c	(working copy)
@@ -57,7 +57,8 @@ static bool emit_secondary_memory_mode_m
 					     enum machine_mode);
 static void emit_secondary_memory_move (rtx, rtx, copy_t, enum machine_mode,
 					enum machine_mode, bool);
-static rtx get_eliminate_subst_rtx (rtx *, int, int, int);
+static rtx get_eliminate_subst_rtx (rtx *, int, int, HOST_WIDE_INT);
+static void emit_plus_assign (int, allocno_t, rtx, HOST_WIDE_INT);
 static void emit_copy (copy_t);
 static rtx copy_insns (rtx);
 static void emit_insns_at_bb_start (rtx, basic_block);
@@ -521,11 +522,10 @@ get_slot_start (struct memory_slot *slot
 {
   if (slot->mem == NULL_RTX)
     {
-#ifdef FRAME_GROWS_DOWNWARD
-      return slot->start + offset - slot->size + 1;
-#else
-      return slot->start + offset;
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	return slot->start + offset - slot->size + 1;
+      else
+	return slot->start + offset;
     }
   else
     {
@@ -1673,27 +1673,49 @@ get_allocno_memory_slot_rtx (struct memo
     {
       HOST_WIDE_INT disp;
 
-#ifdef FRAME_GROWS_DOWNWARD
-      if (stack_frame_pointer_can_be_eliminated_p
-	  && obligatory_stack_frame_pointer_elimination_p)
-	disp = (frame_stack_pointer_offset
-		+ final_stack_memory_start_frame_offset
-		+ memory_slot->start - memory_slot->size + 2
-		- final_rounded_slot_memory_size);
-      else
-	disp = (final_stack_memory_start_frame_offset
-		+ frame_hard_frame_pointer_offset - memory_slot->start);
-#else
-      if (stack_frame_pointer_can_be_eliminated_p
-	  && obligatory_stack_frame_pointer_elimination_p)
-	disp = (frame_stack_pointer_offset
-		- final_stack_memory_start_frame_offset
-		+ memory_slot->start + memory_slot->size
-		- final_rounded_slot_memory_size);
+      if (FRAME_GROWS_DOWNWARD)
+	{
+	  if (stack_frame_pointer_can_be_eliminated_p
+	      && obligatory_stack_frame_pointer_elimination_p)
+	    {
+	      /* Prefer slot arragement with smaller displacement for
+		 slots with smaller start (because they have higher
+		 prioirity).  */
+	      if (final_stack_memory_start_frame_offset
+		  + frame_stack_pointer_offset <= 1)
+		disp = (final_stack_memory_start_frame_offset
+			+ frame_stack_pointer_offset - memory_slot->start);
+	      else
+		disp = (final_stack_memory_start_frame_offset
+			+ frame_stack_pointer_offset + memory_slot->start
+			- memory_slot->size
+			- final_rounded_slot_memory_size + 2);
+	    }
+	  else
+	    disp = (final_stack_memory_start_frame_offset
+		    + frame_hard_frame_pointer_offset - memory_slot->start);
+	}
       else
-	disp = (final_stack_memory_start_frame_offset
-		+ frame_hard_frame_pointer_offset + memory_slot->start);
-#endif
+	{
+	  if (stack_frame_pointer_can_be_eliminated_p
+	      && obligatory_stack_frame_pointer_elimination_p)
+	    {
+	      /* Prefer slot arragement with smaller displacement for
+		 slots with smaller start (because they have higher
+		 prioirity).  */
+	      if (final_stack_memory_start_frame_offset
+		  + frame_stack_pointer_offset >= -1)
+		disp = (final_stack_memory_start_frame_offset
+			+ frame_stack_pointer_offset + memory_slot->start);
+	      else
+		disp = (final_stack_memory_start_frame_offset
+			+ frame_stack_pointer_offset - memory_slot->start
+			- memory_slot->size + final_rounded_slot_memory_size);
+	    }
+	  else
+	    disp = (final_stack_memory_start_frame_offset
+		    + frame_hard_frame_pointer_offset + memory_slot->start);
+	}
       mem = gen_rtx_MEM (mode,
 			 gen_rtx_PLUS
 			 (Pmode,
@@ -1883,7 +1905,7 @@ emit_secondary_memory_move (rtx dst_rtx,
 
 
 static rtx
-get_eliminate_subst_rtx (rtx *loc, int from, int to, int offset)
+get_eliminate_subst_rtx (rtx *loc, int from, int to, HOST_WIDE_INT offset)
 {
   int base_regno, index_regno;
   HOST_WIDE_INT scale;
@@ -1983,6 +2005,30 @@ get_eliminate_subst_rtx (rtx *loc, int f
     }
 }
 
+/* The following function emits correct code assigning hard register
+   value (value of SRC_REG_RTX) plus OFFSET to hard register DST_REGNO
+   which denotes allocno DST.  */
+
+static void
+emit_plus_assign (int dst_regno, allocno_t dst, rtx src_reg_rtx,
+		  HOST_WIDE_INT offset)
+{
+  rtx last = get_last_insn ();
+  rtx dst_reg_rtx, offset_rtx;
+  
+  dst_reg_rtx = gen_allocno_reg_rtx (Pmode, dst_regno, dst);
+  offset_rtx = gen_rtx_CONST_INT (VOIDmode, offset);
+  emit_move (dst_reg_rtx, gen_rtx_PLUS (Pmode, src_reg_rtx, offset_rtx));
+  if (check_insns_added_since (last))
+    return;
+  delete_insns_since (last);
+  emit_move (dst_reg_rtx, offset_rtx);
+  emit_move (dst_reg_rtx, gen_rtx_PLUS (Pmode, dst_reg_rtx, src_reg_rtx));
+  if (check_insns_added_since (last))
+    return;
+  gcc_unreachable ();
+}
+
 static void
 emit_copy (copy_t cp)
 {
@@ -2035,10 +2081,9 @@ emit_copy (copy_t cp)
 	  if (hard_regno < 0)
 	    {
 	      yara_assert (interm_elimination_regno >= 0);
-	      src_rtx = gen_rtx_PLUS (Pmode, elimination_subst_reg,
-				      gen_rtx_CONST_INT (VOIDmode, offset));
-	      dst_rtx
-		= gen_allocno_reg_rtx (Pmode, interm_elimination_regno, dst);
+	      emit_plus_assign (interm_elimination_regno, dst,
+				elimination_subst_reg, offset);
+	      return;
 	    }
 	  else if (interm_elimination_regno < 0)
 	    {
@@ -2052,17 +2097,14 @@ emit_copy (copy_t cp)
 	    }
 	  else if (hard_regno == interm_elimination_regno && regno >= 0)
 	    {
-	      src_rtx = gen_rtx_PLUS (Pmode, elimination_subst_reg,
-				      gen_rtx_CONST_INT (VOIDmode, offset));
-	      dst_rtx = gen_allocno_reg_rtx (Pmode, hard_regno, dst);
+	      emit_plus_assign (hard_regno, dst,
+				elimination_subst_reg, offset);
+	      return;
 	    }
 	  else
 	    {
-	      src_rtx = gen_rtx_PLUS (Pmode, elimination_subst_reg,
-				      gen_rtx_CONST_INT (VOIDmode, offset));
-	      dst_rtx
-		= gen_allocno_reg_rtx (Pmode, interm_elimination_regno, dst);
-	      emit_move (dst_rtx, src_rtx);
+	      emit_plus_assign (interm_elimination_regno, dst,
+				elimination_subst_reg, offset);
 	      dst_rtx = gen_allocno_reg_rtx (Pmode, hard_regno, dst);
 	      if (regno >= 0)
 		src_rtx
@@ -2320,7 +2362,9 @@ add_copy_list (copy_t cp)
   p = COPY_POINT (cp);
   pt = p.point_type;
   jmp = (pt == AT_BB_END && control_flow_insn_p (BB_END (p.u.bb))
-	 ? BB_END (p.u.bb) : NULL_RTX);
+	 ? BB_END (p.u.bb)
+	 : pt == AFTER_INSN && control_flow_insn_p (p.u.insn)
+	 ? p.u.insn : NULL_RTX);
   before_jump_p = true;
   start_sequence ();
 #ifdef ENABLE_YARA_CHECKING
@@ -2354,7 +2398,13 @@ add_copy_list (copy_t cp)
       break;
       
     case AFTER_INSN:
-      emit_insn_after (insns, p.u.insn);
+      if (jmp == NULL_RTX)
+	emit_insn_after (insns, p.u.insn);
+      else
+	{
+	  gcc_assert (! before_jump_p);
+	  emit_insns_at_bb_end (insns, BLOCK_FOR_INSN (jmp), jmp, false);
+	}
       break;
       
     case AT_BB_START:
Index: yara-trans.c
===================================================================
--- yara-trans.c	(revision 114654)
+++ yara-trans.c	(working copy)
@@ -251,14 +251,17 @@ reserve_stack_memory (int start, int siz
   int begin, bound;
 
   yara_assert (start >= 0 && size > 0);
-#ifdef FRAME_GROWS_DOWNWARD
-  begin = start - size + 1;
-  yara_assert (begin >= 0);
-  bound = start + 1;
-#else
-  begin = start;
-  bound = start + size;
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    {
+      begin = start - size + 1;
+      yara_assert (begin >= 0);
+      bound = start + 1;
+    }
+  else
+    {
+      begin = start;
+      bound = start + size;
+    }
   if (bound >= memory_stack_sbitmap_size)
     {
       memory_stack_sbitmap_size = bound + bound / 2;
@@ -279,32 +282,35 @@ find_free_stack_memory (int size, int al
 
   yara_assert (size > 0 && align > 0);
   start = 0;
-#ifdef FRAME_GROWS_DOWNWARD
-  EXECUTE_IF_SET_IN_SBITMAP (memory_stack_sbitmap, 0, k, sbi)
+  if (FRAME_GROWS_DOWNWARD)
     {
-      start = CEIL_ROUND (k, (unsigned) (align));
-      for (j = 0;
-	   (cont_p = j < size && j + start < memory_stack_sbitmap_size);
-	   j++)
-	if (! TEST_BIT (memory_stack_sbitmap, j + start))
-	  break;
-      if (! cont_p)
-	return start + size - 1;
+      EXECUTE_IF_SET_IN_SBITMAP (memory_stack_sbitmap, 0, k, sbi)
+	{
+	  start = CEIL_ROUND (k, (unsigned) (align));
+	  for (j = 0;
+	       (cont_p = j < size && j + start < memory_stack_sbitmap_size);
+	       j++)
+	    if (! TEST_BIT (memory_stack_sbitmap, j + start))
+	      break;
+	  if (! cont_p)
+	    return start + size - 1;
+	}
+      start += size - 1;
     }
-  start += size - 1;
-#else
-  EXECUTE_IF_SET_IN_SBITMAP (memory_stack_sbitmap, 0, k, sbi)
+  else
     {
-      start = CEIL_ROUND (k, (unsigned) align);
-      for (j = 0;
-	   cont_p = j < size && j + start < memory_stack_sbitmap_size;
-	   j++)
-	if (! TEST_BIT (memory_stack_sbitmap, j + start))
-	  break;
-      if (! cont_p)
-	return start;
+      EXECUTE_IF_SET_IN_SBITMAP (memory_stack_sbitmap, 0, k, sbi)
+	{
+	  start = CEIL_ROUND (k, (unsigned) align);
+	  for (j = 0;
+	       (cont_p = j < size && j + start < memory_stack_sbitmap_size);
+	       j++)
+	    if (! TEST_BIT (memory_stack_sbitmap, j + start))
+	      break;
+	  if (! cont_p)
+	    return start;
+	}
     }
-#endif
   return start;
 }
 
@@ -565,15 +571,18 @@ get_stack_memory_start_frame_offset (voi
   if ((unsigned) align * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
     align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
   
-#ifdef FRAME_GROWS_DOWNWARD
-  /* We assume that the simulated stack is properly aligned.  It means
-     that the first byte after the cell is aligned too.  */
-  size = CEIL_ROUND (slot_memory_size, (unsigned) align);
-  offset = cfun->x_frame_offset - size;
-#else
-  size = slot_memory_size;
-  offset = cfun->x_frame_offset;
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    {
+      /* We assume that the simulated stack is properly aligned.  It means
+	 that the first byte after the cell is aligned too.  */
+      size = CEIL_ROUND (slot_memory_size, (unsigned) align);
+      offset = cfun->x_frame_offset - size;
+    }
+  else
+    {
+      size = slot_memory_size;
+      offset = cfun->x_frame_offset;
+    }
 
   /* Calculate how many bytes the start of local variables is off from
      stack alignment.  */
@@ -581,17 +590,19 @@ get_stack_memory_start_frame_offset (voi
   frame_off = STARTING_FRAME_OFFSET % frame_alignment;
   frame_phase = frame_off ? frame_alignment - frame_off : 0;
 
-#ifdef FRAME_GROWS_DOWNWARD
-  offset = (FLOOR_ROUND (offset - frame_phase,
-			 (unsigned HOST_WIDE_INT) align) + frame_phase);
-#else
-  offset = (CEIL_ROUND (offset - frame_phase,
-			(unsigned HOST_WIDE_INT) align) + frame_phase);
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    {
+      offset = (FLOOR_ROUND (offset - frame_phase,
+			     (unsigned HOST_WIDE_INT) align) + frame_phase);
+    }
+  else
+    {
+      offset = (CEIL_ROUND (offset - frame_phase,
+			    (unsigned HOST_WIDE_INT) align) + frame_phase);
+    }
 
-#ifdef FRAME_GROWS_DOWNWARD
-  offset += size - 1;
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    offset += size - 1;
 
   /* ??? trunc_int_for_mode */
   return offset + STARTING_FRAME_OFFSET;
@@ -651,13 +662,16 @@ static void
 register_slot_start_change (int new, struct memory_slot *slot)
 {
   log_memory_slot (slot);
-#ifdef FRAME_GROWS_DOWNWARD
-  remove_memory_slot_end (slot->start);
-  add_memory_slot_end (new);
-#else
-  remove_memory_slot_end (slot->start + slot->size - 1);
-  add_memory_slot_end (new + slot->size - 1);
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    {
+      remove_memory_slot_end (slot->start);
+      add_memory_slot_end (new);
+    }
+  else
+    {
+      remove_memory_slot_end (slot->start + slot->size - 1);
+      add_memory_slot_end (new + slot->size - 1);
+    }
 }
 
 void
@@ -680,11 +694,10 @@ register_memory_slot_usage (struct memor
   if (slot->allocnos_num == 0)
     {
       yara_assert (slot->size > 0);
-#ifdef FRAME_GROWS_DOWNWARD
-      add_memory_slot_end (slot->start);
-#else
-      add_memory_slot_end (slot->start + slot->size - 1);
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	add_memory_slot_end (slot->start);
+      else
+	add_memory_slot_end (slot->start + slot->size - 1);
     }
   increase_align_count (align);
   slot->allocnos_num++;
@@ -698,11 +711,10 @@ unregister_memory_slot_usage (struct mem
   decrease_align_count (align);
   if (slot->allocnos_num == 0)
     {
-#ifdef FRAME_GROWS_DOWNWARD
-      remove_memory_slot_end (slot->start);
-#else
-      remove_memory_slot_end (slot->start + slot->size - 1);
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	remove_memory_slot_end (slot->start);
+      else
+	remove_memory_slot_end (slot->start + slot->size - 1);
       free_memory_slot_structure (slot);
     }
 }
@@ -1110,6 +1122,8 @@ compact_stack (void)
   struct memory_slot *slot, *conflict_slot;
   bitmap_iterator bi;
   varray_type slot_memory_can_varray;
+  sbitmap reallocated_can_sbitmap;
+  bool safe_p = (YARA_PARAMS & YARA_AGGRESSIVE_STACK_COMPACTION) == 0;
 
   /* Sort cans to consider more higher cost can moves first.  */
   VARRAY_GENERIC_PTR_NOGC_INIT (slot_memory_can_varray,
@@ -1128,6 +1142,8 @@ compact_stack (void)
     qsort (&VARRAY_GENERIC_PTR (slot_memory_can_varray, 0),
 	   VARRAY_ACTIVE_SIZE (slot_memory_can_varray),
 	   sizeof (can_t), can_compare);
+  reallocated_can_sbitmap = sbitmap_alloc (cans_num);
+  sbitmap_zero (reallocated_can_sbitmap);
   /* Try to move can slots to closer stack start.  */
   for (i = 0; i < (int) VARRAY_ACTIVE_SIZE (slot_memory_can_varray); i++)
     {
@@ -1143,28 +1159,33 @@ compact_stack (void)
       if (vec != NULL)
 	for (j = 0; (n = vec [j]) >= 0; j++)
 	  if ((conflict_slot = can_memory_slots [n]) != NULL
-	      && conflict_slot->mem == NULL_RTX)
+	      && conflict_slot->mem == NULL_RTX
+	      && (safe_p || TEST_BIT (reallocated_can_sbitmap, n)))
 	    reserve_stack_memory (conflict_slot->start, conflict_slot->size);
 #ifdef SECONDARY_MEMORY_NEEDED
-      EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, k, bi)
-	{
-	  if (can_copy_conflict_p (can, copies [k]))
-	    {
-	      yara_assert (COPY_CHANGE_ADDR (copies [k]) != NULL);
-	      conflict_slot = COPY_MEMORY_SLOT (copies [k]);
-	      yara_assert (conflict_slot != NULL
-			   && conflict_slot->mem == NULL_RTX);
-	      reserve_stack_memory (conflict_slot->start, conflict_slot->size);
-	    }
-	}
+      if (safe_p)
+	EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, k, bi)
+	  {
+	    if (can_copy_conflict_p (can, copies [k]))
+	      {
+		yara_assert (COPY_CHANGE_ADDR (copies [k]) != NULL);
+		conflict_slot = COPY_MEMORY_SLOT (copies [k]);
+		yara_assert (conflict_slot != NULL
+			     && conflict_slot->mem == NULL_RTX);
+		reserve_stack_memory (conflict_slot->start,
+				      conflict_slot->size);
+	      }
+	  }
 #endif
       start = find_free_stack_memory (slot->size, align);
       yara_assert (slot->start >= start);
+      SET_BIT (reallocated_can_sbitmap, CAN_NUM (can));
       if (start == slot->start)
 	continue;
       register_slot_start_change (start, slot);
       slot->start = start;
     }
+  sbitmap_free (reallocated_can_sbitmap);
   VARRAY_FREE (slot_memory_can_varray);
 #ifdef SECONDARY_MEMORY_NEEDED
   /* Try to move slots used for secondary memory closer to the stack
@@ -1344,22 +1365,20 @@ get_temp_stack_memory_slot_rtx (enum mac
       mem = temp_stack_disp_mem [mode];
       offset = (frame_stack_pointer_offset + disp
 		- rounded_slot_memory_size ());
-#ifdef FRAME_GROWS_DOWNWARD
-      offset += get_stack_memory_start_frame_offset () - size + 2;
-#else
-      offset += size - get_stack_memory_start_frame_offset ();
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	offset += get_stack_memory_start_frame_offset () - size + 2;
+      else
+	offset += size - get_stack_memory_start_frame_offset ();
     }
   else
     {
       mem = temp_hard_frame_disp_mem [mode];
       offset = (get_stack_memory_start_frame_offset ()
 		+ frame_hard_frame_pointer_offset);
-#ifdef FRAME_GROWS_DOWNWARD
-      offset -= disp;
-#else
-      offset += disp;
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	offset -= disp;
+      else
+	offset += disp;
     }
   XEXP (XEXP (mem, 0), 1) = get_temp_const_int (disp);
   return mem;
@@ -2829,13 +2848,16 @@ memory_slot_intersected (struct memory_s
   if (slot1 == NULL || slot2 == NULL
       || slot1->mem != NULL_RTX || slot2->mem != NULL_RTX)
     return false;
-#ifdef FRAME_GROWS_DOWNWARD
-  start1 = slot1->start - slot1->size + 1;
-  start2 = slot2->start - slot2->size + 1;
-#else
-  start1 = slot1->start;
-  start2 = slot2->start;
-#endif
+  if (FRAME_GROWS_DOWNWARD)
+    {
+      start1 = slot1->start - slot1->size + 1;
+      start2 = slot2->start - slot2->size + 1;
+    }
+  else
+    {
+      start1 = slot1->start;
+      start2 = slot2->start;
+    }
   if (start1 <= start2)
     return start2 < start1 + slot1->size;
   else
@@ -3137,7 +3159,7 @@ check_elimination_in_addr (int change, i
 	  ? GET_MODE (*container_loc) : VOIDmode);
   for (elim = reg_eliminate [*base_p ? base_regno : index_regno];
        elim != NULL;
-       elim = elim->next)
+       elim = elim->from_next)
     {
       offset = elim->offset + addition;
       if (elim->to == STACK_POINTER_REGNUM)
@@ -3282,7 +3304,9 @@ eliminate_reg (allocno_t a)
 	  
 	  yara_assert (XEXP (*container_loc, 0) == *INSN_ALLOCNO_LOC (a));
 	  temp_const_int = XEXP (*container_loc, 1);
-	  for (elim = reg_eliminate [regno]; elim != NULL; elim = elim->next)
+	  for (elim = reg_eliminate [regno];
+	       elim != NULL;
+	       elim = elim->from_next)
 	    {
 	      offset = elim->offset;
 	      if (elim->to == STACK_POINTER_REGNUM)
@@ -3696,13 +3720,16 @@ undo_memory_slot_change (struct memory_s
 
   if (sl->start != slot->start)
     {
-#ifdef FRAME_GROWS_DOWNWARD
-      remove_memory_slot_end (slot->start);
-      add_memory_slot_end (sl->start);
-#else
-      remove_memory_slot_end (slot->start + slot->size - 1);
-      add_memory_slot_end (sl->start + slot->size - 1);
-#endif
+      if (FRAME_GROWS_DOWNWARD)
+	{
+	  remove_memory_slot_end (slot->start);
+	  add_memory_slot_end (sl->start);
+	}
+      else
+	{
+	  remove_memory_slot_end (slot->start + slot->size - 1);
+	  add_memory_slot_end (sl->start + slot->size - 1);
+	}
       slot->start = sl->start;
     }
 }
@@ -3848,13 +3875,13 @@ check_hard_regno_memory_on_constraint (a
   ALLOCNO_USE_EQUIV_CONST_P (a) = saved_use_equiv_const_p;
   ALLOCNO_HARD_REGNO (a) = saved_hard_regno;
   ALLOCNO_MEMORY_SLOT (a) = saved_memory_slot;
+  SET_ALT_SET (temp_alt_set);
   for (curr_a = insn_allocnos [INSN_UID (INSN_ALLOCNO_INSN (a))];
        curr_a != NULL;
        curr_a = INSN_ALLOCNO_NEXT (curr_a))
     if (INSN_ALLOCNO_TYPE (curr_a) >= OPERAND_BASE)
       {
-	COPY_ALT_SET (temp_alt_set, INSN_ALLOCNO_POSSIBLE_ALTS (curr_a));
-	AND_ALT_SET (temp_alt_set, INSN_ALLOCNO_POSSIBLE_ALTS (a));
+	AND_ALT_SET (temp_alt_set, INSN_ALLOCNO_POSSIBLE_ALTS (curr_a));
 	if (EQ_ALT_SET (temp_alt_set, ZERO_ALT_SET))
 	  break;
       }
Index: yara-ir.c
===================================================================
--- yara-ir.c	(revision 114654)
+++ yara-ir.c	(working copy)
@@ -5337,7 +5337,7 @@ struct move_info
   int freq;
 };
 
-static allocno_t
+allocno_t
 get_duplication_allocno (allocno_t a, bool commutative_p)
 {
   int op_num, curr_alt, c, original;
@@ -5392,6 +5392,7 @@ get_move (allocno_t src, allocno_t dst, 
 
   first = union_first [ALLOCNO_NUM (src)];
   second = union_first [ALLOCNO_NUM (dst)];
+  yara_assert (first != second);
   if (ALLOCNO_NUM (first) > ALLOCNO_NUM (second))
     {
       temp = first;
@@ -5541,6 +5542,11 @@ make_aggressive_coalescing (void)
 		     intermediate register.  Even if we fix that I
 		     believe it will not generate a better code.  */
 		  continue;
+		/* Don't coalesce allocnos with equivalent
+		   locations. */
+		if (reg_equiv_memory_loc [ALLOCNO_REGNO (src)]
+		    || reg_equiv_memory_loc [ALLOCNO_REGNO (dst)])
+		  continue;
 		move = get_move (src, dst, BLOCK_FOR_INSN (insn)->frequency);
 		VARRAY_PUSH_GENERIC_PTR (move_varray, move);
 		VARRAY_PUSH_RTX (move_insn_varray, insn);
@@ -5559,8 +5565,15 @@ make_aggressive_coalescing (void)
 		      && (dst = get_duplication_allocno (src, true)) != NULL
 		      && REG_P (*INSN_ALLOCNO_LOC (dst))
 		      && ! HARD_REGISTER_NUM_P (ALLOCNO_REGNO (dst))
-		      && ALLOCNO_MODE (src) == ALLOCNO_MODE (dst))
+		      && ALLOCNO_MODE (src) == ALLOCNO_MODE (dst)
+		      && (union_first [ALLOCNO_NUM (src)]
+			  != union_first [ALLOCNO_NUM (dst)]))
 		    {
+		      /* Don't coalesce allocnos with equivalent
+			 locations. */
+		      if (reg_equiv_memory_loc [ALLOCNO_REGNO (src)]
+			  || reg_equiv_memory_loc [ALLOCNO_REGNO (dst)])
+			continue;
 		      move = get_move (src, dst,
 				       BLOCK_FOR_INSN (insn)->frequency);
 		      VARRAY_PUSH_GENERIC_PTR (move_varray, move);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]