This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[yara-branch] patch: Richard Henderson's subreg lowering, Inter-procedural register allocation for YARA, some tunning and cleaning up


This patch adds Richard Henderson's subreg lowering pass
(-flower-subreg which is default for -O2 and higher).  I've added it
because it helps crafty benchmark for x86 when YARA is used.

The patch also adds simple inter-procedural register allocation for
YARA (-fipra).  It means that YARA understands what registers which
might be clobbered by a call is really clobbered by the call.  This
information is used to choose a better hard register for a can.  Jan
Hubicka's infrastructure (cgraph) is used for this.

The patch also contains some cleaning up.


2006-05-31 Vladimir Makarov <vmakarov@redhat.com>, Richard Henderson <rth@redhat.com>

* cgraph.c (cgraph_create_node): Initialize function_used_regs.

	* cgraph.h (cgraph_node): Add new member function_used_regs.
	
	* regrename.c (copyprop_hardreg_forward_1): Use
	get_call_invalidated_used_regs.

	* postreload-gcse.c (record_opr_changes,
	reg_set_between_after_reload_p, reg_used_between_after_reload_p):
	Ditto.

* postreload.c (reload_combine, reload_cse_move2add): Ditto.

* rtlanal.c (reg_set_p): Ditto.

* flow.c (propagate_one_insn): Ditto.

* df-scan.c (df_insn_refs_record): Ditto.

* caller-save.c (save_call_clobbered_regs): Ditto.

	* gcse.c (compute_hash_table_work, compute_store_table): Ditto.
	
	* cselib.c (cselib_process_insn): Ditto.
	
	* loop-iv.c (simplify_using_assignment): Ditto.

* sched-deps.c (sched_analyze): Ditto.

	* combine.c (record_dead_and_set_regs): Ditto.
	
	* resource.c (mark_set_resources, mark_target_live_regs): Ditto.

	* var-tracking.c (compute_bb_dataflow, emit_notes_in_bb): Ditto.
	
	* cse.c (invalidate_for_call): Ditto.  Add parameter.
	(cse_insn, invalidate_skipped_block): Pass the insn for
	invalidate_for_call.

* tree-pass.h (pass_lower_subreg): New external definition.

	* final.c (rest_of_handle_final): Set up call_used_regs for the
	cgraph node.

	* toplev.h (flag_lower_subreg, flag_ipra): New external
	definitions.

* rtl.def (CONCATN): New rtl expression.

	* dwarf2out.c (concatn_loc_descriptor): New function.
	(loc_descriptor): Process CONCATN.

* opts.c (flag_ipra, flag_lower_subreg): Set up for -O2.

* timevar.def (TV_LOWER_SUBREG): New definition.

* recog.c (peep2_find_free_register): Set up regs_ever_live.

	* yara-int.h (allocno_common):  Add new member clobbered_regs.
	(ALLOCNO_CLOBBERED_REGS): New macro.
	(can): Remove member spill_p.
	(CAN_SPILL_P): Remove.

	* function.c (get_call, get_call_invalidated_used_regs): New
	functions.

	* function.h (emit_status):  Add new member call_used_regs.
	(get_call_invalidated_used_regs): New external definition.
	
	* yara-color.c (allocated_reg_p): New array.
	(choose_global_hard_reg): Modify cost for cans crossing functions
	and saved cans.
	(pop_globals_from_stack): Initialize allocated_reg_p.
	(assign_global_can_allocnos): Use ALLOCNO_CLOBBERED_REGS.

	* emit-rtl.c (gen_reg_rtx_offset): New function.
	(gen_lowpart_common): Process CONCATN.

* simplify-rtx.c (simplify_subreg): Process CONCATN.

* common.opt (fipra, flower-subreg): New options.

* yara.c (yara): Reset call_used_regs.

* rtl.h (gen_reg_rtx_offset): New external definition.

	* yara-trans.c (check_hard_regno_for_a, assign_one_allocno): Use
	ALLOCNO_CLOBBERED_REGS.
	
	* yara-ir.c (setup_reg_class_nregs): Use CLASS_MAX_NREGS.
	(create_allocno): Clear ALLOCNO_CLOBBERED_REGS.
	(print_allocno): Print ALLOCNO_CLOBBERED_REGS..
	(curr_call_used_function_regs): New static variable.
	(set_call_info): Update curr_call_used_function_regs.
	(build_insn_allocno_conflicts): Use
	get_call_invalidated_used_regs.
	(create_can, print_can): Remove CAN_SPILL_P.

	* Makefile.in (OBJS-common): Add lower-subreg.o.
	(lower-subreg.o): New entry.
	(function.o): Add cgraph.h.
	(yara.o, yara-ir.o): Add cgraph.h and function.h.

* passes.c (pass_lower_subreg): Add new pass.

	* lower-subreg.c: New file.
	

Index: regrename.c
===================================================================
--- regrename.c	(revision 113679)
+++ regrename.c	(working copy)
@@ -1767,9 +1767,14 @@ copyprop_hardreg_forward_1 (basic_block 
     did_replacement:
       /* Clobber call-clobbered registers.  */
       if (CALL_P (insn))
-	for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	  if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
-	    kill_value_regno (i, 1, vd);
+	{
+	  HARD_REG_SET clobbered_regs;
+	  
+	  get_call_invalidated_used_regs (insn, &clobbered_regs, true);
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	    if (TEST_HARD_REG_BIT (clobbered_regs, i))
+	      kill_value_regno (i, 1, vd);
+	}
 
       /* Notice stores.  */
       note_stores (PATTERN (insn), kill_set_value, vd);
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 113679)
+++ cgraph.c	(working copy)
@@ -191,6 +191,7 @@ cgraph_create_node (void)
   node->global.estimated_growth = INT_MIN;
   cgraph_nodes = node;
   cgraph_n_nodes++;
+  COPY_HARD_REG_SET (node->function_used_regs, call_used_reg_set);
   return node;
 }
 
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 113679)
+++ cgraph.h	(working copy)
@@ -168,6 +168,11 @@ struct cgraph_node GTY((chain_next ("%h.
      into clone before compiling so the function in original form can be
      inlined later.  This pointer points to the clone.  */
   tree inline_decl;
+
+  /* Call unsaved hard registers really used by the corresponding
+     function (including ones used by functions called by the
+     function).  */
+  HARD_REG_SET function_used_regs;
 };
 
 struct cgraph_edge GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_caller")))
Index: postreload-gcse.c
===================================================================
--- postreload-gcse.c	(revision 113679)
+++ postreload-gcse.c	(working copy)
@@ -718,11 +718,14 @@ record_opr_changes (rtx insn)
   if (CALL_P (insn))
     {
       unsigned int regno;
+      HARD_REG_SET clobbered_regs;
+      
+      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 
       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-	if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+	if (TEST_HARD_REG_BIT (clobbered_regs, regno))
 	  record_last_reg_set_info (insn, regno);
-
+      
       if (! CONST_OR_PURE_CALL_P (insn))
 	record_last_mem_set_info (insn);
     }
@@ -879,9 +882,15 @@ reg_set_between_after_reload_p (rtx reg,
       {
 	if (set_of (reg, insn) != NULL_RTX)
 	  return insn;
-	if ((CALL_P (insn)
-	      && call_used_regs[REGNO (reg)])
-	    || find_reg_fusage (insn, CLOBBER, reg))
+	if (CALL_P (insn))
+	  {
+	    HARD_REG_SET used_regs;
+	    
+	    get_call_invalidated_used_regs (insn, &used_regs, false);
+	    if (TEST_HARD_REG_BIT (used_regs, REGNO (reg)))
+	      return insn;
+	  }
+	if (find_reg_fusage (insn, CLOBBER, reg))
 	  return insn;
 
 	if (FIND_REG_INC_NOTE (insn, reg))
@@ -911,13 +920,20 @@ reg_used_between_after_reload_p (rtx reg
        insn = NEXT_INSN (insn))
     if (INSN_P (insn))
       {
-	if (reg_overlap_mentioned_p (reg, PATTERN (insn))
-	    || (CALL_P (insn)
-		&& call_used_regs[REGNO (reg)])
-	    || find_reg_fusage (insn, USE, reg)
+	if (reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	  return insn;
+	if (CALL_P (insn))
+	  {
+	    HARD_REG_SET used_regs;
+	    
+	    get_call_invalidated_used_regs (insn, &used_regs, false);
+	    if (TEST_HARD_REG_BIT (used_regs, REGNO (reg)))
+	      return insn;
+	  }
+	if (find_reg_fusage (insn, USE, reg)
 	    || find_reg_fusage (insn, CLOBBER, reg))
 	  return insn;
-
+	
 	if (FIND_REG_INC_NOTE (insn, reg))
 	  return insn;
       }
Index: postreload.c
===================================================================
--- postreload.c	(revision 113679)
+++ postreload.c	(working copy)
@@ -917,9 +917,11 @@ reload_combine (void)
       if (CALL_P (insn))
 	{
 	  rtx link;
+	  HARD_REG_SET used_regs;
 
+	  get_call_invalidated_used_regs (insn, &used_regs, false);
 	  for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
-	    if (call_used_regs[r])
+	    if (TEST_HARD_REG_BIT (used_regs, r))
 	      {
 		reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
 		reg_state[r].store_ruid = reload_combine_ruid;
@@ -1411,9 +1413,12 @@ reload_cse_move2add (rtx first)
 	 unknown values.  */
       if (CALL_P (insn))
 	{
+	  HARD_REG_SET used_regs;
+
+	  get_call_invalidated_used_regs (insn, &used_regs, false);
 	  for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
 	    {
-	      if (call_used_regs[i])
+	      if (TEST_HARD_REG_BIT (used_regs, i))
 		/* Reset the information about this register.  */
 		reg_set_luid[i] = 0;
 	    }
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 113679)
+++ tree-pass.h	(working copy)
@@ -331,6 +331,7 @@ extern struct tree_opt_pass pass_initial
 extern struct tree_opt_pass pass_unshare_all_rtl;
 extern struct tree_opt_pass pass_instantiate_virtual_regs;
 extern struct tree_opt_pass pass_jump2;
+extern struct tree_opt_pass pass_lower_subreg;
 extern struct tree_opt_pass pass_cse;
 extern struct tree_opt_pass pass_gcse;
 extern struct tree_opt_pass pass_jump_bypass;
Index: rtlanal.c
===================================================================
--- rtlanal.c	(revision 113679)
+++ rtlanal.c	(working copy)
@@ -737,16 +737,24 @@ reg_set_p (rtx reg, rtx insn)
 {
   /* We can be passed an insn or part of one.  If we are passed an insn,
      check if a side-effect of the insn clobbers REG.  */
-  if (INSN_P (insn)
-      && (FIND_REG_INC_NOTE (insn, reg)
-	  || (CALL_P (insn)
-	      && ((REG_P (reg)
-		   && REGNO (reg) < FIRST_PSEUDO_REGISTER
-		   && TEST_HARD_REG_BIT (regs_invalidated_by_call,
-					 REGNO (reg)))
-		  || MEM_P (reg)
-		  || find_reg_fusage (insn, CLOBBER, reg)))))
-    return 1;
+  if (INSN_P (insn))
+    {
+      if (FIND_REG_INC_NOTE (insn, reg))
+	return 1;
+      if (CALL_P (insn))
+	{
+	  if (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER)
+	    {
+	      HARD_REG_SET clobbered_regs;
+	    
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
+	      if (TEST_HARD_REG_BIT (clobbered_regs, REGNO (reg)))
+		return 1;
+	    }
+	  if (MEM_P (reg) || find_reg_fusage (insn, CLOBBER, reg))
+	    return 1;
+	}
+    }
 
   return set_of (reg, insn) != NULL_RTX;
 }
Index: final.c
===================================================================
--- final.c	(revision 113679)
+++ final.c	(working copy)
@@ -3902,9 +3902,40 @@ debug_free_queue (void)
 static unsigned int
 rest_of_handle_final (void)
 {
+  int i;
   rtx x;
   const char *fnname;
+  struct cgraph_node *node;
 
+  gcc_assert (cfun->decl != NULL);
+  node = cgraph_node (cfun->decl);
+  if (node != NULL)
+    {
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (fixed_regs [i])
+	  SET_HARD_REG_BIT (cfun->emit->call_used_regs, i);
+	else if (call_used_regs [i]
+		 && (regs_ever_live [i]
+#ifdef STACK_REGS
+		     || (i >= FIRST_STACK_REG && i <= LAST_STACK_REG)
+#endif
+		     ))
+	  SET_HARD_REG_BIT (cfun->emit->call_used_regs, i);
+      COPY_HARD_REG_SET (node->function_used_regs, cfun->emit->call_used_regs);
+      if (dump_file != NULL)
+	{
+	  GO_IF_HARD_REG_EQUAL (cfun->emit->call_used_regs,
+				call_used_reg_set, ok);
+	  fprintf (dump_file, "unused unsaved registers: ");
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	    if (TEST_HARD_REG_BIT (call_used_reg_set, i)
+		&& ! TEST_HARD_REG_BIT (cfun->emit->call_used_regs, i))
+	      fprintf (dump_file, "%s ", reg_names [i]);
+	  fprintf (dump_file, "\n");
+	ok:
+	  ;
+	}
+    }
   /* Get the function's name, as described by its RTL.  This may be
      different from the DECL_NAME name used in the source file.  */
 
Index: toplev.h
===================================================================
--- toplev.h	(revision 113679)
+++ toplev.h	(working copy)
@@ -122,6 +122,7 @@ extern int flag_crossjumping;
 extern int flag_if_conversion;
 extern int flag_if_conversion2;
 extern int flag_keep_static_consts;
+extern int flag_lower_subreg;
 extern int flag_peel_loops;
 extern int flag_rerun_cse_after_loop;
 extern int flag_thread_jumps;
@@ -132,6 +133,7 @@ extern int flag_unswitch_loops;
 extern int flag_cprop_registers;
 extern int time_report;
 extern int flag_yara;
+extern int flag_ipra;
 extern int flag_optimistic_coalescing;
 extern int flag_extended_coalescing;
 extern int flag_relief;
Index: flow.c
===================================================================
--- flow.c	(revision 113679)
+++ flow.c	(working copy)
@@ -1839,7 +1839,8 @@ propagate_one_insn (struct propagate_blo
 	  bool sibcall_p;
 	  rtx note, cond;
 	  int i;
-
+	  HARD_REG_SET clobbered_regs;
+	      
 	  cond = NULL_RTX;
 	  if (GET_CODE (PATTERN (insn)) == COND_EXEC)
 	    cond = COND_EXEC_TEST (PATTERN (insn));
@@ -1869,8 +1870,9 @@ propagate_one_insn (struct propagate_blo
 
 	  sibcall_p = SIBLING_CALL_P (insn);
 	  live_at_end = EXIT_BLOCK_PTR->il.rtl->global_live_at_start;
+	  get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	    if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
+	    if (TEST_HARD_REG_BIT (clobbered_regs, i)
 		&& ! (sibcall_p
 		      && REGNO_REG_SET_P (live_at_end, i)
 		      && ! refers_to_regno_p (i, i+1,
Index: df-scan.c
===================================================================
--- df-scan.c	(revision 113679)
+++ df-scan.c	(working copy)
@@ -1494,6 +1494,8 @@ df_insn_refs_record (struct dataflow *df
 	    {
 	      bitmap_iterator bi;
 	      unsigned int ui;
+	      HARD_REG_SET clobbered_regs;
+	      
 	      /* Calls may also reference any of the global registers,
 		 so they are recorded as used.  */
 	      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
@@ -1501,9 +1503,13 @@ df_insn_refs_record (struct dataflow *df
 		  df_uses_record (dflow, &regno_reg_rtx[i],
 				  DF_REF_REG_USE, bb, insn, 
 				  0);
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      EXECUTE_IF_SET_IN_BITMAP (df_invalidated_by_call, 0, ui, bi)
-	        df_ref_record (dflow, regno_reg_rtx[ui], &regno_reg_rtx[ui], bb, insn, 
-	                       DF_REF_REG_DEF, DF_REF_CLOBBER, false);
+		if (ui < FIRST_PSEUDO_REGISTER
+		    && TEST_HARD_REG_BIT (clobbered_regs, ui))
+		  df_ref_record (dflow, regno_reg_rtx[ui], &regno_reg_rtx[ui],
+				 bb, insn,
+				 DF_REF_REG_DEF, DF_REF_CLOBBER, false);
 	    }
 	}
 
Index: caller-save.c
===================================================================
--- caller-save.c	(revision 113679)
+++ caller-save.c	(working copy)
@@ -409,7 +409,7 @@ save_call_clobbered_regs (void)
 	  if (code == CALL_INSN && ! find_reg_note (insn, REG_NORETURN, NULL))
 	    {
 	      unsigned regno;
-	      HARD_REG_SET hard_regs_to_save;
+	      HARD_REG_SET hard_regs_to_save, used_regs;
 	      reg_set_iterator rsi;
 
 	      /* Use the register life information in CHAIN to compute which
@@ -459,7 +459,8 @@ save_call_clobbered_regs (void)
 	      AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set);
 	      AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets);
 	      AND_COMPL_HARD_REG_SET (hard_regs_to_save, hard_regs_saved);
-	      AND_HARD_REG_SET (hard_regs_to_save, call_used_reg_set);
+	      get_call_invalidated_used_regs (insn, &used_regs, false);
+	      AND_HARD_REG_SET (hard_regs_to_save, used_regs);
 
 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 		if (TEST_HARD_REG_BIT (hard_regs_to_save, regno))
Index: cse.c
===================================================================
--- cse.c	(revision 113679)
+++ cse.c	(working copy)
@@ -592,7 +592,7 @@ static void remove_invalid_subreg_refs (
 					enum machine_mode);
 static void rehash_using_reg (rtx);
 static void invalidate_memory (void);
-static void invalidate_for_call (void);
+static void invalidate_for_call (rtx);
 static rtx use_related_value (rtx, struct table_elt *);
 
 static inline unsigned canon_hash (rtx, enum machine_mode);
@@ -2003,21 +2003,23 @@ rehash_using_reg (rtx x)
    register.  Also update their TICK values.  */
 
 static void
-invalidate_for_call (void)
+invalidate_for_call (rtx call_insn)
 {
   unsigned int regno, endregno;
   unsigned int i;
   unsigned hash;
   struct table_elt *p, *next;
   int in_table = 0;
-
+  HARD_REG_SET clobbered_regs;
+  
   /* Go through all the hard registers.  For each that is clobbered in
      a CALL_INSN, remove the register from quantity chains and update
      reg_tick if defined.  Also see if any of these registers is currently
      in the table.  */
 
+  get_call_invalidated_used_regs (call_insn, &clobbered_regs, true);
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-    if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+    if (TEST_HARD_REG_BIT (clobbered_regs, regno))
       {
 	delete_reg_equiv (regno);
 	if (REG_TICK (regno) >= 0)
@@ -2047,7 +2049,7 @@ invalidate_for_call (void)
 	  endregno = regno + hard_regno_nregs[regno][GET_MODE (p->exp)];
 
 	  for (i = regno; i < endregno; i++)
-	    if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+	    if (TEST_HARD_REG_BIT (clobbered_regs, i))
 	      {
 		remove_from_table (p, hash);
 		break;
@@ -5995,7 +5997,7 @@ cse_insn (rtx insn, rtx libcall_insn)
     {
       if (! CONST_OR_PURE_CALL_P (insn))
 	invalidate_memory ();
-      invalidate_for_call ();
+      invalidate_for_call (insn);
     }
 
   /* Now invalidate everything set by this instruction.
@@ -6581,7 +6583,7 @@ invalidate_skipped_block (rtx start)
 	{
 	  if (! CONST_OR_PURE_CALL_P (insn))
 	    invalidate_memory ();
-	  invalidate_for_call ();
+	  invalidate_for_call (insn);
 	}
 
       invalidate_from_clobbers (PATTERN (insn));
Index: rtl.def
===================================================================
--- rtl.def	(revision 113679)
+++ rtl.def	(working copy)
@@ -383,10 +383,10 @@ DEF_RTL_EXPR(SUBREG, "subreg", "ei", RTX
 
 DEF_RTL_EXPR(STRICT_LOW_PART, "strict_low_part", "e", RTX_EXTRA)
 
-/* (CONCAT a b) represents the virtual concatenation of a and b
-   to make a value that has as many bits as a and b put together.
-   This is used for complex values.  Normally it appears only
-   in DECL_RTLs and during RTL generation, but not in the insn chain.  */
+/* (CONCAT a b) represents the virtual concatenation of a and b to make a
+   value that has as many bits as a and b put together.  This is used for,
+   among other things, complex values.  Normally it appears only in DECL_RTLs
+   and during RTL generation, but not in the insn chain.  */
 DEF_RTL_EXPR(CONCAT, "concat", "ee", RTX_OBJ)
 
 /* A memory location; operand is the address.  The second operand is the
@@ -412,6 +412,13 @@ DEF_RTL_EXPR(SYMBOL_REF, "symbol_ref", "
    pretend to be looking at the entire value and comparing it.  */
 DEF_RTL_EXPR(CC0, "cc0", "", RTX_OBJ)
 
+/* (CONCATN [a1 a2 .. an]) represents the virtual concatenation of all
+   An to make a value.  This is an extension of the CONCAT to larger 
+   numbers of components.  This is used for decomposing large values
+   into register sized components.  Like CONCAT, it should not appear
+   in the insn chain.  */
+DEF_RTL_EXPR (CONCATN, "concatn", "E", RTX_OBJ)
+
 /* ----------------------------------------------------------------------
    Expressions for operators in an rtl pattern
    ---------------------------------------------------------------------- */
Index: dwarf2out.c
===================================================================
--- dwarf2out.c	(revision 113679)
+++ dwarf2out.c	(working copy)
@@ -4089,6 +4089,7 @@ static dw_loc_descr_ref based_loc_descr 
 static int is_based_loc (rtx);
 static dw_loc_descr_ref mem_loc_descriptor (rtx, enum machine_mode mode);
 static dw_loc_descr_ref concat_loc_descriptor (rtx, rtx);
+static dw_loc_descr_ref concatn_loc_descriptor (rtx);
 static dw_loc_descr_ref loc_descriptor (rtx);
 static dw_loc_descr_ref loc_descriptor_from_tree_1 (tree, int);
 static dw_loc_descr_ref loc_descriptor_from_tree (tree);
@@ -8940,6 +8941,31 @@ concat_loc_descriptor (rtx x0, rtx x1)
   return cc_loc_result;
 }
 
+/* Return a descriptor that describes the concatenation of N locations.  */
+
+static dw_loc_descr_ref
+concatn_loc_descriptor (rtx concatn)
+{
+  dw_loc_descr_ref cc_loc_result = NULL;
+  unsigned int i, n = XVECLEN (concatn, 0);
+
+  for (i = 0; i < n; ++i)
+    {
+      dw_loc_descr_ref ref;
+      rtx x = XVECEXP (concatn, 0, i);
+
+      ref = loc_descriptor (x);
+      if (ref == NULL)
+	return NULL;
+
+      add_loc_descr (&cc_loc_result, ref);
+      ref = new_loc_descr (DW_OP_piece, GET_MODE_SIZE (GET_MODE (x)), 0);
+      add_loc_descr (&cc_loc_result, ref);
+    }
+
+  return cc_loc_result;
+}
+
 /* Output a proper Dwarf location descriptor for a variable or parameter
    which is either allocated in a register or in a memory location.  For a
    register, we just generate an OP_REG and the register number.  For a
@@ -8977,6 +9003,10 @@ loc_descriptor (rtx rtl)
       loc_result = concat_loc_descriptor (XEXP (rtl, 0), XEXP (rtl, 1));
       break;
 
+    case CONCATN:
+      loc_result = concatn_loc_descriptor (rtl);
+      break;
+
     case VAR_LOCATION:
       /* Single part.  */
       if (GET_CODE (XEXP (rtl, 1)) != PARALLEL)
Index: opts.c
===================================================================
--- opts.c	(revision 113679)
+++ opts.c	(working copy)
@@ -564,6 +564,8 @@ decode_options (unsigned int argc, const
       flag_gcse = 1;
       flag_expensive_optimizations = 1;
       flag_ipa_type_escape = 1;
+      flag_ipra = 1;
+      flag_lower_subreg = 1;
       flag_rerun_cse_after_loop = 1;
       flag_caller_saves = 1;
       flag_peephole2 = 1;
Index: timevar.def
===================================================================
--- timevar.def	(revision 113679)
+++ timevar.def	(working copy)
@@ -125,6 +125,7 @@ DEFTIMEVAR (TV_OVERLOAD              , "
 DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation")
 DEFTIMEVAR (TV_EXPAND		     , "expand")
 DEFTIMEVAR (TV_VARCONST              , "varconst")
+DEFTIMEVAR (TV_LOWER_SUBREG          , "lower subreg")
 DEFTIMEVAR (TV_JUMP                  , "jump")
 DEFTIMEVAR (TV_CSE                   , "CSE")
 DEFTIMEVAR (TV_LOOP                  , "loop analysis")
Index: recog.c
===================================================================
--- recog.c	(revision 113679)
+++ recog.c	(working copy)
@@ -2971,7 +2971,10 @@ peep2_find_free_register (int from, int 
       if (success)
 	{
 	  for (j = hard_regno_nregs[regno][mode] - 1; j >= 0; j--)
-	    SET_HARD_REG_BIT (*reg_set, regno + j);
+	    {
+	      regs_ever_live [regno + j] = 1;
+	      SET_HARD_REG_BIT (*reg_set, regno + j);
+	    }
 
 	  /* Start the next search with the next register.  */
 	  if (++raw_regno >= FIRST_PSEUDO_REGISTER)
Index: yara-int.h
===================================================================
--- yara-int.h	(revision 113912)
+++ yara-int.h	(working copy)
@@ -342,6 +342,11 @@ struct allocno_common
 
   /* Frequency of calls which given allocno intersects.  */
   int call_freq;
+  /* The following member is defined if the allocno intersects a call.
+     Hard registers which can not used for given allocno because they
+     might be clobbered by calls inside of hard allocno live
+     range.  */
+  HARD_REG_SET clobbered_regs;
 
   /* Allocno attributes should be logged.  */
   struct allocno_change change;
@@ -484,6 +489,7 @@ union allocno_node
 #define ALLOCNO_HARD_REG_CONFLICTS(A) ((A)->common.hard_reg_conflicts)
 #define ALLOCNO_CALL_CROSS_P(A) ((A)->common.call_cross_p)
 #define ALLOCNO_CALL_FREQ(A) ((A)->common.call_freq)
+#define ALLOCNO_CLOBBERED_REGS(A) ((A)->common.clobbered_regs)
 #define ALLOCNO_CHANGE(A) ((A)->common.change)
 #define ALLOCNO_HARD_REGNO(A) ((A)->common.change.hard_regno)
 #define ALLOCNO_HARD_REGSET(A) ((A)->common.change.hard_regset)
@@ -728,6 +734,7 @@ struct can
   int slotno;
   /* Frequency of calls which given can intersects.  */
   int call_freq;
+
   /* True if an allocno of the can lives through a call.  */
   bool call_p;
   /* True value means than the can was not removed from the
@@ -738,8 +745,6 @@ struct can
   /* True value means that the can is global one, in other words it
      lives in more one BB.  */
   bool global_p;
-  /* True if we already spilled the can during local allocation.  */  
-  bool spill_p;
   /* True if hard register or memory has been assigned to the can.  */
   bool assigned_p;
   /* Mode of allocnos belonging to the can.  */
@@ -774,7 +779,6 @@ struct can
 #define CAN_CALL_P(C) ((C)->call_p)
 #define CAN_IN_GRAPH_P(C) ((C)->in_graph_p)
 #define CAN_GLOBAL_P(C) ((C)->global_p)
-#define CAN_SPILL_P(C) ((C)->spill_p)
 #define CAN_ASSIGNED_P(C) ((C)->assigned_p)
 #define CAN_MODE(C) ((C)->mode)
 #define CAN_COPIES(C) ((C)->can_copies)
Index: function.c
===================================================================
--- function.c	(revision 113679)
+++ function.c	(working copy)
@@ -63,6 +63,7 @@ Software Foundation, 51 Franklin Street,
 #include "tree-gimple.h"
 #include "tree-pass.h"
 #include "predict.h"
+#include "cgraph.h"
 
 #ifndef LOCAL_ALIGNMENT
 #define LOCAL_ALIGNMENT(TYPE, ALIGNMENT) ALIGNMENT
@@ -5577,6 +5578,82 @@ current_function_name (void)
 {
   return lang_hooks.decl_printable_name (cfun->decl, 2);
 }
+
+
+
+/* This recursive function finds and returns CALL expression in X.  */
+static rtx
+get_call (rtx x)
+{
+  int i;
+  rtx call_rtx;
+  const char *fmt;
+  enum rtx_code code = GET_CODE (x);
+
+  /* Ignore registers in memory.  */
+  if (code == CALL)
+    return x;
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt [i] == 'e')
+	{
+	  if ((call_rtx = get_call (XEXP (x, i))) != NULL_RTX)
+	    return call_rtx;
+	}
+      else if (fmt [i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if ((call_rtx = get_call (XVECEXP (x, i, j))) != NULL_RTX)
+	      return call_rtx;
+	}
+    }
+  return NULL_RTX;
+}
+
+/* This function returns call unsaved registers invalidated (if
+   CLOBBERED_P) or used by function called by INSN through REGS.  */
+void
+get_call_invalidated_used_regs (rtx insn, HARD_REG_SET *regs, bool clobbered_p)
+{
+  rtx x;
+  struct cgraph_node *node;
+  tree decl = NULL;
+
+  gcc_assert (CALL_P (insn));
+  
+  x = get_call (PATTERN (insn));
+  if (x != NULL_RTX)
+    {
+      x = XEXP (x, 0);
+      gcc_assert (GET_CODE (x) == MEM);
+      x = XEXP (x, 0);
+      if (GET_CODE (x) == SYMBOL_REF)
+	decl = SYMBOL_REF_DECL (x);
+    }
+  node = decl == NULL ? NULL : cgraph_node (decl);
+  if (! flag_ipra || node == NULL
+      /* This is a call of the function itself.  We don't know used
+	 register yet.  So take the worst case.  */
+      || node->decl == cfun->decl)
+    {
+      if (clobbered_p)
+	COPY_HARD_REG_SET (*regs, regs_invalidated_by_call);
+      else
+	COPY_HARD_REG_SET (*regs, call_used_reg_set);
+    }
+  else
+    {
+      COPY_HARD_REG_SET (*regs, node->function_used_regs);
+      if (clobbered_p)
+	AND_HARD_REG_SET (*regs, regs_invalidated_by_call);
+    }
+}
+
+
 
 
 static unsigned int
Index: function.h
===================================================================
--- function.h	(revision 113679)
+++ function.h	(working copy)
@@ -23,6 +23,7 @@ Software Foundation, 51 Franklin Street,
 #define GCC_FUNCTION_H
 
 #include "tree.h"
+#include "hard-reg-set.h"
 
 struct var_refs_queue GTY(())
 {
@@ -101,6 +102,10 @@ struct emit_status GTY(())
   /* Indexed by pseudo register number, gives the rtx for that pseudo.
      Allocated in parallel with regno_pointer_align.  */
   rtx * GTY ((length ("%h.x_reg_rtx_no"))) x_regno_reg_rtx;
+
+  /* Call unsaved hard registers really used by given function
+     (including ones used by functions called by given function).  */
+  HARD_REG_SET call_used_regs;
 };
 
 /* For backward compatibility... eventually these should all go away.  */
@@ -559,6 +564,8 @@ extern rtx get_arg_pointer_save_area (st
 /* Returns the name of the current function.  */
 extern const char *current_function_name (void);
 
+extern void get_call_invalidated_used_regs (rtx, HARD_REG_SET *, bool);
+
 extern void do_warn_unused_parameter (tree);
 
 extern bool pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
Index: gcse.c
===================================================================
--- gcse.c	(revision 113679)
+++ gcse.c	(working copy)
@@ -2063,8 +2063,11 @@ compute_hash_table_work (struct hash_tab
 
 	  if (CALL_P (insn))
 	    {
+	      HARD_REG_SET clobbered_regs;
+
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+		if (TEST_HARD_REG_BIT (clobbered_regs, regno))
 		  record_last_reg_set_info (insn, regno);
 
 	      mark_call (insn);
@@ -5762,8 +5765,11 @@ compute_store_table (void)
 
 	  if (CALL_P (insn))
 	    {
+	      HARD_REG_SET clobbered_regs;
+
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+		if (TEST_HARD_REG_BIT (clobbered_regs, regno))
 		  {
 		    last_set_in[regno] = INSN_UID (insn);
 		    SET_BIT (reg_set_in_block[bb->index], regno);
@@ -5785,8 +5791,11 @@ compute_store_table (void)
 
 	  if (CALL_P (insn))
 	    {
+	      HARD_REG_SET clobbered_regs;
+
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+		if (TEST_HARD_REG_BIT (clobbered_regs, regno))
 		  already_set[regno] = 1;
 	    }
 
@@ -5801,8 +5810,11 @@ compute_store_table (void)
 	  note_stores (pat, reg_clear_last_set, last_set_in);
 	  if (CALL_P (insn))
 	    {
+	      HARD_REG_SET clobbered_regs;
+
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)
+		if (TEST_HARD_REG_BIT (clobbered_regs, regno)
 		    && last_set_in[regno] == INSN_UID (insn))
 		  last_set_in[regno] = 0;
 	    }
Index: yara-color.c
===================================================================
--- yara-color.c	(revision 113912)
+++ yara-color.c	(working copy)
@@ -1868,6 +1868,10 @@ static can_t *saved_conflict_cans;
 static bitmap conflict_can_bitmap;
 static bitmap biased_can_bitmap;
 
+/* Array whose element value is true if the corresponding hard
+   register already allocated for a can.  */
+static bool allocated_reg_p [FIRST_PSEUDO_REGISTER];
+
 /* Function choosing a hard register for CAN.  */
 static bool
 choose_global_hard_reg (can_t can)
@@ -1903,6 +1907,34 @@ choose_global_hard_reg (can_t can)
   best_hard_regno = -1;
   GO_IF_HARD_REG_SUBSET (reg_class_contents [cover_class], conflicting_regs,
 			 fail);
+  if (flag_ipra && call_p)
+    {
+      int freq;
+      allocno_t a, *can_allocnos;
+      HARD_REG_SET clobbered_regs;
+      
+      can_allocnos = CAN_ALLOCNOS (can);
+      for (i = 0; (a = can_allocnos [i]) != NULL; i++)
+	if (ALLOCNO_CALL_CROSS_P (a))
+	  {
+	    freq = ALLOCNO_CALL_FREQ (a);
+	    COPY_HARD_REG_SET (clobbered_regs,
+			       ALLOCNO_CLOBBERED_REGS (a));
+	    for (j = (int) class_hard_regs_num [cover_class] - 1;
+		 j >= 0;
+		 j--)
+	      {
+		hard_regno = class_hard_regs [cover_class] [j];
+		if (TEST_HARD_REG_BIT (clobbered_regs, hard_regno))
+		  {
+		    class = REGNO_REG_CLASS (hard_regno);
+		    costs [j]
+		      += freq * (memory_move_cost [mode] [class] [0]
+				 + memory_move_cost [mode] [class] [1]);
+		  }
+	      }
+	  }
+    }
   min_cost = INT_MAX;
   for (i = 0; i < class_size; i++)
     {
@@ -1910,7 +1942,7 @@ choose_global_hard_reg (can_t can)
       if (hard_reg_not_in_set_p (hard_regno, mode, conflicting_regs))
 	{
 	  cost = costs [i];
-	  if (call_p
+	  if (! flag_ipra && call_p
 	      && ! hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set))
 	    {
 	      /* ??? If only part is call clobbered.  */
@@ -1919,6 +1951,16 @@ choose_global_hard_reg (can_t can)
 		       * (memory_move_cost [mode] [class] [0]
 			  + memory_move_cost [mode] [class] [1]));
 	    }
+	  if (! allocated_reg_p [hard_regno]
+	      && hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set))
+	    /* We need to save/restore the register in
+	       epilogue/prologue.  Therefore we increase the cost.  */
+	    {
+	      /* ??? If only part is call clobbered.  */
+	      class = REGNO_REG_CLASS (hard_regno);
+	      cost += (memory_move_cost [mode] [class] [0]
+		       + memory_move_cost [mode] [class] [1]);
+	    }
 	  if (min_cost > cost)
 	    {
 	      min_cost = cost;
@@ -1961,6 +2003,7 @@ choose_global_hard_reg (can_t can)
 				[best_hard_regno] [mode]);
 	    }
 	}
+      allocated_reg_p [best_hard_regno] = true;
     }
   return best_hard_regno >= 0;
 }
@@ -2214,7 +2257,8 @@ push_globals_to_stack (void)
 	    }
 	  delete_can_from_bucket (can, bucket_ptr);
 	  if (yara_dump_file != NULL)
-	    fprintf (yara_dump_file, "Pushing %d (potential spill)\n", CAN_NUM (can));
+	    fprintf (yara_dump_file, "Pushing %d (potential spill)\n",
+		     CAN_NUM (can));
 	}
       CAN_IN_GRAPH_P (can) = false;
       VARRAY_PUSH_GENERIC_PTR (global_stack_varray, can);
@@ -2265,6 +2309,7 @@ pop_globals_from_stack (void)
   int stack_size;
   enum reg_class cover_class;
 
+  memset (allocated_reg_p, 0, sizeof (allocated_reg_p));
   for (;;)
     {
       stack_size = VARRAY_ACTIVE_SIZE (global_stack_varray);
@@ -2480,7 +2525,9 @@ assign_global_can_allocnos (void)
 	    if (hard_regno < 0
 		|| (ALLOCNO_CALL_CROSS_P (a)
 		    && ! hard_reg_not_in_set_p (hard_regno, CAN_MODE (can),
-						call_used_reg_set)))
+						flag_ipra
+						? ALLOCNO_CLOBBERED_REGS (a)
+						: call_used_reg_set)))
 	      {
 		equiv_const = (ALLOCNO_REGNO (a) >= 0
 			       ? reg_equiv_constant [ALLOCNO_REGNO (a)]
Index: emit-rtl.c
===================================================================
--- emit-rtl.c	(revision 113679)
+++ emit-rtl.c	(working copy)
@@ -812,13 +812,12 @@ gen_reg_rtx (enum machine_mode mode)
   return val;
 }
 
-/* Generate a register with same attributes as REG, but offsetted by OFFSET.
+/* Update NEW with same attributes as REG, but offsetted by OFFSET.
    Do the big endian correction if needed.  */
 
-rtx
-gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int offset)
+static void
+update_reg_offset (rtx new, rtx reg, int offset)
 {
-  rtx new = gen_rtx_REG (mode, regno);
   tree decl;
   HOST_WIDE_INT var_size;
 
@@ -860,7 +859,7 @@ gen_rtx_REG_offset (rtx reg, enum machin
   if ((BYTES_BIG_ENDIAN || WORDS_BIG_ENDIAN)
       && decl != NULL
       && offset > 0
-      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode)
+      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (GET_MODE (new))
       && ((var_size = int_size_in_bytes (TREE_TYPE (decl))) > 0
 	  && var_size < GET_MODE_SIZE (GET_MODE (reg))))
     {
@@ -904,6 +903,27 @@ gen_rtx_REG_offset (rtx reg, enum machin
 
   REG_ATTRS (new) = get_reg_attrs (REG_EXPR (reg),
 				   REG_OFFSET (reg) + offset);
+}
+
+/* Generate a register with same attributes as REG, but offsetted by OFFSET. */
+
+rtx
+gen_rtx_REG_offset (rtx reg, enum machine_mode mode,
+		    unsigned int regno, int offset)
+{
+  rtx new = gen_rtx_REG (mode, regno);
+  update_reg_offset (new, reg, offset);
+  return new;
+}
+
+/* Generate a new pseudo register with same attributes as REG, but
+   offsetted by OFFSET.  */
+
+rtx
+gen_reg_rtx_offset (rtx reg, enum machine_mode mode, int offset)
+{
+  rtx new = gen_reg_rtx (mode);
+  update_reg_offset (new, reg, offset);
   return new;
 }
 
@@ -1153,8 +1173,9 @@ gen_lowpart_common (enum machine_mode mo
 	return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
     }
   else if (GET_CODE (x) == SUBREG || REG_P (x)
-	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
-	   || GET_CODE (x) == CONST_DOUBLE || GET_CODE (x) == CONST_INT)
+	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN
+	   || GET_CODE (x) == CONST_VECTOR || GET_CODE (x) == CONST_DOUBLE
+	   || GET_CODE (x) == CONST_INT)
     return simplify_gen_subreg (mode, x, innermode, offset);
 
   /* Otherwise, we can't do this.  */
Index: cselib.c
===================================================================
--- cselib.c	(revision 113679)
+++ cselib.c	(working copy)
@@ -1404,8 +1404,11 @@ cselib_process_insn (rtx insn)
      memory.  */
   if (CALL_P (insn))
     {
+      HARD_REG_SET used_regs;
+      
+      get_call_invalidated_used_regs (insn, &used_regs, false);
       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	if (call_used_regs[i]
+	if (TEST_HARD_REG_BIT (used_regs, i)
 	    || (REG_VALUES (i) && REG_VALUES (i)->elt
 		&& HARD_REGNO_CALL_PART_CLOBBERED (i, 
 		      GET_MODE (REG_VALUES (i)->elt->u.val_rtx))))
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c	(revision 113679)
+++ simplify-rtx.c	(working copy)
@@ -4573,15 +4573,24 @@ simplify_subreg (enum machine_mode outer
       && GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
     return adjust_address_nv (op, outermode, byte);
 
-  /* Handle complex values represented as CONCAT
-     of real and imaginary part.  */
-  if (GET_CODE (op) == CONCAT)
+  /* Handle values represented as CONCAT.  */
+  if (GET_CODE (op) == CONCAT || GET_CODE (op) == CONCATN)
     {
       unsigned int inner_size, final_offset;
       rtx part, res;
 
-      inner_size = GET_MODE_UNIT_SIZE (innermode);
-      part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+      if (GET_CODE (op) == CONCAT)
+	{
+          inner_size = GET_MODE_SIZE (innermode) / 2;
+	  part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+	}
+      else
+	{
+	  /* ??? We've got room; perhaps we should store the inner size
+	     of the CONCATN in one of the subsequent unused fields.  */
+	  inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
+	  part = XVECEXP (op, 0, byte / inner_size);
+	}
       final_offset = byte % inner_size;
       if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
 	return NULL_RTX;
Index: loop-iv.c
===================================================================
--- loop-iv.c	(revision 113679)
+++ loop-iv.c	(working copy)
@@ -1404,10 +1404,12 @@ simplify_using_assignment (rtx insn, rtx
   if (CALL_P (insn))
     {
       int i;
+      HARD_REG_SET clobbered_regs;
 
       /* Kill all call clobbered registers.  */
+      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+	if (TEST_HARD_REG_BIT (clobbered_regs, i))
 	  SET_REGNO_REG_SET (altered, i);
     }
 
Index: common.opt
===================================================================
--- common.opt	(revision 113912)
+++ common.opt	(working copy)
@@ -539,6 +539,10 @@ fipa-type-escape
 Common Report Var(flag_ipa_type_escape) Init(0)
 Type based escape and alias analysis
 
+fipra
+Common Report Var(flag_ipra) Init(0)
+Inter-procedural register allocation for YARA
+
 fivopts
 Common Report Var(flag_ivopts) Init(1)
 Optimize induction variables on trees
@@ -563,6 +567,10 @@ floop-optimize
 Common
 Does nothing.  Preserved for backward compatibility.
 
+flower-subreg
+Common Report Var(flag_lower_subreg)
+Subreg lowering
+
 fmath-errno
 Common Report Var(flag_errno_math) Init(1)
 Set errno after built-in math functions
Index: sched-deps.c
===================================================================
--- sched-deps.c	(revision 113679)
+++ sched-deps.c	(working copy)
@@ -1512,6 +1512,9 @@ sched_analyze (struct deps *deps, rtx he
 	    }
 	  else
 	    {
+	      HARD_REG_SET clobbered_regs;
+	      
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 		/* A call may read and modify global register variables.  */
 		if (global_regs[i])
@@ -1524,7 +1527,7 @@ sched_analyze (struct deps *deps, rtx he
 		   and 'definitely not clobbered', we must include all
 		   partly call-clobbered registers here.  */
 		else if (HARD_REGNO_CALL_PART_CLOBBERED (i, reg_raw_mode[i])
-			 || TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+			 || TEST_HARD_REG_BIT (clobbered_regs, i))
 		  SET_REGNO_REG_SET (reg_pending_clobbers, i);
 		/* We don't know what set of fixed registers might be used
 		   by the function, but it is certain that the stack pointer
Index: yara.c
===================================================================
--- yara.c	(revision 113714)
+++ yara.c	(working copy)
@@ -45,6 +45,8 @@ Software Foundation, 51 Franklin Street,
 #include "output.h"
 #include "integrate.h"
 #include "yara-int.h"
+#include "cgraph.h"
+#include "function.h"
 
 FILE *yara_dump_file;
 int yara_max_uid; /* before the allocation */
@@ -419,6 +421,7 @@ yara (FILE *f)
   yara_dump_file = f;
   gcc_obstack_init (&yara_obstack);
   bitmap_obstack_initialize (&yara_bitmap_obstack);
+  CLEAR_HARD_REG_SET (cfun->emit->call_used_regs);
   yara_ir_init ();
   yara_trans_init ();
   yara_insn_init ();
Index: rtl.h
===================================================================
--- rtl.h	(revision 113679)
+++ rtl.h	(working copy)
@@ -1477,6 +1477,7 @@ extern int rtx_equal_p (rtx, rtx);
 extern rtvec gen_rtvec_v (int, rtx *);
 extern rtx gen_reg_rtx (enum machine_mode);
 extern rtx gen_rtx_REG_offset (rtx, enum machine_mode, unsigned int, int);
+extern rtx gen_reg_rtx_offset (rtx, enum machine_mode, int);
 extern rtx gen_label_rtx (void);
 extern rtx gen_lowpart_common (enum machine_mode, rtx);
 
Index: combine.c
===================================================================
--- combine.c	(revision 113679)
+++ combine.c	(working copy)
@@ -11003,8 +11003,11 @@ record_dead_and_set_regs (rtx insn)
 
   if (CALL_P (insn))
     {
+      HARD_REG_SET clobbered_regs;
+
+      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+	if (TEST_HARD_REG_BIT (clobbered_regs, i))
 	  {
 	    reg_stat[i].last_set_value = 0;
 	    reg_stat[i].last_set_mode = 0;
Index: yara-trans.c
===================================================================
--- yara-trans.c	(revision 113912)
+++ yara-trans.c	(working copy)
@@ -2132,7 +2132,10 @@ check_hard_regno_for_a (allocno_t a, int
     return false;
   if (ALLOCNO_CALL_CROSS_P (a))
     {
-      COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set);
+      if (flag_ipra)
+	COPY_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_CLOBBERED_REGS (a));
+      else
+	COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set);
       IOR_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_HARD_REG_CONFLICTS (a));
     }
   else
@@ -2520,7 +2523,12 @@ assign_one_allocno (allocno_t a, enum re
  ok:
 #endif
   if (ALLOCNO_CALL_CROSS_P (a))
-    COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set);
+    {
+      if (flag_ipra)
+	COPY_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_CLOBBERED_REGS (a));
+      else
+	COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set);
+    }
   else
     CLEAR_HARD_REG_SET (prohibited_hard_regs);
   IOR_COMPL_HARD_REG_SET (prohibited_hard_regs, possible_regs);
Index: resource.c
===================================================================
--- resource.c	(revision 113679)
+++ resource.c	(working copy)
@@ -662,10 +662,12 @@ mark_set_resources (rtx x, struct resour
       if (mark_type == MARK_SRC_DEST_CALL)
 	{
 	  rtx link;
+	  HARD_REG_SET used_regs;
 
+	  get_call_invalidated_used_regs (x, &used_regs, false);
 	  res->cc = res->memory = 1;
 	  for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
-	    if (call_used_regs[r] || global_regs[r])
+	    if (TEST_HARD_REG_BIT (used_regs, r) || global_regs[r])
 	      SET_HARD_REG_BIT (res->regs, r);
 
 	  for (link = CALL_INSN_FUNCTION_USAGE (x);
@@ -1024,11 +1026,13 @@ mark_target_live_regs (rtx insns, rtx ta
 
 	  if (CALL_P (real_insn))
 	    {
+	      HARD_REG_SET clobbered_regs;
+	    
+	      get_call_invalidated_used_regs (insn, &clobbered_regs, true);
 	      /* CALL clobbers all call-used regs that aren't fixed except
 		 sp, ap, and fp.  Do this before setting the result of the
 		 call live.  */
-	      AND_COMPL_HARD_REG_SET (current_live_regs,
-				      regs_invalidated_by_call);
+	      AND_COMPL_HARD_REG_SET (current_live_regs, clobbered_regs);
 
 	      /* A CALL_INSN sets any global register live, since it may
 		 have been modified by the call.  */
Index: var-tracking.c
===================================================================
--- var-tracking.c	(revision 113679)
+++ var-tracking.c	(working copy)
@@ -1583,9 +1583,15 @@ compute_bb_dataflow (basic_block bb)
       switch (VTI (bb)->mos[i].type)
 	{
 	  case MO_CALL:
-	    for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
-	      if (TEST_HARD_REG_BIT (call_used_reg_set, r))
-		var_regno_delete (out, r);
+	    {
+	      HARD_REG_SET used_regs;
+
+	      get_call_invalidated_used_regs (VTI (bb)->mos[i].insn,
+					      &used_regs, false);
+	      for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
+		if (TEST_HARD_REG_BIT (used_regs, r))
+		  var_regno_delete (out, r);
+	    }
 	    break;
 
 	  case MO_USE:
@@ -2345,9 +2351,11 @@ emit_notes_in_bb (basic_block bb)
 	  case MO_CALL:
 	    {
 	      int r;
+	      HARD_REG_SET used_regs;
 
+	      get_call_invalidated_used_regs (insn, &used_regs, false);
 	      for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
-		if (TEST_HARD_REG_BIT (call_used_reg_set, r))
+		if (TEST_HARD_REG_BIT (used_regs, r))
 		  {
 		    var_regno_delete (&set, r);
 		  }
Index: yara-ir.c
===================================================================
--- yara-ir.c	(revision 113912)
+++ yara-ir.c	(working copy)
@@ -47,6 +47,8 @@ Software Foundation, 51 Franklin Street,
 #include "params.h"
 #include "langhooks.h"
 #include "yara-int.h"
+#include "cgraph.h"
+#include "function.h"
 
 struct loops yara_loops;
 struct yara_loop_tree_node *yara_loop_tree_root;
@@ -259,25 +261,12 @@ int reg_class_nregs [N_REG_CLASSES] [MAX
 static void
 setup_reg_class_nregs (void)
 {
-  int i, m, old, hard_regno;
+  int m;
   enum reg_class cl;
 
-  memset (reg_class_nregs, 0, sizeof (reg_class_nregs));
   for (cl = 0; cl < N_REG_CLASSES; cl++)
     for (m = 0; m < MAX_MACHINE_MODE; m++)
-      {
-	for (i = 0; i < (int) class_hard_regs_num [cl]; i++)
-	  {
-	    hard_regno = class_hard_regs [cl] [i];
-	    old = reg_class_nregs [cl] [m];
-	    reg_class_nregs [cl] [m]
-	      = HARD_REGNO_NREGS (hard_regno, (enum machine_mode) m);
-	    if (old != 0 && old != reg_class_nregs [cl] [m])
-	      break;
-	  }
-	if (i < (int) class_hard_regs_num [cl])
-	  reg_class_nregs [cl] [m] = -1;
-      }
+      reg_class_nregs [cl] [m] = CLASS_MAX_NREGS (cl, m);
 }
 
 /* ??? implement better class SImode instead of HImode for QImode.  */
@@ -780,12 +769,12 @@ contains_eliminable_reg (rtx x)
   fmt = GET_RTX_FORMAT (code);
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
-      if (fmt[i] == 'e')
+      if (fmt [i] == 'e')
 	{
 	  if (contains_eliminable_reg (XEXP (x, i)))
 	    return true;
 	}
-      else if (fmt[i] == 'E')
+      else if (fmt [i] == 'E')
 	{
 	  int j;
 
@@ -1450,6 +1439,7 @@ create_allocno (enum allocno_type type, 
 #endif
   ALLOCNO_CALL_CROSS_P (a) = false;
   ALLOCNO_CALL_FREQ (a) = 0;
+  CLEAR_HARD_REG_SET (ALLOCNO_CLOBBERED_REGS (a));
   return a;
 }
 
@@ -1596,6 +1586,11 @@ print_allocno (FILE *f, allocno_t a)
     fprintf (f, ", + %d", ALLOCNO_MEMORY_SLOT_OFFSET (a));
   if (ALLOCNO_USE_EQUIV_CONST_P (a))
     fprintf (f, ",\nequiv const ");
+  if (ALLOCNO_CALL_CROSS_P (a))
+    {
+      fprintf (f, "\n Call clobbered regs:");
+      print_hard_reg_set (f, ALLOCNO_CLOBBERED_REGS (a));
+    }
   fprintf (f, "\n Hard reg conflicts:");
   print_hard_reg_set (f, ALLOCNO_HARD_REG_CONFLICTS (a));
   fprintf (f, "\n Allocno conflicts:");
@@ -3113,6 +3108,10 @@ process_non_operand_hard_regs (rtx *loc,
     }
 }
 
+/* Unsaved registers invalidated by function whose call is currently
+   being processed.  */
+static HARD_REG_SET curr_call_used_function_regs;
+
 /* The function sets up call_p and increment call_freq for allocno
    LIVE_A living through call insn given by DATA.  The function is
    called by generic traverse function process_live_allocnos.  */
@@ -3124,6 +3123,8 @@ set_call_info (allocno_t live_a, void *d
 
   ALLOCNO_CALL_CROSS_P (live_a) = true;
   ALLOCNO_CALL_FREQ (live_a) += BLOCK_FOR_INSN (insn)->frequency;
+  IOR_HARD_REG_SET (ALLOCNO_CLOBBERED_REGS (live_a),
+		    curr_call_used_function_regs);
 }
 
 /* The function sets up hard registers conflicting with allocno
@@ -3320,7 +3321,13 @@ build_insn_allocno_conflicts (rtx insn, 
       mark_hard_reg_death (REGNO (XEXP (link, 0)), GET_MODE (XEXP (link, 0)));
 
   if (CALL_P (insn))
-    process_live_allocnos (set_call_info, insn);
+    {
+      get_call_invalidated_used_regs (insn, &curr_call_used_function_regs,
+				      false);
+      IOR_HARD_REG_SET (cfun->emit->call_used_regs,
+			curr_call_used_function_regs);
+      process_live_allocnos (set_call_info, insn);
+    }
 
   /* Set up allocnos:  */
   for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
@@ -4837,7 +4844,6 @@ create_can (void)
   CAN_CALL_FREQ (can) = 0;
   CAN_CALL_P (can) = false;
   CAN_GLOBAL_P (can) = false || (YARA_PARAMS & YARA_NO_LOCAL_CAN) == 0;
-  CAN_SPILL_P (can) = false;
   CAN_MODE (can) = VOIDmode;
   CAN_HARD_REG_COSTS (can) = NULL;
   CAN_COPIES (can) = NULL;
@@ -5712,14 +5718,14 @@ print_can (FILE *f, can_t can)
   allocno_t a, *can_allocnos;
 
   fprintf
-    (f, "%scan#%d (%s %s(%d,%d) freq %d (call %d) r%d m%d %s%s%sconfl %d) allocnos:\n  ",
+    (f, "%scan#%d (%s %s(%d,%d) freq %d (call %d) r%d m%d %s%sconfl %d) allocnos:\n  ",
      (CAN_GLOBAL_P (can) ? "g" : ""), CAN_NUM (can),
      GET_MODE_NAME (CAN_MODE (can)), reg_class_names [CAN_COVER_CLASS (can)],
      CAN_COVER_CLASS_COST (can), CAN_MEMORY_COST (can),
      CAN_FREQ (can), CAN_CALL_FREQ (can), CAN_HARD_REGNO (can),
      CAN_SLOTNO (can),
      (CAN_CALL_P (can) ? "call " : ""), (CAN_IN_GRAPH_P (can) ? "in " : ""),
-     (CAN_SPILL_P (can) ? "spill " : ""), CAN_LEFT_CONFLICTS_NUM (can));
+     CAN_LEFT_CONFLICTS_NUM (can));
   can_allocnos = CAN_ALLOCNOS (can);
   for (i = 0; (a = can_allocnos [i]) != NULL; i++)
     {
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 113679)
+++ Makefile.in	(working copy)
@@ -751,7 +751,7 @@ RECOG_H = recog.h
 ALIAS_H = alias.h
 EMIT_RTL_H = emit-rtl.h
 FLAGS_H = flags.h options.h
-FUNCTION_H = function.h $(TREE_H)
+FUNCTION_H = function.h $(TREE_H) hard-reg-set.h
 EXPR_H = expr.h insn-config.h $(FUNCTION_H) $(RTL_H) $(FLAGS_H) $(TREE_H) $(MACHMODE_H) $(EMIT_RTL_H)
 OPTABS_H = optabs.h insn-codes.h
 REGS_H = regs.h varray.h $(MACHMODE_H) $(OBSTACK_H) $(BASIC_BLOCK_H) $(FUNCTION_H)
@@ -997,7 +997,8 @@ OBJS-common = \
  lambda-trans.o	lambda-code.o tree-loop-linear.o tree-ssa-sink.o 	   \
  tree-vrp.o tree-stdarg.o tree-cfgcleanup.o tree-ssa-reassoc.o		   \
  tree-ssa-structalias.o tree-object-size.o rtl-factoring.o		   \
- yara.o yara-ir.o yara-trans.o yara-insn.o yara-color.o yara-final.o
+ yara.o yara-ir.o yara-trans.o yara-insn.o yara-color.o yara-final.o       \
+ lower-subreg.o
 
 
 OBJS-md = $(out_object_file)
@@ -2166,7 +2167,7 @@ function.o : function.c $(CONFIG_H) $(SY
    $(OPTABS_H) libfuncs.h $(REGS_H) hard-reg-set.h insn-config.h $(RECOG_H) \
    output.h toplev.h except.h $(HASHTAB_H) $(GGC_H) $(TM_P_H) langhooks.h \
    gt-function.h $(TARGET_H) $(BASIC_BLOCK_H) $(INTEGRATE_H) $(PREDICT_H) \
-   tree-pass.h
+   tree-pass.h $(CGRAPH_H)
 stmt.o : stmt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(TREE_H) $(FLAGS_H) $(FUNCTION_H) insn-config.h hard-reg-set.h $(EXPR_H) \
    libfuncs.h except.h $(RECOG_H) toplev.h output.h $(GGC_H) $(TM_P_H) \
@@ -2507,12 +2508,13 @@ yara.o: yara.c $(CONFIG_H) $(SYSTEM_H) c
    $(REGS_H) hard-reg-set.h $(FLAGS_H) $(OBSTACK_H) $(HASHTAB_H) errors.h \
    $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) \
    $(CFGLOOP_H) yara.h yara-int.h $(TIMEVAR_H) tree-pass.h output.h \
-   integrate.h $(GGC_H)
+   integrate.h $(FUNCTION_H) $(CGRAPH_H) $(GGC_H)
 yara-ir.o: yara-ir.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TARGET_H) $(RTL_H) $(TREE_H) insn-codes.h insn-config.h \
    $(OPTABS_H) $(RECOG_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) \
-   errors.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) langhooks.h\
-   $(CFGLOOP_H) yara.h yara-int.h $(GGC_H) gt-yara-ir.h
+   errors.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) langhooks.h \
+   $(CFGLOOP_H) yara.h yara-int.h $(FUNCTION_H) $(CGRAPH_H) $(GGC_H) \
+   gt-yara-ir.h
 yara-trans.o: yara-trans.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TARGET_H) $(RTL_H) insn-codes.h insn-config.h $(OPTABS_H) $(RECOG_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) errors.h \
@@ -2627,6 +2629,8 @@ hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H)
 pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \
    $(TREE_H)
 errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H)
+lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(MACHMODE_H) $(RTL_H) bitmap.h
 
 $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \
    $(RTL_H) $(REGS_H) hard-reg-set.h insn-config.h conditions.h \
Index: passes.c
===================================================================
--- passes.c	(revision 113679)
+++ passes.c	(working copy)
@@ -632,6 +632,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_unshare_all_rtl);
   NEXT_PASS (pass_instantiate_virtual_regs);
   NEXT_PASS (pass_jump2);
+  NEXT_PASS (pass_lower_subreg);
   NEXT_PASS (pass_cse);
   NEXT_PASS (pass_gcse);
   NEXT_PASS (pass_jump_bypass);
/* Decompose multiword subregs.
   Contributed by Richard Henderson.
   Copyright (C) 2005, 2006 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.  */


#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "machmode.h"
#include "tm.h"
#include "rtl.h"
#include "function.h"
#include "expr.h"
#include "obstack.h"
#include "bitmap.h"
#include "tree-pass.h"
#include "timevar.h"
#include "tm_p.h"


#ifdef STACK_GROWS_DOWNWARD
# undef STACK_GROWS_DOWNWARD
# define STACK_GROWS_DOWNWARD 1
#else
# define STACK_GROWS_DOWNWARD 0
#endif


DEF_VEC_P(bitmap);
DEF_VEC_ALLOC_P(bitmap,heap);

/* Bit N set if regno N is used in a context in which we can decompose it.  */
static bitmap decomposable_context;

/* Bit N set if regno N is used in a context in which it cannot
   be decomposed.  */
static bitmap non_decomposable_context;

/* Bit N in element M set if there exists a copy from reg M to reg N.  */
static VEC(bitmap,heap) *reg_copy_graph;


/* Return true if INSN is a single set between two objects.  Such insns
   can always be decomposed.  */

static rtx
simple_move (rtx insn)
{
  rtx x, set = single_set (insn);

  if (!set)
    return NULL;

  x = SET_DEST (set);
  if (!OBJECT_P (x) && GET_CODE (x) != SUBREG)
    return NULL;
  if (MEM_P (x) && MEM_VOLATILE_P (x))
    return NULL;

  x = SET_SRC (set);
  if (!OBJECT_P (x) && GET_CODE (x) != SUBREG)
    return NULL;
  if (MEM_P (x) && MEM_VOLATILE_P (x))
    return NULL;

  return set;
}

/*  */

static void
find_pseudo_copy (rtx set)
{
  rtx dst = SET_DEST (set);
  rtx src = SET_SRC (set);
  unsigned int rd, rs;
  bitmap b;

  if (!REG_P (dst) || !REG_P (src))
    return;

  rd = REGNO (dst);
  rs = REGNO (src);
  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
    return;

  if (GET_MODE_SIZE (GET_MODE (dst)) < UNITS_PER_WORD)
    return;

  b = VEC_index (bitmap, reg_copy_graph, rs);
  if (b == NULL)
    {
      b = BITMAP_ALLOC (NULL);
      VEC_replace (bitmap, reg_copy_graph, rs, b);
    }
  bitmap_set_bit (b, rd);
}

/*  */

static void
propagate_pseudo_copies (void)
{
  bitmap queue, propagate;

  queue = BITMAP_ALLOC (NULL);
  propagate = BITMAP_ALLOC (NULL);

  bitmap_copy (queue, decomposable_context);
  do
    {
      bitmap_iterator iter;
      unsigned int i;

      bitmap_clear (propagate);
      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
	{
	  bitmap b = VEC_index (bitmap, reg_copy_graph, i);
	  if (b)
	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
	}

      bitmap_and_compl (queue, propagate, decomposable_context);
      bitmap_ior_into (decomposable_context, propagate);
    }
  while (!bitmap_empty_p (queue));

  BITMAP_FREE (queue);
  BITMAP_FREE (propagate);
}

/* Called via for_each_rtx.  Examine the given expression and set bits as
   appropriate in decomposable_context and non_decomposable_context.  SM
   is the result of simple_move for the complete insn.  */

static int
find_decomposable_subregs (rtx *px, void *sm)
{
  rtx x = *px, inner;
  unsigned int inner_size, outer_size;
  unsigned int inner_words, outer_words;
  unsigned int regno;

  switch (GET_CODE (x))
    {
    case SUBREG:
      /* Ensure we're not looking at something other than a subreg of a
	 pseudo register.  One might hope these tests never fail, since
	 that would indicate someone not using simplify_gen_subreg or some
	 related interface, but that no doubt happens all too often.  */
      inner = SUBREG_REG (x);
      if (!REG_P (inner))
	break;

      regno = REGNO (inner);
      if (HARD_REGISTER_NUM_P (regno))
	return -1;

      /* Compute the number of words covered by the subreg and the reg.  */
      outer_size = GET_MODE_SIZE (GET_MODE (x));
      inner_size = GET_MODE_SIZE (GET_MODE (inner));
      outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
      inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;

      /* If we've got a single-word subreg of a multi-word reg, then this
	 should be a candidate for decomposition.  Return -1 so that we
	 don't iterate over the inner register and decide it is used in a
	 context we can't decompose.  */
      /* ??? This doesn't allow e.g. DImode subregs of TImode values on
	 32-bit targets.  We'd need to record the way in which the pseudo
	 is used, and only decompose if all uses were with the same number
	 of pieces.  Hopefully this doesn't happen with any frequency.  */
      /* ??? This is a bald-faced assumption that the subreg is actually
	 inside an operand, and is thus replacable.  This might be false
	 if the target plays games with subregs in the patterns.  Perhaps
	 a better approach is to mirror what regrename does wrt recognizing
	 the insn, iterating over the operands, smashing the operands out
	 and iterating over the resulting pattern.  */
      if (outer_words == 1 && inner_words > 1)
	{
	  bitmap_set_bit (decomposable_context, regno);
	  return -1;
	}
      break;

    case REG:
      /* Since we see outer subregs and avoid iterating over inner registers
	 when we can handle the decomposition, that means that anywhere else
	 we come across the register must be a place we can't decompose it.
	 Avoid setting the bit for single-word pseudos to keep down the size
	 of the bitmap.  */
      regno = REGNO (x);
      if (!HARD_REGISTER_NUM_P (regno) && !sm
	  && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
	bitmap_set_bit (non_decomposable_context, regno);
      break;

    default:
      break;
    }

  return 0;
}

/* Decompose psuedo REGNO into word-sized components.  We smash the REG
   node in place.  This ensures that (1) something goes wrong quickly if
   we fail to find a place in which we ought to be performing some 
   replacement, and (2) the debug information inside the symbol table is
   automatically kept up to date.  */

static void
decompose_register (unsigned int regno)
{
  unsigned int words;
  rtx reg;

  reg = regno_reg_rtx[regno];
  regno_reg_rtx[regno] = NULL;

  words = GET_MODE_SIZE (GET_MODE (reg));
  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;

  if (0 && words == 2)
    {
      PUT_CODE (reg, CONCAT);
      XEXP (reg, 0) = gen_reg_rtx_offset (reg, word_mode, 0);
      XEXP (reg, 1) = gen_reg_rtx_offset (reg, word_mode, UNITS_PER_WORD);
    }
  else
    {
      unsigned int i;
      rtvec v;

      if (dump_file)
	fprintf (dump_file, "; Splitting reg %u ->", REGNO (reg));

      PUT_CODE (reg, CONCATN);
      XVEC (reg, 0) = v = rtvec_alloc (words);

      for (i = 0; i < words; ++i)
	RTVEC_ELT (v, i)
	  = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);

      if (dump_file)
	{
	  for (i = 0; i < words; ++i)
	    fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
	  fputc ('\n', dump_file);
	}
    }
}

static inline bool
resolve_reg_p (rtx x)
{
  return GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN;
}

static bool
resolve_subreg_p (rtx x)
{
  if (GET_CODE (x) == SUBREG)
    return resolve_reg_p (SUBREG_REG (x));
  return false;
}

/* */

static int
resolve_subreg_use (rtx *px, void *data ATTRIBUTE_UNUSED)
{
  rtx x = *px;

  if (x == NULL)
    return 0;

  /* If this is a (subreg (concat)) pattern, then it must be something that
     we created via decompose_register.  */
  if (resolve_subreg_p (x))
    {
      /* This must be resolvable.  */
      *px = simplify_subreg (GET_MODE (x), SUBREG_REG (x),
			     GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
      gcc_assert (*px != NULL);
      return -1;
    }

  if (resolve_reg_p (x))
    return 1;

  return 0;
}

/* */

static void
move_libcall_note (rtx old_start, rtx new_start)
{
  rtx note0, note1, end;

  note0 = find_reg_note (old_start, REG_LIBCALL, NULL);
  if (note0 == NULL)
    return;

  remove_note (old_start, note0);
  end = XEXP (note0, 0);
  note1 = find_reg_note (end, REG_RETVAL, NULL);

  XEXP (note0, 1) = REG_NOTES (new_start);
  REG_NOTES (new_start) = note0;
  XEXP (note1, 0) = new_start;
}

/* */

static void
remove_retval_note (rtx insn1)
{
  rtx note, note0, insn0, note1, insn;

  note1 = find_reg_note (insn1, REG_RETVAL, NULL);
  if (note1 == NULL)
    return;

  insn0 = XEXP (note1, 0);
  note0 = find_reg_note (insn0, REG_LIBCALL, NULL);

  remove_note (insn0, note0);
  remove_note (insn1, note1);

  for (insn = insn0; insn != insn1; insn = NEXT_INSN (insn))
    while ((note = find_reg_note (insn, REG_NO_CONFLICT, NULL)))
      remove_note (insn, note);
}

/* */

static void
resolve_reg_notes (rtx insn)
{
  rtx *pnote, note;

  note = find_reg_equal_equiv_note (insn);
  if (note && for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
    {
      remove_note (insn, note);
      remove_retval_note (insn);
    }

  pnote = &REG_NOTES (insn);
  while ((note = *pnote))
    {
      bool delete = false;

      switch (REG_NOTE_KIND (note))
	{
	case REG_NO_CONFLICT:
	  if (resolve_reg_p (XEXP (note, 0)))
	    delete = true;
	  break;

	default:
	  break;
	}

      if (delete)
	*pnote = XEXP (note, 1);
      else
	pnote = &XEXP (note, 1);
    }
}

/* */

static bool
cannot_decompose_p (rtx x)
{
  if (REG_P (x))
    {
      unsigned int regno = REGNO (x);
      if (HARD_REGISTER_NUM_P (regno))
	return !validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD);
      else
	return bitmap_bit_p (non_decomposable_context, regno);
    }
  return false;
}

/* */

static rtx
resolve_simple_move (rtx set, rtx insn)
{
  rtx dst, src, tmp;
  bool rdp, rsp, sdp, ssp, delete;
  unsigned int i, words;
  enum machine_mode orig_mode;

  dst = SET_DEST (set);
  src = SET_SRC (set);
  orig_mode = GET_MODE (dst);

  sdp = ssp = false;
  if (GET_CODE (dst) == SUBREG && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD)
    sdp = rdp = resolve_reg_p (SUBREG_REG (dst));
  else
    rdp = resolve_reg_p (dst);

  if (GET_CODE (src) == SUBREG && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD)
    ssp = rsp = resolve_reg_p (SUBREG_REG (src));
  else
    rsp = resolve_reg_p (src);

  if (!rdp && !rsp)
    return insn;

  start_sequence ();

  delete = true;

  if (ssp)
    {
      tmp = SUBREG_REG (src);
      orig_mode = GET_MODE (tmp);
      dst = gen_reg_rtx (orig_mode);
      SUBREG_REG (src) = dst;
      src = tmp;
      delete = false;
    }
  if (!rsp && cannot_decompose_p (src))
    {
      tmp = gen_reg_rtx (orig_mode);
      emit_move_insn (tmp, src);
      src = tmp;
    }

  if (sdp)
    {
      tmp = SUBREG_REG (dst);
      orig_mode = GET_MODE (tmp);
      SUBREG_REG (dst) = gen_reg_rtx (orig_mode);
      emit_move_insn (dst, src);
      src = SUBREG_REG (dst);
      dst = tmp;
    }
  if (!rdp && cannot_decompose_p (dst))
    {
      dst = gen_reg_rtx (orig_mode);
      SET_SRC (set) = dst;
      delete = false;
    }

  words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;

  if (push_operand (dst, orig_mode))
    {
      unsigned int j, jinc;

      gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
      gcc_assert (GET_CODE (XEXP (dst, 0)) != PRE_MODIFY);
      gcc_assert (GET_CODE (XEXP (dst, 0)) != POST_MODIFY);

      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
	j = 0, jinc = 1;
      else
	j = words - 1, jinc = -1;

      for (i = 0; i < words; i++, j += jinc)
	{
	  tmp = copy_rtx (XEXP (dst, 0));
	  tmp = adjust_automodify_address_nv (dst, word_mode, tmp,
					      j * UNITS_PER_WORD);
	  emit_move_insn (tmp, simplify_subreg (word_mode, src, orig_mode,
						j * UNITS_PER_WORD));
	}
    }
  else
    {
      gcc_assert (!MEM_P (dst)
		  || GET_RTX_CLASS (GET_CODE (XEXP (dst, 0))) != RTX_AUTOINC);
      gcc_assert (!MEM_P (src)
		  || GET_RTX_CLASS (GET_CODE (XEXP (src, 0))) != RTX_AUTOINC);

      if (REG_P (dst) && !HARD_REGISTER_NUM_P (REGNO (dst)))
	emit_insn (gen_rtx_CLOBBER (VOIDmode, dst));

      for (i = 0; i < words; ++i)
	emit_move_insn (simplify_gen_subreg (word_mode, dst, orig_mode,
					     UNITS_PER_WORD * i),
			simplify_gen_subreg (word_mode, src, orig_mode,
					     UNITS_PER_WORD * i));
    }

  tmp = get_insns ();
  end_sequence ();

  emit_insn_before (tmp, insn);
  if (delete)
    {
      move_libcall_note (insn, tmp);
      remove_retval_note (insn);
      delete_insn (insn);
    }

  return tmp;
}

static void
resolve_clobber (rtx pat, rtx insn)
{
  rtx reg = XEXP (pat, 0);
  unsigned int words, i;
  enum machine_mode orig_mode;

  if (!resolve_reg_p (reg))
    return;

  orig_mode = GET_MODE (reg);
  words = GET_MODE_SIZE (orig_mode);
  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;

  XEXP (pat, 0) = simplify_subreg (word_mode, reg, orig_mode, 0);
  for (i = words - 1; i > 0; --i)
    {
      pat = simplify_subreg (word_mode, reg, orig_mode, i * UNITS_PER_WORD);
      pat = gen_rtx_CLOBBER (VOIDmode, pat);
      emit_insn_after (pat, insn);
    }
}

static void
resolve_use (rtx pat, rtx insn)
{
  if (resolve_subreg_p (XEXP (pat, 0)))
    delete_insn (insn);
}

/* */

void
decompose_multiword_subregs (void)
{
  rtx insn, set;

  decomposable_context = BITMAP_ALLOC (NULL);
  non_decomposable_context = BITMAP_ALLOC (NULL);

  {
    unsigned int max = max_reg_num ();
    reg_copy_graph = VEC_alloc (bitmap, heap, max);
    VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
    memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
  }

  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
    if (INSN_P (insn))
      {
        if (GET_CODE (PATTERN (insn)) == CLOBBER
	    || GET_CODE (PATTERN (insn)) == USE)
	  continue;

	set = simple_move (insn);
	if (set)
         {
           /* (set cc0 reg) is a comparison instruction and cannot be
              decomposed.  Clear SET so that we recognize this fact when
              we see it in find_decomposable_subregs.  */
           if (CC0_P (SET_DEST (set)))
             set = NULL;
           else
             find_pseudo_copy (set);
         }
        for_each_rtx (&PATTERN (insn), find_decomposable_subregs, set);
      }

  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
  if (!bitmap_empty_p (decomposable_context))
    {
      bitmap_iterator iter;
      unsigned int regno;

      propagate_pseudo_copies ();

      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
	decompose_register (regno);

      for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
	{
	  rtx pat;

	  if (!INSN_P (insn))
	    continue;

	  pat = PATTERN (insn);
	  if (GET_CODE (pat) == CLOBBER)
	    resolve_clobber (pat, insn);
	  else if (GET_CODE (pat) == USE)
	    resolve_use (pat, insn);
	  else
	    {
	      set = simple_move (insn);
	      if (set)
		insn = resolve_simple_move (set, insn);
	      for_each_rtx (&PATTERN (insn), resolve_subreg_use, NULL);
	      resolve_reg_notes (insn);
	    }
	}
    }

  BITMAP_FREE (decomposable_context);
  BITMAP_FREE (non_decomposable_context);

  {
    unsigned int i;
    bitmap b;
    for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i)
      if (b)
	BITMAP_FREE (b);
  }
  VEC_free (bitmap, heap, reg_copy_graph);
}



static bool
gate_lower_subreg (void)
{
  return flag_lower_subreg;
}

/* Run yet another register allocator.  */
static unsigned int
rest_of_handle_lower_subreg (void)
{
  decompose_multiword_subregs ();
  return 0;
}

struct tree_opt_pass pass_lower_subreg =
{
  "lower_subreg",                               /* name */
  gate_lower_subreg,                            /* gate */
  rest_of_handle_lower_subreg,		        /* execute */
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
  TV_LOWER_SUBREG,	                        /* tv_id */
  0,                                    /* properties_required */
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
  TODO_dump_func |
  TODO_ggc_collect,                     /* todo_flags_finish */
  'Y'                                   /* letter */
};

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]