This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH RFC: My version of the lower-subreg patch


Various people have expressed interest in the lower-subreg patch over
the past few months, so I'm sending out a copy.  This patch is not
ready to check in.

This is an update of some work done by Richard Henderson a while back.
We look for multi-word pseudo-registers which are always used in a
context where they can be handled as combinations of single-word
registers, and we split them up.  This lets the register allocator
allocate the different parts of the pseudo-register independently.  In
some cases we can eliminate uses of parts of the pseudo-register.
Either way, we generate better code for C code using 'long long'
values on 32-bit systems.

I modified Richard's work by making the pass look at the instruction
operands rather than just at the RTL.  I also modified the register
allocator to do better tracking of conflicts between subregs, which
permits it to allocate some pseudo-registers into the same hard
register in places where it previously thought there was a conflict
(this work should permit getting rid of REG_NO_CONFLICT notes,
although I haven't actually done that here).  I also added several
splits to the i386 backend to expose more places where multi-word
pseudo-registers can be decomposed.  These different parts are
logically separate, and I'll propose them as separate patches.

As I say, this pass is not ready to check in.  The lower-subreg pass
is run twice: once early in the RTL passes, and once after instruction
splitting.  This gives the best results: the first pass permits CSE on
operands of & and | which are fully decomposable, and the second pass
handles operands of + and - which we do not want to decompose before
CSE and loop optimizations.  But the pass is too slow to run twice.

With the new dataflow framework, it should be possible to rework the
pass to look for multi-word pseudo-registers and check all defs and
uses of them, rather than the current approach of looking through all
the insns.  I think that will make the pass fast enough to run twice.
In typical code which does not use 'long long' the pass will just spin
through the pseudo-registers.  I have not implemented this.

The tracking of subreg conflicts in the register allocator is done
using REG_SUBREG_DEAD notes, which is not the new way.  It would be
better to enhance the dataflow engine to track subreg usage.  Also
that would work across basic blocks--the simple code I have now to
create REG_SUBREG_DEAD notes only works within a single basic block.

Anyhow, I wanted to put this patch out for comments, to see what
people think of it.  I'd be interested in results on machines other
than the i386.

Ian

Index: gcc/reload.c
===================================================================
--- gcc/reload.c	(revision 119834)
+++ gcc/reload.c	(working copy)
@@ -1513,71 +1513,118 @@ push_reload (rtx in, rtx out, rtx *inloc
   if (rld[i].reg_rtx == 0 && in != 0 && hard_regs_live_known)
     {
       rtx note;
-      int regno;
       enum machine_mode rel_mode = inmode;
 
       if (out && GET_MODE_SIZE (outmode) > GET_MODE_SIZE (inmode))
 	rel_mode = outmode;
 
       for (note = REG_NOTES (this_insn); note; note = XEXP (note, 1))
-	if (REG_NOTE_KIND (note) == REG_DEAD
-	    && REG_P (XEXP (note, 0))
-	    && (regno = REGNO (XEXP (note, 0))) < FIRST_PSEUDO_REGISTER
-	    && reg_mentioned_p (XEXP (note, 0), in)
-	    /* Check that we don't use a hardreg for an uninitialized
-	       pseudo.  See also find_dummy_reload().  */
-	    && (ORIGINAL_REGNO (XEXP (note, 0)) < FIRST_PSEUDO_REGISTER
-		|| ! bitmap_bit_p (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end,
-				   ORIGINAL_REGNO (XEXP (note, 0))))
-	    && ! refers_to_regno_for_reload_p (regno,
-					       (regno
-						+ hard_regno_nregs[regno]
-								  [rel_mode]),
-					       PATTERN (this_insn), inloc)
+	{
+	  int regno, byte;
+	  enum machine_mode reg_mode;
+	  bool found;
+
+	  if (REG_NOTE_KIND (note) != REG_DEAD
+	      || !REG_P (XEXP (note, 0)))
+	    continue;
+
+	  regno = REGNO (XEXP (note, 0));
+	  if (!HARD_REGISTER_NUM_P (regno))
+	    continue;
+
+	  reg_mode = GET_MODE (XEXP (note, 0));
+	  if (GET_MODE_SIZE (rel_mode) > GET_MODE_SIZE (reg_mode))
+	    continue;
+
+	  /* Check that we don't use a hardreg for an uninitialized
+	     pseudo.  See also find_dummy_reload.  */
+	  if (!HARD_REGISTER_NUM_P (ORIGINAL_REGNO (XEXP (note, 0)))
+	      && bitmap_bit_p (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end,
+			       ORIGINAL_REGNO (XEXP (note, 0))))
+	    continue;
+
+	  found = false;
+
+	  /* Check each REL_MODE sized chunk of the register being
+	     killed.  */
+	  for (byte = 0;
+	       byte < GET_MODE_SIZE (reg_mode);
+	       byte += GET_MODE_SIZE (rel_mode))
+	    {
+	      int subregno;
+	      unsigned int nregs, offs;
+
+	      if (byte == 0)
+		subregno = regno;
+	      else
+		{
+		  /* subreg_offset_representable_p should probably
+		     handle this case, but as of this writing it gets
+		     an assertion failure.  */
+		  if (GET_MODE_SIZE (reg_mode) % GET_MODE_SIZE (rel_mode) != 0)
+		    break;
+
+		  if (!subreg_offset_representable_p (regno, reg_mode, byte,
+						      rel_mode))
+		    continue;
+		  subregno = subreg_regno_offset (regno, reg_mode, byte,
+						  rel_mode);
+		}
+
+	      if (!HARD_REGNO_MODE_OK (subregno, inmode)
+		  || !HARD_REGNO_MODE_OK (subregno, outmode))
+		continue;
+
+	      nregs = hard_regno_nregs[subregno][rel_mode];
+	      if (!refers_to_regno_for_reload_p (subregno, subregno + nregs,
+						 in, NULL)
+		  || refers_to_regno_for_reload_p (subregno, subregno + nregs,
+						   PATTERN (this_insn), inloc))
+		continue;
+
 	    /* If this is also an output reload, IN cannot be used as
 	       the reload register if it is set in this insn unless IN
 	       is also OUT.  */
-	    && (out == 0 || in == out
-		|| ! hard_reg_set_here_p (regno,
-					  (regno
-					   + hard_regno_nregs[regno]
-							     [rel_mode]),
+	      if (out != NULL_RTX
+		  && in != out
+		  && hard_reg_set_here_p (subregno, subregno + nregs,
 					  PATTERN (this_insn)))
-	    /* ??? Why is this code so different from the previous?
-	       Is there any simple coherent way to describe the two together?
-	       What's going on here.  */
-	    && (in != out
-		|| (GET_CODE (in) == SUBREG
-		    && (((GET_MODE_SIZE (GET_MODE (in)) + (UNITS_PER_WORD - 1))
-			 / UNITS_PER_WORD)
-			== ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))
-			     + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))))
-	    /* Make sure the operand fits in the reg that dies.  */
-	    && (GET_MODE_SIZE (rel_mode)
-		<= GET_MODE_SIZE (GET_MODE (XEXP (note, 0))))
-	    && HARD_REGNO_MODE_OK (regno, inmode)
-	    && HARD_REGNO_MODE_OK (regno, outmode))
-	  {
-	    unsigned int offs;
-	    unsigned int nregs = MAX (hard_regno_nregs[regno][inmode],
-				      hard_regno_nregs[regno][outmode]);
-
-	    for (offs = 0; offs < nregs; offs++)
-	      if (fixed_regs[regno + offs]
-		  || ! TEST_HARD_REG_BIT (reg_class_contents[(int) class],
-					  regno + offs))
-		break;
+		continue;
 
-	    if (offs == nregs
-		&& (! (refers_to_regno_for_reload_p
-		       (regno, (regno + hard_regno_nregs[regno][inmode]),
-				in, (rtx *)0))
-		    || can_reload_into (in, regno, inmode)))
-	      {
-		rld[i].reg_rtx = gen_rtx_REG (rel_mode, regno);
-		break;
-	      }
-	  }
+	      /* ??? Why is this code so different from the previous?
+		 Is there any simple coherent way to describe the two
+		 together?  What's going on here.  */
+	      if (in == out
+		  && (GET_CODE (in) != SUBREG
+		      || (((GET_MODE_SIZE (GET_MODE (in))
+			    + (UNITS_PER_WORD - 1))
+			   / UNITS_PER_WORD)
+			  != ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))
+			       + (UNITS_PER_WORD - 1))
+			      / UNITS_PER_WORD))))
+		continue;
+
+	      for (offs = 0; offs < nregs; offs++)
+		if (fixed_regs[subregno + offs]
+		    || !TEST_HARD_REG_BIT (reg_class_contents[(int) class],
+					   subregno + offs))
+		  break;
+
+	      if (offs == nregs
+		  && (!refers_to_regno_for_reload_p
+		      (subregno, subregno + hard_regno_nregs[regno][inmode],
+		       in, NULL)
+		      || can_reload_into (in, subregno, inmode)))
+		{
+		  rld[i].reg_rtx = gen_rtx_REG (rel_mode, subregno);
+		  found = true;
+		  break;
+		}
+	    }
+
+	  if (found)
+	    break;
+	}
     }
 
   if (out)
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 119834)
+++ gcc/tree-pass.h	(working copy)
@@ -332,6 +332,7 @@ extern struct tree_opt_pass pass_instant
 extern struct tree_opt_pass pass_rtl_fwprop;
 extern struct tree_opt_pass pass_rtl_fwprop_addr;
 extern struct tree_opt_pass pass_jump2;
+extern struct tree_opt_pass pass_lower_subreg;
 extern struct tree_opt_pass pass_cse;
 extern struct tree_opt_pass pass_gcse;
 extern struct tree_opt_pass pass_jump_bypass;
@@ -355,6 +356,7 @@ extern struct tree_opt_pass pass_if_afte
 extern struct tree_opt_pass pass_partition_blocks;
 extern struct tree_opt_pass pass_regmove;
 extern struct tree_opt_pass pass_split_all_insns;
+extern struct tree_opt_pass pass_lower_subreg2;
 extern struct tree_opt_pass pass_mode_switching;
 extern struct tree_opt_pass pass_see;
 extern struct tree_opt_pass pass_recompute_reg_usage;
Index: gcc/rtl.def
===================================================================
--- gcc/rtl.def	(revision 119834)
+++ gcc/rtl.def	(working copy)
@@ -382,12 +382,19 @@ DEF_RTL_EXPR(SUBREG, "subreg", "ei", RTX
 
 DEF_RTL_EXPR(STRICT_LOW_PART, "strict_low_part", "e", RTX_EXTRA)
 
-/* (CONCAT a b) represents the virtual concatenation of a and b
-   to make a value that has as many bits as a and b put together.
-   This is used for complex values.  Normally it appears only
-   in DECL_RTLs and during RTL generation, but not in the insn chain.  */
+/* (CONCAT a b) represents the virtual concatenation of a and b to
+   make a value that has as many bits as a and b put together.  This
+   is used for, among other things, complex values.  Normally it
+   appears only in DECL_RTLs and during RTL generation, but not in the
+   insn chain.  */
 DEF_RTL_EXPR(CONCAT, "concat", "ee", RTX_OBJ)
 
+/* (CONCATN [a1 a2 ... an]) represents the virtual concatenation of
+   all An to make a value.  This is an extension of CONCAT to larger
+   number of components.  Like CONCAT, it should not appear in the
+   insn chain.  Every element of the CONCATN is the same size.  */
+DEF_RTL_EXPR(CONCATN, "concatn", "E", RTX_OBJ)
+
 /* A memory location; operand is the address.  The second operand is the
    alias set to which this MEM belongs.  We use `0' instead of `w' for this
    field so that the field need not be specified in machine descriptions.  */
Index: gcc/global.c
===================================================================
--- gcc/global.c	(revision 119834)
+++ gcc/global.c	(working copy)
@@ -183,6 +183,10 @@ static int allocno_row_words;
  (conflicts[(I) * allocno_row_words + (unsigned) (J) / INT_BITS]	\
   & ((INT_TYPE) 1 << ((unsigned) (J) % INT_BITS)))
 
+#define CLEAR_CONFLICT(I, J) \
+ (conflicts[(I) * allocno_row_words + (J) / INT_BITS]	\
+  &= ~((INT_TYPE) 1 << ((J) % INT_BITS)))
+
 /* For any allocno set in ALLOCNO_SET, set ALLOCNO to that allocno,
    and execute CODE.  */
 #define EXECUTE_IF_SET_IN_ALLOCNO_SET(ALLOCNO_SET, ALLOCNO, CODE)	\
@@ -300,7 +304,7 @@ static void mirror_conflicts (void);
 static void expand_preferences (void);
 static void prune_preferences (void);
 static void find_reg (int, HARD_REG_SET, int, int, int);
-static void record_one_conflict (int);
+static void record_one_conflict (int, int);
 static void record_conflicts (int *, int);
 static void mark_reg_store (rtx, rtx, void *);
 static void mark_reg_clobber (rtx, rtx, void *);
@@ -753,7 +757,7 @@ global_conflicts (void)
 		unsigned int regno = EH_RETURN_DATA_REGNO (i);
 		if (regno == INVALID_REGNUM)
 		  break;
-		record_one_conflict (regno);
+		record_one_conflict (regno, -1);
 	      }
 	  }
 #endif
@@ -779,7 +783,7 @@ global_conflicts (void)
 					       allocno[ax].no_stack_reg = 1;
 					     });
 	      for (ax = FIRST_STACK_REG; ax <= LAST_STACK_REG; ax++)
-		record_one_conflict (ax);
+		record_one_conflict (ax, -1);
 #endif
 
 	      /* No need to record conflicts for call clobbered regs if we have
@@ -788,7 +792,7 @@ global_conflicts (void)
 	      if (! current_function_has_nonlocal_label)
 		for (ax = 0; ax < FIRST_PSEUDO_REGISTER; ax++)
 		  if (call_used_regs [ax])
-		    record_one_conflict (ax);
+		    record_one_conflict (ax, -1);
 	    }
 	}
       }
@@ -829,7 +833,7 @@ global_conflicts (void)
 	      /* Mark any registers clobbered by INSN as live,
 		 so they conflict with the inputs.  */
 
-	      note_stores (PATTERN (insn), mark_reg_clobber, NULL);
+	      note_stores (PATTERN (insn), mark_reg_clobber, insn);
 
 	      /* Mark any registers dead after INSN as dead now.  */
 
@@ -842,12 +846,12 @@ global_conflicts (void)
 		 Clobbers are processed again, so they conflict with
 		 the registers that are set.  */
 
-	      note_stores (PATTERN (insn), mark_reg_store, NULL);
+	      note_stores (PATTERN (insn), mark_reg_store, insn);
 
 #ifdef AUTO_INC_DEC
 	      for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
 		if (REG_NOTE_KIND (link) == REG_INC)
-		  mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
+		  mark_reg_store (XEXP (link, 0), NULL_RTX, insn);
 #endif
 
 	      /* If INSN has multiple outputs, then any reg that dies here
@@ -1411,19 +1415,32 @@ retry_global_alloc (int regno, HARD_REG_
    and everything currently live.
    REGNO must not be a pseudo reg that was allocated
    by local_alloc; such numbers must be translated through
-   reg_renumber before calling here.  */
+   reg_renumber before calling here.
+   If NO_CONFLICT_REGNO is not -1, it is a register for which we
+   should not record a conflict.  */
 
 static void
-record_one_conflict (int regno)
+record_one_conflict (int regno, int no_conflict_regno)
 {
+  int no_conflict_allocno;
   int j;
 
+  no_conflict_allocno = -1;
+  if (no_conflict_regno != -1)
+    {
+      if (no_conflict_regno >= FIRST_PSEUDO_REGISTER)
+	no_conflict_allocno = reg_allocno[no_conflict_regno];
+      if (reg_renumber[no_conflict_regno] >= 0)
+	no_conflict_regno = reg_renumber[no_conflict_regno];
+    }
+
   if (regno < FIRST_PSEUDO_REGISTER)
     /* When a hard register becomes live,
        record conflicts with live pseudo regs.  */
     EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, j,
       {
-	SET_HARD_REG_BIT (allocno[j].hard_reg_conflicts, regno);
+	if (j != no_conflict_allocno)
+	  SET_HARD_REG_BIT (allocno[j].hard_reg_conflicts, regno);
       });
   else
     /* When a pseudo-register becomes live,
@@ -1432,10 +1449,24 @@ record_one_conflict (int regno)
     {
       int ialloc = reg_allocno[regno];
       int ialloc_prod = ialloc * allocno_row_words;
+      bool clear;
+
+      if (no_conflict_regno == -1 || !HARD_REGISTER_NUM_P (no_conflict_regno))
+	IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, hard_regs_live);
+      else
+	{
+	  HARD_REG_SET live;
 
-      IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, hard_regs_live);
+	  COPY_HARD_REG_SET (live, hard_regs_live);
+	  CLEAR_HARD_REG_BIT (live, no_conflict_regno);
+	  IOR_HARD_REG_SET (allocno[ialloc].hard_reg_conflicts, live);
+	}
+      clear = (no_conflict_allocno != -1
+	       && !CONFLICTP (ialloc, no_conflict_allocno));
       for (j = allocno_row_words - 1; j >= 0; j--)
 	conflicts[ialloc_prod + j] |= allocnos_live[j];
+      if (clear)
+	CLEAR_CONFLICT (ialloc, no_conflict_allocno);
     }
 }
 
@@ -1503,9 +1534,11 @@ mirror_conflicts (void)
    a REG_INC note was found for it).  */
 
 static void
-mark_reg_store (rtx reg, rtx setter, void *data ATTRIBUTE_UNUSED)
+mark_reg_store (rtx reg, rtx setter, void *data)
 {
+  rtx insn = (rtx) data;
   int regno;
+  int no_conflict_regno;
 
   if (GET_CODE (reg) == SUBREG)
     reg = SUBREG_REG (reg);
@@ -1518,6 +1551,19 @@ mark_reg_store (rtx reg, rtx setter, voi
   if (setter && GET_CODE (setter) != CLOBBER)
     set_preference (reg, SET_SRC (setter));
 
+  /* If the source is a subreg which dies in this set, then this set
+     does not itself introduce a conflict with the source.  */
+  no_conflict_regno = -1;
+  if (setter
+      && GET_CODE (setter) != CLOBBER
+      && GET_CODE (SET_SRC (setter)) == SUBREG)
+    {
+      rtx note = find_reg_note (insn, REG_SUBREG_DEAD, NULL_RTX);
+
+      if (note && rtx_equal_p (SET_SRC (setter), XEXP (note, 0)))
+	no_conflict_regno = REGNO (SUBREG_REG (SET_SRC (setter)));
+    }
+
   regno = REGNO (reg);
 
   /* Either this is one of the max_allocno pseudo regs not allocated,
@@ -1527,7 +1573,7 @@ mark_reg_store (rtx reg, rtx setter, voi
       if (reg_allocno[regno] >= 0)
 	{
 	  SET_ALLOCNO_LIVE (reg_allocno[regno]);
-	  record_one_conflict (regno);
+	  record_one_conflict (regno, no_conflict_regno);
 	}
     }
 
@@ -1540,7 +1586,7 @@ mark_reg_store (rtx reg, rtx setter, voi
       int last = regno + hard_regno_nregs[regno][GET_MODE (reg)];
       while (regno < last)
 	{
-	  record_one_conflict (regno);
+	  record_one_conflict (regno, no_conflict_regno);
 	  SET_HARD_REG_BIT (hard_regs_live, regno);
 	  regno++;
 	}
@@ -1577,7 +1623,7 @@ mark_reg_conflicts (rtx reg)
   if (regno >= FIRST_PSEUDO_REGISTER)
     {
       if (reg_allocno[regno] >= 0)
-	record_one_conflict (regno);
+	record_one_conflict (regno, -1);
     }
 
   if (reg_renumber[regno] >= 0)
@@ -1589,7 +1635,7 @@ mark_reg_conflicts (rtx reg)
       int last = regno + hard_regno_nregs[regno][GET_MODE (reg)];
       while (regno < last)
 	{
-	  record_one_conflict (regno);
+	  record_one_conflict (regno, -1);
 	  regno++;
 	}
     }
@@ -1647,7 +1693,7 @@ mark_reg_live_nc (int regno, enum machin
 
 /* Try to set a preference for an allocno to a hard register.
    We are passed DEST and SRC which are the operands of a SET.  It is known
-   that SRC is a register.  If SRC or the first operand of SRC is a register,
+   that DEST is a register.  If SRC or the first operand of SRC is a register,
    try to set a preference.  If one of the two is a hard register and the other
    is a pseudo-register, mark the preference.
 
Index: gcc/dwarf2out.c
===================================================================
--- gcc/dwarf2out.c	(revision 119834)
+++ gcc/dwarf2out.c	(working copy)
@@ -9045,6 +9045,32 @@ concat_loc_descriptor (rtx x0, rtx x1)
   return cc_loc_result;
 }
 
+/* Return a descriptor that describes the concatenation of N
+   locations.  */
+
+static dw_loc_descr_ref
+concatn_loc_descriptor (rtx concatn)
+{
+  unsigned int i;
+  dw_loc_descr_ref cc_loc_result = NULL;
+  unsigned int n = XVECLEN (concatn, 0);
+
+  for (i = 0; i < n; ++i)
+    {
+      dw_loc_descr_ref ref;
+      rtx x = XVECEXP (concatn, 0, i);
+
+      ref = loc_descriptor (x);
+      if (ref == NULL)
+	return NULL;
+
+      add_loc_descr (&cc_loc_result, ref);
+      add_loc_descr_op_piece (&cc_loc_result, GET_MODE_SIZE (GET_MODE (x)));
+    }
+
+  return cc_loc_result;
+}
+
 /* Output a proper Dwarf location descriptor for a variable or parameter
    which is either allocated in a register or in a memory location.  For a
    register, we just generate an OP_REG and the register number.  For a
@@ -9082,6 +9108,10 @@ loc_descriptor (rtx rtl)
       loc_result = concat_loc_descriptor (XEXP (rtl, 0), XEXP (rtl, 1));
       break;
 
+    case CONCATN:
+      loc_result = concatn_loc_descriptor (rtl);
+      break;
+
     case VAR_LOCATION:
       /* Single part.  */
       if (GET_CODE (XEXP (rtl, 1)) != PARALLEL)
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	(revision 119834)
+++ gcc/timevar.def	(working copy)
@@ -128,6 +128,7 @@ DEFTIMEVAR (TV_OVERLOAD              , "
 DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation")
 DEFTIMEVAR (TV_EXPAND		     , "expand")
 DEFTIMEVAR (TV_VARCONST              , "varconst")
+DEFTIMEVAR (TV_LOWER_SUBREG	     , "lower subreg")
 DEFTIMEVAR (TV_JUMP                  , "jump")
 DEFTIMEVAR (TV_FWPROP                , "forward prop")
 DEFTIMEVAR (TV_CSE                   , "CSE")
Index: gcc/local-alloc.c
===================================================================
--- gcc/local-alloc.c	(revision 119834)
+++ gcc/local-alloc.c	(working copy)
@@ -1852,6 +1852,13 @@ combine_regs (rtx usedreg, rtx setreg, i
   int usize, ssize;
   int sqty;
 
+  /* If we are setting a complete register from a subreg which dies
+     here, then we can tie the complete register to the subreg.  */
+  if (GET_CODE (usedreg) == SUBREG
+      && GET_CODE (setreg) != SUBREG
+      && find_reg_note (insn, REG_SUBREG_DEAD, usedreg))
+    already_dead = 1;
+
   /* Determine the numbers and sizes of registers being used.  If a subreg
      is present that does not change the entire register, don't consider
      this a copy insn.  */
@@ -2513,6 +2520,276 @@ requires_inout (const char *p)
   return num_matching_alts;
 }
 
+/* This is used to pass data to mark_subregs via for_each_rtx.  */
+
+struct mark_subregs_data
+{
+  /* The register number we are looking for.  */
+  unsigned int regno;
+  /* The address of the bitmask we are setting.  */
+  unsigned HOST_WIDE_INT *subreg_bitmask;
+  /* Whether we set any elements in subregs_bitmask.  */
+  bool any;
+};
+
+/* This is called via for_each_rtx.  DATA points to a struct
+   mark_subregs_data.  If we find the DATA->REGNO unadorned, we return
+   1.  If we find DATA->REGNO within a SUBREG, we mark the appropriate
+   bits in DATA->SUBREG_BITMASK, where each bit represents a single
+   subreg of size UNITS_PER_WORD.  We set DATA->ANY if we set any of
+   the bits in DATA->SUBREG_BITMASK.  */
+
+static int
+mark_subregs (rtx *px, void *data)
+{
+  rtx x = *px;
+  struct mark_subregs_data *pmsd = (struct mark_subregs_data *) data;
+
+  if (REG_P (x) && REGNO (x) == pmsd->regno)
+    return 1;
+  else if (GET_CODE (x) == SUBREG
+	   && REG_P (SUBREG_REG (x))
+	   && REGNO (SUBREG_REG (x)) == pmsd->regno)
+    {
+      int byte;
+
+      if (SUBREG_BYTE (x) % UNITS_PER_WORD != 0)
+	return 1;
+
+      for (byte = 0;
+	   byte < GET_MODE_SIZE (GET_MODE (x));
+	   byte += UNITS_PER_WORD)
+	{
+	  int bit;
+
+	  bit = (SUBREG_BYTE (x) + byte) / UNITS_PER_WORD;
+	  if (bit >= HOST_BITS_PER_WIDE_INT)
+	    return 1;
+
+	  *pmsd->subreg_bitmask |= (unsigned HOST_WIDE_INT) 1 << bit;
+	}
+
+      pmsd->any = true;
+
+      return -1;
+    }
+  else if (GET_CODE (x) == SET)
+    {
+      if (for_each_rtx (&SET_SRC (x), mark_subregs, data) != 0)
+	return 1;
+
+      /* Ignore sets of a register.  We know that the register we care
+	 about dies in this insn, and we don't want to get confused by
+	 an irrelevant SET.  */
+      if (REG_P (SET_DEST (x))
+	  || (GET_CODE (SET_DEST (x)) == SUBREG
+	      && REG_P (SUBREG_REG (SET_DEST (x)))))
+	return -1;
+
+      if (for_each_rtx (&SET_DEST (x), mark_subregs, data) != 0)
+	return 1;
+    }
+  else if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC
+	   && GET_CODE (XEXP (x, 0)) == SUBREG
+	   && REG_P (SUBREG_REG (XEXP (x, 0)))
+	   && REGNO (SUBREG_REG (XEXP (x, 0))) == pmsd->regno)
+    return 1;
+
+  return 0;
+}
+
+/* This is used to pass data to track_subregs via for_each_rtx.  */
+
+struct track_subregs_data
+{
+  /* The insn we are looking at.  */
+  rtx insn;
+  /* The current subreg usage information.  */
+  unsigned HOST_WIDE_INT *subreg_usage;
+};
+
+/* This is called via for_each_rtx.  DATA points to a struct
+   track_subregs_data.
+
+   For each SUBREG we see that is not set by DATA->INSN, look to see
+   if we are currently tracking changes to it (subreg_usage[regno] !=
+   0).  If we are, it means that the register dies in a following
+   insn, and the bits set in subreg_usage[regno] are the bits which
+   are live at that insn.  If we are using words from this subreg
+   which are not set in subreg_usage[regno], then we know that this is
+   the last insn in which they are used, so they are dead.  Add a
+   REG_SUBREG_DEAD note for those words, and mark the appropriate bits
+   in subreg_usage[regno].
+
+   For each SUBREG which is set, clear the bits in subreg_usage, to
+   indicate that they are not live before this insn.
+
+   For each REG which is used or set outside of a SUBREG, clear the
+   corresponding entry in subreg_usage, to indicate that we can no
+   longer do anything useful with this register.  */
+
+static int
+track_subregs (rtx *px, void *data)
+{
+  rtx x = *px;
+  struct track_subregs_data *ptsd = (struct track_subregs_data *) data;
+
+  if (GET_CODE (x) == SUBREG
+      && REG_P (SUBREG_REG (x))
+      && !HARD_REGISTER_P (SUBREG_REG (x)))
+    {
+      int regno, byte;
+      bool dies;
+
+      regno = REGNO (SUBREG_REG (x));
+      if (ptsd->subreg_usage[regno] == 0)
+	return -1;
+
+      if (SUBREG_BYTE (x) % UNITS_PER_WORD != 0)
+	{
+	  ptsd->subreg_usage[regno] = 0;
+	  return -1;
+	}
+
+      dies = false;
+      for (byte = 0;
+	   byte < GET_MODE_SIZE (GET_MODE (x));
+	   byte += UNITS_PER_WORD)
+	{
+	  int bit;
+
+	  bit = (SUBREG_BYTE (x) + byte) / UNITS_PER_WORD;
+	  if (bit >= HOST_BITS_PER_WIDE_INT)
+	    {
+	      ptsd->subreg_usage[regno] = 0;
+	      return -1;
+	    }
+
+	  if ((ptsd->subreg_usage[regno]
+	       & ((unsigned HOST_WIDE_INT) 1 << bit))
+	      != 0)
+	    dies = true;
+	  else
+	    ptsd->subreg_usage[regno] |= (unsigned HOST_WIDE_INT) 1 << bit;
+	}
+
+      if (!dies)
+	{
+	  /* This register dies in a later insn, and this subreg is
+	     live now, but this subreg is not live at the time that
+	     the register dies.  Add a SUBREG_DEAD note.  */
+	  set_unique_reg_note (ptsd->insn, REG_SUBREG_DEAD, copy_rtx (x));
+	}
+
+      return -1;
+    }
+  else if (GET_CODE (x) == REG)
+    {
+      ptsd->subreg_usage[REGNO (x)] = 0;
+      return -1;
+    }
+  else if (GET_CODE (x) == SET)
+    {
+      /* If we see a SET of a SUBREG of a REG, stop tracking that
+	 register.  For a set of a SUBREG we could in some cases keep
+	 tracking it, but we have to be careful about how we handle
+	 the SET_SRC.  */
+      if (GET_CODE (SET_DEST (x)) == SUBREG
+	  && REG_P (SUBREG_REG (SET_DEST (x))))
+	ptsd->subreg_usage[REGNO (SUBREG_REG (SET_DEST (x)))] = 0;
+      else
+	for_each_rtx (&SET_DEST (x), track_subregs, data);
+
+      for_each_rtx (&SET_SRC (x), track_subregs, data);
+
+      return -1;
+    }
+  else if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC
+	   && GET_CODE (XEXP (x, 0)) == SUBREG
+	   && REG_P (SUBREG_REG (XEXP (x, 0))))
+    {
+      ptsd->subreg_usage[REGNO (XEXP (x, 0))] = 0;
+      return -1;
+    }
+
+  return 0;
+}
+
+/* Add REG_SUBREG_DEAD notes.  We do this by walking backward through
+   basic blocks.  When we see a REG_DEAD note that refers to a SUBREG
+   of a pseudo-register, where the register is larger than
+   UNITS_PER_WORD, we see if we find any earlier reference to other
+   word sized SUBREGs of that register.  If we do, we add a
+   REG_SUBREG_DEAD note.  We only do this within basic blocks because
+   we don't expect it to be useful across basic blocks; the
+   lower-subreg pass should have already caught most of those cases
+   anyhow.  We use the REG_SUBREG_DEAD notes primarily to get good
+   conflict information when moving two word_mode registers into a
+   double word_mode register, and vice-versa.  */
+
+static void
+add_subreg_death_notes (void)
+{
+  int cregs;
+  unsigned HOST_WIDE_INT *subreg_usage;
+  basic_block bb;
+
+  /* SUBREG_DEAD notes are only needed for optimizing register
+     allocation, they are not needed for correctness.  */
+  if (!optimize)
+    return;
+
+  cregs = max_reg_num ();
+  subreg_usage = XNEWVEC (unsigned HOST_WIDE_INT, cregs);
+
+  FOR_EACH_BB (bb)
+    {
+      bool any;
+      rtx insn;
+
+      memset (subreg_usage, 0, cregs * sizeof (*subreg_usage));
+      any = false;
+
+      FOR_BB_INSNS_REVERSE (bb, insn)
+	{
+	  rtx note;
+
+	  if (!INSN_P (insn))
+	    continue;
+
+	  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+	    {
+	      if (REG_NOTE_KIND (note) == REG_DEAD
+		  && !HARD_REGISTER_P (XEXP (note, 0))
+		  && (GET_MODE_SIZE (GET_MODE (XEXP (note, 0)))
+		      > UNITS_PER_WORD))
+		{
+		  struct mark_subregs_data msd;
+
+		  msd.regno = REGNO (XEXP (note, 0));
+		  msd.subreg_bitmask = &subreg_usage[msd.regno];
+		  msd.any = false;
+		  if (for_each_rtx (&PATTERN (insn), mark_subregs, &msd) != 0)
+		    subreg_usage[msd.regno] = 0;
+		  else if (msd.any)
+		    any = true;
+		}
+	    }
+
+	  if (any)
+	    {
+	      struct track_subregs_data tsd;
+
+	      tsd.insn = insn;
+	      tsd.subreg_usage = subreg_usage;
+	      for_each_rtx (&PATTERN (insn), track_subregs, &tsd);
+	    }
+	}
+    }
+
+  free (subreg_usage);
+}
+
 void
 dump_local_alloc (FILE *file)
 {
@@ -2546,6 +2823,7 @@ rest_of_handle_local_alloc (void)
   allocate_initial_values (reg_equiv_memory_loc);
 
   regclass (get_insns (), max_reg_num ());
+  add_subreg_death_notes ();
   rebuild_notes = local_alloc ();
 
   /* Local allocation may have turned an indirect jump into a direct
Index: gcc/lower-subreg.c
===================================================================
--- gcc/lower-subreg.c	(revision 0)
+++ gcc/lower-subreg.c	(revision 0)
@@ -0,0 +1,945 @@
+/* Decompose multiword subregs.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>
+		  Ian Lance Taylor <iant@google.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "obstack.h"
+#include "timevar.h"
+#include "flags.h"
+#include "insn-config.h"
+#include "basic-block.h"
+#include "recog.h"
+#include "bitmap.h"
+#include "expr.h"
+#include "regs.h"
+#include "tree-pass.h"
+
+#ifdef STACK_GROWS_DOWNWARD
+# undef STACK_GROWS_DOWNWARD
+# define STACK_GROWS_DOWNWARD 1
+#else
+# define STACK_GROWS_DOWNWARD 0
+#endif
+
+DEF_VEC_P (bitmap);
+DEF_VEC_ALLOC_P (bitmap,heap);
+
+/* Decompose multi-word pseudo-registers into individual
+   pseudo-registers when possible.  This is possible when all the uses
+   of a multi-word register are via SUBREG, or are copies of the
+   register to another location.  Breaking apart the register permits
+   more CSE and permits better register allocation.  */
+
+/* Bit N in this bitmap is set if regno N is used in a context in
+   which we can decompose it.  */
+static bitmap decomposable_context;
+
+/* Bit N in this bitmap is set if regno N is used in a context in
+   which it can not be decomposed.  */
+static bitmap non_decomposable_context;
+
+/* Bit N in the bitmap in element M of this array is set if there is a
+   copy from reg M to reg N.  */
+static VEC(bitmap,heap) *reg_copy_graph;
+
+/* If INSN is a single set between two objects, return the single set.
+   Such an insn can always be decomposed.  */
+
+static rtx
+simple_move (rtx insn)
+{
+  rtx x;
+  rtx set;
+
+  set = single_set (insn);
+  if (!set)
+    return NULL_RTX;
+
+  x = SET_DEST (set);
+  if (!OBJECT_P (x) && GET_CODE (x) != SUBREG)
+    return NULL_RTX;
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return NULL_RTX;
+
+  x = SET_SRC (set);
+  if (!OBJECT_P (x)
+      && GET_CODE (x) != SUBREG
+      && GET_CODE (x) != ASM_OPERANDS)
+    return NULL_RTX;
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return NULL_RTX;
+
+  return set;
+}
+
+/* If SET is a copy from one multi-word pseudo-register to another,
+   record that in reg_copy_graph.  Return whether it is such a
+   copy.  */
+
+static bool
+find_pseudo_copy (rtx set)
+{
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
+  unsigned int rd, rs;
+  bitmap b;
+
+  if (!REG_P (dest) || !REG_P (src))
+    return false;
+
+  rd = REGNO (dest);
+  rs = REGNO (src);
+  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
+    return false;
+
+  if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
+    return false;
+
+  b = VEC_index (bitmap, reg_copy_graph, rs);
+  if (b == NULL)
+    {
+      b = BITMAP_ALLOC (NULL);
+      VEC_replace (bitmap, reg_copy_graph, rs, b);
+    }
+
+  bitmap_set_bit (b, rd);
+
+  return true;
+}
+
+/* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
+   where they are copied to another register, add the register to
+   which they are copied to DECOMPOSABLE_CONTEXT.  Use
+   NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
+   copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
+
+static void
+propagate_pseudo_copies (void)
+{
+  bitmap queue, propagate;
+
+  queue = BITMAP_ALLOC (NULL);
+  propagate = BITMAP_ALLOC (NULL);
+
+  bitmap_copy (queue, decomposable_context);
+  do
+    {
+      bitmap_iterator iter;
+      unsigned int i;
+
+      bitmap_clear (propagate);
+
+      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
+	{
+	  bitmap b = VEC_index (bitmap, reg_copy_graph, i);
+	  if (b)
+	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
+	}
+
+      bitmap_and_compl (queue, propagate, decomposable_context);
+      bitmap_ior_into (decomposable_context, propagate);
+    }
+  while (!bitmap_empty_p (queue));
+
+  BITMAP_FREE (queue);
+  BITMAP_FREE (propagate);
+}
+
+/* A pointer to one of these values is passed to
+   find_decomposable_subregs via for_each_rtx.  */
+
+enum classify_move_insn
+{
+  /* Not a simple move from one location to another.  */
+  NOT_SIMPLE_MOVE,
+  /* A simple move from one pseudo-register to another with no
+     REG_RETVAL note.  */
+  SIMPLE_PSEUDO_REG_MOVE,
+  /* A simple move involving a non-pseudo-register, or from one
+     pseudo-register to another with a REG_RETVAL note.  */
+  SIMPLE_MOVE
+};
+
+/* This is called via for_each_rtx.  If we find a SUBREG which we
+   could use to decompose a pseudo-register, set a bit in
+   DECOMPOSABLE_CONTEXT.  If we find an unadorned register which is
+   not a simple pseudo-register copy, DATA will point at the type of
+   move, and we set a bit in DECOMPOSABLE_CONTEXT or
+   NON_DECOMPOSABLE_CONTEXT as appropriate.  */
+
+static int
+find_decomposable_subregs (rtx *px, void *data)
+{
+  enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
+  rtx x = *px;
+
+  if (GET_CODE (x) == SUBREG)
+    {
+      rtx inner = SUBREG_REG (x);
+      unsigned int regno, outer_size, inner_size, outer_words, inner_words;
+
+      if (!REG_P (inner))
+	return 0;
+
+      regno = REGNO (inner);
+      if (HARD_REGISTER_NUM_P (regno))
+	return -1;
+
+      outer_size = GET_MODE_SIZE (GET_MODE (x));
+      inner_size = GET_MODE_SIZE (GET_MODE (inner));
+      outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+      inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+      /* We only try to decompose single word subregs of multi-word
+	 registers.  When we find one, we return -1 to avoid iterating
+	 over the inner register.
+
+	 ??? This doesn't allow, e.g., DImode subregs of TImode values
+	 on 32-bit targets.  We would need to record the way the
+	 pseudo-register was used, and only decompose if all the uses
+	 were the same number and size of pieces.  Hopefully this
+	 doesn't happen much.  */
+
+      if (outer_words == 1 && inner_words > 1)
+	{
+	  bitmap_set_bit (decomposable_context, regno);
+	  return -1;
+	}
+    }
+  else if (GET_CODE (x) == REG)
+    {
+      unsigned int regno;
+
+      /* We will see an outer SUBREG before we see the inner REG, so
+	 when we see a plain REG here it means a direct reference to
+	 the register.
+
+	 If this is not a simple copy from one location to another,
+	 then we can not decompose this register.  If this is a simple
+	 copy from one pseudo-register to another, with no REG_RETVAL
+	 note, and the mode is right, then we mark the register as
+	 decomposable.  Otherwise we don't say anything about this
+	 register--it could be decomposed, but whether that would be
+	 profitable depends upon how it is used elsewhere.
+
+	 We only set bits in the bitmap for multi-word
+	 pseudo-registers, since those are the only ones we care about
+	 and it keeps the size of the bitmaps down.  */
+
+      regno = REGNO (x);
+      if (!HARD_REGISTER_NUM_P (regno)
+	  && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+	{
+	  switch (*pcmi)
+	    {
+	    case NOT_SIMPLE_MOVE:
+	      bitmap_set_bit (non_decomposable_context, regno);
+	      break;
+	    case SIMPLE_PSEUDO_REG_MOVE:
+	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
+		bitmap_set_bit (decomposable_context, regno);
+	      break;
+	    case SIMPLE_MOVE:
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+
+  return 0;
+}
+
+/* Decompose REGNO into word-sized components.  We smash the REG node
+   in place.  This ensures that (1) something goes wrong quickly if we
+   fail to make some replacement, and (2) the debug information inside
+   the symbol table is automatically kept up to date.  */
+
+static void
+decompose_register (unsigned int regno)
+{
+  rtx reg;
+  unsigned int words, i;
+  rtvec v;
+
+  if (dump_file)
+    fprintf (dump_file, "; Splitting reg %u ->", regno);
+
+  reg = regno_reg_rtx[regno];
+  regno_reg_rtx[regno] = NULL_RTX;
+
+  words = GET_MODE_SIZE (GET_MODE (reg));
+  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  PUT_CODE (reg, CONCATN);
+  v = rtvec_alloc (words);
+  XVEC (reg, 0) = v;
+
+  for (i = 0; i < words; ++i)
+    RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
+
+  /* ??? We should probably call a function in regclass.c to do
+     this.  */
+  memset (VEC_index (reg_info_p, reg_n_info, regno), 0, sizeof (reg_info));
+
+  if (dump_file)
+    {
+      for (i = 0; i < words; ++i)
+	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
+      fputc ('\n', dump_file);
+    }
+}
+
+/* Return whether we should resolve X into the registers into which it
+   was decomposed.  */
+
+static bool
+resolve_reg_p (rtx x)
+{
+  return GET_CODE (x) == CONCATN;
+}
+
+/* Return whether X is a SUBREG of a register which we need to
+   resolve.  */
+
+static bool
+resolve_subreg_p (rtx x)
+{
+  if (GET_CODE (x) != SUBREG)
+    return false;
+  return resolve_reg_p (SUBREG_REG (x));
+}
+
+/* This is called via for_each_rtx.  Look for SUBREGs which need to be
+   decomposed.  */
+
+static int
+resolve_subreg_use (rtx *px, void *data)
+{
+  rtx insn = (rtx) data;
+  rtx x = *px;
+
+  if (x == NULL_RTX)
+    return 0;
+
+  if (resolve_subreg_p (x))
+    {
+      x = simplify_subreg (GET_MODE (x), SUBREG_REG (x),
+			   GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
+      gcc_assert (x);
+
+      validate_change (insn, px, x, 1);
+      return -1;
+    }
+
+  if (resolve_reg_p (x))
+    {
+      /* Return 1 to the caller to indicate that we found a direct
+	 reference to a register which is being decomposed.  This can
+	 happen inside notes.  */
+      gcc_assert (!insn);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* If there is a REG_LIBCALL note on OLD_START, move it to NEW_START,
+   and link the corresponding REG_RETVAL note to NEW_START.  */
+
+static void
+move_libcall_note (rtx old_start, rtx new_start)
+{
+  rtx note0, note1, end;
+
+  note0 = find_reg_note (old_start, REG_LIBCALL, NULL);
+  if (note0 == NULL_RTX)
+    return;
+
+  remove_note (old_start, note0);
+  end = XEXP (note0, 0);
+  note1 = find_reg_note (end, REG_RETVAL, NULL);
+
+  XEXP (note0, 1) = REG_NOTES (new_start);
+  REG_NOTES (new_start) = note0;
+  XEXP (note1, 0) = new_start;
+}
+
+/* Remove any REG_RETVAL note, the corresponding REG_LIBCALL note, and
+   any markers for a no-conflict block.  We have decomposed the
+   registers so the non-conflict is now obvious.  */
+
+static void
+remove_retval_note (rtx insn1)
+{
+  rtx note0, insn0, note1, insn;
+
+  note1 = find_reg_note (insn1, REG_RETVAL, NULL);
+  if (note1 == NULL_RTX)
+    return;
+
+  insn0 = XEXP (note1, 0);
+  note0 = find_reg_note (insn0, REG_LIBCALL, NULL);
+
+  remove_note (insn0, note0);
+  remove_note (insn1, note1);
+
+  for (insn = insn0; insn != insn1; insn = NEXT_INSN (insn))
+    {
+      while (1)
+	{
+	  rtx note;
+
+	  note = find_reg_note (insn, REG_NO_CONFLICT, NULL);
+	  if (note == NULL_RTX)
+	    break;
+	  remove_note (insn, note);
+	}
+    }
+}
+
+/* Resolve any decomposed registers which appear in register notes on
+   INSN.  */
+
+static void
+resolve_reg_notes (rtx insn)
+{
+  rtx *pnote, note;
+
+  note = find_reg_equal_equiv_note (insn);
+  if (note)
+    {
+      if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
+	{
+	  remove_note (insn, note);
+	  remove_retval_note (insn);
+	}
+    }
+
+  pnote = &REG_NOTES (insn);
+  while (*pnote != NULL_RTX)
+    {
+      bool delete = false;
+
+      note = *pnote;
+      switch (REG_NOTE_KIND (note))
+	{
+	case REG_NO_CONFLICT:
+	  if (resolve_reg_p (XEXP (note, 0)))
+	    delete = true;
+	  break;
+
+	default:
+	  break;
+	}
+
+      if (delete)
+	*pnote = XEXP (note, 1);
+      else
+	pnote = &XEXP (note, 1);
+    }
+}
+
+/* Return whether X can not be decomposed into subwords.  */
+
+static bool
+cannot_decompose_p (rtx x)
+{
+  if (REG_P (x))
+    {
+      unsigned int regno = REGNO (x);
+
+      if (HARD_REGISTER_NUM_P (regno))
+	return !validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD);
+      else
+	return bitmap_bit_p (non_decomposable_context, regno);
+    }
+
+  return false;
+}
+
+/* Decompose the registers used in a simple move SET within INSN.  If
+   we don't change anything, return INSN, otherwise return the start
+   of the sequence of moves.  */
+
+static rtx
+resolve_simple_move (rtx set, rtx insn)
+{
+  rtx src, dest, real_dest, insns;
+  enum machine_mode orig_mode;
+  unsigned int words;
+  bool pushing;
+
+  src = SET_SRC (set);
+  dest = SET_DEST (set);
+  orig_mode = GET_MODE (dest);
+
+  words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  if (words <= 1)
+    return insn;
+
+  start_sequence ();
+
+  /* We have to handle copying from a SUBREG of a decomposed reg where
+     the SUBREG is larger than word size.  Rather than assume that we
+     can take a word_mode SUBREG of the destination, we copy to a new
+     register and then copy that to the destination.  */
+
+  real_dest = NULL_RTX;
+
+  if (GET_CODE (src) == SUBREG && resolve_reg_p (SUBREG_REG (src)))
+    {
+      real_dest = dest;
+      dest = gen_reg_rtx (orig_mode);
+    }
+
+  /* Similarly if we are copying to a SUBREG of a decomposed reg where
+     the SUBREG is larger than word size.  */
+
+  if (GET_CODE (dest) == SUBREG && resolve_reg_p (SUBREG_REG (dest)))
+    {
+      rtx reg, minsn;
+
+      reg = gen_reg_rtx (orig_mode);
+      minsn = emit_move_insn (reg, src);
+      resolve_simple_move (simple_move (minsn), minsn);
+
+      src = reg;
+    }
+
+  /* If we didn't have any big SUBREGS of decomposed registers, and
+     neither side of the move is a register we are decomposing, then
+     we don't have to do anything here.  */
+
+  if (src == SET_SRC (set)
+      && dest == SET_DEST (set)
+      && !resolve_reg_p (src)
+      && !resolve_reg_p (dest))
+    {
+      end_sequence ();
+      return insn;
+    }
+
+  /* If SRC is a register which we can't decompose, or has side
+     effects, we need to move via a temporary register.  */
+
+  if (cannot_decompose_p (src)
+      || side_effects_p (src)
+      || GET_CODE (src) == ASM_OPERANDS)
+    {
+      rtx reg;
+
+      reg = gen_reg_rtx (orig_mode);
+      emit_move_insn (reg, src);
+      src = reg;
+    }
+
+  /* If DEST is a register which we can't decompose, or has side
+     effects, we need to first move to a temporary register.  We
+     handle the common case of pushing an operand directly.  */
+
+  pushing = push_operand (dest, orig_mode);
+  if (cannot_decompose_p (dest)
+      || (side_effects_p (dest) && !pushing))
+    {
+      gcc_assert (real_dest == NULL_RTX);
+      real_dest = dest;
+      dest = gen_reg_rtx (orig_mode);
+    }
+
+  if (pushing)
+    {
+      unsigned int i, j, jinc;
+
+      gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
+      gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
+      gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
+
+      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
+	{
+	  j = 0;
+	  jinc = 1;
+	}
+      else
+	{
+	  j = words - 1;
+	  jinc = -1;
+	}
+
+      for (i = 0; i < words; ++i, j += jinc)
+	{
+	  rtx temp;
+
+	  temp = copy_rtx (XEXP (dest, 0));
+	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
+					       j * UNITS_PER_WORD);
+	  emit_move_insn (temp,
+			  simplify_gen_subreg (word_mode, src, orig_mode,
+					       j * UNITS_PER_WORD));
+	}
+    }
+  else
+    {
+      unsigned int i;
+
+      if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
+	emit_insn (gen_rtx_CLOBBER (VOIDmode, dest));
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (simplify_gen_subreg (word_mode, dest, orig_mode,
+					     i * UNITS_PER_WORD),
+			simplify_gen_subreg (word_mode, src, orig_mode,
+					     i * UNITS_PER_WORD));
+    }
+
+  if (real_dest != NULL_RTX)
+    {
+      rtx minsn;
+
+      minsn = emit_move_insn (real_dest, dest);
+      resolve_simple_move (simple_move (minsn), minsn);
+    }
+
+  insns = get_insns ();
+  end_sequence ();
+
+  emit_insn_before (insns, insn);
+
+  move_libcall_note (insn, insns);
+  remove_retval_note (insn);
+  delete_insn (insn);
+
+  return insns;
+}
+
+/* Change a CLOBBER of a decomposed register into a CLOBBER of the
+   component registers.  Return whether we changed something.  */
+
+static bool
+resolve_clobber (rtx pat, rtx insn)
+{
+  rtx reg;
+  enum machine_mode orig_mode;
+  unsigned int words, i;
+
+  reg = XEXP (pat, 0);
+  if (!resolve_reg_p (reg))
+    return false;
+
+  orig_mode = GET_MODE (reg);
+  words = GET_MODE_SIZE (orig_mode);
+  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  XEXP (pat, 0) = simplify_subreg (word_mode, reg, orig_mode, 0);
+  for (i = words - 1; i > 0; --i)
+    {
+      rtx x;
+
+      x = simplify_subreg (word_mode, reg, orig_mode, i * UNITS_PER_WORD);
+      x = gen_rtx_CLOBBER (VOIDmode, x);
+      emit_insn_after (x, insn);
+    }
+
+  return true;
+}
+
+/* A USE of a decomposed register is no longer meaningful.  Return
+   whether we changed something.  */
+
+static bool
+resolve_use (rtx pat, rtx insn)
+{
+  if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
+    {
+      delete_insn (insn);
+      return true;
+    }
+  return false;
+}
+
+/* Look for registers which are always accessed via word-sized SUBREGs
+   or via copies.  Decompose these registers into several word-sized
+   pseudo-registers.  */
+
+static void
+decompose_multiword_subregs (bool update_life)
+{
+  unsigned int max;
+  basic_block bb;
+
+  decomposable_context = BITMAP_ALLOC (NULL);
+  non_decomposable_context = BITMAP_ALLOC (NULL);
+
+  max = max_reg_num ();
+  reg_copy_graph = VEC_alloc (bitmap, heap, max);
+  VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
+  memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
+
+  FOR_EACH_BB (bb)
+    {
+      rtx insn;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  rtx set;
+	  enum classify_move_insn cmi;
+	  int i, n;
+
+	  if (!INSN_P (insn)
+	      || GET_CODE (PATTERN (insn)) == CLOBBER
+	      || GET_CODE (PATTERN (insn)) == USE)
+	    continue;
+
+	  set = simple_move (insn);
+
+	  if (!set)
+	    cmi = NOT_SIMPLE_MOVE;
+	  else
+	    {
+	      bool retval;
+
+	      retval = find_reg_note (insn, REG_RETVAL, NULL_RTX) != NULL_RTX;
+
+	      if (find_pseudo_copy (set) && !retval)
+		cmi = SIMPLE_PSEUDO_REG_MOVE;
+	      else if (retval
+		       && REG_P (SET_SRC (set))
+		       && HARD_REGISTER_P (set))
+		{
+		  /* We don't want to decompose an assignment which
+		     copies the value returned by a libcall to a
+		     pseudo-register.  Doing that will lose the RETVAL
+		     note with no real gain.  */
+		  cmi = NOT_SIMPLE_MOVE;
+		}
+	      else
+		cmi = SIMPLE_MOVE;
+	    }
+
+	  recog_memoized (insn);
+	  extract_insn (insn);
+	  n = recog_data.n_operands;
+	  for (i = 0; i < n; ++i)
+	    {
+	      for_each_rtx (&recog_data.operand[i],
+			    find_decomposable_subregs,
+			    &cmi);
+
+	      /* We handle ASM_OPERANDS as a special case to support
+		 things like x86 rdtsc which returns a DImode value.
+		 We can decompose the output, which will certainly be
+		 operand 0, but not the inputs.  */
+
+	      if (cmi == SIMPLE_MOVE
+		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
+		{
+		  gcc_assert (i == 0);
+		  cmi = NOT_SIMPLE_MOVE;
+		}
+	    }
+	}
+    }
+
+  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
+  if (!bitmap_empty_p (decomposable_context))
+    {
+      int hold_no_new_pseudos = no_new_pseudos;
+      int max_regno = max_reg_num ();
+      sbitmap blocks;
+      bitmap_iterator iter;
+      unsigned int regno;
+
+      propagate_pseudo_copies ();
+
+      no_new_pseudos = 0;
+      blocks = sbitmap_alloc (last_basic_block);
+      sbitmap_zero (blocks);
+
+      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
+	decompose_register (regno);
+
+      FOR_EACH_BB (bb)
+	{
+	  rtx insn;
+
+	  FOR_BB_INSNS (bb, insn)
+	    {
+	      rtx next, pat;
+	      bool changed;
+
+	      if (!INSN_P (insn))
+		continue;
+
+	      next = NEXT_INSN (insn);
+	      changed = false;
+
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == CLOBBER)
+		{
+		  if (resolve_clobber (pat, insn))
+		    changed = true;
+		}
+	      else if (GET_CODE (pat) == USE)
+		{
+		  if (resolve_use (pat, insn))
+		    changed = true;
+		}
+	      else
+		{
+		  rtx set;
+		  int i;
+
+		  set = simple_move (insn);
+		  if (set)
+		    {
+		      rtx orig_insn = insn;
+
+		      insn = resolve_simple_move (set, insn);
+		      if (insn != orig_insn)
+			changed = true;
+		    }
+
+		  recog_memoized (insn);
+		  extract_insn (insn);
+		  for (i = recog_data.n_operands - 1; i >= 0; --i)
+		    for_each_rtx (recog_data.operand_loc[i],
+				  resolve_subreg_use,
+				  insn);
+
+		  resolve_reg_notes (insn);
+
+		  if (num_validated_changes () > 0)
+		    {
+		      for (i = recog_data.n_dups - 1; i >= 0; --i)
+			{
+			  rtx *pl = recog_data.dup_loc[i];
+			  int dup_num = recog_data.dup_num[i];
+			  rtx *px = recog_data.operand_loc[dup_num];
+
+			  validate_change (insn, pl, *px, 1);
+			}
+
+		      i = apply_change_group ();
+		      gcc_assert (i);
+
+		      changed = true;
+		    }
+		}
+
+	      if (changed)
+		{
+		  SET_BIT (blocks, bb->index);
+		  reg_scan_update (insn, next, max_regno);
+		}
+	    }
+	}
+
+      no_new_pseudos = hold_no_new_pseudos;
+
+      if (update_life)
+	update_life_info (blocks, UPDATE_LIFE_GLOBAL_RM_NOTES,
+			  PROP_DEATH_NOTES);
+
+      sbitmap_free (blocks);
+    }
+
+  {
+    unsigned int i;
+    bitmap b;
+
+    for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i)
+      if (b)
+	BITMAP_FREE (b);
+  }
+
+  VEC_free (bitmap, heap, reg_copy_graph);  
+
+  BITMAP_FREE (decomposable_context);
+  BITMAP_FREE (non_decomposable_context);
+}
+
+/* Gate function for lower subreg pass.  */
+
+static bool
+gate_handle_lower_subreg (void)
+{
+  return optimize > 0;
+}
+
+/* Implement first lower subreg pass.  */
+
+static unsigned int
+rest_of_handle_lower_subreg (void)
+{
+  decompose_multiword_subregs (false);
+  return 0;
+}
+
+/* Implement second lower subreg pass.  */
+
+static unsigned int
+rest_of_handle_lower_subreg2 (void)
+{
+  decompose_multiword_subregs (true);
+  return 0;
+}
+
+struct tree_opt_pass pass_lower_subreg =
+{
+  "subreg",	                        /* name */
+  gate_handle_lower_subreg,             /* gate */
+  rest_of_handle_lower_subreg,          /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_LOWER_SUBREG,                      /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  'u'                                   /* letter */
+};
+
+struct tree_opt_pass pass_lower_subreg2 =
+{
+  "subreg2",	                        /* name */
+  gate_handle_lower_subreg,             /* gate */
+  rest_of_handle_lower_subreg2,          /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_LOWER_SUBREG,                      /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  'U'                                   /* letter */
+};
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c	(revision 119834)
+++ gcc/emit-rtl.c	(working copy)
@@ -812,13 +812,12 @@ gen_reg_rtx (enum machine_mode mode)
   return val;
 }
 
-/* Generate a register with same attributes as REG, but offsetted by OFFSET.
+/* Update NEW with the same attributes as REG, but offsetted by OFFSET.
    Do the big endian correction if needed.  */
 
-rtx
-gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int offset)
+static void
+update_reg_offset (rtx new, rtx reg, int offset)
 {
-  rtx new = gen_rtx_REG (mode, regno);
   tree decl;
   HOST_WIDE_INT var_size;
 
@@ -860,7 +859,7 @@ gen_rtx_REG_offset (rtx reg, enum machin
   if ((BYTES_BIG_ENDIAN || WORDS_BIG_ENDIAN)
       && decl != NULL
       && offset > 0
-      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode)
+      && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (GET_MODE (new))
       && ((var_size = int_size_in_bytes (TREE_TYPE (decl))) > 0
 	  && var_size < GET_MODE_SIZE (GET_MODE (reg))))
     {
@@ -904,6 +903,30 @@ gen_rtx_REG_offset (rtx reg, enum machin
 
   REG_ATTRS (new) = get_reg_attrs (REG_EXPR (reg),
 				   REG_OFFSET (reg) + offset);
+}
+
+/* Generate a register with same attributes as REG, but offsetted by
+   OFFSET.  */
+
+rtx
+gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno,
+		    int offset)
+{
+  rtx new = gen_rtx_REG (mode, regno);
+
+  update_reg_offset (new, reg, offset);
+  return new;
+}
+
+/* Generate a new pseudo-register with the same attributes as REG, but
+   offsetted by OFFSET.  */
+
+rtx
+gen_reg_rtx_offset (rtx reg, enum machine_mode mode, int offset)
+{
+  rtx new = gen_reg_rtx (mode);
+
+  update_reg_offset (new, reg, offset);
   return new;
 }
 
@@ -1153,7 +1176,8 @@ gen_lowpart_common (enum machine_mode mo
 	return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
     }
   else if (GET_CODE (x) == SUBREG || REG_P (x)
-	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
+	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN
+	   || GET_CODE (x) == CONST_VECTOR
 	   || GET_CODE (x) == CONST_DOUBLE || GET_CODE (x) == CONST_INT)
     return simplify_gen_subreg (mode, x, innermode, offset);
 
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	(revision 119834)
+++ gcc/simplify-rtx.c	(working copy)
@@ -4644,15 +4644,22 @@ simplify_subreg (enum machine_mode outer
       && GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
     return adjust_address_nv (op, outermode, byte);
 
-  /* Handle complex values represented as CONCAT
-     of real and imaginary part.  */
-  if (GET_CODE (op) == CONCAT)
+  /* Handle values represented as CONCAT.  */
+  if (GET_CODE (op) == CONCAT || GET_CODE (op) == CONCATN)
     {
       unsigned int inner_size, final_offset;
       rtx part, res;
 
-      inner_size = GET_MODE_UNIT_SIZE (innermode);
-      part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+      if (GET_CODE (op) == CONCAT)
+	{
+	  inner_size = GET_MODE_UNIT_SIZE (innermode);
+	  part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1);
+	}
+      else
+	{
+	  inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
+	  part = XVECEXP (op, 0, byte / inner_size);
+	}
       final_offset = byte % inner_size;
       if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
 	return NULL_RTX;
Index: gcc/regclass.c
===================================================================
--- gcc/regclass.c	(revision 119834)
+++ gcc/regclass.c	(working copy)
@@ -859,7 +859,7 @@ static void record_address_regs (enum ma
 #ifdef FORBIDDEN_INC_DEC_CLASSES
 static int auto_inc_dec_reg_p (rtx, enum machine_mode);
 #endif
-static void reg_scan_mark_refs (rtx, rtx, int);
+static void reg_scan_mark_refs (rtx, rtx, int, unsigned int);
 
 /* Wrapper around REGNO_OK_FOR_INDEX_P, to allow pseudo registers.  */
 
@@ -2337,10 +2337,10 @@ reg_scan (rtx f, unsigned int nregs)
 	if (GET_CODE (pat) == PARALLEL
 	    && XVECLEN (pat, 0) > max_parallel)
 	  max_parallel = XVECLEN (pat, 0);
-	reg_scan_mark_refs (pat, insn, 0);
+	reg_scan_mark_refs (pat, insn, 0, 0);
 
 	if (REG_NOTES (insn))
-	  reg_scan_mark_refs (REG_NOTES (insn), insn, 1);
+	  reg_scan_mark_refs (REG_NOTES (insn), insn, 1, 0);
       }
 
   max_parallel += max_set_parallel;
@@ -2348,11 +2348,39 @@ reg_scan (rtx f, unsigned int nregs)
   timevar_pop (TV_REG_SCAN);
 }
 
+/* Update 'regscan' information by looking at the insns
+   from FIRST to LAST.  Some new REGs have been created,
+   and any REG with number greater than OLD_MAX_REGNO is
+   such a REG.  We only update information for those.  */
+
+void
+reg_scan_update (rtx first, rtx last, unsigned int old_max_regno)
+{
+  rtx insn;
+
+  allocate_reg_info (max_reg_num (), FALSE, FALSE);
+
+  for (insn = first; insn != last; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	rtx pat = PATTERN (insn);
+	if (GET_CODE (pat) == PARALLEL
+	    && XVECLEN (pat, 0) > max_parallel)
+	  max_parallel = XVECLEN (pat, 0);
+	reg_scan_mark_refs (pat, insn, 0, old_max_regno);
+
+	if (REG_NOTES (insn))
+	  reg_scan_mark_refs (REG_NOTES (insn), insn, 1, old_max_regno);
+      }
+}
+
 /* X is the expression to scan.  INSN is the insn it appears in.
-   NOTE_FLAG is nonzero if X is from INSN's notes rather than its body.  */
+   NOTE_FLAG is nonzero if X is from INSN's notes rather than its body.
+   We should only record information for REGs with numbers
+   greater than or equal to MIN_REGNO.  */
 
 static void
-reg_scan_mark_refs (rtx x, rtx insn, int note_flag)
+reg_scan_mark_refs (rtx x, rtx insn, int note_flag, unsigned int min_regno)
 {
   enum rtx_code code;
   rtx dest;
@@ -2379,35 +2407,43 @@ reg_scan_mark_refs (rtx x, rtx insn, int
       {
 	unsigned int regno = REGNO (x);
 
-	if (!note_flag)
-	  REGNO_LAST_UID (regno) = INSN_UID (insn);
-	if (REGNO_FIRST_UID (regno) == 0)
-	  REGNO_FIRST_UID (regno) = INSN_UID (insn);
+	if (regno >= min_regno)
+	  {
+	    if (!note_flag)
+	      REGNO_LAST_UID (regno) = INSN_UID (insn);
+	    if (REGNO_FIRST_UID (regno) == 0)
+	      REGNO_FIRST_UID (regno) = INSN_UID (insn);
+	    /* If we are called by reg_scan_update() (indicated by min_regno
+	       being set), we also need to update the reference count.  */
+	    if (min_regno)
+	      REG_N_REFS (regno)++;
+	  }
       }
       break;
 
     case EXPR_LIST:
       if (XEXP (x, 0))
-	reg_scan_mark_refs (XEXP (x, 0), insn, note_flag);
+	reg_scan_mark_refs (XEXP (x, 0), insn, note_flag, min_regno);
       if (XEXP (x, 1))
-	reg_scan_mark_refs (XEXP (x, 1), insn, note_flag);
+	reg_scan_mark_refs (XEXP (x, 1), insn, note_flag, min_regno);
       break;
 
     case INSN_LIST:
       if (XEXP (x, 1))
-	reg_scan_mark_refs (XEXP (x, 1), insn, note_flag);
+	reg_scan_mark_refs (XEXP (x, 1), insn, note_flag, min_regno);
       break;
 
     case CLOBBER:
       {
 	rtx reg = XEXP (x, 0);
-	if (REG_P (reg))
+	if (REG_P (reg)
+	    && REGNO (reg) >= min_regno)
 	  {
 	    REG_N_SETS (REGNO (reg))++;
 	    REG_N_REFS (REGNO (reg))++;
 	  }
 	else if (MEM_P (reg))
-	  reg_scan_mark_refs (XEXP (reg, 0), insn, note_flag);
+	  reg_scan_mark_refs (XEXP (reg, 0), insn, note_flag, min_regno);
       }
       break;
 
@@ -2424,7 +2460,8 @@ reg_scan_mark_refs (rtx x, rtx insn, int
       if (GET_CODE (dest) == PARALLEL)
 	max_set_parallel = MAX (max_set_parallel, XVECLEN (dest, 0) - 1);
 
-      if (REG_P (dest))
+      if (REG_P (dest)
+	  && REGNO (dest) >= min_regno)
 	{
 	  REG_N_SETS (REGNO (dest))++;
 	  REG_N_REFS (REGNO (dest))++;
@@ -2444,6 +2481,7 @@ reg_scan_mark_refs (rtx x, rtx insn, int
 
       if (REG_P (SET_DEST (x))
 	  && REGNO (SET_DEST (x)) >= FIRST_PSEUDO_REGISTER
+	  && REGNO (SET_DEST (x)) >= min_regno
 	  /* If the destination pseudo is set more than once, then other
 	     sets might not be to a pointer value (consider access to a
 	     union in two threads of control in the presence of global
@@ -2504,12 +2542,12 @@ reg_scan_mark_refs (rtx x, rtx insn, int
 	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
 	  {
 	    if (fmt[i] == 'e')
-	      reg_scan_mark_refs (XEXP (x, i), insn, note_flag);
+	      reg_scan_mark_refs (XEXP (x, i), insn, note_flag, min_regno);
 	    else if (fmt[i] == 'E' && XVEC (x, i) != 0)
 	      {
 		int j;
 		for (j = XVECLEN (x, i) - 1; j >= 0; j--)
-		  reg_scan_mark_refs (XVECEXP (x, i, j), insn, note_flag);
+		  reg_scan_mark_refs (XVECEXP (x, i, j), insn, note_flag, min_regno);
 	      }
 	  }
       }
Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	(revision 119834)
+++ gcc/rtl.h	(working copy)
@@ -1474,6 +1474,7 @@ extern int rtx_equal_p (rtx, rtx);
 extern rtvec gen_rtvec_v (int, rtx *);
 extern rtx gen_reg_rtx (enum machine_mode);
 extern rtx gen_rtx_REG_offset (rtx, enum machine_mode, unsigned int, int);
+extern rtx gen_reg_rtx_offset (rtx, enum machine_mode, int);
 extern rtx gen_label_rtx (void);
 extern rtx gen_lowpart_common (enum machine_mode, rtx);
 
@@ -2166,6 +2167,7 @@ extern void init_reg_sets (void);
 extern void regclass_init (void);
 extern void regclass (rtx, int);
 extern void reg_scan (rtx, unsigned int);
+extern void reg_scan_update (rtx, rtx, unsigned int);
 extern void fix_register (const char *, int, int);
 extern void init_subregs_of_mode (void);
 extern void record_subregs_of_mode (rtx);
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 119834)
+++ gcc/Makefile.in	(working copy)
@@ -1018,7 +1018,7 @@ OBJS-common = \
  lambda-trans.o	lambda-code.o tree-loop-linear.o tree-ssa-sink.o 	   \
  tree-vrp.o tree-stdarg.o tree-cfgcleanup.o tree-ssa-reassoc.o		   \
  tree-ssa-structalias.o tree-object-size.o 				   \
- rtl-factoring.o
+ rtl-factoring.o lower-subreg.o
 
 
 OBJS-md = $(out_object_file)
@@ -2646,6 +2646,8 @@ hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H)
 pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \
    $(TREE_H)
 errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H)
+lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(MACHMODE_H) $(TM_H) $(RTL_H) $(TM_P_H) bitmap.h tree-pass.h
 
 $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \
    $(RTL_H) $(REGS_H) hard-reg-set.h insn-config.h conditions.h \
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(revision 119834)
+++ gcc/passes.c	(working copy)
@@ -634,6 +634,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_unshare_all_rtl);
   NEXT_PASS (pass_instantiate_virtual_regs);
   NEXT_PASS (pass_jump2);
+  NEXT_PASS (pass_lower_subreg);
   NEXT_PASS (pass_cse);
   NEXT_PASS (pass_rtl_fwprop);
   NEXT_PASS (pass_gcse);
@@ -653,6 +654,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_partition_blocks);
   NEXT_PASS (pass_regmove);
   NEXT_PASS (pass_split_all_insns);
+  NEXT_PASS (pass_lower_subreg2);
   NEXT_PASS (pass_mode_switching);
   NEXT_PASS (pass_see);
   NEXT_PASS (pass_recompute_reg_usage);
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 119834)
+++ gcc/config/i386/i386.md	(working copy)
@@ -3274,7 +3274,7 @@
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
      (zero_extend:DI (match_dup 0)))]
-  "TARGET_64BIT"
+  ""
   [(set (match_dup 4) (const_int 0))]
   "split_di (&operands[0], 1, &operands[3], &operands[4]);")
 
@@ -3289,13 +3289,15 @@
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(zero_extend:DI (match_operand:SI 1 "general_operand" "")))
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed
-   && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])"
-  [(set (match_dup 3) (match_dup 1))
-   (set (match_dup 4) (const_int 0))]
-  "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+  "!TARGET_64BIT
+   && (!MEM_P (operands[0]) || !MEM_P (operands[1]))
+   && (!reload_completed
+       || (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (const_int 0))]
+  "split_di (&operands[0], 1, &operands[0], &operands[2]);")
 
 (define_insn "zero_extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4852,7 +4854,40 @@
 	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
 		 (match_operand:DI 2 "general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed"
+  "!TARGET_64BIT"
+  [(parallel [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+	      (set (match_dup 3)
+		   (plus:SI (plus:SI (match_dup 4) (match_dup 5))
+			    (truncate:SI
+			     (lshiftrt:DI
+			      (plus:DI
+			       (zero_extend:DI (match_dup 1))
+			       (zero_extend:DI (match_dup 2)))
+			      (const_int 32)))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_di (operands + 0, 1, operands + 0, operands + 3);
+  split_di (operands + 1, 1, operands + 1, operands + 4);
+  split_di (operands + 2, 1, operands + 2, operands + 5);
+})
+
+(define_insn_and_split "*adddi3_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,rm,r")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "ri,rm,ri,rm")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=rm,r,rm,r")
+	(plus:SI (plus:SI (match_operand:SI 4 "general_operand" "3,3,ri,rm")
+			  (match_operand:SI 5 "general_operand" "ri,rm,3,3"))
+		 (truncate:SI
+		  (lshiftrt:DI
+		   (plus:DI
+		    (zero_extend:DI (match_dup 1))
+		    (zero_extend:DI (match_dup 2)))
+		   (const_int 32)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
 					  UNSPEC_ADD_CARRY))
 	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
@@ -4861,9 +4896,16 @@
 				     (match_dup 4))
 			    (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+0, 1, operands+0, operands+3);
-   split_di (operands+1, 1, operands+1, operands+4);
-   split_di (operands+2, 1, operands+2, operands+5);")
+{
+  /* We can only have one pair of commutative operands in an insn, so
+     we commute operands 4 and 5 manually.  */
+  if (rtx_equal_p (operands[5], operands[3]))
+    {
+      rtx tmp = operands[4];
+      operands[4] = operands[5];
+      operands[5] = tmp;
+    }
+})
 
 (define_insn "adddi3_carry_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
@@ -6582,7 +6624,41 @@
 	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
 		  (match_operand:DI 2 "general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed"
+  "!TARGET_64BIT"
+  [(parallel [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+	      (set (match_dup 3)
+		   (plus:SI (minus:SI (match_dup 4) (match_dup 5))
+			    (truncate:SI
+			     (ashiftrt:DI
+			      (minus:DI
+			       (sign_extend:DI (match_dup 1))
+			       (sign_extend:DI (match_dup 2)))
+			      (const_int 32)))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_di (operands + 0, 1, operands + 0, operands + 3);
+  split_di (operands + 1, 1, operands + 1, operands + 4);
+  split_di (operands + 2, 1, operands + 2, operands + 5);
+})
+
+(define_insn_and_split "*subdi3_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		  (match_operand:SI 2 "general_operand" "ri,rm")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=rm,r")
+	(plus:SI
+	 (minus:SI (match_operand:SI 4 "nonimmediate_operand" "3,3")
+		   (match_operand:SI 5 "general_operand" "ri,rm"))
+	 (truncate:SI
+	  (ashiftrt:DI
+	   (minus:DI
+	    (sign_extend:DI (match_dup 1))
+	    (sign_extend:DI (match_dup 2)))
+	   (const_int 32)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
 	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
    (parallel [(set (match_dup 3)
@@ -6590,9 +6666,7 @@
 			     (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
 				      (match_dup 5))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+0, 1, operands+0, operands+3);
-   split_di (operands+1, 1, operands+1, operands+4);
-   split_di (operands+2, 1, operands+2, operands+5);")
+  "")
 
 (define_insn "subdi3_carry_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
@@ -9427,7 +9501,27 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
 	(neg:DI (match_operand:DI 1 "general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed"
+  "!TARGET_64BIT"
+  [(parallel [(set (match_dup 0) (neg:SI (match_dup 2)))
+	      (set (match_dup 1)
+		   (not:SI (minus:SI (match_dup 3)
+				     (eq:SI (match_dup 2) (const_int 0)))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  split_di (operands + 1, 1, operands + 2, operands + 3);
+  split_di (operands + 0, 1, operands + 0, operands + 1);
+})
+
+(define_insn_and_split "*negdi2_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(neg:SI (match_operand:SI 2 "nonimmediate_operand" "0")))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=rm")
+	(not:SI (minus:SI (match_operand:SI 3 "nonimmediate_operand" "1")
+			  (eq:SI (match_dup 2) (const_int 0)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
   [(parallel
     [(set (reg:CCZ FLAGS_REG)
 	  (compare:CCZ (neg:SI (match_dup 2)) (const_int 0)))
@@ -9442,8 +9536,7 @@
     [(set (match_dup 1)
 	  (neg:SI (match_dup 1)))
      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+1, 1, operands+2, operands+3);
-   split_di (operands+0, 1, operands+0, operands+1);")
+  "")
 
 (define_insn "*negdi2_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -10380,7 +10473,14 @@
 	(ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
 		   (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
-  "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
+{
+  if (!TARGET_64BIT
+      && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >= 32)
+    FAIL;
+  ix86_expand_binary_operator (ASHIFT, DImode, operands);
+  DONE;
+})
 
 (define_insn "*ashldi3_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
@@ -10552,8 +10652,7 @@
 	(ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
 		   (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		     ? flow2_completed : reload_completed)"
+  "!TARGET_64BIT"
   [(const_int 0)]
   "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
 
@@ -11339,7 +11438,14 @@
 	(ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
 		     (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
-  "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
+{
+  if (!TARGET_64BIT
+      && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >= 32)
+    FAIL;
+  ix86_expand_binary_operator (ASHIFTRT, DImode, operands);
+  DONE;
+})
 
 (define_insn "*ashrdi3_63_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
@@ -12018,7 +12124,14 @@
 	(lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
 		     (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
-  "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+{
+  if (!TARGET_64BIT
+      && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >= 32)
+    FAIL;
+  ix86_expand_binary_operator (LSHIFTRT, DImode, operands);
+  DONE;
+})
 
 (define_insn "*lshrdi3_1_one_bit_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -12143,8 +12256,7 @@
 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
 		     (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-		     ? flow2_completed : reload_completed)"
+  "!TARGET_64BIT"
   [(const_int 0)]
   "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 119834)
+++ gcc/config/i386/i386.c	(working copy)
@@ -9061,7 +9061,7 @@ ix86_expand_clear (rtx dest)
   rtx tmp;
 
   /* We play register width games, which are only valid after reload.  */
-  gcc_assert (reload_completed);
+  gcc_assert (reload_completed || GET_MODE_SIZE (GET_MODE (dest)) >= 4);
 
   /* Avoid HImode and its attendant prefix byte.  */
   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
@@ -12510,10 +12510,27 @@ ix86_split_ashl (rtx *operands, rtx scra
 	  rtx x;
 
 	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
-	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
+	    {
+	      x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode,
+				       operands[2]);
+	      x = gen_rtx_SET (VOIDmode, high[0], x);
+	      if (reload_completed
+		  && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size))
+		emit_insn (x);
+	      else
+		{
+		  rtx y;
+
+		  y = gen_rtx_CLOBBER (VOIDmode,
+				       gen_rtx_REG (CCmode, FLAGS_REG));
+		  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
+		}
+	    }
 	  else
-	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
-	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+	    {
+	      x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
+	      emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+	    }
 
 	  emit_insn ((mode == DImode
 		      ? gen_lshrsi3
@@ -17970,6 +17987,15 @@ ix86_modes_tieable_p (enum machine_mode 
       && ix86_tieable_integer_mode_p (mode2))
     return true;
 
+  /* If MODE1 is an integer mode bigger than a word, and MODE2 is an
+     integer mode word-sized or larger, then we can just pick up the
+     relevant pices.  */
+  if (VALID_INT_MODE_P (mode1)
+      && GET_MODE_SIZE (mode1) > UNITS_PER_WORD
+      && VALID_INT_MODE_P (mode2)
+      && GET_MODE_SIZE (mode2) >= UNITS_PER_WORD)
+    return true;
+
   /* MODE2 being XFmode implies fp stack or general regs, which means we
      can tie any smaller floating point modes to it.  Note that we do not
      tie this with TFmode.  */
Index: gcc/reg-notes.def
===================================================================
--- gcc/reg-notes.def	(revision 119834)
+++ gcc/reg-notes.def	(working copy)
@@ -163,3 +163,7 @@ REG_NOTE (CROSSING_JUMP)
 /* This kind of note is generated at each to `setjmp', and similar
    functions that can return twice.  */
 REG_NOTE (SETJMP)
+
+/* The argument is a SUBREG of a pseudo-register which dies in this
+   insn.  Used during the register allocation pass.  */
+REG_NOTE (SUBREG_DEAD)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]