This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: HARD_REGNO_MODE_OK_FOR_CLASS Might Be Nice (tm)


On Wed, Dec 21, 2005 at 03:07:21PM -0500, DJ Delorie wrote:
> 
> >  It was because I had decided to expose the registers as %al, %ah,
> > ... %bl, %bh, ... instead of the customary %[e]ax and friends.
> 
> I originally did this for the m32c port (which has hi/low pairs like
> the i386) but discovered that reload always allocates registers in
> UNITS_PER_WORD chunks, and move-by-pieces uses UNITS_PER_WORD chunks,
> so if you have 8 bit registers you end up with 8 bit moves all over
> the place.  If you have 8 bit registers and 16 bit moves, reload
> counts wrong.  I ended up switching to the word-sized register model
> that i386 currently uses, even though it meant worse code generation.
> 
> I seem to recall ranting about it at the time, too.  UNITS_PER_WORD
> must die!  The m32c has four 8 bit registers, two 16 bit registers,
> and five 24 bit registers.  They can be combined to form 8, 16, 24,
> 32, 48, and 64 bit registers.  GCC has no way of expressing that.

Like Bernd, I'm playing with a 16-bit ix86 port where the eight 8-bit
registers are not represented as four 16-bit registers. This means that
reload has to deal with eight 8-bit registers and four 16-bit registers.
With a few patches, reload is able to work with mixed register sizes.
I have rewritten subreg_regno_offset() and subreg_offset_representable_p().
There are two known problems with the rewritten versions:

1) The i386 complex modes with holes are not supported, but I think that
can be added.

2) The i386 backend causes subreg_regno_offset (9, DFmode, 4, SImode) calls
which fail the first assertion. Register 9 is a floating point register. I
have no idea what should be returned in this case.

Index: gcc/rtlanal.c
===================================================================
--- gcc/rtlanal.c	(revision 109766)
+++ gcc/rtlanal.c	(working copy)
@@ -3130,7 +3130,7 @@
   return subreg_lsb_1 (GET_MODE (x), GET_MODE (SUBREG_REG (x)),
 		       SUBREG_BYTE (x));
 }
-
+#if 1
 /* This function returns the regno offset of a subreg expression.
    xregno - A regno of an inner hard subreg_reg (or what will become one).
    xmode  - The mode of xregno.
@@ -3141,6 +3141,42 @@
 subreg_regno_offset (unsigned int xregno, enum machine_mode xmode,
 		     unsigned int offset, enum machine_mode ymode)
 {
+/* (subreg:Ymode (reg:Xmode Xregno) offset) */
+  unsigned int r, units;
+
+  gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
+
+  if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode))
+    {
+      for (r = xregno, units = 0;
+           units < offset;
+           units += GET_MODE_SIZE (reg_raw_mode[r]), r ++)
+         ;
+      gcc_assert (units == offset);
+    }
+  else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno]
+	   ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      /* Big endian paradoxical subreg. */
+      for (r = xregno, units = 0;
+	   units < GET_MODE_SIZE (ymode);
+	   units += GET_MODE_SIZE (reg_raw_mode[r]), r --)
+	 ;
+      gcc_assert (units == GET_MODE_SIZE (ymode));
+    }
+  else
+    {
+      r = xregno;
+      gcc_assert (offset == 0);
+    }
+  return (r - xregno);
+}
+
+#else /* 0 */
+unsigned int
+subreg_regno_offset (unsigned int xregno, enum machine_mode xmode,
+		     unsigned int offset, enum machine_mode ymode)
+{
   int nregs_xmode, nregs_ymode, nregs_xmode_unit_int;
   int mode_multiple, nregs_multiple;
   int y_offset;
@@ -3194,6 +3230,7 @@
   nregs_multiple =  nregs_xmode / nregs_ymode;
   return (y_offset / (mode_multiple / nregs_multiple)) * nregs_ymode;
 }
+#endif /* 0 */
 
 /* This function returns true when the offset is representable via
    subreg_offset in the given regno.
@@ -3202,10 +3239,42 @@
    offset - The byte offset.
    ymode  - The mode of a top level SUBREG (or what may become one).
    RETURN - Whether the offset is representable.  */
+
 bool
 subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode,
 			       unsigned int offset, enum machine_mode ymode)
 {
+/* (subreg:Ymode (reg:Xmode Xregno) offset) */
+  unsigned int r, units;
+
+  gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
+
+  if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode))
+    {
+      for (r = xregno, units = 0;
+           units < offset;
+           units += GET_MODE_SIZE (reg_raw_mode[r]), r ++)
+         ;
+      return (units == offset);
+    }
+  else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno]
+	   ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      /* Big endian paradoxical subreg. */
+      for (r = xregno, units = 0;
+	   units < GET_MODE_SIZE (ymode);
+	   units += GET_MODE_SIZE (reg_raw_mode[r]), r --)
+	 ;
+      return (units == GET_MODE_SIZE (ymode));
+    }
+  return (offset == 0);
+}
+
+#if 0
+bool
+subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode,
+			       unsigned int offset, enum machine_mode ymode)
+{
   int nregs_xmode, nregs_ymode, nregs_xmode_unit, nregs_xmode_unit_int;
   int mode_multiple, nregs_multiple;
   int y_offset;
@@ -3299,6 +3368,7 @@
 
   return (!(y_offset % (mode_multiple / nregs_multiple)));
 }
+#endif /* 0 */
 
 /* Return the final regno that a subreg expression refers to.  */
 unsigned int


In reload, the way that push_reload() calls find_valid_class() assumes that
HARD_REGNO_NREGS (regno, mode) does not change when regno changes. I had to
rewrite that. Note that I end up with two outer mode parameters. I don't
know if their values ever differ. find_valid_class() would never return
something else than NO_REGS or ALL_REGS. I fixed that too.
Without the patch, reload would run into problems trying to load a
shift count in %si into %cl. With the patch, it uses %cx as an intermediate
register when reloading %cl from a 16-bit register, e.g. "mov %si,%cx" and
"shr %cl,dx", while reloads from an 8-bit register don't clobber %ch, e.g.
"mov %al,cl" and "shl %cl,dx".

Index: gcc/reload.c
===================================================================
--- gcc/reload.c	(revision 109766)
+++ gcc/reload.c	(working copy)
@@ -246,7 +246,7 @@
 				  enum machine_mode, enum reload_type,
 				  enum insn_code *, secondary_reload_info *);
 static enum reg_class find_valid_class (enum machine_mode, enum machine_mode,
-					int, unsigned int);
+					int, unsigned int, enum machine_mode);
 static int reload_inner_reg_of_subreg (rtx, enum machine_mode, int);
 static void push_replacement (rtx *, int, enum machine_mode);
 static void dup_replacements (rtx *, rtx *);
@@ -626,17 +626,20 @@
 
 /* Find the largest class which has at least one register valid in
    mode INNER, and which for every such register, that register number
-   plus N is also valid in OUTER (if in range) and is cheap to move
-   into REGNO.  Such a class must exist.  */
+   plus subreg_regno_offset (regnum, INNER, OFFSET, OUTER2) is also valid
+   in OUTER (if in range) and is cheap to move into REGNO.
+   Such a class must exist.  */
 
 static enum reg_class
 find_valid_class (enum machine_mode outer ATTRIBUTE_UNUSED,
-		  enum machine_mode inner ATTRIBUTE_UNUSED, int n,
-		  unsigned int dest_regno ATTRIBUTE_UNUSED)
+		  enum machine_mode inner ATTRIBUTE_UNUSED, int offset,
+		  unsigned int dest_regno ATTRIBUTE_UNUSED,
+		  enum machine_mode outer2)
 {
   int best_cost = -1;
   int class;
   int regno;
+  int n;
   enum reg_class best_class = NO_REGS;
   enum reg_class dest_class ATTRIBUTE_UNUSED = REGNO_REG_CLASS (dest_regno);
   unsigned int best_size = 0;
@@ -646,11 +649,16 @@
     {
       int bad = 0;
       int good = 0;
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER - n && ! bad; regno++)
-	if (TEST_HARD_REG_BIT (reg_class_contents[class], regno))
+      unsigned int nregs;
+      for (regno = 0, nregs = hard_regno_nregs[regno][inner];
+	   regno + nregs - 1 < FIRST_PSEUDO_REGISTER && ! bad;
+	   regno++, nregs = hard_regno_nregs[regno][inner])
+	if (TEST_HARD_REG_BIT (reg_class_contents[class], regno)
+	    && TEST_HARD_REG_BIT (reg_class_contents[class], regno + nregs - 1))
 	  {
 	    if (HARD_REGNO_MODE_OK (regno, inner))
 	      {
+		n = subreg_regno_offset (regno, inner, offset, outer2);
 		good = 1;
 		if (! TEST_HARD_REG_BIT (reg_class_contents[class], regno + n)
 		    || ! HARD_REGNO_MODE_OK (regno + n, outer))
@@ -1055,11 +1066,8 @@
       if (REG_P (SUBREG_REG (in)))
 	in_class
 	  = find_valid_class (inmode, GET_MODE (SUBREG_REG (in)),
-			      subreg_regno_offset (REGNO (SUBREG_REG (in)),
-						   GET_MODE (SUBREG_REG (in)),
-						   SUBREG_BYTE (in),
-						   GET_MODE (in)),
-			      REGNO (SUBREG_REG (in)));
+			      SUBREG_BYTE (in), REGNO (SUBREG_REG (in)),
+			      GET_MODE (in));
 
       /* This relies on the fact that emit_reload_insns outputs the
 	 instructions for input reloads of type RELOAD_OTHER in the same
@@ -1149,11 +1157,8 @@
       push_reload (SUBREG_REG (out), SUBREG_REG (out), &SUBREG_REG (out),
 		   &SUBREG_REG (out),
 		   find_valid_class (outmode, GET_MODE (SUBREG_REG (out)),
-				     subreg_regno_offset (REGNO (SUBREG_REG (out)),
-							  GET_MODE (SUBREG_REG (out)),
-							  SUBREG_BYTE (out),
-							  GET_MODE (out)),
-				     REGNO (SUBREG_REG (out))),
+				     SUBREG_BYTE (out),
+				     REGNO (SUBREG_REG (out)), GET_MODE (out)),
 		   VOIDmode, VOIDmode, 0, 0,
 		   opnum, RELOAD_OTHER);
     }


Comments will be appreciated. It would also be very interesting to see
how this works on the m32c, in particular.

Best regards,
Rask Ingemann Lambertsen


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]