This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: HARD_REGNO_MODE_OK_FOR_CLASS Might Be Nice (tm)
- From: Rask Ingemann Lambertsen <rask at sygehus dot dk>
- To: gcc at gcc dot gnu dot org
- Date: Mon, 16 Jan 2006 23:15:29 +0100
- Subject: Re: HARD_REGNO_MODE_OK_FOR_CLASS Might Be Nice (tm)
- References: <20051221182316.GA24293@prism.co.za> <200512212007.jBLK7L1I027074@greed.delorie.com>
On Wed, Dec 21, 2005 at 03:07:21PM -0500, DJ Delorie wrote:
>
> > It was because I had decided to expose the registers as %al, %ah,
> > ... %bl, %bh, ... instead of the customary %[e]ax and friends.
>
> I originally did this for the m32c port (which has hi/low pairs like
> the i386) but discovered that reload always allocates registers in
> UNITS_PER_WORD chunks, and move-by-pieces uses UNITS_PER_WORD chunks,
> so if you have 8 bit registers you end up with 8 bit moves all over
> the place. If you have 8 bit registers and 16 bit moves, reload
> counts wrong. I ended up switching to the word-sized register model
> that i386 currently uses, even though it meant worse code generation.
>
> I seem to recall ranting about it at the time, too. UNITS_PER_WORD
> must die! The m32c has four 8 bit registers, two 16 bit registers,
> and five 24 bit registers. They can be combined to form 8, 16, 24,
> 32, 48, and 64 bit registers. GCC has no way of expressing that.
Like Bernd, I'm playing with a 16-bit ix86 port where the eight 8-bit
registers are not represented as four 16-bit registers. This means that
reload has to deal with eight 8-bit registers and four 16-bit registers.
With a few patches, reload is able to work with mixed register sizes.
I have rewritten subreg_regno_offset() and subreg_offset_representable_p().
There are two known problems with the rewritten versions:
1) The i386 complex modes with holes are not supported, but I think that
can be added.
2) The i386 backend causes subreg_regno_offset (9, DFmode, 4, SImode) calls
which fail the first assertion. Register 9 is a floating point register. I
have no idea what should be returned in this case.
Index: gcc/rtlanal.c
===================================================================
--- gcc/rtlanal.c (revision 109766)
+++ gcc/rtlanal.c (working copy)
@@ -3130,7 +3130,7 @@
return subreg_lsb_1 (GET_MODE (x), GET_MODE (SUBREG_REG (x)),
SUBREG_BYTE (x));
}
-
+#if 1
/* This function returns the regno offset of a subreg expression.
xregno - A regno of an inner hard subreg_reg (or what will become one).
xmode - The mode of xregno.
@@ -3141,6 +3141,42 @@
subreg_regno_offset (unsigned int xregno, enum machine_mode xmode,
unsigned int offset, enum machine_mode ymode)
{
+/* (subreg:Ymode (reg:Xmode Xregno) offset) */
+ unsigned int r, units;
+
+ gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
+
+ if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode))
+ {
+ for (r = xregno, units = 0;
+ units < offset;
+ units += GET_MODE_SIZE (reg_raw_mode[r]), r ++)
+ ;
+ gcc_assert (units == offset);
+ }
+ else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno]
+ ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ /* Big endian paradoxical subreg. */
+ for (r = xregno, units = 0;
+ units < GET_MODE_SIZE (ymode);
+ units += GET_MODE_SIZE (reg_raw_mode[r]), r --)
+ ;
+ gcc_assert (units == GET_MODE_SIZE (ymode));
+ }
+ else
+ {
+ r = xregno;
+ gcc_assert (offset == 0);
+ }
+ return (r - xregno);
+}
+
+#else /* 0 */
+unsigned int
+subreg_regno_offset (unsigned int xregno, enum machine_mode xmode,
+ unsigned int offset, enum machine_mode ymode)
+{
int nregs_xmode, nregs_ymode, nregs_xmode_unit_int;
int mode_multiple, nregs_multiple;
int y_offset;
@@ -3194,6 +3230,7 @@
nregs_multiple = nregs_xmode / nregs_ymode;
return (y_offset / (mode_multiple / nregs_multiple)) * nregs_ymode;
}
+#endif /* 0 */
/* This function returns true when the offset is representable via
subreg_offset in the given regno.
@@ -3202,10 +3239,42 @@
offset - The byte offset.
ymode - The mode of a top level SUBREG (or what may become one).
RETURN - Whether the offset is representable. */
+
bool
subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode,
unsigned int offset, enum machine_mode ymode)
{
+/* (subreg:Ymode (reg:Xmode Xregno) offset) */
+ unsigned int r, units;
+
+ gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
+
+ if (GET_MODE_SIZE (xmode) >= GET_MODE_SIZE (ymode))
+ {
+ for (r = xregno, units = 0;
+ units < offset;
+ units += GET_MODE_SIZE (reg_raw_mode[r]), r ++)
+ ;
+ return (units == offset);
+ }
+ else if (offset == 0 && GET_MODE_SIZE (ymode) > reg_raw_mode[xregno]
+ ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+ {
+ /* Big endian paradoxical subreg. */
+ for (r = xregno, units = 0;
+ units < GET_MODE_SIZE (ymode);
+ units += GET_MODE_SIZE (reg_raw_mode[r]), r --)
+ ;
+ return (units == GET_MODE_SIZE (ymode));
+ }
+ return (offset == 0);
+}
+
+#if 0
+bool
+subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode,
+ unsigned int offset, enum machine_mode ymode)
+{
int nregs_xmode, nregs_ymode, nregs_xmode_unit, nregs_xmode_unit_int;
int mode_multiple, nregs_multiple;
int y_offset;
@@ -3299,6 +3368,7 @@
return (!(y_offset % (mode_multiple / nregs_multiple)));
}
+#endif /* 0 */
/* Return the final regno that a subreg expression refers to. */
unsigned int
In reload, the way that push_reload() calls find_valid_class() assumes that
HARD_REGNO_NREGS (regno, mode) does not change when regno changes. I had to
rewrite that. Note that I end up with two outer mode parameters. I don't
know if their values ever differ. find_valid_class() would never return
something else than NO_REGS or ALL_REGS. I fixed that too.
Without the patch, reload would run into problems trying to load a
shift count in %si into %cl. With the patch, it uses %cx as an intermediate
register when reloading %cl from a 16-bit register, e.g. "mov %si,%cx" and
"shr %cl,dx", while reloads from an 8-bit register don't clobber %ch, e.g.
"mov %al,cl" and "shl %cl,dx".
Index: gcc/reload.c
===================================================================
--- gcc/reload.c (revision 109766)
+++ gcc/reload.c (working copy)
@@ -246,7 +246,7 @@
enum machine_mode, enum reload_type,
enum insn_code *, secondary_reload_info *);
static enum reg_class find_valid_class (enum machine_mode, enum machine_mode,
- int, unsigned int);
+ int, unsigned int, enum machine_mode);
static int reload_inner_reg_of_subreg (rtx, enum machine_mode, int);
static void push_replacement (rtx *, int, enum machine_mode);
static void dup_replacements (rtx *, rtx *);
@@ -626,17 +626,20 @@
/* Find the largest class which has at least one register valid in
mode INNER, and which for every such register, that register number
- plus N is also valid in OUTER (if in range) and is cheap to move
- into REGNO. Such a class must exist. */
+ plus subreg_regno_offset (regnum, INNER, OFFSET, OUTER2) is also valid
+ in OUTER (if in range) and is cheap to move into REGNO.
+ Such a class must exist. */
static enum reg_class
find_valid_class (enum machine_mode outer ATTRIBUTE_UNUSED,
- enum machine_mode inner ATTRIBUTE_UNUSED, int n,
- unsigned int dest_regno ATTRIBUTE_UNUSED)
+ enum machine_mode inner ATTRIBUTE_UNUSED, int offset,
+ unsigned int dest_regno ATTRIBUTE_UNUSED,
+ enum machine_mode outer2)
{
int best_cost = -1;
int class;
int regno;
+ int n;
enum reg_class best_class = NO_REGS;
enum reg_class dest_class ATTRIBUTE_UNUSED = REGNO_REG_CLASS (dest_regno);
unsigned int best_size = 0;
@@ -646,11 +649,16 @@
{
int bad = 0;
int good = 0;
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER - n && ! bad; regno++)
- if (TEST_HARD_REG_BIT (reg_class_contents[class], regno))
+ unsigned int nregs;
+ for (regno = 0, nregs = hard_regno_nregs[regno][inner];
+ regno + nregs - 1 < FIRST_PSEUDO_REGISTER && ! bad;
+ regno++, nregs = hard_regno_nregs[regno][inner])
+ if (TEST_HARD_REG_BIT (reg_class_contents[class], regno)
+ && TEST_HARD_REG_BIT (reg_class_contents[class], regno + nregs - 1))
{
if (HARD_REGNO_MODE_OK (regno, inner))
{
+ n = subreg_regno_offset (regno, inner, offset, outer2);
good = 1;
if (! TEST_HARD_REG_BIT (reg_class_contents[class], regno + n)
|| ! HARD_REGNO_MODE_OK (regno + n, outer))
@@ -1055,11 +1066,8 @@
if (REG_P (SUBREG_REG (in)))
in_class
= find_valid_class (inmode, GET_MODE (SUBREG_REG (in)),
- subreg_regno_offset (REGNO (SUBREG_REG (in)),
- GET_MODE (SUBREG_REG (in)),
- SUBREG_BYTE (in),
- GET_MODE (in)),
- REGNO (SUBREG_REG (in)));
+ SUBREG_BYTE (in), REGNO (SUBREG_REG (in)),
+ GET_MODE (in));
/* This relies on the fact that emit_reload_insns outputs the
instructions for input reloads of type RELOAD_OTHER in the same
@@ -1149,11 +1157,8 @@
push_reload (SUBREG_REG (out), SUBREG_REG (out), &SUBREG_REG (out),
&SUBREG_REG (out),
find_valid_class (outmode, GET_MODE (SUBREG_REG (out)),
- subreg_regno_offset (REGNO (SUBREG_REG (out)),
- GET_MODE (SUBREG_REG (out)),
- SUBREG_BYTE (out),
- GET_MODE (out)),
- REGNO (SUBREG_REG (out))),
+ SUBREG_BYTE (out),
+ REGNO (SUBREG_REG (out)), GET_MODE (out)),
VOIDmode, VOIDmode, 0, 0,
opnum, RELOAD_OTHER);
}
Comments will be appreciated. It would also be very interesting to see
how this works on the m32c, in particular.
Best regards,
Rask Ingemann Lambertsen