This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] reloading of subregs
- From: Rask Ingemann Lambertsen <rask at sygehus dot dk>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 23 May 2007 22:14:43 +0200
- Subject: [PATCH] reloading of subregs
Fixing testsuite failures on my 16-bit x86 port, I've found
miscompilation of this simple testcase extracted from
gcc.c-torture/execute/20020108-1.c:
long int ashiftsi3_1 (long int x)
{
return x << 1;
}
At -O0, I get (with no ashlsi3 expander):
[pushes registers]
movw %sp, %bp ;# 48 *movhi/1
subw $2, %sp ;# 49 *subhi3/2
movw 12(%bp),%ax ;# 5 *movhi/1
movb $15, %cl ;# 6 *movqi/1
movw %ax, %di ;# 32 *movhi/1
shrw %cl, %di ;# 7 *lshrhi3
movw 14(%bp),%cx ;# 8 *movhi/1
movw 12(%bp),%ax ;# 11 *movhi/1
movw %ax, -2(%bp) ;# 33 *movhi/2
movw %ax, %bx ;# 34 *movhi/1
movw %dx, %si ;# 35 *movhi/1
movw %cx, %dx ;# 38 *movhi/1
shlw $1, %dx ;# 9 *ashlhi3
movw %dx, %si ;# 39 *movhi/1
movw %bx, %ax ;# 36 *movhi/1
movw %si, %dx ;# 37 *movhi/1
orw %di, %dx ;# 10 *iorhi3/6 <- sets high part
movw -2(%bp),%si ;# 40 *movhi/1
shlw $1, %si ;# 12 *ashlhi3 <- sets low part
movw %si, %ax ;# 41 *movhi/1
movw %di, %dx ;# 42 *movhi/1 <- clobbers high part
movw %ax, %si ;# 14 *movhi/1
movw %dx, %di ;# 15 *movhi/1
movw %si, %ax ;# 20 *movhi/1
movw %di, %dx ;# 21 *movhi/1
movw %bp, %sp ;# 52 *movhi/1
[pops registers]
Before reload (lreg dump):
;; Register 20 in 2.
;; Register 21 in 8.
;; Register 22 in 9.
;; Register 23 in 2.
;; Register 24 in 0.
;; Register 25 in 0.
;; Register 26 in 8.
(insn 5 2 6 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 23 [ x ])
(mem/c/i:HI (plus:HI (reg/f:HI 14 argp)
(const_int 2 [0x2])) [0 x+0 S2 A16])) 9 {*movhi} (nil)
(nil))
(insn 6 5 7 2 /tmp/ashiftsi3_1.c:3 (set (reg:QI 24)
(const_int 15 [0xf])) 11 {*movqi} (nil)
(nil))
(insn 7 6 8 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (reg:HI 22)
(lshiftrt:HI (reg:HI 23 [ x ])
(reg:QI 24)))
(clobber (reg:CC 13 cc))
]) 355 {*lshrhi3} (nil)
(expr_list:REG_DEAD (reg:HI 23 [ x ])
(expr_list:REG_DEAD (reg:QI 24)
(expr_list:REG_UNUSED (reg:CC 13 cc)
(expr_list:REG_EQUAL (lshiftrt:HI (mem/c/i:HI (plus:HI (reg/f:HI 14 argp)
(const_int 2 [0x2])) [0 x+0 S2 A16])
(const_int 15 [0xf]))
(nil))))))
(insn 8 7 11 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 25 [ x+2 ])
(mem/c/i:HI (plus:HI (reg/f:HI 14 argp)
(const_int 4 [0x4])) [0 x+2 S2 A16])) 9 {*movhi} (nil)
(nil))
(insn 11 8 13 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 26 [ x ])
(mem/c/i:HI (plus:HI (reg/f:HI 14 argp)
(const_int 2 [0x2])) [0 x+0 S2 A16])) 9 {*movhi} (nil)
(nil))
(insn 13 11 9 2 /tmp/ashiftsi3_1.c:3 (clobber (reg:SI 20 [ D.1497 ])) -1 (nil)
(insn_list:REG_LIBCALL 12 (nil)))
(insn 9 13 10 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (subreg:HI (reg:SI 20 [ D.1497 ]) 2)
(ashift:HI (reg:HI 25 [ x+2 ])
(const_int 1 [0x1])))
(clobber (reg:CC 13 cc))
]) 353 {*ashlhi3} (nil)
(expr_list:REG_DEAD (reg:HI 25 [ x+2 ])
(expr_list:REG_UNUSED (reg:CC 13 cc)
(expr_list:REG_EQUAL (ashift:HI (mem/c/i:HI (plus:HI (reg/f:HI 14 argp)
(const_int 4 [0x4])) [0 x+2 S2 A16])
(const_int 1 [0x1]))
(nil)))))
(insn 10 9 12 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (subreg:HI (reg:SI 20 [ D.1497 ]) 2)
(ior:HI (reg:HI 22)
(subreg:HI (reg:SI 20 [ D.1497 ]) 2)))
(clobber (reg:CC 13 cc))
]) 98 {*iorhi3} (nil)
(expr_list:REG_DEAD (reg:HI 22)
(expr_list:REG_UNUSED (reg:CC 13 cc)
(nil))))
(insn 12 10 16 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (subreg:HI (reg:SI 20 [ D.1497 ]) 0)
(ashift:HI (reg:HI 26 [ x ])
(const_int 1 [0x1])))
(clobber (reg:CC 13 cc))
]) 353 {*ashlhi3} (nil)
(expr_list:REG_DEAD (reg:HI 26 [ x ])
(expr_list:REG_UNUSED (reg:CC 13 cc)
(insn_list:REG_RETVAL 13 (nil)))))
(insn 16 12 14 2 /tmp/ashiftsi3_1.c:3 (clobber (reg:SI 21 [ <result> ])) -1 (nil)
(nil))
(insn 14 16 15 2 /tmp/ashiftsi3_1.c:3 (set (subreg:HI (reg:SI 21 [ <result> ]) 0)
(subreg:HI (reg:SI 20 [ D.1497 ]) 0)) 9 {*movhi} (nil)
(nil))
(insn 15 14 20 2 /tmp/ashiftsi3_1.c:3 (set (subreg:HI (reg:SI 21 [ <result> ]) 2)
(subreg:HI (reg:SI 20 [ D.1497 ]) 2)) 9 {*movhi} (nil)
(expr_list:REG_DEAD (reg:SI 20 [ D.1497 ])
(nil)))
(insn 20 15 21 2 /tmp/ashiftsi3_1.c:4 (set (reg:HI 2 a [ <result> ])
(subreg:HI (reg:SI 21 [ <result> ]) 0)) 9 {*movhi} (nil)
(nil))
(insn 21 20 27 2 /tmp/ashiftsi3_1.c:4 (set (reg:HI 4 d [+2 ])
(subreg:HI (reg:SI 21 [ <result> ]) 2)) 9 {*movhi} (nil)
(expr_list:REG_DEAD (reg:SI 21 [ <result> ])
(nil)))
(insn 27 21 0 2 /tmp/ashiftsi3_1.c:4 (use (reg/i:SI 2 a)) -1 (nil)
(nil))
Reload produces:
;; Function ashiftsi3_1 (ashiftsi3_1)
Spilling for insn 7.
Spilling for insn 9.
Using reg 6 for reload 0
Spilling for insn 12.
Using reg 8 for reload 0
Register 26 now on stack.
Spilling for insn 7.
Spilling for insn 11.
Using reg 2 for reload 0
Spilling for insn 9.
Using reg 6 for reload 0
Spilling for insn 12.
Using reg 8 for reload 0
Reloads for insn # 7
Reload order: 0
Reload 0: reload_in (HI) = (reg:HI 2 a [orig:23 x ] [23])
reload_out (HI) = (reg:HI 9 di [22])
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 2 a [orig:23 x ] [23])
reload_out_reg: (reg:HI 9 di [22])
reload_reg_rtx: (reg:HI 9 di [22])
Reloads for insn # 11
Reload order: 0
Reload 0: reload_out (HI) = (reg:HI 26 [ x ])
GENERAL_REGS, RELOAD_FOR_OUTPUT (opnum = 0)
reload_out_reg: (reg:HI 26 [ x ])
reload_reg_rtx: (reg:HI 2 a)
Reloads for insn # 9
Reload order: 0 1
Reload 0: reload_in (SI) = (reg:SI 2 a [orig:20 D.1497 ] [20])
reload_out (SI) = (reg:SI 2 a [orig:20 D.1497 ] [20])
SEG_GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:SI 2 a [orig:20 D.1497 ] [20])
reload_out_reg: (reg:SI 2 a [orig:20 D.1497 ] [20])
reload_reg_rtx: (reg:SI 6 b)
Reload 1: reload_in (HI) = (reg:HI 0 c [orig:25 x+2 ] [25])
reload_out (HI) = (subreg:HI (reg:SI 2 a [orig:20 D.1497 ] [20]) 2)
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 0 c [orig:25 x+2 ] [25])
reload_out_reg: (subreg:HI (reg:SI 2 a [orig:20 D.1497 ] [20]) 2)
reload_reg_rtx: (reg:HI 4 d)
Reloads for insn # 12
Reload order: 0
Reload 0: reload_in (HI) = (reg:HI 26 [ x ])
reload_out (SI) = (reg:SI 2 a [orig:20 D.1497 ] [20])
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 26 [ x ])
reload_out_reg: (reg:SI 2 a [orig:20 D.1497 ] [20])
reload_reg_rtx: (reg:SI 8 si)
;; Register dispositions:
20 in 2 21 in 8 22 in 9 23 in 2 24 in 0 25 in 0
Note that the first 8 hard regs are 8 bits wide while the rest are 16
bits wide. The problem with insn 12 is that reload thinks it can't use
(subreg:HI (reg:SI 2 a) 0) for reload_out_reg. It happens in push_reload()
from around line 1097 in reload.c:
/* Similarly for paradoxical and problematical SUBREGs on the output.
Note that there is no reason we need worry about the previous value
of SUBREG_REG (out); even if wider than out,
storing in a subreg is entitled to clobber it all
(except in the case of STRICT_LOW_PART,
and in that case the constraint should label it input-output.) */
[...]
|| (REG_P (SUBREG_REG (out))
&& REGNO (SUBREG_REG (out)) < FIRST_PSEUDO_REGISTER
&& ((GET_MODE_SIZE (outmode) <= UNITS_PER_WORD
&& (GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
> UNITS_PER_WORD)
&& ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
/ UNITS_PER_WORD)
!= (int) hard_regno_nregs[REGNO (SUBREG_REG (out))]
[GET_MODE (SUBREG_REG (out))]))
|| ! HARD_REGNO_MODE_OK (subreg_regno (out), outmode)))
push_reload (in=0xb7f833b0, out=0xb7f7578c, inloc=0xb7f758bc, outloc=0xb7f758c8, class=GENERAL_REGS, inmode=HImode, outmode=HImode, strict_low=0, optional=0,
opnum=0, type=RELOAD_OTHER) at ../../../cvssrc/gcc/gcc/reload.c:929
(gdb) call debug_rtx(in)
(reg:HI 8 si [orig:26 x ] [26])
(gdb) call debug_rtx(out)
(subreg:HI (reg:SI 2 a [orig:20 D.1497 ] [20]) 0)
and since we have
GET_MODE_SIZE (GET_MODE (SUBREG_REG (out))) = 4
UNITS_PER_WORD = 2
hard_regno_nregs[2][SImode] = 4 (8-bit registers)
the condition is true and reload "upgrades" the output reload from HImode to
SImode. The output reload from %di:%si to %dx:%ax thus clobbers %dx:
(insn 12 40 41 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (reg:HI 8 si)
(ashift:HI (reg:HI 8 si)
(const_int 1 [0x1])))
(clobber (reg:CC 13 cc))
]) 353 {*ashlhi3} (nil)
(nil))
(insn 41 12 42 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 2 a [orig:20 D.1497 ] [20])
(reg:HI 8 si)) 9 {*movhi} (nil)
(nil))
(insn 42 41 14 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 4 d [ D.1497+2 ])
(reg:HI 9 di [+2 ])) 9 {*movhi} (nil)
(nil))
The root cause of the problem is that reload thinks the subreg is
allowed to clobber the whole inner reg while the rest of the compiler thinks
it isn't. The patch below fixes this particular case, where reload expects
the hard regs to be at least UNITS_PER_WORD wide, but I'm wondering if
perhaps reload should be using subreg_lowpart_p() or something similiar.
With the patch, reload produces the expected HImode reloads:
;; Function ashiftsi3_1 (ashiftsi3_1)
Spilling for insn 7.
Spilling for insn 9.
Spilling for insn 12.
Reloads for insn # 7
Reload order: 0
Reload 0: reload_in (HI) = (reg:HI 2 a [orig:23 x ] [23])
reload_out (HI) = (reg:HI 9 di [22])
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 2 a [orig:23 x ] [23])
reload_out_reg: (reg:HI 9 di [22])
reload_reg_rtx: (reg:HI 9 di [22])
Reloads for insn # 9
Reload order: 0
Reload 0: reload_in (HI) = (reg:HI 0 c [orig:25 x+2 ] [25])
reload_out (HI) = (reg:HI 4 d)
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 0 c [orig:25 x+2 ] [25])
reload_out_reg: (subreg:HI (reg:SI 2 a [orig:20 D.1497 ] [20]) 2)
reload_reg_rtx: (reg:HI 4 d)
Reloads for insn # 12
Reload order: 0
Reload 0: reload_in (HI) = (reg:HI 8 si [orig:26 x ] [26])
reload_out (HI) = (reg:HI 2 a)
GENERAL_REGS, RELOAD_OTHER (opnum = 0)
reload_in_reg: (reg:HI 8 si [orig:26 x ] [26])
reload_out_reg: (subreg:HI (reg:SI 2 a [orig:20 D.1497 ] [20]) 0)
reload_reg_rtx: (reg:HI 2 a)
;; Register dispositions:
20 in 2 21 in 8 22 in 9 23 in 2 24 in 0 25 in 0
26 in 8
[cut]
(insn 12 35 14 2 /tmp/ashiftsi3_1.c:3 (parallel [
(set (reg:HI 2 a)
(ashift:HI (reg:HI 2 a)
(const_int 1 [0x1])))
(clobber (reg:CC 13 cc))
]) 353 {*ashlhi3} (nil)
(nil))
(insn 14 12 15 2 /tmp/ashiftsi3_1.c:3 (set (reg:HI 8 si [orig:21 <result> ] [21])
(reg:HI 2 a [orig:20 D.1497 ] [20])) 9 {*movhi} (nil)
(nil))
The resulting assembly is also much improved:
[pushes registers]
movw %sp, %bp ;# 41 *movhi/1
movw 10(%bp),%ax ;# 5 *movhi/1
movb $15, %cl ;# 6 *movqi/1
movw %ax, %di ;# 32 *movhi/1
shrw %cl, %di ;# 7 *lshrhi3
movw 12(%bp),%cx ;# 8 *movhi/1
movw 10(%bp),%si ;# 11 *movhi/1
movw %cx, %dx ;# 33 *movhi/1
shlw $1, %dx ;# 9 *ashlhi3
orw %di, %dx ;# 10 *iorhi3/6
movw %si, %ax ;# 35 *movhi/1
shlw $1, %ax ;# 12 *ashlhi3
movw %ax, %si ;# 14 *movhi/1
movw %dx, %di ;# 15 *movhi/1
movw %si, %ax ;# 20 *movhi/1
movw %di, %dx ;# 21 *movhi/1
[pops registers]
I have built and tested mipsisa64-unknown-elf, sh-unknown-elf,
v850-unknown-elf and arm-unknown-elf for all default languages as a cross
compiler from i686-pc-linux-gnu. There were no new failures. I also checked
that the target libraries have the same size for each archive member with
and without the patch.
I have bootstrapped and tested i686-pc-linux-gnu for all default
languages. The only change in testresults is a known problem:
-FAIL: gfortran.dg/secnds.f -O3 -g execution test
+FAIL: gfortran.dg/secnds.f -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions execution test
See <URL:http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32057> and
<URL:http://gcc.gnu.org/ml/gcc-patches/2007-05/msg00766.html>. There is no
change in the size of the target libraries' archive members.
Finally, the patch fixes a few failures I was seeing on my 16-bit x86
port:
gcc.c-torture/execute/20020108-1.c execution, -O0
gcc.c-torture/execute/960311-3.c execution, -O0
gcc.dg/builtin-bswap-4.c execution test
Ok for mainline?
(Please commit if it is OK. Thanks.)
:ADDPATCH reload:
2005-05-23 Rask Ingemann Lambertsen <rask@sygehus.dk>
* reload.c (push_reload): Don't reload a subreg just because
its inner reg hard regs are smaller than UNITS_PER_WORD.
(reload_inner_reg_of_subreg): Likewise.
Index: reload.c
===================================================================
--- reload.c (revision 124905)
+++ reload.c (working copy)
@@ -808,13 +808,13 @@ reload_inner_reg_of_subreg (rtx x, enum
return 1;
/* If the outer part is a word or smaller, INNER larger than a
- word and the number of regs for INNER is not the same as the
+ word and the number of regs for INNER is smaller than the
number of words in INNER, then INNER will need reloading. */
return (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
&& output
&& GET_MODE_SIZE (GET_MODE (inner)) > UNITS_PER_WORD
&& ((GET_MODE_SIZE (GET_MODE (inner)) / UNITS_PER_WORD)
- != (int) hard_regno_nregs[REGNO (inner)][GET_MODE (inner)]));
+ > (int) hard_regno_nregs[REGNO (inner)][GET_MODE (inner)]));
}
/* Return nonzero if IN can be reloaded into REGNO with mode MODE without
@@ -1034,7 +1034,7 @@ push_reload (rtx in, rtx out, rtx *inloc
> UNITS_PER_WORD)
&& ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))
/ UNITS_PER_WORD)
- != (int) hard_regno_nregs[REGNO (SUBREG_REG (in))]
+ > (int) hard_regno_nregs[REGNO (SUBREG_REG (in))]
[GET_MODE (SUBREG_REG (in))]))
|| ! HARD_REGNO_MODE_OK (subreg_regno (in), inmode)))
|| (secondary_reload_class (1, class, inmode, in) != NO_REGS
@@ -1127,7 +1127,7 @@ push_reload (rtx in, rtx out, rtx *inloc
> UNITS_PER_WORD)
&& ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
/ UNITS_PER_WORD)
- != (int) hard_regno_nregs[REGNO (SUBREG_REG (out))]
+ > (int) hard_regno_nregs[REGNO (SUBREG_REG (out))]
[GET_MODE (SUBREG_REG (out))]))
|| ! HARD_REGNO_MODE_OK (subreg_regno (out), outmode)))
|| (secondary_reload_class (0, class, outmode, out) != NO_REGS
--
Rask Ingemann Lambertsen