This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: regrename speedup
- From: Bernd Schmidt <bernds_cb1 at t-online dot de>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: Eric Botcazou <ebotcazou at adacore dot com>, gcc-patches at gcc dot gnu dot org
- Date: Wed, 02 Dec 2009 13:01:09 +0100
- Subject: Re: regrename speedup
- References: <4AD9CEF2.50908@t-online.de> <200910221504.37462.ebotcazou@adacore.com> <4AFC5322.1010308@t-online.de> <6dc9ffc80911271649j76d5a78x15c6cb11683907f7@mail.gmail.com>
H.J. Lu wrote:
> This caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42202
Here's a patch. Bootstrapped and regression tested on ia64-linux (the
gcc60 machine), with flag_rename_registers forced to 1 (without this
patch the bootstrap fails under those conditions). Since the machine is
so slow I didn't yet run a comparison with a clean tree, but the
failures I get are all seen in recent gcc-testresults postings prior to
my regrename patches. The gfortran failures are gone.
FAIL: gcc.dg/builtin-apply4.c execution test
FAIL: gcc.dg/pr34668-1.c (internal compiler error)
FAIL: gcc.dg/pr34668-1.c (test for excess errors)
FAIL: gcc.dg/guality/pr41353-1.c -O3 -fomit-frame-pointer line 39 i == 12
FAIL: gcc.dg/guality/pr41353-1.c -O3 -g line 39 i == 12
FAIL: abi_check
FAIL: libmudflap.c/pass54-frag.c execution test
FAIL: libmudflap.c/pass54-frag.c execution test
FAIL: libmudflap.c/pass54-frag.c (-static) execution test
FAIL: libmudflap.c/pass54-frag.c (-static) execution test
FAIL: libmudflap.c/fail31-frag.c (-O3) crash test
FAIL: libmudflap.c/fail31-frag.c (-O3) output pattern test
FAIL: libmudflap.c/pass45-frag.c (-O3) execution test
FAIL: libmudflap.c/pass45-frag.c (-O3) output pattern test
FAIL: libmudflap.c/pass45-frag.c (-O3) execution test
FAIL: libmudflap.c/pass45-frag.c (-O3) output pattern test
FAIL: libmudflap.c++/pass41-frag.cxx execution test
FAIL: libmudflap.c++/pass41-frag.cxx (-static) execution test
FAIL: libmudflap.c++/pass41-frag.cxx ( -O) execution test
FAIL: libmudflap.c++/pass41-frag.cxx (-O2) execution test
FAIL: libmudflap.c++/pass41-frag.cxx (-O3) execution test
FAIL: libffi.call/cls_longdouble_va.c -O0 -W -Wall output pattern test,
is %.1f
FAIL: libffi.call/cls_longdouble_va.c -O2 output pattern test, is %.1f
FAIL: libffi.call/cls_longdouble_va.c -O3 output pattern test, is %.1f
FAIL: libffi.call/cls_longdouble_va.c -Os output pattern test, is %.1f
FAIL: libffi.call/cls_longdouble_va.c -O2 -fomit-frame-pointer output
pattern test, is %.1f
FAIL: getlocalvartable output
FAIL: Throw_3 -O3 output - source compiled test
FAIL: Throw_3 -O3 -findirect-dispatch output - source compiled test
The closest matches appear to be
http://gcc.gnu.org/ml/gcc-testresults/2009-11/msg02593.html
http://gcc.gnu.org/ml/gcc-testresults/2009-11/msg02570.html
http://gcc.gnu.org/ml/gcc-testresults/2009-11/msg02537.html
which are about as old as the tree I was testing.
Could install now, or could wait for the clean results (without this and
the previous regrename patch) which should take another 24 hours or so.
Bernd
--
This footer brought to you by insane German lawmakers.
Analog Devices GmbH Wilhelm-Wagenfeld-Str. 6 80807 Muenchen
Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368
Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif
* regrename.c (live_in_chains): New variable.
(verify_reg_tracked): New static function.
(scan_rtx_reg): Update live_in_chains.
(scan_rtx): Only promote sets in COND_EXEC to OP_INOUT if
we're already tracking the reg.
(build_def_use): Likewise. Initialize live_in_chains.
Index: regrename.c
===================================================================
--- regrename.c (revision 154688)
+++ regrename.c (working copy)
@@ -438,6 +438,54 @@ static struct du_head *closed_chains;
static bitmap_head open_chains_set;
static HARD_REG_SET live_hard_regs;
+/* Record the registers being tracked in open_chains. The intersection
+ between this and live_hard_regs is empty. */
+static HARD_REG_SET live_in_chains;
+
+/* Return true if OP is a reg that is being tracked already in some form.
+ May set fail_current_block if it sees an unhandled case of overlap. */
+
+static bool
+verify_reg_tracked (rtx op)
+{
+ unsigned regno, nregs;
+ bool all_live, all_dead;
+ if (!REG_P (op))
+ return false;
+
+ regno = REGNO (op);
+ nregs = hard_regno_nregs[regno][GET_MODE (op)];
+ all_live = all_dead = true;
+ while (nregs-- > 0)
+ if (TEST_HARD_REG_BIT (live_hard_regs, regno + nregs))
+ all_dead = false;
+ else
+ all_live = false;
+ if (!all_dead && !all_live)
+ {
+ fail_current_block = true;
+ return false;
+ }
+
+ if (all_live)
+ return true;
+
+ nregs = hard_regno_nregs[regno][GET_MODE (op)];
+ all_live = all_dead = true;
+ while (nregs-- > 0)
+ if (TEST_HARD_REG_BIT (live_in_chains, regno + nregs))
+ all_dead = false;
+ else
+ all_live = false;
+ if (!all_dead && !all_live)
+ {
+ fail_current_block = true;
+ return false;
+ }
+
+ return all_live;
+}
+
/* Called through note_stores. DATA points to a rtx_code, either SET or
CLOBBER, which tells us which kind of rtx to look at. If we have a
match, record the set register in live_hard_regs and in the hard_conflicts
@@ -495,10 +543,14 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
mark_conflict (open_chains, head->id);
/* Since we're tracking this as a chain now, remove it from the
- list of conflicting live hard registers. */
+ list of conflicting live hard registers and track it in
+ live_in_chains instead. */
nregs = head->nregs;
while (nregs-- > 0)
- CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
+ {
+ SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
+ CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
+ }
COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
bitmap_set_bit (&open_chains_set, head->id);
@@ -583,10 +635,17 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
if ((action == terminate_dead || action == terminate_write)
&& superset)
{
+ unsigned nregs;
+
head->terminated = 1;
head->next_chain = closed_chains;
closed_chains = head;
bitmap_clear_bit (&open_chains_set, head->id);
+
+ nregs = head->nregs;
+ while (nregs-- > 0)
+ CLEAR_HARD_REG_BIT (live_in_chains, head->regno + nregs);
+
*p = next;
if (dump_file)
fprintf (dump_file,
@@ -803,7 +862,8 @@ scan_rtx (rtx insn, rtx *loc, enum reg_c
case SET:
scan_rtx (insn, &SET_SRC (x), cl, action, OP_IN);
scan_rtx (insn, &SET_DEST (x), cl, action,
- GET_CODE (PATTERN (insn)) == COND_EXEC ? OP_INOUT : OP_OUT);
+ (GET_CODE (PATTERN (insn)) == COND_EXEC
+ && verify_reg_tracked (SET_DEST (x))) ? OP_INOUT : OP_OUT);
return;
case STRICT_LOW_PART:
@@ -829,7 +889,8 @@ scan_rtx (rtx insn, rtx *loc, enum reg_c
case CLOBBER:
scan_rtx (insn, &SET_DEST (x), cl, action,
- GET_CODE (PATTERN (insn)) == COND_EXEC ? OP_INOUT : OP_OUT);
+ (GET_CODE (PATTERN (insn)) == COND_EXEC
+ && verify_reg_tracked (SET_DEST (x))) ? OP_INOUT : OP_OUT);
return;
case EXPR_LIST:
@@ -962,6 +1023,7 @@ build_def_use (basic_block bb)
current_id = 0;
bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+ CLEAR_HARD_REG_SET (live_in_chains);
REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
{
@@ -1026,7 +1088,8 @@ build_def_use (basic_block bb)
if (matches >= 0)
recog_op_alt[i][alt].cl = recog_op_alt[matches][alt].cl;
if (matches >= 0 || recog_op_alt[i][alt].matched >= 0
- || (predicated && recog_data.operand_type[i] == OP_OUT))
+ || (predicated && recog_data.operand_type[i] == OP_OUT
+ && verify_reg_tracked (recog_data.operand[i])))
{
recog_data.operand_type[i] = OP_INOUT;
if (matches >= 0