This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch, CSE] Bit-field insertion optimization
- From: Andrew Stubbs <ams at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 09 Dec 2010 16:45:11 +0000
- Subject: [patch, CSE] Bit-field insertion optimization
The attached patch fixes a bug in which constant assignments to bit
fields are improperly optimized. I'm seeing this on ARM, but I imagine
it affects other targets similarly.
The first problem is that CSE cannot determine that the result is
constant because the auto-variable is implicitly initialized. I have
solved this by moving up the init-regs pass to before cse2. It might be
better to move it before cse1, but that's a bigger change, so I wasn't sure?
The second problem is that the pattern match for ZERO_EXTRACT requires
that the operand is an immediate constant, which is never the case on
ARM (and presumably is only the case with a limited range of inputs even
on other targets). I have added code to detect known-constant input
registers.
Test case:
struct bits
{
unsigned a:5;
unsigned b:5;
unsigned c:5;
unsigned d:5;
};
struct bits
f (unsigned int a)
{
struct bits bits = {0,0,0,0};
bits.a = 1;
bits.b = 2;
bits.c = 3;
bits.d = a;
return bits;
}
Before, compiled for ARM with "-O2 -mcpu=cortex-a8 -mthumb":
movs r2, #1
movs r3, #0
bfi r3, r2, #0, #5
movs r2, #2
bfi r3, r2, #5, #5
movs r2, #3
bfi r3, r2, #10, #5
bfi r3, r0, #15, #5
mov r0, r3
bx lr
After:
movw r3, #3137
bfi r3, r0, #15, #5
mov r0, r3
bx lr
OK for commit, once stage 1 opens again?
Andrew
2010-12-09 Andrew Stubbs <ams@codesourcery.com>
gcc/
* cse.c (cse_insn): Add support for ZERO_EXTRACT with a register
source operand.
* passes.c (init_optimization_passes): Move initialize_regs
before cse2.
---
src/gcc-mainline/gcc/cse.c | 24 ++++++++++++++++++++----
src/gcc-mainline/gcc/passes.c | 2 +-
2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/src/gcc-mainline/gcc/cse.c b/src/gcc-mainline/gcc/cse.c
index 3ab6b37..d283040 100644
--- a/src/gcc-mainline/gcc/cse.c
+++ b/src/gcc-mainline/gcc/cse.c
@@ -5036,7 +5036,6 @@ cse_insn (rtx insn)
(set (zero_extract:M2 (reg:M N) (const_int C) (const_int D))
(reg:M2 O)). */
if (GET_CODE (SET_DEST (sets[i].rtl)) == ZERO_EXTRACT
- && CONST_INT_P (trial)
&& CONST_INT_P (XEXP (SET_DEST (sets[i].rtl), 1))
&& CONST_INT_P (XEXP (SET_DEST (sets[i].rtl), 2))
&& REG_P (XEXP (SET_DEST (sets[i].rtl), 0))
@@ -5052,9 +5051,26 @@ cse_insn (rtx insn)
unsigned int dest_hash = HASH (dest_reg, GET_MODE (dest_reg));
struct table_elt *dest_elt
= lookup (dest_reg, dest_hash, GET_MODE (dest_reg));
- rtx dest_cst = NULL;
+ rtx dest_cst = NULL, src_cst = NULL;
- if (dest_elt)
+ if (CONST_INT_P (trial))
+ src_cst = trial;
+ else if (REG_P (trial))
+ {
+ unsigned int src_hash = HASH (trial, GET_MODE (trial));
+ struct table_elt *src_elt
+ = lookup (trial, src_hash, GET_MODE (trial));
+
+ if (src_elt)
+ for (p = src_elt->first_same_value; p; p = p->next_same_value)
+ if (p->is_const && CONST_INT_P (p->exp))
+ {
+ src_cst = p->exp;
+ break;
+ }
+ }
+
+ if (src_cst && dest_elt)
for (p = dest_elt->first_same_value; p; p = p->next_same_value)
if (p->is_const && CONST_INT_P (p->exp))
{
@@ -5076,7 +5092,7 @@ cse_insn (rtx insn)
else
mask = ((HOST_WIDE_INT) 1 << INTVAL (width)) - 1;
val &= ~(mask << shift);
- val |= (INTVAL (trial) & mask) << shift;
+ val |= (INTVAL (src_cst) & mask) << shift;
val = trunc_int_for_mode (val, GET_MODE (dest_reg));
validate_unshare_change (insn, &SET_DEST (sets[i].rtl),
dest_reg, 1);
diff --git a/src/gcc-mainline/gcc/passes.c b/src/gcc-mainline/gcc/passes.c
index 4be61a9..c1c656d 100644
--- a/src/gcc-mainline/gcc/passes.c
+++ b/src/gcc-mainline/gcc/passes.c
@@ -990,11 +990,11 @@ init_optimization_passes (void)
}
NEXT_PASS (pass_web);
NEXT_PASS (pass_rtl_cprop);
+ NEXT_PASS (pass_initialize_regs);
NEXT_PASS (pass_cse2);
NEXT_PASS (pass_rtl_dse1);
NEXT_PASS (pass_rtl_fwprop_addr);
NEXT_PASS (pass_inc_dec);
- NEXT_PASS (pass_initialize_regs);
NEXT_PASS (pass_ud_rtl_dce);
NEXT_PASS (pass_combine);
NEXT_PASS (pass_if_after_combine);