This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] fix PR opt/2391 (mostly ARM)
- From: Adam Nemet <anemet at Lnxw dot com>
- To: gcc-patches at gcc dot gnu dot org, rearnsha at arm dot com
- Date: Fri, 10 Jan 2003 10:56:43 -0800
- Subject: [PATCH] fix PR opt/2391 (mostly ARM)
Hi,
The patch below attempts to fix the long outstanding PR opt/2391. The
testcase triggers exponential compilation time in the instruction
combiner with the ARM backend. This is however not a backend-specific
bug since similar testcases can be built for other backends that would
exhibit the same behavior. (e.g. for i386 replace the a^=a+a with
a=a*a)
The combiner records the last value assigned to each register. This is
done in function record_value_for_reg. If the new value contains the
register itself its previous value will be substituted first and then
the resulting expression will be the new ``last value''. E.g. for the
above expression a = a * a, if a had A as is last value previously the
new last value will be (mult A A). So with every new a = a * a
statement the combiner doubles the size of the rtx that holds the last
value of a. In turn for functions that traverse the last value it
will take twice as much time to complete the expression with every
statement (a = a * a) added to the program.
The solution that I came up with teaches the functions that traverse
last values to recognize expression of forms (op A A) or (op1 A (op2 A
B)) and only traverse the subexpression A once and then reuse the
result.
Boostrapped on i686-pc-gnu-linux. Regtested gcc/g++/g77/objc/libjava
on arm-elf and on arm-elf/-mthumb, with no regressions.
Also in order to check that I didn't change the functionality of the
combiner I compared the bootstrapped compiler (built with -O2) to the
original bootstrapped compiler: combiner.o was the only module that
was different.
Does anyone have a better solution? Other suggestions? OK to apply?
Adam
Index: ChangeLog
from Adam Nemet <anemet@lnxw.com>
PR opt/2391
* combine.c: Fix spelling in comment. Update copyright.
(nonzero_bits2): Rename from nonzero_bits. Add two
arguments. Change calls from nonzero_bits to nonzero_bits1.
(num_sign_bit_copies2): Rename from num_sign_bit_copies. Add two
arguments. Change calls from num_sign_bit_copies to
num_sign_bit_copies1.
(nonzero_bits): New macro.
(num_sign_bit_copies): New macro.
(nonzero_bits1): New function.
(num_sign_bit_copies1): New function.
(update_table_tick): Don't traverse identical subexpression more
than once.
(get_last_value_validate): Likewise.
Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.326
diff -u -p -r1.326 combine.c
--- combine.c 16 Dec 2002 18:19:08 -0000 1.326
+++ combine.c 10 Jan 2003 18:43:31 -0000
@@ -1,6 +1,6 @@
/* Optimize by combining instructions for GNU compiler.
Copyright (C) 1987, 1988, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
This file is part of GCC.
@@ -139,6 +139,9 @@ static int max_uid_cuid;
#define UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD(val) \
(((unsigned HOST_WIDE_INT) (val) << (BITS_PER_WORD - 1)) << 1)
+#define nonzero_bits(X, M) nonzero_bits1 (X, M, NULL_RTX, 0)
+#define num_sign_bit_copies(X, M) num_sign_bit_copies1 (X, M, NULL_RTX, 0)
+
/* Maximum register number, which is the size of the tables below. */
static unsigned int combine_max_regno;
@@ -198,7 +201,7 @@ static basic_block this_basic_block;
static sbitmap refresh_blocks;
/* The next group of arrays allows the recording of the last value assigned
- to (hard or pseudo) register n. We use this information to see if a
+ to (hard or pseudo) register n. We use this information to see if an
operation being processed is redundant given a prior operation performed
on the register. For example, an `and' with a constant is redundant if
all the zero bits are already known to be turned off.
@@ -371,8 +374,16 @@ static rtx make_field_assignment PARAMS
static rtx apply_distributive_law PARAMS ((rtx));
static rtx simplify_and_const_int PARAMS ((rtx, enum machine_mode, rtx,
unsigned HOST_WIDE_INT));
-static unsigned HOST_WIDE_INT nonzero_bits PARAMS ((rtx, enum machine_mode));
-static unsigned int num_sign_bit_copies PARAMS ((rtx, enum machine_mode));
+static unsigned HOST_WIDE_INT nonzero_bits1 PARAMS ((rtx, enum machine_mode,
+ rtx,
+ unsigned HOST_WIDE_INT));
+static unsigned HOST_WIDE_INT nonzero_bits2 PARAMS ((rtx, enum machine_mode,
+ rtx,
+ unsigned HOST_WIDE_INT));
+static unsigned int num_sign_bit_copies1 PARAMS ((rtx, enum machine_mode,
+ rtx, unsigned int));
+static unsigned int num_sign_bit_copies2 PARAMS ((rtx, enum machine_mode,
+ rtx, unsigned int));
static int merge_outer_ops PARAMS ((enum rtx_code *, HOST_WIDE_INT *,
enum rtx_code, HOST_WIDE_INT,
enum machine_mode, int *));
@@ -8102,12 +8113,61 @@ simplify_and_const_int (x, mode, varop,
return x;
}
+/* The function nonzero_bits1 is a wrapper around nonzero_bits2. It
+ avoids the exponential behavior in nonzero_bits2 when X has
+ identical subexpressions on the first or on the second level. */
+
+static unsigned HOST_WIDE_INT
+nonzero_bits1 (x, mode, known_x, known_x_ret)
+ rtx x;
+ enum machine_mode mode;
+ rtx known_x;
+ unsigned HOST_WIDE_INT known_x_ret;
+{
+ if (x == known_x)
+ return known_x_ret;
+
+ /* Try to find identical subexpressions. If found call
+ nonzero_bits2 with known_x as the subexpression known_x_ret as
+ the precomputed value for known_x. */
+
+ if (GET_RTX_CLASS (GET_CODE (x)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x)) == 'c')
+ {
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+
+ /* Check the first level. */
+ if (x0 == x1)
+ return
+ nonzero_bits2 (x, mode, x0,
+ nonzero_bits1 (x0, mode, known_x, known_x_ret));
+
+ /* Check the second level. */
+ if ((GET_RTX_CLASS (GET_CODE (x0)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x0)) == 'c')
+ && (x1 == XEXP (x0, 0) || x1 == XEXP (x0, 1)))
+ return
+ nonzero_bits2 (x, mode, x1,
+ nonzero_bits1 (x1, mode, known_x, known_x_ret));
+
+ if ((GET_RTX_CLASS (GET_CODE (x1)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x1)) == 'c')
+ && (x0 == XEXP (x1, 0) || x0 == XEXP (x1, 1)))
+ return
+ nonzero_bits2 (x, mode, x0,
+ nonzero_bits1 (x0, mode, known_x, known_x_ret));
+ }
+
+ return nonzero_bits2 (x, mode, known_x, known_x_ret);
+}
+
/* We let num_sign_bit_copies recur into nonzero_bits as that is useful.
We don't let nonzero_bits recur into num_sign_bit_copies, because that
is less useful. We can't allow both, because that results in exponential
run time recursion. There is a nullstone testcase that triggered
this. This macro avoids accidental uses of num_sign_bit_copies. */
-#define num_sign_bit_copies()
+#define num_sign_bit_copies1()
/* Given an expression, X, compute which bits in X can be nonzero.
We don't care about bits outside of those defined in MODE.
@@ -8116,9 +8176,11 @@ simplify_and_const_int (x, mode, varop,
a shift, AND, or zero_extract, we can do better. */
static unsigned HOST_WIDE_INT
-nonzero_bits (x, mode)
+nonzero_bits2 (x, mode, known_x, known_x_ret)
rtx x;
enum machine_mode mode;
+ rtx known_x;
+ unsigned HOST_WIDE_INT known_x_ret;
{
unsigned HOST_WIDE_INT nonzero = GET_MODE_MASK (mode);
unsigned HOST_WIDE_INT inner_nz;
@@ -8156,7 +8218,7 @@ nonzero_bits (x, mode)
&& GET_MODE_BITSIZE (GET_MODE (x)) <= HOST_BITS_PER_WIDE_INT
&& GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (GET_MODE (x)))
{
- nonzero &= nonzero_bits (x, GET_MODE (x));
+ nonzero &= nonzero_bits1 (x, GET_MODE (x), known_x, known_x_ret);
nonzero |= GET_MODE_MASK (mode) & ~GET_MODE_MASK (GET_MODE (x));
return nonzero;
}
@@ -8238,7 +8300,7 @@ nonzero_bits (x, mode)
| ((HOST_WIDE_INT) (-1)
<< GET_MODE_BITSIZE (GET_MODE (x))));
#endif
- return nonzero_bits (tem, mode) & nonzero;
+ return nonzero_bits1 (tem, mode, known_x, known_x_ret) & nonzero;
}
else if (nonzero_sign_valid && reg_nonzero_bits[REGNO (x)])
{
@@ -8291,7 +8353,7 @@ nonzero_bits (x, mode)
case NEG:
#if 0
- /* Disabled to avoid exponential mutual recursion between nonzero_bits
+ /* Disabled to avoid exponential mutual recursion between nonzero_bits1
and num_sign_bit_copies. */
if (num_sign_bit_copies (XEXP (x, 0), GET_MODE (x))
== GET_MODE_BITSIZE (GET_MODE (x)))
@@ -8304,7 +8366,7 @@ nonzero_bits (x, mode)
case ABS:
#if 0
- /* Disabled to avoid exponential mutual recursion between nonzero_bits
+ /* Disabled to avoid exponential mutual recursion between nonzero_bits1
and num_sign_bit_copies. */
if (num_sign_bit_copies (XEXP (x, 0), GET_MODE (x))
== GET_MODE_BITSIZE (GET_MODE (x)))
@@ -8313,11 +8375,12 @@ nonzero_bits (x, mode)
break;
case TRUNCATE:
- nonzero &= (nonzero_bits (XEXP (x, 0), mode) & GET_MODE_MASK (mode));
+ nonzero &= (nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret)
+ & GET_MODE_MASK (mode));
break;
case ZERO_EXTEND:
- nonzero &= nonzero_bits (XEXP (x, 0), mode);
+ nonzero &= nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret);
if (GET_MODE (XEXP (x, 0)) != VOIDmode)
nonzero &= GET_MODE_MASK (GET_MODE (XEXP (x, 0)));
break;
@@ -8326,7 +8389,7 @@ nonzero_bits (x, mode)
/* If the sign bit is known clear, this is the same as ZERO_EXTEND.
Otherwise, show all the bits in the outer mode but not the inner
may be nonzero. */
- inner_nz = nonzero_bits (XEXP (x, 0), mode);
+ inner_nz = nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret);
if (GET_MODE (XEXP (x, 0)) != VOIDmode)
{
inner_nz &= GET_MODE_MASK (GET_MODE (XEXP (x, 0)));
@@ -8341,19 +8404,21 @@ nonzero_bits (x, mode)
break;
case AND:
- nonzero &= (nonzero_bits (XEXP (x, 0), mode)
- & nonzero_bits (XEXP (x, 1), mode));
+ nonzero &= (nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret)
+ & nonzero_bits1 (XEXP (x, 1), mode, known_x, known_x_ret));
break;
case XOR: case IOR:
case UMIN: case UMAX: case SMIN: case SMAX:
{
- unsigned HOST_WIDE_INT nonzero0 = nonzero_bits (XEXP (x, 0), mode);
+ unsigned HOST_WIDE_INT nonzero0 =
+ nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret);
- /* Don't call nonzero_bits for the second time if it cannot change
+ /* Don't call nonzero_bits1 for the second time if it cannot change
anything. */
if ((nonzero & nonzero0) != nonzero)
- nonzero &= (nonzero0 | nonzero_bits (XEXP (x, 1), mode));
+ nonzero &= (nonzero0 | nonzero_bits1 (XEXP (x, 1), mode,
+ known_x, known_x_ret));
}
break;
@@ -8366,8 +8431,10 @@ nonzero_bits (x, mode)
computing the width (position of the highest-order nonzero bit)
and the number of low-order zero bits for each value. */
{
- unsigned HOST_WIDE_INT nz0 = nonzero_bits (XEXP (x, 0), mode);
- unsigned HOST_WIDE_INT nz1 = nonzero_bits (XEXP (x, 1), mode);
+ unsigned HOST_WIDE_INT nz0 =
+ nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret);
+ unsigned HOST_WIDE_INT nz1 =
+ nonzero_bits1 (XEXP (x, 1), mode, known_x, known_x_ret);
int width0 = floor_log2 (nz0) + 1;
int width1 = floor_log2 (nz1) + 1;
int low0 = floor_log2 (nz0 & -nz0);
@@ -8451,7 +8518,8 @@ nonzero_bits (x, mode)
if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x) > 0)
nonzero = (GET_MODE_MASK (GET_MODE (x))
- & nonzero_bits (SUBREG_REG (x), GET_MODE (x)));
+ & nonzero_bits1 (SUBREG_REG (x), GET_MODE (x),
+ known_x, known_x_ret));
/* If the inner mode is a single word for both the host and target
machines, we can compute this from which bits of the inner
@@ -8460,7 +8528,8 @@ nonzero_bits (x, mode)
&& (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))
<= HOST_BITS_PER_WIDE_INT))
{
- nonzero &= nonzero_bits (SUBREG_REG (x), mode);
+ nonzero &=
+ nonzero_bits1 (SUBREG_REG (x), mode, known_x, known_x_ret);
#if defined (WORD_REGISTER_OPERATIONS) && defined (LOAD_EXTEND_OP)
/* If this is a typical RISC machine, we only have to worry
@@ -8503,7 +8572,8 @@ nonzero_bits (x, mode)
unsigned int width = GET_MODE_BITSIZE (inner_mode);
int count = INTVAL (XEXP (x, 1));
unsigned HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
- unsigned HOST_WIDE_INT op_nonzero = nonzero_bits (XEXP (x, 0), mode);
+ unsigned HOST_WIDE_INT op_nonzero =
+ nonzero_bits1 (XEXP (x, 0), mode, known_x, known_x_ret);
unsigned HOST_WIDE_INT inner = op_nonzero & mode_mask;
unsigned HOST_WIDE_INT outer = 0;
@@ -8538,8 +8608,8 @@ nonzero_bits (x, mode)
break;
case IF_THEN_ELSE:
- nonzero &= (nonzero_bits (XEXP (x, 1), mode)
- | nonzero_bits (XEXP (x, 2), mode));
+ nonzero &= (nonzero_bits1 (XEXP (x, 1), mode, known_x, known_x_ret)
+ | nonzero_bits1 (XEXP (x, 2), mode, known_x, known_x_ret));
break;
default:
@@ -8550,17 +8620,72 @@ nonzero_bits (x, mode)
}
/* See the macro definition above. */
-#undef num_sign_bit_copies
+#undef num_sign_bit_copies1
+/* The function num_sign_bit_copies1 is a wrapper around
+ num_sign_bit_copies2. It avoids the exponential behavior in
+ num_sign_bit_copies2 when X has identical subexpressions on the
+ first or on the second level. */
+
+static unsigned int
+num_sign_bit_copies1 (x, mode, known_x, known_x_ret)
+ rtx x;
+ enum machine_mode mode;
+ rtx known_x;
+ unsigned int known_x_ret;
+{
+ if (x == known_x)
+ return known_x_ret;
+
+ /* Try to find identical subexpressions. If found call
+ num_sign_bit_copies2 with known_x as the subexpression
+ known_x_ret as the precomputed value for known_x. */
+
+ if (GET_RTX_CLASS (GET_CODE (x)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x)) == 'c')
+ {
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+
+ /* Check the first level. */
+ if (x0 == x1)
+ return
+ num_sign_bit_copies2 (x, mode, x0,
+ num_sign_bit_copies1 (x0, mode, known_x,
+ known_x_ret));
+
+ /* Check the second level. */
+ if ((GET_RTX_CLASS (GET_CODE (x0)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x0)) == 'c')
+ && (x1 == XEXP (x0, 0) || x1 == XEXP (x0, 1)))
+ return
+ num_sign_bit_copies2 (x, mode, x1,
+ num_sign_bit_copies1 (x1, mode, known_x,
+ known_x_ret));
+
+ if ((GET_RTX_CLASS (GET_CODE (x1)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x1)) == 'c')
+ && (x0 == XEXP (x1, 0) || x0 == XEXP (x1, 1)))
+ return
+ num_sign_bit_copies2 (x, mode, x0,
+ num_sign_bit_copies1 (x0, mode, known_x,
+ known_x_ret));
+ }
+
+ return num_sign_bit_copies2 (x, mode, known_x, known_x_ret);
+}
+
/* Return the number of bits at the high-order end of X that are known to
be equal to the sign bit. X will be used in mode MODE; if MODE is
VOIDmode, X will be used in its own mode. The returned value will always
be between 1 and the number of bits in MODE. */
static unsigned int
-num_sign_bit_copies (x, mode)
+num_sign_bit_copies2 (x, mode, known_x, known_x_ret)
rtx x;
enum machine_mode mode;
+ rtx known_x;
+ unsigned int known_x_ret;
{
enum rtx_code code = GET_CODE (x);
unsigned int bitwidth;
@@ -8583,7 +8708,7 @@ num_sign_bit_copies (x, mode)
/* For a smaller object, just ignore the high bits. */
if (bitwidth < GET_MODE_BITSIZE (GET_MODE (x)))
{
- num0 = num_sign_bit_copies (x, GET_MODE (x));
+ num0 = num_sign_bit_copies1 (x, GET_MODE (x), known_x, known_x_ret);
return MAX (1,
num0 - (int) (GET_MODE_BITSIZE (GET_MODE (x)) - bitwidth));
}
@@ -8632,7 +8757,7 @@ num_sign_bit_copies (x, mode)
tem = get_last_value (x);
if (tem != 0)
- return num_sign_bit_copies (tem, mode);
+ return num_sign_bit_copies1 (tem, mode, known_x, known_x_ret);
if (nonzero_sign_valid && reg_sign_bit_copies[REGNO (x)] != 0
&& GET_MODE_BITSIZE (GET_MODE (x)) == bitwidth)
@@ -8665,7 +8790,8 @@ num_sign_bit_copies (x, mode)
if (SUBREG_PROMOTED_VAR_P (x) && ! SUBREG_PROMOTED_UNSIGNED_P (x))
{
- num0 = num_sign_bit_copies (SUBREG_REG (x), mode);
+ num0 =
+ num_sign_bit_copies1 (SUBREG_REG (x), mode, known_x, known_x_ret);
return MAX ((int) bitwidth
- (int) GET_MODE_BITSIZE (GET_MODE (x)) + 1,
num0);
@@ -8674,7 +8800,8 @@ num_sign_bit_copies (x, mode)
/* For a smaller object, just ignore the high bits. */
if (bitwidth <= GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))))
{
- num0 = num_sign_bit_copies (SUBREG_REG (x), VOIDmode);
+ num0 = num_sign_bit_copies1 (SUBREG_REG (x), VOIDmode,
+ known_x, known_x_ret);
return MAX (1, (num0
- (int) (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))
- bitwidth)));
@@ -8696,7 +8823,8 @@ num_sign_bit_copies (x, mode)
> GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))
&& LOAD_EXTEND_OP (GET_MODE (SUBREG_REG (x))) == SIGN_EXTEND
&& GET_CODE (SUBREG_REG (x)) == MEM)
- return num_sign_bit_copies (SUBREG_REG (x), mode);
+ return
+ num_sign_bit_copies1 (SUBREG_REG (x), mode, known_x, known_x_ret);
#endif
#endif
break;
@@ -8708,16 +8836,18 @@ num_sign_bit_copies (x, mode)
case SIGN_EXTEND:
return (bitwidth - GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0)))
- + num_sign_bit_copies (XEXP (x, 0), VOIDmode));
+ + num_sign_bit_copies1 (XEXP (x, 0), VOIDmode,
+ known_x, known_x_ret));
case TRUNCATE:
/* For a smaller object, just ignore the high bits. */
- num0 = num_sign_bit_copies (XEXP (x, 0), VOIDmode);
+ num0 =
+ num_sign_bit_copies1 (XEXP (x, 0), VOIDmode, known_x, known_x_ret);
return MAX (1, (num0 - (int) (GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0)))
- bitwidth)));
case NOT:
- return num_sign_bit_copies (XEXP (x, 0), mode);
+ return num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
case ROTATE: case ROTATERT:
/* If we are rotating left by a number of bits less than the number
@@ -8727,7 +8857,8 @@ num_sign_bit_copies (x, mode)
&& INTVAL (XEXP (x, 1)) >= 0
&& INTVAL (XEXP (x, 1)) < (int) bitwidth)
{
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
+ num0 =
+ num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
return MAX (1, num0 - (code == ROTATE ? INTVAL (XEXP (x, 1))
: (int) bitwidth - INTVAL (XEXP (x, 1))));
}
@@ -8738,7 +8869,7 @@ num_sign_bit_copies (x, mode)
is known to be positive, the number of sign bit copies is the
same as that of the input. Finally, if the input has just one bit
that might be nonzero, all the bits are copies of the sign bit. */
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
if (bitwidth > HOST_BITS_PER_WIDE_INT)
return num0 > 1 ? num0 - 1 : 1;
@@ -8756,8 +8887,8 @@ num_sign_bit_copies (x, mode)
case SMIN: case SMAX: case UMIN: case UMAX:
/* Logical operations will preserve the number of sign-bit copies.
MIN and MAX operations always return one of the operands. */
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
- num1 = num_sign_bit_copies (XEXP (x, 1), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
+ num1 = num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
return MIN (num0, num1);
case PLUS: case MINUS:
@@ -8775,8 +8906,8 @@ num_sign_bit_copies (x, mode)
: bitwidth - floor_log2 (nonzero) - 1);
}
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
- num1 = num_sign_bit_copies (XEXP (x, 1), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
+ num1 = num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
result = MAX (1, MIN (num0, num1) - 1);
#ifdef POINTERS_EXTEND_UNSIGNED
@@ -8798,8 +8929,8 @@ num_sign_bit_copies (x, mode)
to be positive, we must allow for an additional bit since negating
a negative number can remove one sign bit copy. */
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
- num1 = num_sign_bit_copies (XEXP (x, 1), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
+ num1 = num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
result = bitwidth - (bitwidth - num0) - (bitwidth - num1);
if (result > 0
@@ -8822,17 +8953,19 @@ num_sign_bit_copies (x, mode)
& ((HOST_WIDE_INT) 1 << (bitwidth - 1))) != 0)
return 1;
else
- return num_sign_bit_copies (XEXP (x, 0), mode);
+ return
+ num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
case UMOD:
/* The result must be <= the second operand. */
- return num_sign_bit_copies (XEXP (x, 1), mode);
+ return num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
case DIV:
/* Similar to unsigned division, except that we have to worry about
the case where the divisor is negative, in which case we have
to add 1. */
- result = num_sign_bit_copies (XEXP (x, 0), mode);
+ result =
+ num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
if (result > 1
&& (bitwidth > HOST_BITS_PER_WIDE_INT
|| (nonzero_bits (XEXP (x, 1), mode)
@@ -8842,7 +8975,8 @@ num_sign_bit_copies (x, mode)
return result;
case MOD:
- result = num_sign_bit_copies (XEXP (x, 1), mode);
+ result =
+ num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
if (result > 1
&& (bitwidth > HOST_BITS_PER_WIDE_INT
|| (nonzero_bits (XEXP (x, 1), mode)
@@ -8854,7 +8988,7 @@ num_sign_bit_copies (x, mode)
case ASHIFTRT:
/* Shifts by a constant add to the number of bits equal to the
sign bit. */
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& INTVAL (XEXP (x, 1)) > 0)
num0 = MIN ((int) bitwidth, num0 + INTVAL (XEXP (x, 1)));
@@ -8868,12 +9002,12 @@ num_sign_bit_copies (x, mode)
|| INTVAL (XEXP (x, 1)) >= (int) bitwidth)
return 1;
- num0 = num_sign_bit_copies (XEXP (x, 0), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 0), mode, known_x, known_x_ret);
return MAX (1, num0 - INTVAL (XEXP (x, 1)));
case IF_THEN_ELSE:
- num0 = num_sign_bit_copies (XEXP (x, 1), mode);
- num1 = num_sign_bit_copies (XEXP (x, 2), mode);
+ num0 = num_sign_bit_copies1 (XEXP (x, 1), mode, known_x, known_x_ret);
+ num1 = num_sign_bit_copies1 (XEXP (x, 2), mode, known_x, known_x_ret);
return MIN (num0, num1);
case EQ: case NE: case GE: case GT: case LE: case LT:
@@ -11325,7 +11459,45 @@ update_table_tick (x)
/* Note that we can't have an "E" in values stored; see
get_last_value_validate. */
if (fmt[i] == 'e')
- update_table_tick (XEXP (x, i));
+ {
+ /* Check for identical subexpressions. If x contains
+ identical subexpression we only have to traverse one of
+ them. */
+ if (i == 0
+ && (GET_RTX_CLASS (code) == '2'
+ || GET_RTX_CLASS (code) == 'c'))
+ {
+ /* Note that at this point x1 has already been
+ processed. */
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+
+ /* If x0 and x1 are identical then there is no need to
+ process x0. */
+ if (x0 == x1)
+ break;
+
+ /* If x0 is identical to a subexpression of x1 then while
+ processing x1, x0 has already been processed. Thus we
+ are done with x. */
+ if ((GET_RTX_CLASS (GET_CODE (x1)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x1)) == 'c')
+ && (x0 == XEXP (x1, 0) || x0 == XEXP (x1, 1)))
+ break;
+
+ /* If x1 is identical to a subexpression of x0 then we
+ still have to process the rest of x0. */
+ if ((GET_RTX_CLASS (GET_CODE (x0)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x0)) == 'c')
+ && (x1 == XEXP (x0, 0) || x1 == XEXP (x0, 1)))
+ {
+ update_table_tick (XEXP (x0, x1 == XEXP (x0, 0) ? 1 : 0));
+ break;
+ }
+ }
+
+ update_table_tick (XEXP (x, i));
+ }
}
/* Record that REG is set to VALUE in insn INSN. If VALUE is zero, we
@@ -11678,11 +11850,52 @@ get_last_value_validate (loc, insn, tick
}
for (i = 0; i < len; i++)
- if ((fmt[i] == 'e'
- && get_last_value_validate (&XEXP (x, i), insn, tick, replace) == 0)
- /* Don't bother with these. They shouldn't occur anyway. */
- || fmt[i] == 'E')
- return 0;
+ {
+ if (fmt[i] == 'e')
+ {
+ /* Check for identical subexpressions. If x contains
+ identical subexpression we only have to traverse one of
+ them. */
+ if (i == 1
+ && (GET_RTX_CLASS (GET_CODE (x)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x)) == 'c'))
+ {
+ /* Note that at this point x0 has already been checked
+ and found valid. */
+ rtx x0 = XEXP (x, 0);
+ rtx x1 = XEXP (x, 1);
+
+ /* If x0 and x1 are identical then x is also valid. */
+ if (x0 == x1)
+ return 1;
+
+ /* If x1 is identical to a subexpression of x0 then
+ while checking x0, x1 has already been checked. Thus
+ it is valid and so as x. */
+ if ((GET_RTX_CLASS (GET_CODE (x0)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x0)) == 'c')
+ && (x1 == XEXP (x0, 0) || x1 == XEXP (x0, 1)))
+ return 1;
+
+ /* If x0 is identical to a subexpression of x1 then x is
+ valid iff the rest of x1 is valid. */
+ if ((GET_RTX_CLASS (GET_CODE (x1)) == '2'
+ || GET_RTX_CLASS (GET_CODE (x1)) == 'c')
+ && (x0 == XEXP (x1, 0) || x0 == XEXP (x1, 1)))
+ return
+ get_last_value_validate (&XEXP (x1,
+ x0 == XEXP (x1, 0) ? 1 : 0),
+ insn, tick, replace);
+ }
+
+ if (get_last_value_validate (&XEXP (x, i), insn, tick,
+ replace) == 0)
+ return 0;
+ }
+ /* Don't bother with these. They shouldn't occur anyway. */
+ else if (fmt[i] == 'E')
+ return 0;
+ }
/* If we haven't found a reason for it to be invalid, it is valid. */
return 1;