On 13/11/10 10:50, Eric Botcazou wrote:
I profiled the pass on spec2000:
-mabi=32 -mabi=64
ee-pass (usr time): 0.70 1.16
total (usr time): 919.30 879.26
ee-pass (%): 0.08 0.13
The pass takes 0.13% or less of the total usr runtime.
For how many hits? What are the numbers with --param ee-max-propagate=0?
Is it necessary to improve the runtime of this pass?
I've already given my opinion about the implementation. The other passes in
the compiler try hard not to rescan everything when a single bit changes; as
currently written, yours doesn't.
Eric,
I've done the following:
- refactored the pass such that it now scans at most twice over all
instructions.
- updated the patch to be applicable to current trunk
- updated the motivating example to a more applicable one (as discussed in
this thread), and added that one as test-case.
- added a part in the header comment illustrating the working of the pass
on the motivating example.
bootstrapped and reg-tested on x86_64 and i686.
build and reg-tested on mips, mips64, and arm.
OK for trunk?
Thanks,
- Tom
2012-07-10 Tom de Vries <tom@codesourcery.com>
* ee.c: New file.
* tree-pass.h (pass_ee): Declare.
* opts.c ( default_options_table): Set flag_ee at -O2.
* timevar.def (TV_EE): New timevar.
* common.opt (fextension-elimination): New option.
* Makefile.in (ee.o): New rule.
* passes.c (pass_ee): Add it.
* gcc.dg/extend-1.c: New test.
* gcc.dg/extend-2.c: Same.
* gcc.dg/extend-2-64.c: Same.
* gcc.dg/extend-3.c: Same.
* gcc.dg/extend-4.c: Same.
* gcc.dg/extend-5.c: Same.
* gcc.target/mips/octeon-bbit-2.c: Make test more robust.
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h (revision 189409)
+++ gcc/tree-pass.h (working copy)
@@ -483,6 +483,7 @@ extern struct gimple_opt_pass pass_fixup
extern struct rtl_opt_pass pass_expand;
extern struct rtl_opt_pass pass_instantiate_virtual_regs;
+extern struct rtl_opt_pass pass_ee;
extern struct rtl_opt_pass pass_rtl_fwprop;
extern struct rtl_opt_pass pass_rtl_fwprop_addr;
extern struct rtl_opt_pass pass_jump;
Index: gcc/testsuite/gcc.target/mips/octeon-bbit-2.c
===================================================================
--- gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (revision 189409)
+++ gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (working copy)
@@ -5,19 +5,19 @@
/* { dg-final { scan-assembler "\tbnel\t" } } */
/* { dg-final { scan-assembler-not "\tbne\t" } } */
-NOMIPS16 int
-f (int n, int i)
+NOMIPS16 long int
+f (long int n, long int i)
{
- int s = 0;
+ long int s = 0;
for (; i & 1; i++)
s += i;
return s;
}
-NOMIPS16 int
-g (int n, int i)
+NOMIPS16 long int
+g (long int n, long int i)
{
- int s = 0;
+ long int s = 0;
for (i = 0; i < n; i++)
s += i;
return s;
Index: gcc/testsuite/gcc.dg/extend-4.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-4.c (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+unsigned char f(unsigned int a, int c)
+{
+ unsigned int b = a;
+ if (c)
+ b = a & 0x10ff;
+ return b;
+}
+
+/* { dg-final { scan-rtl-dump-times "_extend:" 1 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "and:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump "redundant extension \[0-9\]+ removed" "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
+
Index: gcc/testsuite/gcc.dg/extend-1.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-1.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+void f(unsigned char * p, short s, int c, int *z)
+{
+ if (c)
+ *z = 0;
+ *p ^= (unsigned char)s;
+}
+
+/* { dg-final { scan-rtl-dump-times "sign_extend:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 1 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
Index: gcc/testsuite/gcc.dg/extend-5.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-5.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+void f (short d[2][2])
+{
+ int d0 = d[0][0] + d[0][1];
+ int d1 = d[1][0] + d[1][1];
+ d[0][0] = d0 + d1;
+ d[0][1] = d0 - d1;
+}
+
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 2 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
Index: gcc/testsuite/gcc.dg/extend-2.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-2.c (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+/* { dg-require-effective-target ilp32 } */
+
+void f(unsigned char * p, short *s, int c)
+{
+ short or = 0;
+ while (c)
+ {
+ or = or | s[c];
+ c --;
+ }
+ *p = (unsigned char)or;
+}
+
+/* { dg-final { scan-rtl-dump-times "zero_extend" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "sign_extend" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 2 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
+
Index: gcc/testsuite/gcc.dg/extend-2-64.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-2-64.c (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee -mabi=64" } */
+/* { dg-require-effective-target mips64 } */
+
+void f(unsigned char * p, short *s, int c)
+{
+ short or = 0;
+ while (c)
+ {
+ or = or | s[c];
+ c --;
+ }
+ *p = (unsigned char)or;
+}
+
+/* { dg-final { scan-rtl-dump-times "zero_extend:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "sign_extend:" 1 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 2 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
+
Index: gcc/testsuite/gcc.dg/extend-3.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-3.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee -mabi=64" } */
+/* { dg-require-effective-target mips64 } */
+
+unsigned int f(unsigned char byte)
+{
+ return byte << 25;
+}
+
+/* { dg-final { scan-rtl-dump-times "zero_extend:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump "redundant extension \[0-9\]+ replaced" "ee" } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
+
Index: gcc/opts.c
===================================================================
--- gcc/opts.c (revision 189409)
+++ gcc/opts.c (working copy)
@@ -490,6 +490,7 @@ static const struct default_options defa
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fextension_elimination, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def (revision 189409)
+++ gcc/timevar.def (working copy)
@@ -201,6 +201,7 @@ DEFTIMEVAR (TV_POST_EXPAND , "post
DEFTIMEVAR (TV_VARCONST , "varconst")
DEFTIMEVAR (TV_LOWER_SUBREG , "lower subreg")
DEFTIMEVAR (TV_JUMP , "jump")
+DEFTIMEVAR (TV_EE , "extension elimination")
DEFTIMEVAR (TV_FWPROP , "forward prop")
DEFTIMEVAR (TV_CSE , "CSE")
DEFTIMEVAR (TV_DCE , "dead code elimination")
Index: gcc/ee.c
===================================================================
--- /dev/null (new file)
+++ gcc/ee.c (revision 0)
@@ -0,0 +1,1190 @@
+/* Redundant extension elimination.
+ Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
+ Contributed by Tom de Vries (tom@codesourcery.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/*
+
+ MOTIVATING EXAMPLE
+
+ The motivating example for this pass is the example from PR 40893:
+
+ void f (short d[2][2])
+ {
+ int d0 = d[0][0] + d[0][1];
+ int d1 = d[1][0] + d[1][1];
+ d[0][0] = d0 + d1;
+ d[0][1] = d0 - d1;
+ }
+
+ For MIPS, compilation results in the following insns.
+
+ (set (reg:SI 204)
+ (zero_extend:SI (subreg:HI (reg:SI 213) 2)))
+
+ (set (reg:SI 205)
+ (zero_extend:SI (subreg:HI (reg:SI 216 [ d1 ]) 2)))
+
+ (set (reg:SI 217)
+ (plus:SI (reg:SI 205)
+ (reg:SI 204)))
+
+ (set (reg:SI 218)
+ (minus:SI (reg:SI 204)
+ (reg:SI 205)))
+
+ (set (mem:HI (reg/v/f:SI 210))
+ (subreg:HI (reg:SI 217) 2))
+
+ (set (mem:HI (plus:SI (reg/v/f:SI 210)
+ (const_int 2 [0x2])))
+ (subreg:HI (reg:SI 218) 2))
+
+
+ The pseudos 217 and 218 only use the lower half of pseudos 217 and 218, and
+ are the only uses. And the plus and minus operators belong to the class of
+ operators where a bit in the result is only influenced by same-or-less
+ significant bitss in the operands, so the plus and minus insns only use the
+ lower halves of pseudos 204 and 205. Those are also the only uses of pseudos
+ 204 and 205, so the zero_extends are redundant.
+
+
+ INTENDED EFFECT
+
+ This pass works by removing sign/zero-extensions, or replacing them with
+ regcopies. The idea there is that the regcopy might be eliminated by a later
+ pass. In case the regcopy cannot be eliminated, it might at least be cheaper
+ than the extension.
+
+
+ IMPLEMENTATION
+
+ The pass scans at most two times over all instructions.
+
+ The first scan collects all extensions. If there are no extensions, we're
+ done.
+
+ The second scan registers all uses of a reg in the biggest_use array.
+ Additionally, it registers how the use size of a pseudo is propagated to the
+ operands of the insns defining the pseudo.
+
+ The biggest_use array now contains the size in bits of the biggest use
+ of each reg, which allows us to find redundant extensions.
+
+ If there are still non-redundant extensions left, we use the propagation
+ information in an iterative fashion to improve the biggest_use array, after
+ which we may find more redundant extensions.
+
+ Finally, redundant extensions are deleted or replaced.
+
+ In case that the src and dest reg of the replacement are not of the same size,
+ we do not replace with a normal regcopy, but with a truncate or with the copy
+ of a paradoxical subreg instead.
+
+
+ ILLUSTRATION OF PASS
+
+ The dump of the pass shows us how the pass works on the motivating example.
+
+ We find the 2 extensions:
+ found extension with preserved size 16 defining reg 204
+ found extension with preserved size 16 defining reg 205
+
+ We calculate the biggests uses of a register:
+ biggest_use
+ reg 204: size 32
+ reg 205: size 32
+ reg 217: size 16
+ reg 218: size 16
+
+ We propagate the biggest uses where possible:
+ propagations
+ 205: 32 -> 16
+ 204: 32 -> 16
+ 214: 32 -> 16
+ 215: 32 -> 16
+
+ We conclude that the extensions are redundant:
+ found redundant extension with preserved size 16 defining reg 205
+ found redundant extension with preserved size 16 defining reg 204
+
+ And we replace them with regcopies:
+ (set (reg:SI 204)
+ (reg:SI 213))
+
+ (set (reg:SI 205)
+ (reg:SI 216))
+
+
+ LIMITATIONS
+
+ The scope of the analysis is limited to an extension and its uses. The other
+ type of analysis (related to the defs of the operand of an extension) is not
+ done.
+
+ Furthermore, we do the analysis of biggest use per reg. So when determining
+ whether an extension is redundant, we take all uses of a dest reg into
+ account, also the ones that are not uses of the extension.
+ The consideration is that using use-def chains will give a more precise
+ analysis, but is much more expensive in terms of runtime. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "function.h"
+#include "expr.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "toplev.h"
+#include "target.h"
+#include "timevar.h"
+#include "optabs.h"
+#include "insn-codes.h"
+#include "rtlhooks-def.h"
+#include "output.h"
+#include "params.h"
+#include "timevar.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "vec.h"
+
+#define SKIP_REG (-1)
+#define NONE (-1)
+
+/* Number of registers at start of pass. */
+
+static int n_regs;
+
+/* Array to register the biggest use of a reg, in bits. */
+
+static int *biggest_use;
+
+/* Array to register the promoted subregs. */
+
+static VEC (rtx,heap) **promoted_subreg;
+
+/* Array to register for a reg what the last propagated size is. */
+
+static int *propagated_size;
+
+typedef struct use
+{
+ int regno;
+ int size;
+ int offset;
+ rtx *use;
+} use_type;
+
+DEF_VEC_O(use_type);
+DEF_VEC_ALLOC_O(use_type,heap);
+
+/* Vector to register the uses. */
+
+static VEC (use_type,heap) **uses;
+
+typedef struct prop
+{
+ rtx set;
+ int uses_regno;
+ int uses_index;
+} prop_type;
+
+DEF_VEC_O(prop_type);
+DEF_VEC_ALLOC_O(prop_type,heap);
+
+/* Vector to register the propagations. */
+
+static VEC (prop_type,heap) **props;
+
+/* Work list for propragation. */
+
+static VEC (int,heap) *wl;
+
+/* Array to register what regs are in the work list. */
+
+static bool *in_wl;
+
+/* Vector that contains the extensions in the function. */
+
+static VEC (rtx,heap) *extensions;
+
+/* Vector that contains the extensions in the function that are going to be
+ removed or replaced. */
+
+static VEC (rtx,heap) *redundant_extensions;
+
+/* Forward declaration. */
+
+static void note_use (rtx *x, void *data);
+static bool skip_reg_p (int regno);
+static void register_prop (rtx set, use_type *use);
+
+/* Check whether SUBREG is a promoted subreg. */
+
+static bool
+promoted_subreg_p (rtx subreg)
+{
+ return (GET_CODE (subreg) == SUBREG
+ && SUBREG_PROMOTED_VAR_P (subreg));
+}
+
+/* Check whether SUBREG is a promoted subreg for which we cannot reset the
+ promotion. */
+
+static bool
+fixed_promoted_subreg_p (rtx subreg)
+{
+ int mre;
+
+ if (!promoted_subreg_p (subreg))
+ return false;
+
+ mre = targetm.mode_rep_extended (GET_MODE (subreg),
+ GET_MODE (SUBREG_REG (subreg)));
+ return mre != UNKNOWN;
+}
+
+/* Attempt to return the size, reg number and offset of USE in SIZE, REGNO and
+ OFFSET. Return true if successful. */
+
+static bool
+reg_use_p (rtx use, int *size, unsigned int *regno, int *offset)
+{
+ rtx reg;
+
+ if (REG_P (use))
+ {
+ *regno = REGNO (use);
+ *offset = 0;
+ *size = GET_MODE_BITSIZE (GET_MODE (use));
+ return true;
+ }
+ else if (GET_CODE (use) == SUBREG)
+ {
+ reg = SUBREG_REG (use);
+
+ if (!REG_P (reg))
+ return false;
+
+ *regno = REGNO (reg);
+
+ if (paradoxical_subreg_p (use) || fixed_promoted_subreg_p (use))
+ {
+ *offset = 0;
+ *size = GET_MODE_BITSIZE (GET_MODE (reg));
+ }
+ else
+ {
+ *offset = subreg_lsb (use);
+ *size = *offset + GET_MODE_BITSIZE (GET_MODE (use));
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Create a new empty entry in the uses[REGNO] vector. */
+
+static use_type *
+new_use (unsigned int regno)
+{
+ if (uses[regno] == NULL)
+ uses[regno] = VEC_alloc (use_type, heap, 4);
+
+ VEC_safe_push (use_type, heap, uses[regno], NULL);
+
+ return VEC_last (use_type, uses[regno]);
+}
+
+/* Register a USE of reg REGNO with SIZE and OFFSET. */
+
+static use_type *
+register_use (int size, unsigned int regno, int offset, rtx *use)
+{
+ int *current;
+ use_type *p;
+
+ gcc_assert (size >= 0);
+ gcc_assert (regno < (unsigned int)n_regs);
+
+ if (skip_reg_p (regno))
+ return NULL;
+
+ p = new_use (regno);
+ p->regno = regno;
+ p->size = size;
+ p->offset = offset;
+ p->use = use;
+
+ /* Update the bigest use. */
+ current = &biggest_use[regno];
+ *current = MAX (*current, size);
+
+ return p;
+}
+
+/* Handle embedded uses in USE, which is a part of PATTERN. */
+
+static void
+note_embedded_uses (rtx use, rtx pattern)
+{
+ const char *format_ptr;
+ int i, j;
+
+ format_ptr = GET_RTX_FORMAT (GET_CODE (use));
+ for (i = 0; i < GET_RTX_LENGTH (GET_CODE (use)); i++)
+ if (format_ptr[i] == 'e')
+ note_use (&XEXP (use, i), pattern);
+ else if (format_ptr[i] == 'E')
+ for (j = 0; j < XVECLEN (use, i); j++)
+ note_use (&XVECEXP (use, i, j), pattern);
+}
+
+/* Get the set in PATTERN that has USE as its src operand. */
+
+static rtx
+get_set (rtx use, rtx pattern)
+{
+ rtx sub;
+ int i;
+
+ if (GET_CODE (pattern) == SET && SET_SRC (pattern) == use)
+ return pattern;
+
+ if (GET_CODE (pattern) == PARALLEL)
+ for (i = 0; i < XVECLEN (pattern, 0); ++i)
+ {
+ sub = XVECEXP (pattern, 0, i);
+ if (GET_CODE (sub) == SET && SET_SRC (sub) == use)
+ return sub;
+ }
+
+ return NULL_RTX;
+}
+
+/* Handle a restricted op USE with NR_OPERANDS. USE is a part of SET, which is
+ a part of PATTERN. In this context restricted means that a bit in
+ an operand influences only the same bit or more significant bits in the
+ result. The bitwise ops are a subclass, but PLUS is one as well. */
+
+static void
+note_restricted_op_use (rtx set, rtx use, unsigned int nr_operands, rtx pattern)
+{
+ unsigned int i, smallest;
+ int operand_size[2];
+ int operand_offset[2];
+ int used_size;
+ unsigned int operand_regno[2];
+ bool operand_reg[2];
+ bool operand_ignore[2];
+ use_type *p;
+
+ /* Init operand_reg, operand_size, operand_regno and operand_ignore. */
+ for (i = 0; i < nr_operands; ++i)
+ {
+ operand_reg[i] = reg_use_p (XEXP (use, i), &operand_size[i],
+ &operand_regno[i], &operand_offset[i]);
+ operand_ignore[i] = false;
+ }
+
+ /* Handle case of reg and-masked with const. */
+ if (GET_CODE (use) == AND && CONST_INT_P (XEXP (use, 1)) && operand_reg[0])
+ {
+ used_size =
+ HOST_BITS_PER_WIDE_INT - clz_hwi (UINTVAL (XEXP (use, 1)));
+ operand_size[0] = MIN (operand_size[0], used_size);
+ }
+
+ /* Handle case of reg or-masked with const. */
+ if (GET_CODE (use) == IOR && CONST_INT_P (XEXP (use, 1)) && operand_reg[0])
+ {
+ used_size =
+ HOST_BITS_PER_WIDE_INT - clz_hwi (~UINTVAL (XEXP (use, 1)));
+ operand_size[0] = MIN (operand_size[0], used_size);
+ }
+
+ /* Ignore the use of a in 'a = a + b'. */
+ /* TODO: handle GET_CODE ((SET_DEST (set))) == SUBREG. */
+ if (set != NULL_RTX && REG_P (SET_DEST (set)))
+ for (i = 0; i < nr_operands; ++i)
+ operand_ignore[i] = (operand_reg[i]
+ && (REGNO (SET_DEST (set)) == operand_regno[i]));
+
+ /* Handle the case a reg is combined with don't care bits. */
+ if (nr_operands == 2 && operand_reg[0] && operand_reg[1]
+ && operand_size[0] != operand_size[1])
+ {
+ smallest = operand_size[0] > operand_size[1];
+
+ if (paradoxical_subreg_p (XEXP (use, smallest)))
+ operand_size[1 - smallest] = operand_size[smallest];
+ }
+
+ /* Register the operand use, if necessary. */
+ for (i = 0; i < nr_operands; ++i)
+ if (!operand_reg[i])
+ note_use (&XEXP (use, i), pattern);
+ else if (!operand_ignore[i])
+ {
+ p = register_use (operand_size[i], operand_regno[i], operand_offset[i],
+ &XEXP (use, i));
+ register_prop (set, p);
+ }
+}
+
+/* Register promoted SUBREG in promoted_subreg. */
+
+static void
+register_promoted_subreg (rtx subreg)
+{
+ int index = REGNO (SUBREG_REG (subreg));
+
+ if (promoted_subreg[index] == NULL)
+ promoted_subreg[index] = VEC_alloc (rtx, heap, 10);
+
+ VEC_safe_push (rtx, heap, promoted_subreg[index], subreg);
+}
+
+/* Note promoted subregs in X. */
+
+static int
+note_promoted_subreg (rtx *x, void *y ATTRIBUTE_UNUSED)
+{
+ rtx subreg = *x;
+
+ if (promoted_subreg_p (subreg) && !fixed_promoted_subreg_p (subreg)
+ && REG_P (SUBREG_REG (subreg)))
+ register_promoted_subreg (subreg);
+
+ return 0;
+}
+
+/* Handle use X in pattern DATA noted by note_uses. */
+
+static void
+note_use (rtx *x, void *data)
+{
+ rtx use = *x;
+ rtx pattern = (rtx)data;
+ int use_size, use_offset;
+ unsigned int use_regno;
+ rtx set;
+ use_type *p;
+
+ for_each_rtx (x, note_promoted_subreg, NULL);
+
+ set = get_set (use, pattern);
+
+ switch (GET_CODE (use))
+ {
+ case REG:
+ case SUBREG:
+ if (!reg_use_p (use, &use_size, &use_regno, &use_offset))
+ {
+ note_embedded_uses (use, pattern);
+ return;
+ }
+ p = register_use (use_size, use_regno, use_offset, x);
+ register_prop (set, p);
+ return;
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ if (!reg_use_p (XEXP (use, 0), &use_size, &use_regno, &use_offset))
+ {
+ note_embedded_uses (use, pattern);
+ return;
+ }
+ p = register_use (use_size, use_regno, use_offset, x);
+ register_prop (set, p);
+ return;
+ case IOR:
+ case AND:
+ case XOR:
+ case PLUS:
+ case MINUS:
+ note_restricted_op_use (set, use, 2, pattern);
+ return;
+ case NOT:
+ case NEG:
+ note_restricted_op_use (set, use, 1, pattern);
+ return;
+ case ASHIFT:
+ if (!reg_use_p (XEXP (use, 0), &use_size, &use_regno, &use_offset)
+ || !CONST_INT_P (XEXP (use, 1))
+ || INTVAL (XEXP (use, 1)) <= 0
+ || paradoxical_subreg_p (XEXP (use, 0)))
+ {
+ note_embedded_uses (use, pattern);
+ return;
+ }
+ (void)register_use (use_size - INTVAL (XEXP (use, 1)), use_regno,
+ use_offset, x);
+ return;
+ default:
+ note_embedded_uses (use, pattern);
+ return;
+ }
+}
+
+/* Check whether reg REGNO is implicitly used. */
+
+static bool
+implicit_use_p (int regno ATTRIBUTE_UNUSED)
+{
+#ifdef EPILOGUE_USES
+ if (EPILOGUE_USES (regno))
+ return true;
+#endif
+
+#ifdef EH_USES
+ if (EH_USES (regno))
+ return true;
+#endif
+
+ return false;
+}
+
+/* Check whether reg REGNO should be skipped in analysis. */
+
+static bool
+skip_reg_p (int regno)
+{
+ /* TODO: handle hard registers. The problem with hard registers is that
+ a DI use of r0 can mean a 64bit use of r0 and a 32 bit use of r1.
+ We don't handle that properly. */
+ return implicit_use_p (regno) || HARD_REGISTER_NUM_P (regno);
+}
+
+/* Note the uses of argument registers in call INSN. */
+
+static void
+note_call_uses (rtx insn)
+{
+ rtx link, link_expr;
+
+ if (!CALL_P (insn))
+ return;
+
+ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+ {
+ link_expr = XEXP (link, 0);
+
+ if (GET_CODE (link_expr) == USE)
+ note_use (&XEXP (link_expr, 0), link);
+ }
+}
+
+/* Dump the biggest uses found. */
+
+static void
+dump_biggest_use (void)
+{
+ int i;
+
+ if (!dump_file)
+ return;
+
+ fprintf (dump_file, "biggest_use:\n");
+
+ for (i = 0; i < n_regs; i++)
+ if (biggest_use[i] > 0)
+ fprintf (dump_file, "reg %d: size %d\n", i, biggest_use[i]);
+
+ fprintf (dump_file, "\n");
+}
+
+/* Calculate the biggest use mode for all regs. */
+
+static void
+calculate_biggest_use (void)
+{
+ basic_block bb;
+ rtx insn;
+
+ /* For all insns, call note_use for each use in insn. */
+ FOR_EACH_BB (bb)
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ note_uses (&PATTERN (insn), note_use, PATTERN (insn));
+
+ if (CALL_P (insn))
+ note_call_uses (insn);
+ }
+
+ dump_biggest_use ();
+}
+
+/* Register a propagation USE in SET in the props vector. */
+
+static void
+register_prop (rtx set, use_type *use)
+{
+ prop_type *p;
+ int regno;
+
+ if (set == NULL_RTX || use == NULL)
+ return;
+
+ if (!REG_P (SET_DEST (set)))
+ return;
+
+ regno = REGNO (SET_DEST (set));
+
+ if (skip_reg_p (regno))
+ return;
+
+ if (props[regno] == NULL)
+ props[regno] = VEC_alloc (prop_type, heap, 4);
+
+ VEC_safe_push (prop_type, heap, props[regno], NULL);
+ p = VEC_last (prop_type, props[regno]);
+ p->set = set;
+ p->uses_regno = use->regno;
+ p->uses_index = VEC_length (use_type, uses[use->regno]) - 1;
+}
+
+/* Add REGNO to the worklist. */
+
+static void
+add_to_wl (int regno)
+{
+ if (in_wl[regno])
+ return;
+
+ if (biggest_use[regno] > 0
+ && biggest_use[regno] == GET_MODE_BITSIZE (PSEUDO_REGNO_MODE (regno)))
+ return;
+
+ if (VEC_empty (prop_type, props[regno]))
+ return;
+
+ if (propagated_size[regno] != NONE
+ && propagated_size[regno] == biggest_use[regno])
+ return;
+
+ VEC_safe_push (int, heap, wl, regno);
+ in_wl[regno] = true;
+}
+
+/* Pop a reg from the worklist and return it. */
+
+static int
+pop_wl (void)
+{
+ int regno = VEC_pop (int, wl);
+ in_wl[regno] = false;
+ return regno;
+}
+
+/* Propagate the use size DEST_SIZE of a reg to use P. */
+
+static int
+propagate_size (int dest_size, use_type *p)
+{
+ if (dest_size == 0)
+ return 0;
+
+ return p->offset + MIN (p->size - p->offset, dest_size);
+}
+
+/* Get the biggest use of REGNO from the uses vector. */
+
+static int
+get_biggest_use (unsigned int regno)
+{
+ int ix;
+ use_type *p;
+ int max = 0;
+
+ gcc_assert (uses[regno] != NULL);
+
+ FOR_EACH_VEC_ELT (use_type, uses[regno], ix, p)
+ max = MAX (max, p->size);
+
+ return max;
+}
+
+/* Propagate the use size DEST_SIZE of a reg to the uses in USE. */
+
+static void
+propagate_to_use (int dest_size, use_type *use)
+{
+ int new_use_size;
+ int prev_biggest_use;
+ int *current;
+
+ new_use_size = propagate_size (dest_size, use);
+
+ if (new_use_size >= use->size)
+ return;
+
+ use->size = new_use_size;
+
+ current = &biggest_use[use->regno];
+
+ prev_biggest_use = *current;
+ *current = get_biggest_use (use->regno);
+
+ if (*current >= prev_biggest_use)
+ return;
+
+ add_to_wl (use->regno);
+
+ if (dump_file)
+ fprintf (dump_file, "%d: %d -> %d\n", use->regno, prev_biggest_use,
+ *current);
+
+}
+
+/* Propagate the biggest use of a reg REGNO to all its uses, and note
+ propagations in NR_PROPAGATIONS. */
+
+static void
+propagate_to_uses (int regno, int *nr_propagations)
+{
+ int ix;
+ prop_type *p;
+
+ gcc_assert (!(propagated_size[regno] == NONE
+ && propagated_size[regno] == biggest_use[regno]));
+
+ FOR_EACH_VEC_ELT (prop_type, props[regno], ix, p)
+ {
+ use_type *use = VEC_index (use_type, uses[p->uses_regno], p->uses_index);
+ propagate_to_use (biggest_use[regno], use);
+ ++(*nr_propagations);
+ }
+
+ propagated_size[regno] = biggest_use[regno];
+}
+
+/* Improve biggest_use array iteratively. */
+
+static void
+propagate (void)
+{
+ int i;
+ int nr_propagations = 0;
+
+ /* Initialize work list. */
+
+ for (i = 0; i < n_regs; ++i)
+ add_to_wl (i);
+
+ /* Work the work list. */
+
+ if (dump_file)
+ fprintf (dump_file, "propagations: \n");
+ while (!VEC_empty (int, wl))
+ propagate_to_uses (pop_wl (), &nr_propagations);
+
+ if (dump_file)
+ fprintf (dump_file, "\nnr_propagations: %d\n\n", nr_propagations);
+}
+
+/* Check whether this is a sign/zero extension. */
+
+static bool
+extension_p (rtx insn, rtx *dest, rtx *inner, int *preserved_size)
+{
+ rtx src, op0;
+
+ /* Detect set of reg. */
+ if (GET_CODE (PATTERN (insn)) != SET)
+ return false;
+
+ src = SET_SRC (PATTERN (insn));
+ *dest = SET_DEST (PATTERN (insn));
+
+ if (!REG_P (*dest))
+ return false;
+
+ /* Detect sign or zero extension. */
+ if (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND
+ || (GET_CODE (src) == AND && CONST_INT_P (XEXP (src, 1))))
+ {
+ op0 = XEXP (src, 0);
+
+ /* Determine amount of least significant bits preserved by operation. */
+ if (GET_CODE (src) == AND)
+ *preserved_size = ctz_hwi (~UINTVAL (XEXP (src, 1)));
+ else
+ *preserved_size = GET_MODE_BITSIZE (GET_MODE (op0));
+
+ if (GET_CODE (op0) == SUBREG)
+ {
+ if (subreg_lsb (op0) != 0)
+ return false;
+
+ *inner = SUBREG_REG (op0);
+
+ if (GET_MODE_CLASS (GET_MODE (*dest))
+ != GET_MODE_CLASS (GET_MODE (*inner)))
+ return false;
+
+ return true;
+ }
+ else if (REG_P (op0))
+ {
+ *inner = op0;
+
+ if (GET_MODE_CLASS (GET_MODE (*dest))
+ != GET_MODE_CLASS (GET_MODE (*inner)))
+ return false;
+
+ return true;
+ }
+ else if (GET_CODE (op0) == TRUNCATE)
+ {
+ *inner = XEXP (op0, 0);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Find extensions and store them in the extensions vector. */
+
+static bool
+find_extensions (void)
+{
+ basic_block bb;
+ rtx insn, dest, inner;
+ int preserved_size;
+
+ /* For all insns, call note_use for each use in insn. */
+ FOR_EACH_BB (bb)
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ if (!extension_p (insn, &dest, &inner, &preserved_size))
+ continue;
+
+ VEC_safe_push (rtx, heap, extensions, insn);
+
+ if (dump_file)
+ fprintf (dump_file,
+ "found extension %u with preserved size %d defining"
+ " reg %d\n",
+ INSN_UID (insn), preserved_size, REGNO (dest));
+ }
+
+ if (dump_file)
+ {
+ if (!VEC_empty (rtx, extensions))
+ fprintf (dump_file, "\n");
+ else
+ fprintf (dump_file, "no extensions found.\n");
+ }
+
+ return !VEC_empty (rtx, extensions);
+}
+
+/* Check whether this is a redundant sign/zero extension. */
+
+static bool
+redundant_extension_p (rtx insn, rtx *dest, rtx *inner, int *preserved_size)
+{
+ int biggest_dest_use;
+
+ if (!extension_p (insn, dest, inner, preserved_size))
+ gcc_unreachable ();
+
+ biggest_dest_use = biggest_use[REGNO (*dest)];
+
+ if (biggest_dest_use == SKIP_REG)
+ return false;
+
+ if (*preserved_size < biggest_dest_use)
+ return false;
+
+ return true;
+}
+
+/* Find the redundant extensions in the extensions vector and move them to the
+ redundant_extensions vector. */
+
+static void
+find_redundant_extensions (void)
+{
+ rtx insn, dest, inner;
+ int ix;
+ bool found = false;
+ int preserved_size;
+
+ FOR_EACH_VEC_ELT_REVERSE (rtx, extensions, ix, insn)
+ if (redundant_extension_p (insn, &dest, &inner, &preserved_size))
+ {
+ VEC_safe_push (rtx, heap, redundant_extensions, insn);
+ VEC_unordered_remove (rtx, extensions, ix);
+
+ if (dump_file)
+ fprintf (dump_file,
+ "found redundant extension %u with preserved size %d"
+ " defining reg %d\n",
+ INSN_UID (insn), preserved_size, REGNO (dest));
+ found = true;
+ }
+
+ if (dump_file && found)
+ fprintf (dump_file, "\n");
+}
+
+/* Reset promotion of subregs or REG. */
+
+static void
+reset_promoted_subreg (rtx reg)
+{
+ int ix;
+ rtx subreg;
+
+ if (promoted_subreg[REGNO (reg)] == NULL)
+ return;
+
+ FOR_EACH_VEC_ELT (rtx, promoted_subreg[REGNO (reg)], ix, subreg)
+ {
+ SUBREG_PROMOTED_UNSIGNED_SET (subreg, 0);
+ SUBREG_PROMOTED_VAR_P (subreg) = 0;
+ }
+
+ VEC_free (rtx, heap, promoted_subreg[REGNO (reg)]);
+}
+
+/* Try to remove or replace the redundant extension INSN which extends INNER and
+ writes to DEST. */
+
+static void
+try_remove_or_replace_extension (rtx insn, rtx dest, rtx inner)
+{
+ rtx cp_src, cp_dest, seq = NULL_RTX, one;
+
+ /* Check whether replacement is needed. */
+ if (dest != inner)
+ {
+ start_sequence ();
+
+ /* Determine the proper replacement operation. */
+ if (GET_MODE (dest) == GET_MODE (inner))
+ {
+ cp_src = inner;
+ cp_dest = dest;
+ }
+ else if (GET_MODE_SIZE (GET_MODE (dest))
+ > GET_MODE_SIZE (GET_MODE (inner)))
+ {
+ emit_clobber (dest);
+ cp_src = inner;
+ cp_dest = gen_lowpart_SUBREG (GET_MODE (inner), dest);
+ }
+ else
+ {
+ cp_src = gen_lowpart_SUBREG (GET_MODE (dest), inner);
+ cp_dest = dest;
+ }
+
+ emit_move_insn (cp_dest, cp_src);
+
+ seq = get_insns ();
+ end_sequence ();
+
+ /* If the replacement is not supported, bail out. */
+ for (one = seq; one != NULL_RTX; one = NEXT_INSN (one))
+ if (recog_memoized (one) < 0 && GET_CODE (PATTERN (one)) != CLOBBER)
+ return;
+
+ /* Insert the replacement. */
+ emit_insn_before (seq, insn);
+ }
+
+ /* Note replacement/removal in the dump. */
+ if (dump_file)
+ {
+ fprintf (dump_file, "redundant extension %u ", INSN_UID (insn));
+ if (dest != inner)
+ fprintf (dump_file, "replaced by %u\n", INSN_UID (seq));
+ else
+ fprintf (dump_file, "removed\n");
+ }
+
+ /* Remove the extension. */
+ delete_insn (insn);
+
+ reset_promoted_subreg (dest);
+}
+
+/* Setup the variables at the start of the pass. */
+
+static void
+init_pass (void)
+{
+ int i;
+
+ biggest_use = XNEWVEC (int, n_regs);
+ promoted_subreg = XCNEWVEC (VEC (rtx,heap) *, n_regs);
+ propagated_size = XNEWVEC (int, n_regs);
+
+ /* Initialize biggest_use for all regs to 0. If a reg is used implicitly, we
+ handle that reg conservatively and set it to SKIP_REG instead. */
+ for (i = 0; i < n_regs; i++)
+ {
+ biggest_use[i] = skip_reg_p (i) ? SKIP_REG : 0;
+ propagated_size[i] = NONE;
+ }
+
+ extensions = VEC_alloc (rtx, heap, 10);
+ redundant_extensions = VEC_alloc (rtx, heap, 10);
+
+ wl = VEC_alloc (int, heap, 50);
+ in_wl = XNEWVEC (bool, n_regs);
+
+ uses = XNEWVEC (typeof (*uses), n_regs);
+ props = XNEWVEC (typeof (*props), n_regs);
+
+ for (i = 0; i < n_regs; ++i)
+ {
+ uses[i] = NULL;
+ props[i] = NULL;
+ in_wl[i] = false;
+ }
+}
+
+/* Find redundant extensions and remove or replace them if possible. */
+
+static void
+remove_redundant_extensions (void)
+{
+ rtx insn, dest, inner;
+ int preserved_size;
+ int ix;
+
+ if (!find_extensions ())
+ return;
+
+ calculate_biggest_use ();
+
+ find_redundant_extensions ();
+
+ if (!VEC_empty (rtx, extensions))
+ {
+ propagate ();
+
+ find_redundant_extensions ();
+ }
+
+ gcc_checking_assert (n_regs == max_reg_num ());
+
+ FOR_EACH_VEC_ELT (rtx, redundant_extensions, ix, insn)
+ {
+ extension_p (insn, &dest, &inner, &preserved_size);
+ try_remove_or_replace_extension (insn, dest, inner);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\n");
+}
+
+/* Free the variables at the end of the pass. */
+
+static void
+finish_pass (void)
+{
+ int i;
+
+ XDELETEVEC (propagated_size);
+
+ VEC_free (rtx, heap, extensions);
+ VEC_free (rtx, heap, redundant_extensions);
+
+ VEC_free (int, heap, wl);
+
+ for (i = 0; i < n_regs; ++i)
+ {
+ if (uses[i] != NULL)
+ VEC_free (use_type, heap, uses[i]);
+
+ if (props[i] != NULL)
+ VEC_free (prop_type, heap, props[i]);
+ }
+
+ XDELETEVEC (uses);
+ XDELETEVEC (props);
+ XDELETEVEC (biggest_use);
+
+ for (i = 0; i < n_regs; ++i)
+ if (promoted_subreg[i] != NULL)
+ VEC_free (rtx, heap, promoted_subreg[i]);
+ XDELETEVEC (promoted_subreg);
+}
+
+/* Remove redundant extensions. */
+
+static unsigned int
+rest_of_handle_ee (void)
+{
+ n_regs = max_reg_num ();
+
+ init_pass ();
+ remove_redundant_extensions ();
+ finish_pass ();
+ return 0;
+}
+
+/* Run ee pass when flag_ee is set at optimization level > 0. */
+
+static bool
+gate_handle_ee (void)
+{
+ return (optimize > 0 && flag_ee);
+}
+
+struct rtl_opt_pass pass_ee =
+{
+ {
+ RTL_PASS,
+ "ee", /* name */
+ gate_handle_ee, /* gate */
+ rest_of_handle_ee, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_EE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_ggc_collect |
+ TODO_verify_rtl_sharing, /* todo_flags_finish */
+ }
+};
Index: gcc/common.opt
===================================================================
--- gcc/common.opt (revision 189409)
+++ gcc/common.opt (working copy)
@@ -1067,6 +1067,10 @@ feliminate-dwarf2-dups
Common Report Var(flag_eliminate_dwarf2_dups)
Perform DWARF2 duplicate elimination
+fextension-elimination
+Common Report Var(flag_ee) Init(0) Optimization
+Perform extension elimination
+
fipa-sra
Common Report Var(flag_ipa_sra) Init(0) Optimization
Perform interprocedural reduction of aggregates
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in (revision 189409)
+++ gcc/Makefile.in (working copy)
@@ -1218,6 +1218,7 @@ OBJS = \
dwarf2asm.o \
dwarf2cfi.o \
dwarf2out.o \
+ ee.o \
ebitmap.o \
emit-rtl.o \
et-forest.o \
@@ -2971,6 +2972,12 @@ cprop.o : cprop.c $(CONFIG_H) $(SYSTEM_H
$(TM_P_H) $(PARAMS_H) cselib.h $(EXCEPT_H) $(TREE_H) $(TIMEVAR_H) \
intl.h $(OBSTACK_H) $(TREE_PASS_H) $(DF_H) $(DBGCNT_H) $(TARGET_H) \
$(DF_H) $(CFGLOOP_H)
+ee.o : ee.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
+ hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h \
+ $(DF_H) $(TIMEVAR_H) tree-pass.h $(RECOG_H) $(EXPR_H) \
+ $(REGS_H) $(TREE_H) $(TM_P_H) insn-config.h $(INSN_ATTR_H) $(TOPLEV_H) \
+ $(DIAGNOSTIC_CORE_H) $(TARGET_H) $(OPTABS_H) insn-codes.h rtlhooks-def.h \
+ $(PARAMS_H) $(CGRAPH_H)
gcse.o : gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(GGC_H) \
$(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) toplev.h $(DIAGNOSTIC_CORE_H) \
Index: gcc/passes.c
===================================================================
--- gcc/passes.c (revision 189409)
+++ gcc/passes.c (working copy)
@@ -1552,6 +1552,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_initialize_regs);
NEXT_PASS (pass_ud_rtl_dce);
NEXT_PASS (pass_combine);
+ NEXT_PASS (pass_ee);
NEXT_PASS (pass_if_after_combine);
NEXT_PASS (pass_partition_blocks);
NEXT_PASS (pass_regmove);