This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][1/n] Remove GENERIC stmt combining from SCCVN
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 25 Jun 2015 15:59:57 +0200 (CEST)
- Subject: Re: [PATCH][1/n] Remove GENERIC stmt combining from SCCVN
- Authentication-results: sourceware.org; auth=none
- References: <alpine dot LSU dot 2 dot 11 dot 1506251514160 dot 26650 at zhemvz dot fhfr dot qr>
On Thu, 25 Jun 2015, Richard Biener wrote:
>
> A first pattern moved from fold-const.c - albeit a very twisted one.
> It triggers in gcc.dg/tree-ssa/pr52631.c.
>
> I've tried a nearly 1:1 translation (cut&paste and convert the
> code structure to withs/ifs and then replace tree ops accordingly).
>
> One might question if the narrowing case should better be split
> out generically, that is convert X & CST to narrowed X if CST
> masks out upper bits (independent on the operation generating X).
>
> I'll leave any such improvements for followups.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Applied with additional
* gcc.dg/tree-ssa/pr52631.c: Disable forwprop.
Index: gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr52631.c (revision 224893)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr52631.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fre1-details" } */
+/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-fre1-details" } */
unsigned f(unsigned a)
{
> Richard.
>
> 2015-06-25 Richard Biener <rguenther@suse.de>
>
> * fold-const.c (fold_binary_loc): Move simplification of
> (X <<>> C1) & C2 ...
> * match.pd: ... here.
>
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c (revision 224893)
> +++ gcc/fold-const.c (working copy)
> @@ -11516,106 +11516,6 @@ fold_binary_loc (location_t loc,
> return build_int_cst (type, residue & low);
> }
>
> - /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
> - (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
> - if the new mask might be further optimized. */
> - if ((TREE_CODE (arg0) == LSHIFT_EXPR
> - || TREE_CODE (arg0) == RSHIFT_EXPR)
> - && TYPE_PRECISION (TREE_TYPE (arg0)) <= HOST_BITS_PER_WIDE_INT
> - && TREE_CODE (arg1) == INTEGER_CST
> - && tree_fits_uhwi_p (TREE_OPERAND (arg0, 1))
> - && tree_to_uhwi (TREE_OPERAND (arg0, 1)) > 0
> - && (tree_to_uhwi (TREE_OPERAND (arg0, 1))
> - < TYPE_PRECISION (TREE_TYPE (arg0))))
> - {
> - unsigned int shiftc = tree_to_uhwi (TREE_OPERAND (arg0, 1));
> - unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg1);
> - unsigned HOST_WIDE_INT newmask, zerobits = 0;
> - tree shift_type = TREE_TYPE (arg0);
> -
> - if (TREE_CODE (arg0) == LSHIFT_EXPR)
> - zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1);
> - else if (TREE_CODE (arg0) == RSHIFT_EXPR
> - && TYPE_PRECISION (TREE_TYPE (arg0))
> - == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (arg0))))
> - {
> - prec = TYPE_PRECISION (TREE_TYPE (arg0));
> - tree arg00 = TREE_OPERAND (arg0, 0);
> - /* See if more bits can be proven as zero because of
> - zero extension. */
> - if (TREE_CODE (arg00) == NOP_EXPR
> - && TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (arg00, 0))))
> - {
> - tree inner_type = TREE_TYPE (TREE_OPERAND (arg00, 0));
> - if (TYPE_PRECISION (inner_type)
> - == GET_MODE_PRECISION (TYPE_MODE (inner_type))
> - && TYPE_PRECISION (inner_type) < prec)
> - {
> - prec = TYPE_PRECISION (inner_type);
> - /* See if we can shorten the right shift. */
> - if (shiftc < prec)
> - shift_type = inner_type;
> - /* Otherwise X >> C1 is all zeros, so we'll optimize
> - it into (X, 0) later on by making sure zerobits
> - is all ones. */
> - }
> - }
> - zerobits = ~(unsigned HOST_WIDE_INT) 0;
> - if (shiftc < prec)
> - {
> - zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc;
> - zerobits <<= prec - shiftc;
> - }
> - /* For arithmetic shift if sign bit could be set, zerobits
> - can contain actually sign bits, so no transformation is
> - possible, unless MASK masks them all away. In that
> - case the shift needs to be converted into logical shift. */
> - if (!TYPE_UNSIGNED (TREE_TYPE (arg0))
> - && prec == TYPE_PRECISION (TREE_TYPE (arg0)))
> - {
> - if ((mask & zerobits) == 0)
> - shift_type = unsigned_type_for (TREE_TYPE (arg0));
> - else
> - zerobits = 0;
> - }
> - }
> -
> - /* ((X << 16) & 0xff00) is (X, 0). */
> - if ((mask & zerobits) == mask)
> - return omit_one_operand_loc (loc, type,
> - build_int_cst (type, 0), arg0);
> -
> - newmask = mask | zerobits;
> - if (newmask != mask && (newmask & (newmask + 1)) == 0)
> - {
> - /* Only do the transformation if NEWMASK is some integer
> - mode's mask. */
> - for (prec = BITS_PER_UNIT;
> - prec < HOST_BITS_PER_WIDE_INT; prec <<= 1)
> - if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1)
> - break;
> - if (prec < HOST_BITS_PER_WIDE_INT
> - || newmask == ~(unsigned HOST_WIDE_INT) 0)
> - {
> - tree newmaskt;
> -
> - if (shift_type != TREE_TYPE (arg0))
> - {
> - tem = fold_build2_loc (loc, TREE_CODE (arg0), shift_type,
> - fold_convert_loc (loc, shift_type,
> - TREE_OPERAND (arg0, 0)),
> - TREE_OPERAND (arg0, 1));
> - tem = fold_convert_loc (loc, type, tem);
> - }
> - else
> - tem = op0;
> - newmaskt = build_int_cst_type (TREE_TYPE (op1), newmask);
> - if (!tree_int_cst_equal (newmaskt, arg1))
> - return fold_build2_loc (loc, BIT_AND_EXPR, type, tem, newmaskt);
> - }
> - }
> - }
> -
> goto associate;
>
> case RDIV_EXPR:
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd (revision 224893)
> +++ gcc/match.pd (working copy)
> @@ -738,6 +755,97 @@ (define_operator_list swapped_tcc_compar
> && wi::eq_p (wi::lshift (@0, cand), @2))
> (cmp @1 { build_int_cst (TREE_TYPE (@1), cand); })))))
>
> +/* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
> + (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
> + if the new mask might be further optimized. */
> +(for shift (lshift rshift)
> + (simplify
> + (bit_and (convert?@4 (shift@5 (convert1?@3 @0) INTEGER_CST@1)) INTEGER_CST@2)
> + (if (tree_nop_conversion_p (TREE_TYPE (@4), TREE_TYPE (@5))
> + && TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
> + && tree_fits_uhwi_p (@1)
> + && tree_to_uhwi (@1) > 0
> + && tree_to_uhwi (@1) < TYPE_PRECISION (type))
> + (with
> + {
> + unsigned int shiftc = tree_to_uhwi (@1);
> + unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (@2);
> + unsigned HOST_WIDE_INT newmask, zerobits = 0;
> + tree shift_type = TREE_TYPE (@3);
> + unsigned int prec;
> +
> + if (shift == LSHIFT_EXPR)
> + zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1);
> + else if (shift == RSHIFT_EXPR
> + && (TYPE_PRECISION (shift_type)
> + == GET_MODE_PRECISION (TYPE_MODE (shift_type))))
> + {
> + prec = TYPE_PRECISION (TREE_TYPE (@3));
> + tree arg00 = @0;
> + /* See if more bits can be proven as zero because of
> + zero extension. */
> + if (@3 != @0
> + && TYPE_UNSIGNED (TREE_TYPE (@0)))
> + {
> + tree inner_type = TREE_TYPE (@0);
> + if ((TYPE_PRECISION (inner_type)
> + == GET_MODE_PRECISION (TYPE_MODE (inner_type)))
> + && TYPE_PRECISION (inner_type) < prec)
> + {
> + prec = TYPE_PRECISION (inner_type);
> + /* See if we can shorten the right shift. */
> + if (shiftc < prec)
> + shift_type = inner_type;
> + /* Otherwise X >> C1 is all zeros, so we'll optimize
> + it into (X, 0) later on by making sure zerobits
> + is all ones. */
> + }
> + }
> + zerobits = ~(unsigned HOST_WIDE_INT) 0;
> + if (shiftc < prec)
> + {
> + zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc;
> + zerobits <<= prec - shiftc;
> + }
> + /* For arithmetic shift if sign bit could be set, zerobits
> + can contain actually sign bits, so no transformation is
> + possible, unless MASK masks them all away. In that
> + case the shift needs to be converted into logical shift. */
> + if (!TYPE_UNSIGNED (TREE_TYPE (@3))
> + && prec == TYPE_PRECISION (TREE_TYPE (@3)))
> + {
> + if ((mask & zerobits) == 0)
> + shift_type = unsigned_type_for (TREE_TYPE (@3));
> + else
> + zerobits = 0;
> + }
> + }
> + }
> + /* ((X << 16) & 0xff00) is (X, 0). */
> + (if ((mask & zerobits) == mask)
> + { build_int_cst (type, 0); })
> + (with { newmask = mask | zerobits; }
> + (if (newmask != mask && (newmask & (newmask + 1)) == 0)
> + (with
> + {
> + /* Only do the transformation if NEWMASK is some integer
> + mode's mask. */
> + for (prec = BITS_PER_UNIT;
> + prec < HOST_BITS_PER_WIDE_INT; prec <<= 1)
> + if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1)
> + break;
> + }
> + (if (prec < HOST_BITS_PER_WIDE_INT
> + || newmask == ~(unsigned HOST_WIDE_INT) 0)
> + (with
> + { tree newmaskt = build_int_cst_type (TREE_TYPE (@2), newmask); }
> + (if (!tree_int_cst_equal (newmaskt, @2))
> + (if (shift_type != TREE_TYPE (@3)
> + && single_use (@4) && single_use (@5))
> + (bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt; }))
> + (if (shift_type == TREE_TYPE (@3))
> + (bit_and @4 { newmaskt; }))))))))))))
> +
> /* Simplifications of conversions. */
>
> /* Basic strip-useless-type-conversions / strip_nops. */
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)