This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Compute nonzero_bits from __builtin_unreachable assertions
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 29 Oct 2013 15:54:12 +0100 (CET)
- Subject: Re: [PATCH] Compute nonzero_bits from __builtin_unreachable assertions
- Authentication-results: sourceware.org; auth=none
- References: <20131025091925 dot GG30970 at tucnak dot zalov dot cz> <20131025173844 dot GK30970 at tucnak dot zalov dot cz> <20131025221933 dot GN30970 at tucnak dot zalov dot cz>
On Sat, 26 Oct 2013, Jakub Jelinek wrote:
> Hi!
>
> And here is a patch that allows vectorization without peeling for alignment
> and scalar loop for bound even for fn2, fn3 and fn4 in the following
> testcase, though as with the range __builtin_unreachable () notes, it is
> quite fragile, because it only works if there are no immediate uses of the
> tested SSA_NAME before the assertion. Perhaps more reliable way would be to
> convert those assertions info __builtin_assume_aligned, but that has the
> disadvantage that it's first argument is a pointer and it returns a pointer,
> so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED
> internal function.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.
Ok.
Thanks,
Richard.
> int a[1024];
>
> void
> fn1 (int x, int y)
> {
> int i;
> x &= -32;
> y &= -32;
> for (i = x + 32; i < y; i++)
> a[i]++;
> }
>
> void
> fn2 (int x, int y)
> {
> int i;
> if (x & 31)
> __builtin_unreachable ();
> if (y & 31)
> __builtin_unreachable ();
> for (i = x + 32; i < x + y; i++)
> a[i]++;
> }
>
> void
> fn3 (int x, int y)
> {
> int i;
> if (x % 32)
> __builtin_unreachable ();
> if (y % 32)
> __builtin_unreachable ();
> for (i = x + 32; i < x + y; i++)
> a[i]++;
> }
>
> void
> fn3 (int x, int y)
> {
> int i;
> if ((x % 32) != 0)
> __builtin_unreachable ();
> if ((y % 32) != 0)
> __builtin_unreachable ();
> for (i = x + 32; i < x + y; i++)
> a[i]++;
> }
>
> 2013-10-25 Jakub Jelinek <jakub@redhat.com>
>
> * tree-vrp.c (maybe_set_nonzero_bits): New function.
> (remove_range_assertions): Call it.
>
> --- gcc/tree-vrp.c.jj 2013-10-24 14:32:29.000000000 +0200
> +++ gcc/tree-vrp.c 2013-10-25 21:21:35.183092937 +0200
> @@ -6459,6 +6459,60 @@ check_all_array_refs (void)
> }
> }
>
> +/* Handle
> + _4 = x_3 & 31;
> + if (_4 != 0)
> + goto <bb 6>;
> + else
> + goto <bb 7>;
> + <bb 6>:
> + __builtin_unreachable ();
> + <bb 7>:
> + x_5 = ASSERT_EXPR <x_3, ...>;
> + If x_3 has no other immediate uses (checked by caller),
> + var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits
> + from the non-zero bitmask. */
> +
> +static void
> +maybe_set_nonzero_bits (basic_block bb, tree var)
> +{
> + edge e = single_pred_edge (bb);
> + basic_block cond_bb = e->src;
> + gimple stmt = last_stmt (cond_bb);
> + tree cst;
> +
> + if (stmt == NULL
> + || gimple_code (stmt) != GIMPLE_COND
> + || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE)
> + ? EQ_EXPR : NE_EXPR)
> + || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME
> + || !integer_zerop (gimple_cond_rhs (stmt)))
> + return;
> +
> + stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
> + if (!is_gimple_assign (stmt)
> + || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR
> + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
> + return;
> + if (gimple_assign_rhs1 (stmt) != var)
> + {
> + gimple stmt2;
> +
> + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
> + return;
> + stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> + if (!gimple_assign_cast_p (stmt2)
> + || gimple_assign_rhs1 (stmt2) != var
> + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
> + || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt)))
> + != TYPE_PRECISION (TREE_TYPE (var))))
> + return;
> + }
> + cst = gimple_assign_rhs2 (stmt);
> + set_nonzero_bits (var, (get_nonzero_bits (var)
> + & ~tree_to_double_int (cst)));
> +}
> +
> /* Convert range assertion expressions into the implied copies and
> copy propagate away the copies. Doing the trivial copy propagation
> here avoids the need to run the full copy propagation pass after
> @@ -6576,8 +6630,11 @@ remove_range_assertions (void)
> }
> }
> if (ok)
> - set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> - SSA_NAME_RANGE_INFO (lhs)->max);
> + {
> + set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min,
> + SSA_NAME_RANGE_INFO (lhs)->max);
> + maybe_set_nonzero_bits (bb, var);
> + }
> }
> }
>
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend