This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: combine permutations in gimple
On Sat, Aug 11, 2012 at 2:25 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Fri, 10 Aug 2012, Marc Glisse wrote:
>
>> this patch detects permutations of permutations and merges them. It also
>> canonicalizes permutations a bit more.
>>
>> There are several issues with this patch:
>>
>> 1) I am not sure we always want to combine permutations. Indeed, someone
>> (user? vectorizer?) may have written 2 permutations to help the backend
>> generate optimal code, whereas it will do a bad job on the complicated
>> combined permutation. At tree level, I am not sure what can be done except
>> possibly restrict the optimization to the case where the combined
>> permutation is the identity. At the RTL level, we could compare what insns
>> are generated, but that's going to be painful (doing anything with
>> permutations at the RTL level is painful).
>>
>> 2) I don't understand how the cleanup works in tree-ssa-forwprop.c. I
>> copied some fold/update/remove lines from other simplifications, but I don't
>> know if they are right.
>>
>> 3) In a first version of the patch, where I had SSA_NAME_DEF_STMT instead
>> of get_prop_source_stmt, I got an ICE in one of the torture vectorization
>> testcases. It happened in expand_expr_real_2, because it asserts that
>> expand_vec_perm will never fail. However, expand_vec_perm does return
>> NULL_RTX sometimes. Here it was in V16QImode, with the permutation
>> {0,0,2,2,4,...,14,14}. maybe_expand_insn can't handle it directly (that's ok
>> I guess), but then expand_vec_perm sees VOIDmode and exits instead of trying
>> other ways. I don't know if there is a latent bug or if (more likely) my
>> patch may produce trees with wrong modes.
>>
>> 4) Is this the right place?
>>
>> This isn't the transformation I am most interested in, but it is a first
>> step to see if the direction is right.
>
>
> Hello,
>
> there was yet another issue with the version I posted: the testcase was
> trivial so I didn't notice that it didn't perform the optimization at all...
>
> Here is a new version. It seems a bit strange to me that there are plenty of
> functions that check for single-use variables, but there isn't any that
> checks for variables used in a single statement (but possibly twice). So I
> may be doing something strange, but it seems to be the natural test here.
>
> Happily passed bootstrap+regtest.
>
> 2012-08-11 Marc Glisse <marc.glisse@inria.fr>
>
>
> gcc/
> * fold-const.c (fold_ternary_loc): Detect identity permutations.
> Canonicalize permutations more.
> * tree-ssa-forwprop.c (is_only_used_in): New function.
> (simplify_permutation): Likewise.
>
> (ssa_forward_propagate_and_combine): Call it.
>
> gcc/testsuite/
> * gcc.dg/tree-ssa/forwprop-19.c: New testcase.
>
> --
> Marc Glisse
>
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c (revision 190311)
> +++ gcc/tree-ssa-forwprop.c (working copy)
> @@ -2569,20 +2569,97 @@ combine_conversions (gimple_stmt_iterato
> gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
> update_stmt (stmt);
> return remove_prop_source_from_use (op0) ? 2 : 1;
> }
> }
> }
>
> return 0;
> }
>
> +/* Return true if VAR has no nondebug use but in stmt. */
> +static bool
> +is_only_used_in (const_tree var, const_gimple stmt)
> +{
> + const ssa_use_operand_t *const ptr0 = &(SSA_NAME_IMM_USE_NODE (var));
> + const ssa_use_operand_t *ptr = ptr0->next;
> +
> + for (; ptr != ptr0; ptr = ptr->next)
> + {
> + const_gimple use_stmt = USE_STMT (ptr);
> + if (is_gimple_debug (use_stmt))
> + continue;
> + if (use_stmt != stmt)
> + return false;
> + }
> + return true;
> +}
if (!single_imm_use (var, &use, &use_stmt) || use_stmt != stmt)
instead of the above.
> +/* Combine two shuffles in a row. Returns 1 if there were any changes
> + made, 2 if cfg-cleanup needs to run. Else it returns 0. */
> +
> +static int
> +simplify_permutation (gimple_stmt_iterator *gsi)
> +{
> + gimple stmt = gsi_stmt (*gsi);
> + gimple def_stmt;
> + tree op0, op1, op2, op3, mask;
> + enum tree_code code = gimple_assign_rhs_code (stmt);
> + enum tree_code code2;
> + location_t loc = gimple_location (stmt);
> +
> + gcc_checking_assert (code == VEC_PERM_EXPR);
> +
> + op0 = gimple_assign_rhs1 (stmt);
> + op1 = gimple_assign_rhs2 (stmt);
> + op2 = gimple_assign_rhs3 (stmt);
> +
> + if (TREE_CODE (op0) != SSA_NAME)
> + return 0;
> +
> + if (TREE_CODE (op2) != VECTOR_CST)
> + return 0;
> +
> + def_stmt = SSA_NAME_DEF_STMT (op0);
> + if (!def_stmt || !is_gimple_assign (def_stmt)
> + || !can_propagate_from (def_stmt)
> + || !is_only_used_in (op0, stmt))
Or rather than the above, simply check
|| !has_single_use (op0)
here.
> + return 0;
> +
> + /* Check that it is only used here. We cannot use has_single_use
> + since the expression is using it twice itself... */
Ah ... so then
|| num_imm_uses (op0) != 2
> + code2 = gimple_assign_rhs_code (def_stmt);
> +
> + /* Two consecutive shuffles. */
> + if (code2 == VEC_PERM_EXPR)
> + {
> + op3 = gimple_assign_rhs3 (def_stmt);
> + if (TREE_CODE (op3) != VECTOR_CST)
> + return 0;
> + mask = fold_build3_loc (loc, VEC_PERM_EXPR, TREE_TYPE (op3),
> + op3, op3, op2);
> + gcc_assert (TREE_CODE (mask) == VECTOR_CST);
which means you can use
mask = fold_ternary (loc, ...);
Ok with that changes.
Thanks,
Richard.
> + gimple_assign_set_rhs1 (stmt, gimple_assign_rhs1 (def_stmt));
> + gimple_assign_set_rhs2 (stmt, gimple_assign_rhs2 (def_stmt));
> + gimple_assign_set_rhs3 (stmt, mask);
> + fold_stmt_inplace (gsi);
> + update_stmt (stmt);
> + return remove_prop_source_from_use (op0) ? 2 : 1;
> + }
> +
> + return false;
> +}
> +
> /* Main entry point for the forward propagation and statement combine
> optimizer. */
>
> static unsigned int
> ssa_forward_propagate_and_combine (void)
> {
> basic_block bb;
> unsigned int todoflags = 0;
>
> cfg_changed = false;
> @@ -2731,20 +2808,27 @@ ssa_forward_propagate_and_combine (void)
> changed = associate_pointerplus (&gsi);
> else if (CONVERT_EXPR_CODE_P (code)
> || code == FLOAT_EXPR
> || code == FIX_TRUNC_EXPR)
> {
> int did_something = combine_conversions (&gsi);
> if (did_something == 2)
> cfg_changed = true;
> changed = did_something != 0;
> }
> + else if (code == VEC_PERM_EXPR)
> + {
> + int did_something = simplify_permutation (&gsi);
> + if (did_something == 2)
> + cfg_changed = true;
> + changed = did_something != 0;
> + }
> break;
> }
>
> case GIMPLE_SWITCH:
> changed = simplify_gimple_switch (stmt);
> break;
>
> case GIMPLE_COND:
> {
> int did_something;
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c (revision 190311)
> +++ gcc/fold-const.c (working copy)
> @@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t
> return fold_build2_loc (loc, PLUS_EXPR, type,
> const_binop (MULT_EXPR, arg0, arg1), arg2);
> if (integer_zerop (arg2))
> return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
>
> return fold_fma (loc, type, arg0, arg1, arg2);
>
> case VEC_PERM_EXPR:
> if (TREE_CODE (arg2) == VECTOR_CST)
> {
> - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
> + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;
> unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
> tree t;
> bool need_mask_canon = false;
> + bool all_in_vec0 = true;
> + bool all_in_vec1 = true;
> + bool maybe_identity = true;
> + bool single_arg = (op0 == op1);
> + bool changed = false;
>
> + mask = single_arg ? (nelts - 1) : (2 * nelts - 1);
> gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
> for (i = 0; i < nelts; i++)
> {
> tree val = VECTOR_CST_ELT (arg2, i);
> if (TREE_CODE (val) != INTEGER_CST)
> return NULL_TREE;
>
> - sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
> + sel[i] = TREE_INT_CST_LOW (val) & mask;
> if (TREE_INT_CST_HIGH (val)
> || ((unsigned HOST_WIDE_INT)
> TREE_INT_CST_LOW (val) != sel[i]))
> need_mask_canon = true;
> +
> + if (sel[i] < nelts)
> + all_in_vec1 = false;
> + else
> + all_in_vec0 = false;
> +
> + if ((sel[i] & (nelts-1)) != i)
> + maybe_identity = false;
> + }
> +
> + if (maybe_identity)
> + {
> + if (all_in_vec0)
> + return op0;
> + if (all_in_vec1)
> + return op1;
> }
>
> if ((TREE_CODE (arg0) == VECTOR_CST
> || TREE_CODE (arg0) == CONSTRUCTOR)
> && (TREE_CODE (arg1) == VECTOR_CST
> || TREE_CODE (arg1) == CONSTRUCTOR))
> {
> t = fold_vec_perm (type, arg0, arg1, sel);
> if (t != NULL_TREE)
> return t;
> }
>
> + if (all_in_vec0)
> + op1 = op0;
> + else if (all_in_vec1)
> + {
> + op0 = op1;
> + for (i = 0; i < nelts; i++)
> + sel[i] -= nelts;
> + need_mask_canon = true;
> + }
> +
> + if (op0 == op1 && !single_arg)
> + changed = true;
> +
> if (need_mask_canon && arg2 == op2)
> {
> tree *tsel = XALLOCAVEC (tree, nelts);
> tree eltype = TREE_TYPE (TREE_TYPE (arg2));
> for (i = 0; i < nelts; i++)
> tsel[i] = build_int_cst (eltype, sel[i]);
> - t = build_vector (TREE_TYPE (arg2), tsel);
> - return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
> + op2 = build_vector (TREE_TYPE (arg2), tsel);
> + changed = true;
> }
> +
> + if (changed)
> + return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
> }
> return NULL_TREE;
>
> default:
> return NULL_TREE;
> } /* switch (code) */
> }
>
> /* Perform constant folding and related simplification of EXPR.
> The related simplifications include x*1 => x, x*0 => 0, etc.,
> Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-forwprop2" } */
> +
> +typedef int vec __attribute__((vector_size (2 * sizeof (int))));
> +void f (vec *x1, vec *x2)
> +{
> + vec m = { 3, 1 };
> + vec n = { 1, 8 };
> + vec y = __builtin_shuffle (*x1, *x2, n);
> + vec z = __builtin_shuffle (y, m);
> + *x1 = z;
> +}
> +
> +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0 }" "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop2" } } */
> +/* { dg-final { scan-tree-dump-times "x2" 1 "forwprop2" } } */
> +/* { dg-final { cleanup-tree-dump "forwprop2" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
> ___________________________________________________________________
> Added: svn:eol-style
> + native
> Added: svn:keywords
> + Author Date Id Revision URL
>
>