This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fold VEC_PERM_EXPR/VEC_INTERLEAVE*EXPR/VEC_EXTRACT*EXPR with VECTOR_CST/CONSTRUCTOR arguments (PR tree-optimization/51074, take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 11 Nov 2011 16:24:54 +0100
- Subject: [PATCH] Fold VEC_PERM_EXPR/VEC_INTERLEAVE*EXPR/VEC_EXTRACT*EXPR with VECTOR_CST/CONSTRUCTOR arguments (PR tree-optimization/51074, take 2)
- References: <20111110190945.GM27242@tyan-ft48-01.lab.bos.redhat.com> <4EBC2D75.5080700@redhat.com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
On Thu, Nov 10, 2011 at 12:00:53PM -0800, Richard Henderson wrote:
> VEC_PERM_EXPR is explicitly modulo. Don't fail, mask.
Here is an updated patch.
In addition to the creation of subroutines this performs the permutation
folding using an unsigned char array for selector and folds VEC_PERM_EXPR
with out of bounds constant indices to masked ones if the first two
arguments aren't constants.
The valid_gimple_rhs_p change was needed because otherwise the VEC_PERM_EXPR
to VEC_PERM_EXPR with masked indices folding otherwise wasn't applied, and
the tree-vect-generic change was needed because if for whatever reason
the VEC_PERM_EXPR folding doesn't happen, lower_vec_perm wasn't masking it
and i386 target hook on it would ICE.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2011-11-11 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51074
* fold-const.c (vec_cst_ctor_to_array, fold_vec_perm): New functions.
(fold_binary_loc): Handle VEC_EXTRACT_EVEN_EXPR,
VEC_EXTRACT_ODD_EXPR, VEC_INTERLEAVE_HIGH_EXPR and
VEC_INTERLEAVE_LOW_EXPR with VECTOR_CST or CONSTRUCTOR operands.
(fold_ternary_loc): Handle VEC_PERM_EXPR with VECTOR_CST or
CONSTRUCTOR operands.
* tree-ssa-propagate.c (valid_gimple_rhs_p): Handle ternary
expressions.
* tree-vect-generic.c (lower_vec_perm): Mask sel_int elements
to 0 .. 2 * elements - 1.
--- gcc/fold-const.c.jj 2011-11-10 18:08:54.328438535 +0100
+++ gcc/fold-const.c 2011-11-11 11:23:09.393186463 +0100
@@ -9528,6 +9528,86 @@ get_pointer_modulus_and_residue (tree ex
return 1;
}
+/* Helper function for fold_vec_perm. Store elements of VECTOR_CST or
+ CONSTRUCTOR ARG into array ELTS and return true if successful. */
+
+static bool
+vec_cst_ctor_to_array (tree arg, tree *elts)
+{
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)), i;
+
+ if (TREE_CODE (arg) == VECTOR_CST)
+ {
+ tree t;
+
+ for (i = 0, t = TREE_VECTOR_CST_ELTS (arg);
+ i < nelts && t; i++, t = TREE_CHAIN (t))
+ elts[i] = TREE_VALUE (t);
+ if (t)
+ return false;
+ }
+ else if (TREE_CODE (arg) == CONSTRUCTOR)
+ {
+ constructor_elt *elt;
+
+ FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (arg), i, elt)
+ if (i >= nelts)
+ return false;
+ else
+ elts[i] = elt->value;
+ }
+ else
+ return false;
+ for (; i < nelts; i++)
+ elts[i]
+ = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
+ return true;
+}
+
+/* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL
+ selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful,
+ NULL_TREE otherwise. */
+
+static tree
+fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel)
+{
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ tree *elts;
+ bool need_ctor = false;
+
+ gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts);
+ if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type)
+ || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type))
+ return NULL_TREE;
+
+ elts = XALLOCAVEC (tree, nelts * 3);
+ if (!vec_cst_ctor_to_array (arg0, elts)
+ || !vec_cst_ctor_to_array (arg1, elts + nelts))
+ return NULL_TREE;
+
+ for (i = 0; i < nelts; i++)
+ {
+ if (!CONSTANT_CLASS_P (elts[sel[i]]))
+ need_ctor = true;
+ elts[i + 2 * nelts] = unshare_expr (elts[sel[i]]);
+ }
+
+ if (need_ctor)
+ {
+ VEC(constructor_elt,gc) *v = VEC_alloc (constructor_elt, gc, nelts);
+ for (i = 0; i < nelts; i++)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[2 * nelts + i]);
+ return build_constructor (type, v);
+ }
+ else
+ {
+ tree vals = NULL_TREE;
+ for (i = 0; i < nelts; i++)
+ vals = tree_cons (NULL_TREE, elts[3 * nelts - i - 1], vals);
+ return build_vector (type, vals);
+ }
+}
/* Fold a binary expression of code CODE and type TYPE with operands
OP0 and OP1. LOC is the location of the resulting expression.
@@ -13381,6 +13461,41 @@ fold_binary_loc (location_t loc,
/* An ASSERT_EXPR should never be passed to fold_binary. */
gcc_unreachable ();
+ case VEC_EXTRACT_EVEN_EXPR:
+ case VEC_EXTRACT_ODD_EXPR:
+ case VEC_INTERLEAVE_HIGH_EXPR:
+ case VEC_INTERLEAVE_LOW_EXPR:
+ if ((TREE_CODE (arg0) == VECTOR_CST
+ || TREE_CODE (arg0) == CONSTRUCTOR)
+ && (TREE_CODE (arg1) == VECTOR_CST
+ || TREE_CODE (arg1) == CONSTRUCTOR))
+ {
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
+
+ for (i = 0; i < nelts; i++)
+ switch (code)
+ {
+ case VEC_EXTRACT_EVEN_EXPR:
+ sel[i] = i * 2;
+ break;
+ case VEC_EXTRACT_ODD_EXPR:
+ sel[i] = i * 2 + 1;
+ break;
+ case VEC_INTERLEAVE_HIGH_EXPR:
+ sel[i] = (i + nelts) / 2 + ((i & 1) ? nelts : 0);
+ break;
+ case VEC_INTERLEAVE_LOW_EXPR:
+ sel[i] = i / 2 + ((i & 1) ? nelts : 0);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ return fold_vec_perm (type, arg0, arg1, sel);
+ }
+ return NULL_TREE;
+
default:
return NULL_TREE;
} /* switch (code) */
@@ -13767,6 +13882,55 @@ fold_ternary_loc (location_t loc, enum t
return fold_fma (loc, type, arg0, arg1, arg2);
+ case VEC_PERM_EXPR:
+ if (TREE_CODE (arg2) == VECTOR_CST)
+ {
+ unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
+ tree t;
+ bool need_mask_canon = false;
+
+ gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)));
+ for (i = 0, t = TREE_VECTOR_CST_ELTS (arg2);
+ i < nelts && t; i++, t = TREE_CHAIN (t))
+ {
+ if (TREE_CODE (TREE_VALUE (t)) != INTEGER_CST)
+ return NULL_TREE;
+
+ sel[i] = TREE_INT_CST_LOW (TREE_VALUE (t)) & (2 * nelts - 1);
+ if (TREE_INT_CST_HIGH (TREE_VALUE (t))
+ || ((unsigned HOST_WIDE_INT)
+ TREE_INT_CST_LOW (TREE_VALUE (t)) != sel[i]))
+ need_mask_canon = true;
+ }
+ if (t)
+ return NULL_TREE;
+ for (; i < nelts; i++)
+ sel[i] = 0;
+
+ if ((TREE_CODE (arg0) == VECTOR_CST
+ || TREE_CODE (arg0) == CONSTRUCTOR)
+ && (TREE_CODE (arg1) == VECTOR_CST
+ || TREE_CODE (arg1) == CONSTRUCTOR))
+ {
+ t = fold_vec_perm (type, arg0, arg1, sel);
+ if (t != NULL_TREE)
+ return t;
+ }
+
+ if (need_mask_canon && arg2 == op2)
+ {
+ tree list = NULL_TREE, eltype = TREE_TYPE (TREE_TYPE (arg2));
+ for (i = 0; i < nelts; i++)
+ list = tree_cons (NULL_TREE,
+ build_int_cst (eltype, sel[nelts - i - 1]),
+ list);
+ t = build_vector (TREE_TYPE (arg2), list);
+ return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
+ }
+ }
+ return NULL_TREE;
+
default:
return NULL_TREE;
} /* switch (code) */
--- gcc/tree-ssa-propagate.c.jj 2011-11-10 14:57:09.000000000 +0100
+++ gcc/tree-ssa-propagate.c 2011-11-11 12:08:56.949178176 +0100
@@ -602,6 +602,16 @@ valid_gimple_rhs_p (tree expr)
break;
default:
+ if (get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS)
+ {
+ if (((code == VEC_COND_EXPR || code == COND_EXPR)
+ ? !is_gimple_condexpr (TREE_OPERAND (expr, 0))
+ : !is_gimple_val (TREE_OPERAND (expr, 0)))
+ || !is_gimple_val (TREE_OPERAND (expr, 1))
+ || !is_gimple_val (TREE_OPERAND (expr, 2)))
+ return false;
+ break;
+ }
return false;
}
break;
--- gcc/tree-vect-generic.c.jj 2011-10-27 18:39:43.000000000 +0200
+++ gcc/tree-vect-generic.c 2011-11-11 11:41:39.775591369 +0100
@@ -649,7 +649,7 @@ lower_vec_perm (gimple_stmt_iterator *gs
tree vals = TREE_VECTOR_CST_ELTS (mask);
for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
- sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals));
+ sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
return;
Jakub