This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC] Consider lrotate const rotation in vectorizer
- From: "Kewen.Lin" <linkw at linux dot ibm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Jakub Jelinek <jakub at redhat dot com>, Richard Biener <richard dot guenther at gmail dot com>, richard dot sandiford at arm dot com, Segher Boessenkool <segher at kernel dot crashing dot org>, Bill Schmidt <wschmidt at linux dot ibm dot com>
- Date: Tue, 16 Jul 2019 16:45:19 +0800
- Subject: [RFC] Consider lrotate const rotation in vectorizer
- References: <232a38b1-76c2-476d-1be0-a1958e5624bb@linux.ibm.com> <20190715085929.GO2125@tucnak> <d2ccc831-c805-c7b8-5a90-cb3e5ee5ed8b@linux.ibm.com>
Hi all,
Based on the previous comments (thank you!), I tried to update the
handling in expander and vectorizer. Middle-end optimizes lrotate
with const rotation count to rrotate all the time, it makes vectorizer
fail to vectorize if rrotate isn't supported on the target. We can at
least teach it on const rotation count, the cost should be the same?
At the same time, the expander already tries to use the opposite
rotation optable for scalar, we can teach it to deal with vector as well.
Is it on the right track and reasonable?
Thanks,
Kewen
--------------
One informal patch to help describing this new thought:
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a0e361b8bfe..ebebb0ad145 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1273,6 +1273,7 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
if (mclass == MODE_VECTOR_INT)
{
optab otheroptab = unknown_optab;
+ optab otheroptab1 = unknown_optab;
if (binoptab == ashl_optab)
otheroptab = vashl_optab;
@@ -1281,23 +1282,50 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
else if (binoptab == lshr_optab)
otheroptab = vlshr_optab;
else if (binoptab == rotl_optab)
- otheroptab = vrotl_optab;
+ {
+ otheroptab = vrotl_optab;
+ otheroptab1 = vrotr_optab;
+ }
else if (binoptab == rotr_optab)
- otheroptab = vrotr_optab;
+ {
+ otheroptab = vrotr_optab;
+ otheroptab1 = vrotl_optab;
+ }
+
+ bool other_ok = (otheroptab && (icode = optab_handler (otheroptab, mode)) != CODE_FOR_nothing);
+ bool other1_ok = false;
+ if (!other_ok && otheroptab1)
+ other1_ok
+ = ((icode = optab_handler (otheroptab1, mode)) != CODE_FOR_nothing)
+ && SCALAR_INT_MODE_P (GET_MODE_INNER (mode));
- if (otheroptab
- && (icode = optab_handler (otheroptab, mode)) != CODE_FOR_nothing)
+ if (other_ok || other1_ok)
{
/* The scalar may have been extended to be too wide. Truncate
it back to the proper size to fit in the broadcast vector. */
scalar_mode inner_mode = GET_MODE_INNER (mode);
- if (!CONST_INT_P (op1)
- && (GET_MODE_BITSIZE (as_a <scalar_int_mode> (GET_MODE (op1)))
+ rtx newop1 = op1;
+ if (other1_ok)
+ {
+ unsigned int bits = GET_MODE_PRECISION (inner_mode);
+
+ if (CONST_INT_P (op1))
+ newop1 = gen_int_shift_amount (int_mode, bits - INTVAL (op1));
+ else if (targetm.shift_truncation_mask (int_mode) == bits - 1)
+ newop1 = negate_rtx (GET_MODE (op1), op1);
+ else
+ newop1 = expand_binop (GET_MODE (op1), sub_optab,
+ gen_int_mode (bits, GET_MODE (op1)), op1,
+ NULL_RTX, unsignedp, OPTAB_DIRECT);
+ }
+ if (!CONST_INT_P (newop1)
+ && (GET_MODE_BITSIZE (as_a<scalar_int_mode> (GET_MODE (newop1)))
> GET_MODE_BITSIZE (inner_mode)))
- op1 = force_reg (inner_mode,
- simplify_gen_unary (TRUNCATE, inner_mode, op1,
- GET_MODE (op1)));
- rtx vop1 = expand_vector_broadcast (mode, op1);
+ newop1 = force_reg (inner_mode,
+ simplify_gen_unary (TRUNCATE, inner_mode,
+ newop1, GET_MODE (newop1)));
+
+ rtx vop1 = expand_vector_broadcast (mode, newop1);
if (vop1)
{
temp = expand_binop_directly (icode, mode, otheroptab, op0, vop1,
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index ff952d6f464..c05ce1acba4 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -2039,6 +2039,15 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
if (optab1
&& optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
return NULL;
+ /* middle-end canonicalizing LROTATE to RROTATE with const rotation count,
+ let's try the LROTATE as well. */
+ if (rhs_code == RROTATE_EXPR && TREE_CODE(oprnd1) == INTEGER_CST)
+ {
+ optab1 = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector);
+ if (optab1
+ && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
+ return NULL;
+ }
if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
{
@@ -2046,6 +2055,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
if (optab2
&& optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
return NULL;
+ if (rhs_code == RROTATE_EXPR && TREE_CODE(oprnd1) == INTEGER_CST)
+ {
+ optab2 = optab_for_tree_code (LROTATE_EXPR, vectype, optab_scalar);
+ if (optab2
+ && optab_handler (optab2, TYPE_MODE (vectype))
+ != CODE_FOR_nothing)
+ return NULL;
+ }
}
/* If vector/vector or vector/scalar shifts aren't supported by the target,
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 21046931243..9e1a2f971a1 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5665,6 +5665,12 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (!scalar_shift_arg)
{
optab = optab_for_tree_code (code, vectype, optab_vector);
+
+ if (TREE_CODE (op1) == INTEGER_CST && code == RROTATE_EXPR
+ && !(optab
+ && optab_handler (optab, TYPE_MODE (vectype))
+ != CODE_FOR_nothing))
+ optab = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vector/vector shift/rotate found.\n");
@@ -5696,7 +5702,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
else
{
optab = optab_for_tree_code (code, vectype, optab_vector);
- if (optab
+ if (TREE_CODE (op1) == INTEGER_CST && code == RROTATE_EXPR
+ && !(optab && optab_handler (optab, TYPE_MODE (vectype))))
+ optab = optab_for_tree_code (LROTATE_EXPR, vectype, optab_vector);
+ if (optab
&& (optab_handler (optab, TYPE_MODE (vectype))
!= CODE_FOR_nothing))
{