This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors
- From: Richard Sandiford <richard dot sandiford at arm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 05 Nov 2019 11:16:16 +0000
- Subject: [17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors
- References: <mpto8y59fe7.fsf@arm.com>
This patch makes can_duplicate_and_interleave_p cope with mixtures of
vector sizes, by using queries based on get_vectype_for_scalar_type
instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
int_mode_for_size is now the first check we do for a candidate mode,
so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids
unnecessary work and avoids trying to create scalar types that the
target might not support.
This final patch in the series. As before, each patch tested individually
on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.
2019-11-04 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
element type rather than an element mode.
* tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
Use get_vectype_for_scalar_type to query the natural types
for a given element type rather than basing everything on
GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size
query to MAX_FIXED_MODE_SIZE.
(duplicate_and_interleave): Update call accordingly.
* tree-vect-loop.c (vectorizable_reduction): Likewise.
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h 2019-11-05 11:08:12.521631453 +0000
+++ gcc/tree-vectorizer.h 2019-11-05 11:14:42.786884473 +0000
@@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
extern bool vect_slp_bb (basic_block);
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
-extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
- machine_mode,
+extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
unsigned int * = NULL,
tree * = NULL, tree * = NULL);
extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000
+++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000
@@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
return -1;
}
-/* Check whether it is possible to load COUNT elements of type ELT_MODE
+/* Check whether it is possible to load COUNT elements of type ELT_TYPE
using the method implemented by duplicate_and_interleave. Return true
if so, returning the number of intermediate vectors in *NVECTORS_OUT
(if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
@@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
bool
can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
- machine_mode elt_mode,
- unsigned int *nvectors_out,
+ tree elt_type, unsigned int *nvectors_out,
tree *vector_type_out,
tree *permutes)
{
- poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
- poly_int64 nelts;
+ tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
+ if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
+ return false;
+
+ machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
+ poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
unsigned int nvectors = 1;
for (;;)
{
scalar_int_mode int_mode;
poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
- if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
- && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+ if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
{
+ /* Get the natural vector type for this SLP group size. */
tree int_type = build_nonstandard_integer_type
(GET_MODE_BITSIZE (int_mode), 1);
- tree vector_type = build_vector_type (int_type, nelts);
- if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
- {
+ tree vector_type
+ = get_vectype_for_scalar_type (vinfo, int_type, count);
+ if (vector_type
+ && VECTOR_MODE_P (TYPE_MODE (vector_type))
+ && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+ GET_MODE_SIZE (base_vector_mode)))
+ {
+ /* Try fusing consecutive sequences of COUNT / NVECTORS elements
+ together into elements of type INT_TYPE and using the result
+ to build NVECTORS vectors. */
+ poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
vec_perm_builder sel1 (nelts, 2, 3);
vec_perm_builder sel2 (nelts, 2, 3);
poly_int64 half_nelts = exact_div (nelts, 2);
@@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v
&& !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
&& (TREE_CODE (type) == BOOLEAN_TYPE
|| !can_duplicate_and_interleave_p (vinfo, stmts.length (),
- TYPE_MODE (type))))
+ type)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf
unsigned int nvectors = 1;
tree new_vector_type;
tree permutes[2];
- if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
+ if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
&nvectors, &new_vector_type,
permutes))
gcc_unreachable ();
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c 2019-11-05 10:57:41.658071173 +0000
+++ gcc/tree-vect-loop.c 2019-11-05 11:14:42.782884501 +0000
@@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st
that value needs to be repeated for every instance of the
statement within the initial vector. */
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
- scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
if (!neutral_op
&& !can_duplicate_and_interleave_p (loop_vinfo, group_size,
- elt_mode))
+ TREE_TYPE (vectype_out)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,