[17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors
Richard Biener
richard.guenther@gmail.com
Thu Nov 14 12:23:00 GMT 2019
On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> This patch makes can_duplicate_and_interleave_p cope with mixtures of
> vector sizes, by using queries based on get_vectype_for_scalar_type
> instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
>
> int_mode_for_size is now the first check we do for a candidate mode,
> so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids
> unnecessary work and avoids trying to create scalar types that the
> target might not support.
>
> This final patch in the series. As before, each patch tested individually
> on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.
OK.
Thanks,
Richard.
>
> 2019-11-04 Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/
> * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
> element type rather than an element mode.
> * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
> Use get_vectype_for_scalar_type to query the natural types
> for a given element type rather than basing everything on
> GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size
> query to MAX_FIXED_MODE_SIZE.
> (duplicate_and_interleave): Update call accordingly.
> * tree-vect-loop.c (vectorizable_reduction): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h 2019-11-05 11:08:12.521631453 +0000
> +++ gcc/tree-vectorizer.h 2019-11-05 11:14:42.786884473 +0000
> @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
> extern bool vect_slp_bb (basic_block);
> extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
> extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
> -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
> - machine_mode,
> +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
> unsigned int * = NULL,
> tree * = NULL, tree * = NULL);
> extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000
> +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000
> @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
> return -1;
> }
>
> -/* Check whether it is possible to load COUNT elements of type ELT_MODE
> +/* Check whether it is possible to load COUNT elements of type ELT_TYPE
> using the method implemented by duplicate_and_interleave. Return true
> if so, returning the number of intermediate vectors in *NVECTORS_OUT
> (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
> @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
>
> bool
> can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
> - machine_mode elt_mode,
> - unsigned int *nvectors_out,
> + tree elt_type, unsigned int *nvectors_out,
> tree *vector_type_out,
> tree *permutes)
> {
> - poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
> - poly_int64 nelts;
> + tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
> + if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
> + return false;
> +
> + machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
> + poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
> unsigned int nvectors = 1;
> for (;;)
> {
> scalar_int_mode int_mode;
> poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
> - if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
> - && int_mode_for_size (elt_bits, 0).exists (&int_mode))
> + if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
> {
> + /* Get the natural vector type for this SLP group size. */
> tree int_type = build_nonstandard_integer_type
> (GET_MODE_BITSIZE (int_mode), 1);
> - tree vector_type = build_vector_type (int_type, nelts);
> - if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
> - {
> + tree vector_type
> + = get_vectype_for_scalar_type (vinfo, int_type, count);
> + if (vector_type
> + && VECTOR_MODE_P (TYPE_MODE (vector_type))
> + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
> + GET_MODE_SIZE (base_vector_mode)))
> + {
> + /* Try fusing consecutive sequences of COUNT / NVECTORS elements
> + together into elements of type INT_TYPE and using the result
> + to build NVECTORS vectors. */
> + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
> vec_perm_builder sel1 (nelts, 2, 3);
> vec_perm_builder sel2 (nelts, 2, 3);
> poly_int64 half_nelts = exact_div (nelts, 2);
> @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v
> && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> && (TREE_CODE (type) == BOOLEAN_TYPE
> || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> - TYPE_MODE (type))))
> + type)))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf
> unsigned int nvectors = 1;
> tree new_vector_type;
> tree permutes[2];
> - if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
> + if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
> &nvectors, &new_vector_type,
> permutes))
> gcc_unreachable ();
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c 2019-11-05 10:57:41.658071173 +0000
> +++ gcc/tree-vect-loop.c 2019-11-05 11:14:42.782884501 +0000
> @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st
> that value needs to be repeated for every instance of the
> statement within the initial vector. */
> unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
> - scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
> if (!neutral_op
> && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
> - elt_mode))
> + TREE_TYPE (vectype_out)))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
More information about the Gcc-patches
mailing list