[17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors

Richard Biener richard.guenther@gmail.com
Thu Nov 14 12:23:00 GMT 2019


On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> This patch makes can_duplicate_and_interleave_p cope with mixtures of
> vector sizes, by using queries based on get_vectype_for_scalar_type
> instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
>
> int_mode_for_size is now the first check we do for a candidate mode,
> so it seemed better to restrict it to MAX_FIXED_MODE_SIZE.  This avoids
> unnecessary work and avoids trying to create scalar types that the
> target might not support.
>
> This final patch in the series.  As before, each patch tested individually
> on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.

OK.

Thanks,
Richard.

>
> 2019-11-04  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
>         element type rather than an element mode.
>         * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
>         Use get_vectype_for_scalar_type to query the natural types
>         for a given element type rather than basing everything on
>         GET_MODE_SIZE (vinfo->vector_mode).  Limit int_mode_for_size
>         query to MAX_FIXED_MODE_SIZE.
>         (duplicate_and_interleave): Update call accordingly.
>         * tree-vect-loop.c (vectorizable_reduction): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       2019-11-05 11:08:12.521631453 +0000
> +++ gcc/tree-vectorizer.h       2019-11-05 11:14:42.786884473 +0000
> @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
>  extern bool vect_slp_bb (basic_block);
>  extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
>  extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
> -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
> -                                           machine_mode,
> +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
>                                             unsigned int * = NULL,
>                                             tree * = NULL, tree * = NULL);
>  extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000
> +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000
> @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
>    return -1;
>  }
>
> -/* Check whether it is possible to load COUNT elements of type ELT_MODE
> +/* Check whether it is possible to load COUNT elements of type ELT_TYPE
>     using the method implemented by duplicate_and_interleave.  Return true
>     if so, returning the number of intermediate vectors in *NVECTORS_OUT
>     (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
> @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
>
>  bool
>  can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
> -                               machine_mode elt_mode,
> -                               unsigned int *nvectors_out,
> +                               tree elt_type, unsigned int *nvectors_out,
>                                 tree *vector_type_out,
>                                 tree *permutes)
>  {
> -  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
> -  poly_int64 nelts;
> +  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
> +  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
> +    return false;
> +
> +  machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
> +  poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
>    unsigned int nvectors = 1;
>    for (;;)
>      {
>        scalar_int_mode int_mode;
>        poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
> -      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
> -         && int_mode_for_size (elt_bits, 0).exists (&int_mode))
> +      if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
>         {
> +         /* Get the natural vector type for this SLP group size.  */
>           tree int_type = build_nonstandard_integer_type
>             (GET_MODE_BITSIZE (int_mode), 1);
> -         tree vector_type = build_vector_type (int_type, nelts);
> -         if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
> -           {
> +         tree vector_type
> +           = get_vectype_for_scalar_type (vinfo, int_type, count);
> +         if (vector_type
> +             && VECTOR_MODE_P (TYPE_MODE (vector_type))
> +             && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
> +                          GET_MODE_SIZE (base_vector_mode)))
> +           {
> +             /* Try fusing consecutive sequences of COUNT / NVECTORS elements
> +                together into elements of type INT_TYPE and using the result
> +                to build NVECTORS vectors.  */
> +             poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
>               vec_perm_builder sel1 (nelts, 2, 3);
>               vec_perm_builder sel2 (nelts, 2, 3);
>               poly_int64 half_nelts = exact_div (nelts, 2);
> @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v
>               && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
>               && (TREE_CODE (type) == BOOLEAN_TYPE
>                   || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> -                                                     TYPE_MODE (type))))
> +                                                     type)))
>             {
>               if (dump_enabled_p ())
>                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf
>    unsigned int nvectors = 1;
>    tree new_vector_type;
>    tree permutes[2];
> -  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
> +  if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
>                                        &nvectors, &new_vector_type,
>                                        permutes))
>      gcc_unreachable ();
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        2019-11-05 10:57:41.658071173 +0000
> +++ gcc/tree-vect-loop.c        2019-11-05 11:14:42.782884501 +0000
> @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st
>          that value needs to be repeated for every instance of the
>          statement within the initial vector.  */
>        unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
> -      scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
>        if (!neutral_op
>           && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
> -                                             elt_mode))
> +                                             TREE_TYPE (vectype_out)))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,



More information about the Gcc-patches mailing list