This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [5/7] Move the fix for PR65518


On Wed, Jun 15, 2016 at 10:52 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch moves the fix for PR65518 to the code that checks whether
> load-and-permute operations are supported.   If the group size is
> greater than the vectorisation factor, it would still be possible
> to fall back to elementwise loads (as for strided groups) rather
> than fail vectorisation entirely.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Ok.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
>         * tree-vectorizer.h (vect_grouped_load_supported): Add a
>         single_element_p parameter.
>         * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise.
>         Check the PR65518 case here rather than in vectorizable_load.
>         * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly.
>         * tree-vect-stmts.c (vectorizable_load): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h
> +++ gcc/tree-vectorizer.h
> @@ -1069,7 +1069,7 @@ extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *,
>  extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
>  extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
> -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT);
> +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
>  extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
>  extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *,
>                                      gimple_stmt_iterator *, vec<tree> *);
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c
> +++ gcc/tree-vect-data-refs.c
> @@ -5131,14 +5131,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
>
>  /* Function vect_grouped_load_supported.
>
> -   Returns TRUE if even and odd permutations are supported,
> -   and FALSE otherwise.  */
> +   COUNT is the size of the load group (the number of statements plus the
> +   number of gaps).  SINGLE_ELEMENT_P is true if there is actually
> +   only one statement, with a gap of COUNT - 1.
> +
> +   Returns true if a suitable permute exists.  */
>
>  bool
> -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
> +vect_grouped_load_supported (tree vectype, bool single_element_p,
> +                            unsigned HOST_WIDE_INT count)
>  {
>    machine_mode mode = TYPE_MODE (vectype);
>
> +  /* If this is single-element interleaving with an element distance
> +     that leaves unused vector loads around punt - we at least create
> +     very sub-optimal code in that case (and blow up memory,
> +     see PR65518).  */
> +  if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "single-element interleaving not supported "
> +                        "for not adjacent vector loads\n");
> +      return false;
> +    }
> +
>    /* vect_permute_load_chain requires the group size to be equal to 3 or
>       be a power of two.  */
>    if (count != 3 && exact_log2 (count) == -1)
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c
> +++ gcc/tree-vect-loop.c
> @@ -2148,10 +2148,12 @@ again:
>         {
>           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
>           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
> +         bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
>           size = STMT_VINFO_GROUP_SIZE (vinfo);
>           vectype = STMT_VINFO_VECTYPE (vinfo);
>           if (! vect_load_lanes_supported (vectype, size)
> -             && ! vect_grouped_load_supported (vectype, size))
> +             && ! vect_grouped_load_supported (vectype, single_element_p,
> +                                               size))
>             return false;
>         }
>      }
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c
> +++ gcc/tree-vect-stmts.c
> @@ -6298,31 +6298,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>
>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> +      bool single_element_p = (first_stmt == stmt
> +                              && !GROUP_NEXT_ELEMENT (stmt_info));
>
>        if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
>         {
>           if (vect_load_lanes_supported (vectype, group_size))
>             load_lanes_p = true;
> -         else if (!vect_grouped_load_supported (vectype, group_size))
> +         else if (!vect_grouped_load_supported (vectype, single_element_p,
> +                                                group_size))
>             return false;
>         }
>
> -      /* If this is single-element interleaving with an element distance
> -         that leaves unused vector loads around punt - we at least create
> -        very sub-optimal code in that case (and blow up memory,
> -        see PR65518).  */
> -      if (first_stmt == stmt
> -         && !GROUP_NEXT_ELEMENT (stmt_info))
> +      if (single_element_p)
>         {
> -         if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                "single-element interleaving not supported "
> -                                "for not adjacent vector loads\n");
> -             return false;
> -           }
> -
>           /* Single-element interleaving requires peeling for gaps.  */
>           gcc_assert (GROUP_GAP (stmt_info));
>         }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]