[PATCH] SLP: support entire BB.

Richard Biener richard.guenther@gmail.com
Mon Aug 3 10:29:36 GMT 2020


On Fri, Jul 31, 2020 at 12:30 PM Martin Liška <mliska@suse.cz> wrote:
>
> Hey.
>
> Motivation of the patch is to allow vectorization of an entire BB.
> So far we bail out a sub-BB region when we reach a stmt for which
> vect_find_stmt_data_reference returns false. That's replaced with
> recoding of groups of the data references.
>
> We can newly vectorize code like:
>
> void foo();
> void bar (int i, double *a, double *b)
> {
>    double tem1 = a[2*i];
>    double tem2 = 2*a[2*i+1];
>    foo ();
>    b[2*i] = 2*tem1;
>    b[2*i+1] = tem2;
> }
>
> into:
>    <bb 2> [local count: 1073741824]:
>    _1 = i_12(D) * 2;
>    _2 = (long unsigned int) _1;
>    _3 = _2 * 8;
>    _4 = a_13(D) + _3;
>    vect_tem1_15.5_24 = MEM <vector(2) double> [(double *)_4];
>    vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 };
>    foo ();
>    _9 = b_18(D) + _3;
>    MEM <vector(2) double> [(double *)_9] = vect__10.6_25;
>    return;
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

OK with some minor changes, see below

> Thoughs?
> Martin
>
> gcc/ChangeLog:
>
>         * tree-vect-data-refs.c (dr_group_sort_cmp): Work on
>         data_ref_pair.
>         (vect_analyze_data_ref_accesses): Work on groups.
>         (vect_find_stmt_data_reference): Add group_id argument and fill
>         up dataref_groups vector.
>         * tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new
>         arguments.
>         (vect_analyze_loop_2): Likewise.
>         * tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument.
>         (vect_slp_bb_region): Likewise.
>         (vect_slp_region): Likewise.
>         (vect_slp_bb):Work on the entire BB.
>         * tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new
>         argument.
>         (vect_find_stmt_data_reference): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process
>         a single vectorization and now 2 partial.
>         * gcc.dg/vect/bb-slp-45.c: New test.
> ---
>   gcc/testsuite/gcc.dg/vect/bb-slp-38.c |  2 +-
>   gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++
>   gcc/tree-vect-data-refs.c             | 63 +++++++++++++-------
>   gcc/tree-vect-loop.c                  |  5 +-
>   gcc/tree-vect-slp.c                   | 82 ++++++++++-----------------
>   gcc/tree-vectorizer.h                 |  5 +-
>   6 files changed, 118 insertions(+), 75 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
> index 59aec54fffd..9c57ea3c2c1 100644
> --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c
> @@ -41,4 +41,4 @@ int main()
>   }
>
>   /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
> new file mode 100644
> index 00000000000..4107a34cb93
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c
> @@ -0,0 +1,36 @@
> +/* { dg-require-effective-target vect_double } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +double a[8], b[8];
> +int x;
> +
> +void __attribute__((noinline,noclone))
> +bar (void)
> +{
> +  x = 1;
> +}
> +
> +void __attribute__((noinline,noclone))
> +foo(int i)
> +{
> +  double tem1 = a[2*i];
> +  double tem2 = 2*a[2*i+1];
> +  bar ();
> +  b[2*i] = 2*tem1;
> +  b[2*i+1] = tem2;
> +}
> +
> +int main()
> +{
> +  int i;
> +  check_vect ();
> +  for (i = 0; i < 8; ++i)
> +    b[i] = i;
> +  foo (2);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index a78ae61d1b0..36e10ff3619 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
>     return vect_analyze_group_access (vinfo, dr_info);
>   }
>
> +typedef std::pair<data_reference_p, int> data_ref_pair;
> +
>   /* Compare two data-references DRA and DRB to group them into chunks
>      suitable for grouping.  */
>
>   static int
>   dr_group_sort_cmp (const void *dra_, const void *drb_)
>   {
> -  data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
> -  data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
> +  data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_);
> +  data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_);
> +  data_reference_p dra = dra_pair.first;
> +  data_reference_p drb = drb_pair.first;
>     int cmp;
>
>     /* Stabilize sort.  */
> @@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_)
>       return 0;
>
>     /* DRs in different loops never belong to the same group.  */

Comment needs to be adjusted to mention basic-block instead of loop

> -  loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
> -  loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
> -  if (loopa != loopb)
> -    return loopa->num < loopb->num ? -1 : 1;
> +  int bb_index1 = gimple_bb (DR_STMT (dra))->index;
> +  int bb_index2 = gimple_bb (DR_STMT (drb))->index;
> +  if (bb_index1 != bb_index2)
> +    return bb_index1 < bb_index2 ? -1 : 1;
> +
> +  if (dra_pair.second != drb_pair.second)
> +    return dra_pair.second < drb_pair.second ? -1 : 1;

Please add a comment that this compares the DR group
(not obvious because it says .second)

>
>     /* Ordering of DRs according to base.  */
>     cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
> @@ -2881,11 +2888,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
>      FORNOW: handle only arrays and pointer accesses.  */
>
>   opt_result
> -vect_analyze_data_ref_accesses (vec_info *vinfo)
> +vect_analyze_data_ref_accesses (vec_info *vinfo,
> +                               vec<int> *dataref_groups)
>   {
>     unsigned int i;
>     vec<data_reference_p> datarefs = vinfo->shared->datarefs;
> -  struct data_reference *dr;
>
>     DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
>
> @@ -2895,14 +2902,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>     /* Sort the array of datarefs to make building the interleaving chains
>        linear.  Don't modify the original vector's order, it is needed for
>        determining what dependencies are reversed.  */
> -  vec<data_reference_p> datarefs_copy = datarefs.copy ();
> +  vec<data_ref_pair> datarefs_copy;
> +  datarefs_copy.create (datarefs.length ());
> +  for (unsigned i = 0; i < datarefs.length (); i++)
> +    {
> +      int group_id = dataref_groups ? (*dataref_groups)[i] : 0;
> +      datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id));

quick_push

> +    }
>     datarefs_copy.qsort (dr_group_sort_cmp);
>     hash_set<stmt_vec_info> to_fixup;
>
>     /* Build the interleaving chains.  */
>     for (i = 0; i < datarefs_copy.length () - 1;)
>       {
> -      data_reference_p dra = datarefs_copy[i];
> +      data_reference_p dra = datarefs_copy[i].first;
> +      int dra_group_id = datarefs_copy[i].second;
>         dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
>         stmt_vec_info stmtinfo_a = dr_info_a->stmt;
>         stmt_vec_info lastinfo = NULL;
> @@ -2914,7 +2928,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>         }
>         for (i = i + 1; i < datarefs_copy.length (); ++i)
>         {
> -         data_reference_p drb = datarefs_copy[i];
> +         data_reference_p drb = datarefs_copy[i].first;
> +         int drb_group_id = datarefs_copy[i].second;
>           dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
>           stmt_vec_info stmtinfo_b = dr_info_b->stmt;
>           if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
> @@ -2927,10 +2942,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>              matters we can push those to a worklist and re-iterate
>              over them.  The we can just skip ahead to the next DR here.  */
>
> -         /* DRs in a different loop should not be put into the same
> +         /* DRs in a different BBs should not be put into the same
>              interleaving group.  */
> -         if (gimple_bb (DR_STMT (dra))->loop_father
> -             != gimple_bb (DR_STMT (drb))->loop_father)
> +         int bb_index1 = gimple_bb (DR_STMT (dra))->index;
> +         int bb_index2 = gimple_bb (DR_STMT (drb))->index;
> +         if (bb_index1 != bb_index2)
> +           break;
> +
> +         if (dra_group_id != drb_group_id)
>             break;
>
>           /* Check that the data-refs have same first location (except init)
> @@ -2977,7 +2996,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
>           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
>           HOST_WIDE_INT init_prev
> -           = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
> +           = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first));
>           gcc_assert (init_a <= init_b
>                       && init_a <= init_prev
>                       && init_prev <= init_b);
> @@ -2985,7 +3004,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>           /* Do not place the same access in the interleaving chain twice.  */
>           if (init_b == init_prev)
>             {
> -             gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
> +             gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first))
>                           < gimple_uid (DR_STMT (drb)));
>               /* Simply link in duplicates and fix up the chain below.  */
>             }
> @@ -3098,9 +3117,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
>         to_fixup.add (newgroup);
>       }
>
> -  FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
> +  data_ref_pair *dr_pair;
> +  FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair)
>       {
> -      dr_vec_info *dr_info = vinfo->lookup_dr (dr);
> +      dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first);
>         if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
>           && !vect_analyze_data_ref_access (vinfo, dr_info))
>         {
> @@ -3991,7 +4011,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
>
>   opt_result
>   vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
> -                              vec<data_reference_p> *datarefs)
> +                              vec<data_reference_p> *datarefs,
> +                              vec<int> *dataref_groups, int group_id)

You are always passing NULL here so simply avoid this and the following changes.

OK with those changes.
Thanks,
Richard.

>   {
>     /* We can ignore clobbers for dataref analysis - they are removed during
>        loop vectorization and BB vectorization checks dependences with a
> @@ -4118,6 +4139,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
>                       newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
>                       free_data_ref (dr);
>                       datarefs->safe_push (newdr);
> +                     if (dataref_groups)
> +                       dataref_groups->safe_push (group_id);
>                       return opt_result::success ();
>                     }
>                 }
> @@ -4127,6 +4150,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
>       }
>
>     datarefs->safe_push (dr);
> +  if (dataref_groups)
> +    dataref_groups->safe_push (group_id);
>     return opt_result::success ();
>   }
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 185019c3305..2326d670bee 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
>         if (is_gimple_debug (stmt))
>           continue;
>         ++(*n_stmts);
> -       opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);
> +       opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
> +                                                       NULL, 0);
>         if (!res)
>           {
>             if (is_gimple_call (stmt) && loop->safelen)
> @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
>     /* Analyze the access patterns of the data-refs in the loop (consecutive,
>        complex, etc.). FORNOW: Only handle consecutive access pattern.  */
>
> -  ok = vect_analyze_data_ref_accesses (loop_vinfo);
> +  ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
>     if (!ok)
>       {
>         if (dump_enabled_p ())
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 72192b5a813..167db076454 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
>      region.  */
>
>   static bool
> -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
> +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
> +                      vec<int> *dataref_groups)
>   {
>     DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
>
> @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
>         return false;
>       }
>
> -  if (!vect_analyze_data_ref_accesses (bb_vinfo))
> +  if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups))
>       {
>        if (dump_enabled_p ())
>          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
>      given by DATAREFS.  */
>
>   static bool
> -vect_slp_bb_region (gimple_stmt_iterator region_begin,
> -                   gimple_stmt_iterator region_end,
> -                   vec<data_reference_p> datarefs,
> -                   unsigned int n_stmts)
> +vect_slp_region (gimple_stmt_iterator region_begin,
> +                gimple_stmt_iterator region_end,
> +                vec<data_reference_p> datarefs,
> +                vec<int> *dataref_groups,
> +                unsigned int n_stmts)
>   {
>     bb_vec_info bb_vinfo;
>     auto_vector_modes vector_modes;
> @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
>         bb_vinfo->shared->check_datarefs ();
>         bb_vinfo->vector_mode = next_vector_mode;
>
> -      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
> +      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups)
>           && dbg_cnt (vect_slp))
>         {
>           if (dump_enabled_p ())
> @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
>   bool
>   vect_slp_bb (basic_block bb)
>   {
> -  gimple_stmt_iterator gsi;
> -  bool any_vectorized = false;
> -
> -  gsi = gsi_after_labels (bb);
> -  while (!gsi_end_p (gsi))
> +  vec<data_reference_p> datarefs = vNULL;
> +  vec<int> dataref_groups = vNULL;
> +  int insns = 0;
> +  int current_group = 0;
> +  gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb);
> +  gimple_stmt_iterator region_end = gsi_last_bb (bb);
> +  if (!gsi_end_p (region_end))
> +    gsi_next (&region_end);
> +
> +  for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
> +       gsi_next (&gsi))
>       {
> -      gimple_stmt_iterator region_begin = gsi;
> -      vec<data_reference_p> datarefs = vNULL;
> -      int insns = 0;
> -
> -      for (; !gsi_end_p (gsi); gsi_next (&gsi))
> -       {
> -         gimple *stmt = gsi_stmt (gsi);
> -         if (is_gimple_debug (stmt))
> -           {
> -             /* Skip leading debug stmts.  */
> -             if (gsi_stmt (region_begin) == stmt)
> -               gsi_next (&region_begin);
> -             continue;
> -           }
> -         insns++;
> -
> -         if (gimple_location (stmt) != UNKNOWN_LOCATION)
> -           vect_location = stmt;
> +      gimple *stmt = gsi_stmt (gsi);
> +      if (is_gimple_debug (stmt))
> +       continue;
>
> -         if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
> -           break;
> -       }
> -      if (gsi_end_p (region_begin))
> -       break;
> +      insns++;
>
> -      /* Skip leading unhandled stmts.  */
> -      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
> -       {
> -         gsi_next (&gsi);
> -         continue;
> -       }
> +      if (gimple_location (stmt) != UNKNOWN_LOCATION)
> +       vect_location = stmt;
>
> -      gimple_stmt_iterator region_end = gsi;
> +      if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,
> +                                         &dataref_groups, current_group))
> +       ++current_group;
>
>         if (insns > param_slp_max_insns_in_bb)
>         {
> @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb)
>                              "not vectorized: too many instructions in "
>                              "basic block.\n");
>         }
> -      else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
> -       any_vectorized = true;
> -
> -      if (gsi_end_p (region_end))
> -       break;
> -
> -      /* Skip the unhandled stmt.  */
> -      gsi_next (&gsi);
>       }
>
> -  return any_vectorized;
> +  return vect_slp_region (region_begin, region_end, datarefs,
> +                         &dataref_groups, insns);
>   }
>
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 5466c78c20b..21881a9390c 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
>   extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
>   extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
>   extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
> -extern opt_result vect_analyze_data_ref_accesses (vec_info *);
> +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
>   extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>   extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
>                                       tree, int, internal_fn *, tree *);
>   extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
>                                        gather_scatter_info *);
>   extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
> -                                                vec<data_reference_p> *);
> +                                                vec<data_reference_p> *,
> +                                                vec<int> *, int);
>   extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
>   extern void vect_record_base_alignments (vec_info *);
>   extern tree vect_create_data_ref_ptr (vec_info *,
> --
> 2.27.0
>


More information about the Gcc-patches mailing list