[06/11] Handle VMAT_INVARIANT separately

Richard Biener richard.guenther@gmail.com
Wed Aug 1 12:52:00 GMT 2018


On Mon, Jul 30, 2018 at 1:41 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Invariant loads were handled as a variation on the code for contiguous
> loads.  We detected whether they were invariant or not as a byproduct of
> creating the vector pointer ivs: vect_create_data_ref_ptr passed back an
> inv_p to say whether the pointer was invariant.
>
> But vectorised invariant loads just keep the original scalar load,
> so this meant that detecting invariant loads had the side-effect of
> creating an unwanted vector pointer iv.  The placement of the code
> also meant that we'd create a vector load and then not use the result.
> In principle this is wrong code, since there's no guarantee that there's
> a vector's worth of accessible data at that address, but we rely on DCE
> to get rid of the load before any harm is done.
>
> E.g., for an invariant load in an inner loop (which seems like the more
> common use case for this code), we'd create:
>
>    vectp_a.6_52 = &a + 4;
>
>    # vectp_a.5_53 = PHI <vectp_a.5_54(9), vectp_a.6_52(2)>
>
>    # vectp_a.5_55 = PHI <vectp_a.5_53(3), vectp_a.5_56(10)>
>
>    vect_next_a_11.7_57 = MEM[(int *)vectp_a.5_55];
>    next_a_11 = a[_1];
>    vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};
>
>    vectp_a.5_56 = vectp_a.5_55 + 4;
>
>    vectp_a.5_54 = vectp_a.5_53 + 0;
>
> whereas all we want is:
>
>    next_a_11 = a[_1];
>    vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};
>
> This patch moves the handling to its own block and makes
> vect_create_data_ref_ptr assert (when creating a full iv) that the
> address isn't invariant.
>
> The ncopies handling is unfortunate, but a preexisting issue.
> Richi's suggestion of using a vector of vector statements would
> let us reuse one statement for all copies.

OK.

Richard.

>
> 2018-07-30  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-vectorizer.h (vect_create_data_ref_ptr): Remove inv_p
>         parameter.
>         * tree-vect-data-refs.c (vect_create_data_ref_ptr): Likewise.
>         When creating an iv, assert that the step is not known to be zero.
>         (vect_setup_realignment): Update call accordingly.
>         * tree-vect-stmts.c (vectorizable_store): Likewise.
>         (vectorizable_load): Likewise.  Handle VMAT_INVARIANT separately.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h       2018-07-30 12:32:29.586506669 +0100
> --- gcc/tree-vectorizer.h       2018-07-30 12:40:13.000000000 +0100
> *************** extern bool vect_analyze_data_refs (vec_
> *** 1527,1533 ****
>   extern void vect_record_base_alignments (vec_info *);
>   extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
>                                       tree *, gimple_stmt_iterator *,
> !                                     gimple **, bool, bool *,
>                                       tree = NULL_TREE, tree = NULL_TREE);
>   extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
>                              stmt_vec_info, tree);
> --- 1527,1533 ----
>   extern void vect_record_base_alignments (vec_info *);
>   extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
>                                       tree *, gimple_stmt_iterator *,
> !                                     gimple **, bool,
>                                       tree = NULL_TREE, tree = NULL_TREE);
>   extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
>                              stmt_vec_info, tree);
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> *** gcc/tree-vect-data-refs.c   2018-07-30 12:32:26.214536374 +0100
> --- gcc/tree-vect-data-refs.c   2018-07-30 12:32:32.546480596 +0100
> *************** vect_create_addr_base_for_vector_ref (st
> *** 4674,4689 ****
>
>         Return the increment stmt that updates the pointer in PTR_INCR.
>
> !    3. Set INV_P to true if the access pattern of the data reference in the
> !       vectorized loop is invariant.  Set it to false otherwise.
> !
> !    4. Return the pointer.  */
>
>   tree
>   vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
>                           struct loop *at_loop, tree offset,
>                           tree *initial_address, gimple_stmt_iterator *gsi,
> !                         gimple **ptr_incr, bool only_init, bool *inv_p,
>                           tree byte_offset, tree iv_step)
>   {
>     const char *base_name;
> --- 4674,4686 ----
>
>         Return the increment stmt that updates the pointer in PTR_INCR.
>
> !    3. Return the pointer.  */
>
>   tree
>   vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
>                           struct loop *at_loop, tree offset,
>                           tree *initial_address, gimple_stmt_iterator *gsi,
> !                         gimple **ptr_incr, bool only_init,
>                           tree byte_offset, tree iv_step)
>   {
>     const char *base_name;
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4705,4711 ****
>     bool insert_after;
>     tree indx_before_incr, indx_after_incr;
>     gimple *incr;
> -   tree step;
>     bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>
>     gcc_assert (iv_step != NULL_TREE
> --- 4702,4707 ----
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4726,4739 ****
>         *ptr_incr = NULL;
>       }
>
> -   /* Check the step (evolution) of the load in LOOP, and record
> -      whether it's invariant.  */
> -   step = vect_dr_behavior (dr_info)->step;
> -   if (integer_zerop (step))
> -     *inv_p = true;
> -   else
> -     *inv_p = false;
> -
>     /* Create an expression for the first address accessed by this load
>        in LOOP.  */
>     base_name = get_name (DR_BASE_ADDRESS (dr));
> --- 4722,4727 ----
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4849,4863 ****
>       aptr = aggr_ptr_init;
>     else
>       {
>         if (iv_step == NULL_TREE)
>         {
> !         /* The step of the aggregate pointer is the type size.  */
>           iv_step = TYPE_SIZE_UNIT (aggr_type);
> !         /* One exception to the above is when the scalar step of the load in
> !            LOOP is zero. In this case the step here is also zero.  */
> !         if (*inv_p)
> !           iv_step = size_zero_node;
> !         else if (tree_int_cst_sgn (step) == -1)
>             iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
>         }
>
> --- 4837,4853 ----
>       aptr = aggr_ptr_init;
>     else
>       {
> +       /* Accesses to invariant addresses should be handled specially
> +        by the caller.  */
> +       tree step = vect_dr_behavior (dr_info)->step;
> +       gcc_assert (!integer_zerop (step));
> +
>         if (iv_step == NULL_TREE)
>         {
> !         /* The step of the aggregate pointer is the type size,
> !            negated for downward accesses.  */
>           iv_step = TYPE_SIZE_UNIT (aggr_type);
> !         if (tree_int_cst_sgn (step) == -1)
>             iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
>         }
>
> *************** vect_setup_realignment (stmt_vec_info st
> *** 5462,5468 ****
>     gphi *phi_stmt;
>     tree msq = NULL_TREE;
>     gimple_seq stmts = NULL;
> -   bool inv_p;
>     bool compute_in_loop = false;
>     bool nested_in_vect_loop = false;
>     struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
> --- 5452,5457 ----
> *************** vect_setup_realignment (stmt_vec_info st
> *** 5556,5562 ****
>         vec_dest = vect_create_destination_var (scalar_dest, vectype);
>         ptr = vect_create_data_ref_ptr (stmt_info, vectype,
>                                       loop_for_initial_load, NULL_TREE,
> !                                     &init_addr, NULL, &inc, true, &inv_p);
>         if (TREE_CODE (ptr) == SSA_NAME)
>         new_temp = copy_ssa_name (ptr);
>         else
> --- 5545,5551 ----
>         vec_dest = vect_create_destination_var (scalar_dest, vectype);
>         ptr = vect_create_data_ref_ptr (stmt_info, vectype,
>                                       loop_for_initial_load, NULL_TREE,
> !                                     &init_addr, NULL, &inc, true);
>         if (TREE_CODE (ptr) == SSA_NAME)
>         new_temp = copy_ssa_name (ptr);
>         else
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c       2018-07-30 12:32:29.586506669 +0100
> --- gcc/tree-vect-stmts.c       2018-07-30 12:40:14.000000000 +0100
> *************** vectorizable_store (stmt_vec_info stmt_i
> *** 6254,6260 ****
>     unsigned int group_size, i;
>     vec<tree> oprnds = vNULL;
>     vec<tree> result_chain = vNULL;
> -   bool inv_p;
>     tree offset = NULL_TREE;
>     vec<tree> vec_oprnds = vNULL;
>     bool slp = (slp_node != NULL);
> --- 6254,6259 ----
> *************** vectorizable_store (stmt_vec_info stmt_i
> *** 7018,7039 ****
>             {
>               dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
>               dataref_offset = build_int_cst (ref_type, 0);
> -             inv_p = false;
>             }
>           else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> !           {
> !             vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> !                                          &dataref_ptr, &vec_offset);
> !             inv_p = false;
> !           }
>           else
>             dataref_ptr
>               = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
>                                           simd_lane_access_p ? loop : NULL,
>                                           offset, &dummy, gsi, &ptr_incr,
> !                                         simd_lane_access_p, &inv_p,
> !                                         NULL_TREE, bump);
> !         gcc_assert (bb_vinfo || !inv_p);
>         }
>         else
>         {
> --- 7017,7032 ----
>             {
>               dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
>               dataref_offset = build_int_cst (ref_type, 0);
>             }
>           else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> !           vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> !                                        &dataref_ptr, &vec_offset);
>           else
>             dataref_ptr
>               = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
>                                           simd_lane_access_p ? loop : NULL,
>                                           offset, &dummy, gsi, &ptr_incr,
> !                                         simd_lane_access_p, NULL_TREE, bump);
>         }
>         else
>         {
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 7419,7425 ****
>     bool grouped_load = false;
>     stmt_vec_info first_stmt_info;
>     stmt_vec_info first_stmt_info_for_drptr = NULL;
> -   bool inv_p;
>     bool compute_in_loop = false;
>     struct loop *at_loop;
>     int vec_num;
> --- 7412,7417 ----
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 7669,7674 ****
> --- 7661,7723 ----
>         return true;
>       }
>
> +   if (memory_access_type == VMAT_INVARIANT)
> +     {
> +       gcc_assert (!grouped_load && !mask && !bb_vinfo);
> +       /* If we have versioned for aliasing or the loop doesn't
> +        have any data dependencies that would preclude this,
> +        then we are sure this is a loop invariant load and
> +        thus we can insert it on the preheader edge.  */
> +       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
> +                     && !nested_in_vect_loop
> +                     && hoist_defs_of_uses (stmt_info, loop));
> +       if (hoist_p)
> +       {
> +         gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> +         if (dump_enabled_p ())
> +           {
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "hoisting out of the vectorized loop: ");
> +             dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> +           }
> +         scalar_dest = copy_ssa_name (scalar_dest);
> +         tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
> +         gsi_insert_on_edge_immediate
> +           (loop_preheader_edge (loop),
> +            gimple_build_assign (scalar_dest, rhs));
> +       }
> +       /* These copies are all equivalent, but currently the representation
> +        requires a separate STMT_VINFO_VEC_STMT for each one.  */
> +       prev_stmt_info = NULL;
> +       gimple_stmt_iterator gsi2 = *gsi;
> +       gsi_next (&gsi2);
> +       for (j = 0; j < ncopies; j++)
> +       {
> +         stmt_vec_info new_stmt_info;
> +         if (hoist_p)
> +           {
> +             new_temp = vect_init_vector (stmt_info, scalar_dest,
> +                                          vectype, NULL);
> +             gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
> +             new_stmt_info = vinfo->add_stmt (new_stmt);
> +           }
> +         else
> +           {
> +             new_temp = vect_init_vector (stmt_info, scalar_dest,
> +                                          vectype, &gsi2);
> +             new_stmt_info = vinfo->lookup_def (new_temp);
> +           }
> +         if (slp)
> +           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
> +         else if (j == 0)
> +           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
> +         else
> +           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
> +         prev_stmt_info = new_stmt_info;
> +       }
> +       return true;
> +     }
> +
>     if (memory_access_type == VMAT_ELEMENTWISE
>         || memory_access_type == VMAT_STRIDED_SLP)
>       {
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8177,8183 ****
>             {
>               dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
>               dataref_offset = build_int_cst (ref_type, 0);
> -             inv_p = false;
>             }
>           else if (first_stmt_info_for_drptr
>                    && first_stmt_info != first_stmt_info_for_drptr)
> --- 8226,8231 ----
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8186,8192 ****
>                 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
>                                             aggr_type, at_loop, offset, &dummy,
>                                             gsi, &ptr_incr, simd_lane_access_p,
> !                                           &inv_p, byte_offset, bump);
>               /* Adjust the pointer by the difference to first_stmt.  */
>               data_reference_p ptrdr
>                 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
> --- 8234,8240 ----
>                 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
>                                             aggr_type, at_loop, offset, &dummy,
>                                             gsi, &ptr_incr, simd_lane_access_p,
> !                                           byte_offset, bump);
>               /* Adjust the pointer by the difference to first_stmt.  */
>               data_reference_p ptrdr
>                 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8199,8214 ****
>                                              stmt_info, diff);
>             }
>           else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> !           {
> !             vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> !                                          &dataref_ptr, &vec_offset);
> !             inv_p = false;
> !           }
>           else
>             dataref_ptr
>               = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
>                                           offset, &dummy, gsi, &ptr_incr,
> !                                         simd_lane_access_p, &inv_p,
>                                           byte_offset, bump);
>           if (mask)
>             vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
> --- 8247,8259 ----
>                                              stmt_info, diff);
>             }
>           else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> !           vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> !                                        &dataref_ptr, &vec_offset);
>           else
>             dataref_ptr
>               = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
>                                           offset, &dummy, gsi, &ptr_incr,
> !                                         simd_lane_access_p,
>                                           byte_offset, bump);
>           if (mask)
>             vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8492,8538 ****
>                     }
>                 }
>
> -             /* 4. Handle invariant-load.  */
> -             if (inv_p && !bb_vinfo)
> -               {
> -                 gcc_assert (!grouped_load);
> -                 /* If we have versioned for aliasing or the loop doesn't
> -                    have any data dependencies that would preclude this,
> -                    then we are sure this is a loop invariant load and
> -                    thus we can insert it on the preheader edge.  */
> -                 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
> -                     && !nested_in_vect_loop
> -                     && hoist_defs_of_uses (stmt_info, loop))
> -                   {
> -                     gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> -                     if (dump_enabled_p ())
> -                       {
> -                         dump_printf_loc (MSG_NOTE, vect_location,
> -                                          "hoisting out of the vectorized "
> -                                          "loop: ");
> -                         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> -                       }
> -                     tree tem = copy_ssa_name (scalar_dest);
> -                     gsi_insert_on_edge_immediate
> -                       (loop_preheader_edge (loop),
> -                        gimple_build_assign (tem,
> -                                             unshare_expr
> -                                               (gimple_assign_rhs1 (stmt))));
> -                     new_temp = vect_init_vector (stmt_info, tem,
> -                                                  vectype, NULL);
> -                     new_stmt = SSA_NAME_DEF_STMT (new_temp);
> -                     new_stmt_info = vinfo->add_stmt (new_stmt);
> -                   }
> -                 else
> -                   {
> -                     gimple_stmt_iterator gsi2 = *gsi;
> -                     gsi_next (&gsi2);
> -                     new_temp = vect_init_vector (stmt_info, scalar_dest,
> -                                                  vectype, &gsi2);
> -                     new_stmt_info = vinfo->lookup_def (new_temp);
> -                   }
> -               }
> -
>               if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
>                 {
>                   tree perm_mask = perm_mask_for_reverse (vectype);
> --- 8537,8542 ----



More information about the Gcc-patches mailing list