This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [06/11] Handle VMAT_INVARIANT separately
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, richard dot sandiford at arm dot com
- Date: Wed, 1 Aug 2018 14:52:34 +0200
- Subject: Re: [06/11] Handle VMAT_INVARIANT separately
- References: <874lghez1a.fsf@arm.com> <87effldk8n.fsf@arm.com>
On Mon, Jul 30, 2018 at 1:41 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Invariant loads were handled as a variation on the code for contiguous
> loads. We detected whether they were invariant or not as a byproduct of
> creating the vector pointer ivs: vect_create_data_ref_ptr passed back an
> inv_p to say whether the pointer was invariant.
>
> But vectorised invariant loads just keep the original scalar load,
> so this meant that detecting invariant loads had the side-effect of
> creating an unwanted vector pointer iv. The placement of the code
> also meant that we'd create a vector load and then not use the result.
> In principle this is wrong code, since there's no guarantee that there's
> a vector's worth of accessible data at that address, but we rely on DCE
> to get rid of the load before any harm is done.
>
> E.g., for an invariant load in an inner loop (which seems like the more
> common use case for this code), we'd create:
>
> vectp_a.6_52 = &a + 4;
>
> # vectp_a.5_53 = PHI <vectp_a.5_54(9), vectp_a.6_52(2)>
>
> # vectp_a.5_55 = PHI <vectp_a.5_53(3), vectp_a.5_56(10)>
>
> vect_next_a_11.7_57 = MEM[(int *)vectp_a.5_55];
> next_a_11 = a[_1];
> vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};
>
> vectp_a.5_56 = vectp_a.5_55 + 4;
>
> vectp_a.5_54 = vectp_a.5_53 + 0;
>
> whereas all we want is:
>
> next_a_11 = a[_1];
> vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};
>
> This patch moves the handling to its own block and makes
> vect_create_data_ref_ptr assert (when creating a full iv) that the
> address isn't invariant.
>
> The ncopies handling is unfortunate, but a preexisting issue.
> Richi's suggestion of using a vector of vector statements would
> let us reuse one statement for all copies.
OK.
Richard.
>
> 2018-07-30 Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/
> * tree-vectorizer.h (vect_create_data_ref_ptr): Remove inv_p
> parameter.
> * tree-vect-data-refs.c (vect_create_data_ref_ptr): Likewise.
> When creating an iv, assert that the step is not known to be zero.
> (vect_setup_realignment): Update call accordingly.
> * tree-vect-stmts.c (vectorizable_store): Likewise.
> (vectorizable_load): Likewise. Handle VMAT_INVARIANT separately.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h 2018-07-30 12:32:29.586506669 +0100
> --- gcc/tree-vectorizer.h 2018-07-30 12:40:13.000000000 +0100
> *************** extern bool vect_analyze_data_refs (vec_
> *** 1527,1533 ****
> extern void vect_record_base_alignments (vec_info *);
> extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
> tree *, gimple_stmt_iterator *,
> ! gimple **, bool, bool *,
> tree = NULL_TREE, tree = NULL_TREE);
> extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
> stmt_vec_info, tree);
> --- 1527,1533 ----
> extern void vect_record_base_alignments (vec_info *);
> extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
> tree *, gimple_stmt_iterator *,
> ! gimple **, bool,
> tree = NULL_TREE, tree = NULL_TREE);
> extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
> stmt_vec_info, tree);
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> *** gcc/tree-vect-data-refs.c 2018-07-30 12:32:26.214536374 +0100
> --- gcc/tree-vect-data-refs.c 2018-07-30 12:32:32.546480596 +0100
> *************** vect_create_addr_base_for_vector_ref (st
> *** 4674,4689 ****
>
> Return the increment stmt that updates the pointer in PTR_INCR.
>
> ! 3. Set INV_P to true if the access pattern of the data reference in the
> ! vectorized loop is invariant. Set it to false otherwise.
> !
> ! 4. Return the pointer. */
>
> tree
> vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
> struct loop *at_loop, tree offset,
> tree *initial_address, gimple_stmt_iterator *gsi,
> ! gimple **ptr_incr, bool only_init, bool *inv_p,
> tree byte_offset, tree iv_step)
> {
> const char *base_name;
> --- 4674,4686 ----
>
> Return the increment stmt that updates the pointer in PTR_INCR.
>
> ! 3. Return the pointer. */
>
> tree
> vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
> struct loop *at_loop, tree offset,
> tree *initial_address, gimple_stmt_iterator *gsi,
> ! gimple **ptr_incr, bool only_init,
> tree byte_offset, tree iv_step)
> {
> const char *base_name;
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4705,4711 ****
> bool insert_after;
> tree indx_before_incr, indx_after_incr;
> gimple *incr;
> - tree step;
> bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>
> gcc_assert (iv_step != NULL_TREE
> --- 4702,4707 ----
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4726,4739 ****
> *ptr_incr = NULL;
> }
>
> - /* Check the step (evolution) of the load in LOOP, and record
> - whether it's invariant. */
> - step = vect_dr_behavior (dr_info)->step;
> - if (integer_zerop (step))
> - *inv_p = true;
> - else
> - *inv_p = false;
> -
> /* Create an expression for the first address accessed by this load
> in LOOP. */
> base_name = get_name (DR_BASE_ADDRESS (dr));
> --- 4722,4727 ----
> *************** vect_create_data_ref_ptr (stmt_vec_info
> *** 4849,4863 ****
> aptr = aggr_ptr_init;
> else
> {
> if (iv_step == NULL_TREE)
> {
> ! /* The step of the aggregate pointer is the type size. */
> iv_step = TYPE_SIZE_UNIT (aggr_type);
> ! /* One exception to the above is when the scalar step of the load in
> ! LOOP is zero. In this case the step here is also zero. */
> ! if (*inv_p)
> ! iv_step = size_zero_node;
> ! else if (tree_int_cst_sgn (step) == -1)
> iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
> }
>
> --- 4837,4853 ----
> aptr = aggr_ptr_init;
> else
> {
> + /* Accesses to invariant addresses should be handled specially
> + by the caller. */
> + tree step = vect_dr_behavior (dr_info)->step;
> + gcc_assert (!integer_zerop (step));
> +
> if (iv_step == NULL_TREE)
> {
> ! /* The step of the aggregate pointer is the type size,
> ! negated for downward accesses. */
> iv_step = TYPE_SIZE_UNIT (aggr_type);
> ! if (tree_int_cst_sgn (step) == -1)
> iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
> }
>
> *************** vect_setup_realignment (stmt_vec_info st
> *** 5462,5468 ****
> gphi *phi_stmt;
> tree msq = NULL_TREE;
> gimple_seq stmts = NULL;
> - bool inv_p;
> bool compute_in_loop = false;
> bool nested_in_vect_loop = false;
> struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
> --- 5452,5457 ----
> *************** vect_setup_realignment (stmt_vec_info st
> *** 5556,5562 ****
> vec_dest = vect_create_destination_var (scalar_dest, vectype);
> ptr = vect_create_data_ref_ptr (stmt_info, vectype,
> loop_for_initial_load, NULL_TREE,
> ! &init_addr, NULL, &inc, true, &inv_p);
> if (TREE_CODE (ptr) == SSA_NAME)
> new_temp = copy_ssa_name (ptr);
> else
> --- 5545,5551 ----
> vec_dest = vect_create_destination_var (scalar_dest, vectype);
> ptr = vect_create_data_ref_ptr (stmt_info, vectype,
> loop_for_initial_load, NULL_TREE,
> ! &init_addr, NULL, &inc, true);
> if (TREE_CODE (ptr) == SSA_NAME)
> new_temp = copy_ssa_name (ptr);
> else
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c 2018-07-30 12:32:29.586506669 +0100
> --- gcc/tree-vect-stmts.c 2018-07-30 12:40:14.000000000 +0100
> *************** vectorizable_store (stmt_vec_info stmt_i
> *** 6254,6260 ****
> unsigned int group_size, i;
> vec<tree> oprnds = vNULL;
> vec<tree> result_chain = vNULL;
> - bool inv_p;
> tree offset = NULL_TREE;
> vec<tree> vec_oprnds = vNULL;
> bool slp = (slp_node != NULL);
> --- 6254,6259 ----
> *************** vectorizable_store (stmt_vec_info stmt_i
> *** 7018,7039 ****
> {
> dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
> dataref_offset = build_int_cst (ref_type, 0);
> - inv_p = false;
> }
> else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> ! {
> ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> ! &dataref_ptr, &vec_offset);
> ! inv_p = false;
> ! }
> else
> dataref_ptr
> = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
> simd_lane_access_p ? loop : NULL,
> offset, &dummy, gsi, &ptr_incr,
> ! simd_lane_access_p, &inv_p,
> ! NULL_TREE, bump);
> ! gcc_assert (bb_vinfo || !inv_p);
> }
> else
> {
> --- 7017,7032 ----
> {
> dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
> dataref_offset = build_int_cst (ref_type, 0);
> }
> else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> ! &dataref_ptr, &vec_offset);
> else
> dataref_ptr
> = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
> simd_lane_access_p ? loop : NULL,
> offset, &dummy, gsi, &ptr_incr,
> ! simd_lane_access_p, NULL_TREE, bump);
> }
> else
> {
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 7419,7425 ****
> bool grouped_load = false;
> stmt_vec_info first_stmt_info;
> stmt_vec_info first_stmt_info_for_drptr = NULL;
> - bool inv_p;
> bool compute_in_loop = false;
> struct loop *at_loop;
> int vec_num;
> --- 7412,7417 ----
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 7669,7674 ****
> --- 7661,7723 ----
> return true;
> }
>
> + if (memory_access_type == VMAT_INVARIANT)
> + {
> + gcc_assert (!grouped_load && !mask && !bb_vinfo);
> + /* If we have versioned for aliasing or the loop doesn't
> + have any data dependencies that would preclude this,
> + then we are sure this is a loop invariant load and
> + thus we can insert it on the preheader edge. */
> + bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
> + && !nested_in_vect_loop
> + && hoist_defs_of_uses (stmt_info, loop));
> + if (hoist_p)
> + {
> + gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> + if (dump_enabled_p ())
> + {
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "hoisting out of the vectorized loop: ");
> + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> + }
> + scalar_dest = copy_ssa_name (scalar_dest);
> + tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
> + gsi_insert_on_edge_immediate
> + (loop_preheader_edge (loop),
> + gimple_build_assign (scalar_dest, rhs));
> + }
> + /* These copies are all equivalent, but currently the representation
> + requires a separate STMT_VINFO_VEC_STMT for each one. */
> + prev_stmt_info = NULL;
> + gimple_stmt_iterator gsi2 = *gsi;
> + gsi_next (&gsi2);
> + for (j = 0; j < ncopies; j++)
> + {
> + stmt_vec_info new_stmt_info;
> + if (hoist_p)
> + {
> + new_temp = vect_init_vector (stmt_info, scalar_dest,
> + vectype, NULL);
> + gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
> + new_stmt_info = vinfo->add_stmt (new_stmt);
> + }
> + else
> + {
> + new_temp = vect_init_vector (stmt_info, scalar_dest,
> + vectype, &gsi2);
> + new_stmt_info = vinfo->lookup_def (new_temp);
> + }
> + if (slp)
> + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
> + else if (j == 0)
> + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
> + else
> + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
> + prev_stmt_info = new_stmt_info;
> + }
> + return true;
> + }
> +
> if (memory_access_type == VMAT_ELEMENTWISE
> || memory_access_type == VMAT_STRIDED_SLP)
> {
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8177,8183 ****
> {
> dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
> dataref_offset = build_int_cst (ref_type, 0);
> - inv_p = false;
> }
> else if (first_stmt_info_for_drptr
> && first_stmt_info != first_stmt_info_for_drptr)
> --- 8226,8231 ----
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8186,8192 ****
> = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
> aggr_type, at_loop, offset, &dummy,
> gsi, &ptr_incr, simd_lane_access_p,
> ! &inv_p, byte_offset, bump);
> /* Adjust the pointer by the difference to first_stmt. */
> data_reference_p ptrdr
> = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
> --- 8234,8240 ----
> = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
> aggr_type, at_loop, offset, &dummy,
> gsi, &ptr_incr, simd_lane_access_p,
> ! byte_offset, bump);
> /* Adjust the pointer by the difference to first_stmt. */
> data_reference_p ptrdr
> = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8199,8214 ****
> stmt_info, diff);
> }
> else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> ! {
> ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> ! &dataref_ptr, &vec_offset);
> ! inv_p = false;
> ! }
> else
> dataref_ptr
> = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
> offset, &dummy, gsi, &ptr_incr,
> ! simd_lane_access_p, &inv_p,
> byte_offset, bump);
> if (mask)
> vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
> --- 8247,8259 ----
> stmt_info, diff);
> }
> else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
> ! &dataref_ptr, &vec_offset);
> else
> dataref_ptr
> = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
> offset, &dummy, gsi, &ptr_incr,
> ! simd_lane_access_p,
> byte_offset, bump);
> if (mask)
> vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
> *************** vectorizable_load (stmt_vec_info stmt_in
> *** 8492,8538 ****
> }
> }
>
> - /* 4. Handle invariant-load. */
> - if (inv_p && !bb_vinfo)
> - {
> - gcc_assert (!grouped_load);
> - /* If we have versioned for aliasing or the loop doesn't
> - have any data dependencies that would preclude this,
> - then we are sure this is a loop invariant load and
> - thus we can insert it on the preheader edge. */
> - if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
> - && !nested_in_vect_loop
> - && hoist_defs_of_uses (stmt_info, loop))
> - {
> - gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> - if (dump_enabled_p ())
> - {
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "hoisting out of the vectorized "
> - "loop: ");
> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> - }
> - tree tem = copy_ssa_name (scalar_dest);
> - gsi_insert_on_edge_immediate
> - (loop_preheader_edge (loop),
> - gimple_build_assign (tem,
> - unshare_expr
> - (gimple_assign_rhs1 (stmt))));
> - new_temp = vect_init_vector (stmt_info, tem,
> - vectype, NULL);
> - new_stmt = SSA_NAME_DEF_STMT (new_temp);
> - new_stmt_info = vinfo->add_stmt (new_stmt);
> - }
> - else
> - {
> - gimple_stmt_iterator gsi2 = *gsi;
> - gsi_next (&gsi2);
> - new_temp = vect_init_vector (stmt_info, scalar_dest,
> - vectype, &gsi2);
> - new_stmt_info = vinfo->lookup_def (new_temp);
> - }
> - }
> -
> if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> {
> tree perm_mask = perm_mask_for_reverse (vectype);
> --- 8537,8542 ----