This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [2/7] Clean up vectorizer load/store costs
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, richard dot sandiford at arm dot com
- Date: Wed, 15 Jun 2016 15:22:35 +0200
- Subject: Re: [2/7] Clean up vectorizer load/store costs
- Authentication-results: sourceware.org; auth=none
- References: <87d1nin8hz dot fsf at e105548-lin dot cambridge dot arm dot com> <874m8un8f8 dot fsf at e105548-lin dot cambridge dot arm dot com>
On Wed, Jun 15, 2016 at 10:49 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Add a bit more commentary and try to make the structure more obvious.
> The horrendous:
>
> if (grouped_access_p
> && represents_group_p
> && !store_lanes_p
> && !STMT_VINFO_STRIDED_P (stmt_info)
> && !slp_node)
>
> checks go away in patch 6.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Ok.
Thanks,
Richard.
> Thanks,
> Richard
>
>
> gcc/
> * tree-vect-stmts.c (vect_cost_group_size): Delete.
> (vect_model_store_cost): Avoid calling it. Use first_stmt_p
> variable to indicate when once-per-group costs are being used.
> (vect_model_load_cost): Likewise. Fix comment and misindented code.
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c
> +++ gcc/tree-vect-stmts.c
> @@ -865,24 +865,6 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
> "prologue_cost = %d .\n", inside_cost, prologue_cost);
> }
>
> -/* Function vect_cost_group_size
> -
> - For grouped load or store, return the group_size only if it is the first
> - load or store of a group, else return 1. This ensures that group size is
> - only returned once per group. */
> -
> -static int
> -vect_cost_group_size (stmt_vec_info stmt_info)
> -{
> - gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> -
> - if (first_stmt == STMT_VINFO_STMT (stmt_info))
> - return GROUP_SIZE (stmt_info);
> -
> - return 1;
> -}
> -
> -
> /* Function vect_model_store_cost
>
> Models cost for stores. In the case of grouped accesses, one access
> @@ -895,47 +877,43 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
> stmt_vector_for_cost *prologue_cost_vec,
> stmt_vector_for_cost *body_cost_vec)
> {
> - int group_size;
> unsigned int inside_cost = 0, prologue_cost = 0;
> - struct data_reference *first_dr;
> - gimple *first_stmt;
> + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> + gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
> + bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
>
> if (dt == vect_constant_def || dt == vect_external_def)
> prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
> stmt_info, 0, vect_prologue);
>
> - /* Grouped access? */
> - if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> + /* Grouped stores update all elements in the group at once,
> + so we want the DR for the first statement. */
> + if (!slp_node && grouped_access_p)
> {
> - if (slp_node)
> - {
> - first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
> - group_size = 1;
> - }
> - else
> - {
> - first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> - group_size = vect_cost_group_size (stmt_info);
> - }
> -
> - first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> - }
> - /* Not a grouped access. */
> - else
> - {
> - group_size = 1;
> - first_dr = STMT_VINFO_DATA_REF (stmt_info);
> + first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> }
>
> + /* True if we should include any once-per-group costs as well as
> + the cost of the statement itself. For SLP we only get called
> + once per group anyhow. */
> + bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
> +
> /* We assume that the cost of a single store-lanes instruction is
> equivalent to the cost of GROUP_SIZE separate stores. If a grouped
> access is instead being provided by a permute-and-store operation,
> - include the cost of the permutes. */
> - if (!store_lanes_p && group_size > 1
> - && !STMT_VINFO_STRIDED_P (stmt_info))
> + include the cost of the permutes.
> +
> + For SLP, the caller has already counted the permutation, if any. */
> + if (grouped_access_p
> + && first_stmt_p
> + && !store_lanes_p
> + && !STMT_VINFO_STRIDED_P (stmt_info)
> + && !slp_node)
> {
> /* Uses a high and low interleave or shuffle operations for each
> needed permute. */
> + int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> int nstmts = ncopies * ceil_log2 (group_size) * group_size;
> inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
> stmt_info, 0, vect_body);
> @@ -957,7 +935,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
> scalar_store, stmt_info, 0, vect_body);
> }
> else
> - vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
> + vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
>
> if (STMT_VINFO_STRIDED_P (stmt_info))
> inside_cost += record_stmt_cost (body_cost_vec,
> @@ -1026,8 +1004,8 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
>
> /* Function vect_model_load_cost
>
> - Models cost for loads. In the case of grouped accesses, the last access
> - has the overhead of the grouped access attributed to it. Since unaligned
> + Models cost for loads. In the case of grouped accesses, one access has
> + the overhead of the grouped access attributed to it. Since unaligned
> accesses are supported for loads, we also account for the costs of the
> access scheme chosen. */
>
> @@ -1037,34 +1015,39 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
> stmt_vector_for_cost *prologue_cost_vec,
> stmt_vector_for_cost *body_cost_vec)
> {
> - int group_size;
> - gimple *first_stmt;
> - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
> + gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
> + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> unsigned int inside_cost = 0, prologue_cost = 0;
> + bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
>
> - /* Grouped accesses? */
> - first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> - if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
> + /* Grouped loads read all elements in the group at once,
> + so we want the DR for the first statement. */
> + if (!slp_node && grouped_access_p)
> {
> - group_size = vect_cost_group_size (stmt_info);
> - first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> - }
> - /* Not a grouped access. */
> - else
> - {
> - group_size = 1;
> - first_dr = dr;
> + first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> }
>
> + /* True if we should include any once-per-group costs as well as
> + the cost of the statement itself. For SLP we only get called
> + once per group anyhow. */
> + bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
> +
> /* We assume that the cost of a single load-lanes instruction is
> equivalent to the cost of GROUP_SIZE separate loads. If a grouped
> access is instead being provided by a load-and-permute operation,
> - include the cost of the permutes. */
> - if (!load_lanes_p && group_size > 1
> - && !STMT_VINFO_STRIDED_P (stmt_info))
> + include the cost of the permutes.
> +
> + For SLP, the caller has already counted the permutation, if any. */
> + if (grouped_access_p
> + && first_stmt_p
> + && !load_lanes_p
> + && !STMT_VINFO_STRIDED_P (stmt_info)
> + && !slp_node)
> {
> /* Uses an even and odd extract operations or shuffle operations
> for each needed permute. */
> + int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> int nstmts = ncopies * ceil_log2 (group_size) * group_size;
> inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
> stmt_info, 0, vect_body);
> @@ -1086,14 +1069,12 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
> scalar_load, stmt_info, 0, vect_body);
> }
> else
> - vect_get_load_cost (first_dr, ncopies,
> - ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
> - || group_size > 1 || slp_node),
> + vect_get_load_cost (dr, ncopies, first_stmt_p,
> &inside_cost, &prologue_cost,
> prologue_cost_vec, body_cost_vec, true);
> if (STMT_VINFO_STRIDED_P (stmt_info))
> - inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
> - stmt_info, 0, vect_body);
> + inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
> + stmt_info, 0, vect_body);
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,