This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Ping: [PATCH, 4.6] Backport fixes for PR50031, PR50969
- From: "William J. Schmidt" <wschmidt at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: bergner at vnet dot ibm dot com
- Date: Thu, 16 Feb 2012 07:17:23 -0600
- Subject: Ping: [PATCH, 4.6] Backport fixes for PR50031, PR50969
- References: <1328907521.18863.6.camel@gnopaine>
Greetings,
Given the recent discussion on getting 4.6 cleaned up, I thought I'd
check back on this one. Thanks!
Bill
On Fri, 2012-02-10 at 14:58 -0600, William J. Schmidt wrote:
> This patch backports the two recent trunk fixes for powerpc64
> vectorization degradations. The fixes are largely identical to their
> 4.7 counterparts except that (a) the logic for
> STMT_VINFO_PATTERN_DEF_SEQ does not apply in 4.6, and (b) the changes to
> vectorizable_conversion in 4.7 correspond to changes in
> vectorizable_type_demotion and vectorizable_type_promotion in 4.6.
>
> Bootstrapped and tested for regressions and performance for
> powerpc64-linux. OK to commit after the trunk patch has a few days of
> burn-in?
>
> Thanks,
> Bill
>
>
> 2012-02-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
> Ira Rosen <irar@il.ibm.com>
>
> PR tree-optimization/50031
> PR tree-optimization/50969
> * targhooks.c (default_builtin_vectorization_cost): Handle
> vec_promote_demote.
> * target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
> * tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
> all types of reduction and pattern statements.
> (vect_estimate_min_profitable_iters): Likewise.
> * tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
> (vect_model_store_cost): Use vec_perm rather than vector_stmt for
> statement cost.
> (vect_model_load_cost): Likewise.
> (vect_get_load_cost): Likewise; add dump logic for explicit realigns.
> (vectorizable_type_demotion): Call vect_model_promotion_demotion_cost.
> (vectorizable_type_promotion): Likewise.
> * config/spu/spu.c (spu_builtin_vectorization_cost): Handle
> vec_promote_demote.
> * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
> * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
> vec_perm for VSX and handle vec_promote_demote.
>
>
> Index: gcc/targhooks.c
> ===================================================================
> --- gcc/targhooks.c (revision 184047)
> +++ gcc/targhooks.c (working copy)
> @@ -529,6 +529,7 @@ default_builtin_vectorization_cost (enum vect_cost
> case scalar_to_vec:
> case cond_branch_not_taken:
> case vec_perm:
> + case vec_promote_demote:
> return 1;
>
> case unaligned_load:
> Index: gcc/target.h
> ===================================================================
> --- gcc/target.h (revision 184047)
> +++ gcc/target.h (working copy)
> @@ -128,7 +128,8 @@ enum vect_cost_for_stmt
> scalar_to_vec,
> cond_branch_not_taken,
> cond_branch_taken,
> - vec_perm
> + vec_perm,
> + vec_promote_demote
> };
>
> /* Sets of optimization levels at which an option may be enabled by
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c (revision 184047)
> +++ gcc/tree-vect-loop.c (working copy)
> @@ -2104,7 +2104,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_inf
> if (stmt_info
> && !STMT_VINFO_RELEVANT_P (stmt_info)
> && (!STMT_VINFO_LIVE_P (stmt_info)
> - || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
> + || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
> + && !STMT_VINFO_IN_PATTERN_P (stmt_info))
> continue;
>
> if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
> @@ -2251,11 +2252,19 @@ vect_estimate_min_profitable_iters (loop_vec_info
> {
> gimple stmt = gsi_stmt (si);
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +
> + if (STMT_VINFO_IN_PATTERN_P (stmt_info))
> + {
> + stmt = STMT_VINFO_RELATED_STMT (stmt_info);
> + stmt_info = vinfo_for_stmt (stmt);
> + }
> +
> /* Skip stmts that are not vectorized inside the loop. */
> if (!STMT_VINFO_RELEVANT_P (stmt_info)
> && (!STMT_VINFO_LIVE_P (stmt_info)
> - || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
> + || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
> continue;
> +
> vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
> /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
> some of the "outside" costs are generated inside the outer-loop. */
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c (revision 184047)
> +++ gcc/tree-vect-stmts.c (working copy)
> @@ -623,6 +623,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, i
> }
>
>
> +/* Model cost for type demotion and promotion operations. PWR is normally
> + zero for single-step promotions and demotions. It will be one if
> + two-step promotion/demotion is required, and so on. Each additional
> + step doubles the number of instructions required. */
> +
> +static void
> +vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
> + enum vect_def_type *dt, int pwr)
> +{
> + int i, tmp;
> + int inside_cost = 0, outside_cost = 0, single_stmt_cost;
> +
> + /* The SLP costs were already calculated during SLP tree build. */
> + if (PURE_SLP_STMT (stmt_info))
> + return;
> +
> + single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
> + for (i = 0; i < pwr + 1; i++)
> + {
> + tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
> + (i + 1) : i;
> + inside_cost += vect_pow2 (tmp) * single_stmt_cost;
> + }
> +
> + /* FORNOW: Assuming maximum 2 args per stmts. */
> + for (i = 0; i < 2; i++)
> + {
> + if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
> + outside_cost += vect_get_stmt_cost (vector_stmt);
> + }
> +
> + if (vect_print_dump_info (REPORT_COST))
> + fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
> + "outside_cost = %d .", inside_cost, outside_cost);
> +
> + /* Set the costs in STMT_INFO. */
> + stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
> + stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
> +}
> +
> /* Function vect_cost_strided_group_size
>
> For strided load or store, return the group_size only if it is the first
> @@ -691,7 +731,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, in
> {
> /* Uses a high and low interleave operation for each needed permute. */
> inside_cost = ncopies * exact_log2(group_size) * group_size
> - * vect_get_stmt_cost (vector_stmt);
> + * vect_get_stmt_cost (vec_perm);
>
> if (vect_print_dump_info (REPORT_COST))
> fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
> @@ -795,7 +835,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int
> {
> /* Uses an even and odd extract operations for each needed permute. */
> inside_cost = ncopies * exact_log2(group_size) * group_size
> - * vect_get_stmt_cost (vector_stmt);
> + * vect_get_stmt_cost (vec_perm);
>
> if (vect_print_dump_info (REPORT_COST))
> fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
> @@ -855,7 +895,7 @@ vect_get_load_cost (struct data_reference *dr, int
> case dr_explicit_realign:
> {
> *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
> - + vect_get_stmt_cost (vector_stmt));
> + + vect_get_stmt_cost (vec_perm));
>
> /* FIXME: If the misalignment remains fixed across the iterations of
> the containing loop, the following cost should be added to the
> @@ -863,6 +903,9 @@ vect_get_load_cost (struct data_reference *dr, int
> if (targetm.vectorize.builtin_mask_for_load)
> *inside_cost += vect_get_stmt_cost (vector_stmt);
>
> + if (vect_print_dump_info (REPORT_COST))
> + fprintf (vect_dump, "vect_model_load_cost: explicit realign");
> +
> break;
> }
> case dr_explicit_realign_optimized:
> @@ -886,7 +929,12 @@ vect_get_load_cost (struct data_reference *dr, int
> }
>
> *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
> - + vect_get_stmt_cost (vector_stmt));
> + + vect_get_stmt_cost (vec_perm));
> +
> + if (vect_print_dump_info (REPORT_COST))
> + fprintf (vect_dump,
> + "vect_model_load_cost: explicit realign optimized");
> +
> break;
> }
>
> @@ -2919,7 +2967,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
> STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "=== vectorizable_demotion ===");
> - vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
> + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
> return true;
> }
>
> @@ -3217,7 +3265,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
> STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "=== vectorizable_promotion ===");
> - vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
> + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
> return true;
> }
>
> Index: gcc/config/spu/spu.c
> ===================================================================
> --- gcc/config/spu/spu.c (revision 184047)
> +++ gcc/config/spu/spu.c (working copy)
> @@ -6794,6 +6794,7 @@ spu_builtin_vectorization_cost (enum vect_cost_for
> case scalar_to_vec:
> case cond_branch_not_taken:
> case vec_perm:
> + case vec_promote_demote:
> return 1;
>
> case scalar_store:
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc/config/i386/i386.c (revision 184047)
> +++ gcc/config/i386/i386.c (working copy)
> @@ -32816,7 +32816,8 @@ ix86_builtin_vectorization_cost (enum vect_cost_fo
> return ix86_cost->cond_not_taken_branch_cost;
>
> case vec_perm:
> - return 1;
> + case vec_promote_demote:
> + return ix86_cost->vec_stmt_cost;
>
> default:
> gcc_unreachable ();
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c (revision 184047)
> +++ gcc/config/rs6000/rs6000.c (working copy)
> @@ -3695,12 +3695,23 @@ rs6000_builtin_vectorization_cost (enum vect_cost_
> case vec_to_scalar:
> case scalar_to_vec:
> case cond_branch_not_taken:
> - case vec_perm:
> return 1;
>
> case cond_branch_taken:
> return 3;
>
> + case vec_perm:
> + if (TARGET_VSX)
> + return 4;
> + else
> + return 1;
> +
> + case vec_promote_demote:
> + if (TARGET_VSX)
> + return 5;
> + else
> + return 1;
> +
> case unaligned_load:
> if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
> {
>