This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFA: vectorize reductions on minus_expr [v2]


On Fri, May 7, 2010 at 5:26 PM, Michael Matz <matz@suse.de> wrote:
> Hello,
>
> On Thu, 6 May 2010, Steven Bosscher wrote:
>
>> > + ?if (code == MINUS_EXPR)
>> > + ? ?{
> ...
>
>> Deserves a big fat comment on why we do this, IMVHO.
>
> Agreed. ?I've also made the in-place modification conditional on an
> argument, so that the uses in gcc_assert certainly won't change code,
> renamed the global function to vect_force_simple_reduction and added some
> comments.
>
> Regstrapping on x86_64-linux in progress. ?Okay for trunk if it passes?

Ok.

Thanks,
Richard.

>
> Ciao,
> Michael.
> --
> ? ? ? ?* tree-ssa-reassoc.c (undistribute_ops_list): Use create_tmp_reg.
> ? ? ? ?(can_reassociate_p): Use FLOAT_TYPE_P.
> ? ? ? ?* tree-vectorizer.h (vect_is_simple_reduction): Rename to ...
> ? ? ? ?(vect_force_simple_reduction): ... this.
> ? ? ? ?* tree-parloops.c (gather_scalar_reductions): Use
> ? ? ? ?vect_force_simple_reduction.
> ? ? ? ?* tree-vect-loop.c (vect_is_simple_reduction_1): Rename from
> ? ? ? ?vect_is_simple_reduction, add modify argument, if true rewrite
> ? ? ? ?"a-b" into "a+(-b)".
> ? ? ? ?(vect_is_simple_reduction, vect_force_simple_reduction): New
> ? ? ? ?functions.
> ? ? ? ?(vect_analyze_scalar_cycles_1): Use vect_force_simple_reduction.
>
> testsuite/
> ? ? ? ?* gcc.dg/vect/fast-math-vect-reduc-8.c: New test.
>
> Index: tree-ssa-reassoc.c
> ===================================================================
> --- tree-ssa-reassoc.c ?(revision 159105)
> +++ tree-ssa-reassoc.c ?(working copy)
> @@ -1165,7 +1165,7 @@ undistribute_ops_list (enum tree_code op
> ? ? ? ? ? ? ?fprintf (dump_file, "Building (");
> ? ? ? ? ? ? ?print_generic_expr (dump_file, oe1->op, 0);
> ? ? ? ? ? ?}
> - ? ? ? ? tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
> + ? ? ? ? tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
> ? ? ? ? ?add_referenced_var (tmpvar);
> ? ? ? ? ?zero_one_operation (&oe1->op, c->oecode, c->op);
> ? ? ? ? ?EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
> @@ -1840,7 +1840,7 @@ can_reassociate_p (tree op)
> ? tree type = TREE_TYPE (op);
> ? if (INTEGRAL_TYPE_P (type)
> ? ? ? || NON_SAT_FIXED_POINT_TYPE_P (type)
> - ? ? ?|| (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
> + ? ? ?|| (flag_associative_math && FLOAT_TYPE_P (type)))
> ? ? return true;
> ? return false;
> ?}
> Index: tree-vectorizer.h
> ===================================================================
> --- tree-vectorizer.h ? (revision 159105)
> +++ tree-vectorizer.h ? (working copy)
> @@ -846,7 +846,7 @@ extern tree vect_create_addr_base_for_ve
> ?/* In tree-vect-loop.c. ?*/
> ?/* FORNOW: Used in tree-parloops.c. ?*/
> ?extern void destroy_loop_vec_info (loop_vec_info, bool);
> -extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *);
> +extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
> ?/* Drive for loop analysis stage. ?*/
> ?extern loop_vec_info vect_analyze_loop (struct loop *);
> ?/* Drive for loop transformation stage. ?*/
> Index: tree-parloops.c
> ===================================================================
> --- tree-parloops.c ? ? (revision 159105)
> +++ tree-parloops.c ? ? (working copy)
> @@ -64,7 +64,7 @@ along with GCC; see the file COPYING3.
>
> ?/*
> ? Reduction handling:
> - ?currently we use vect_is_simple_reduction() to detect reduction patterns.
> + ?currently we use vect_force_simple_reduction() to detect reduction patterns.
> ? The code transformation will be introduced by an example.
>
>
> @@ -1745,7 +1745,9 @@ gather_scalar_reductions (loop_p loop, h
> ? ? ? if (!simple_iv (loop, loop, res, &iv, true)
> ? ? ? ?&& simple_loop_info)
> ? ? ? ?{
> - ? ? ? ? ? gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
> + ? ? ? ? ? gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? phi, true,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &double_reduc);
> ? ? ? ? ? if (reduc_stmt && !double_reduc)
> ? ? ? ? ? ? ? build_new_reduction (reduction_list, reduc_stmt, phi);
> ? ? ? ? }
> Index: tree-vect-loop.c
> ===================================================================
> --- tree-vect-loop.c ? ?(revision 159105)
> +++ tree-vect-loop.c ? ?(working copy)
> @@ -513,8 +513,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
> ? ? ? gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
>
> ? ? ? nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
> - ? ? ?reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &double_reduc);
> + ? ? ?reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &double_reduc);
> ? ? ? if (reduc_stmt)
> ? ? ? ? {
> ? ? ? ? ? if (double_reduc)
> @@ -1584,7 +1584,7 @@ report_vect_op (gimple stmt, const char
> ?}
>
>
> -/* Function vect_is_simple_reduction
> +/* Function vect_is_simple_reduction_1
>
> ? ?(1) Detect a cross-iteration def-use cycle that represents a simple
> ? ?reduction computation. We look for the following pattern:
> @@ -1612,18 +1612,23 @@ report_vect_op (gimple stmt, const char
> ? ? ?a1 = phi < a0, a2 >
> ? ? ?inner loop (def of a3)
> ? ? ?a2 = phi < a3 >
> +
> + ? If MODIFY is true it tries also to rework the code in-place to enable
> + ? detection of more reduction patterns. ?For the time being we rewrite
> + ? "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
> ?*/
>
> -gimple
> -vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
> - ? ? ? ? ? ? ? ? ? ? ? ? ?bool check_reduction, bool *double_reduc)
> +static gimple
> +vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? bool check_reduction, bool *double_reduc,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? bool modify)
> ?{
> ? struct loop *loop = (gimple_bb (phi))->loop_father;
> ? struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> ? edge latch_e = loop_latch_edge (loop);
> ? tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> ? gimple def_stmt, def1 = NULL, def2 = NULL;
> - ?enum tree_code code;
> + ?enum tree_code orig_code, code;
> ? tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> ? tree type;
> ? int nloop_uses;
> @@ -1743,7 +1748,14 @@ vect_is_simple_reduction (loop_vec_info
> ? ? ? return NULL;
> ? ? }
>
> - ?code = gimple_assign_rhs_code (def_stmt);
> + ?code = orig_code = gimple_assign_rhs_code (def_stmt);
> +
> + ?/* We can handle "res -= x[i]", which is non-associative by
> + ? ? simply rewriting this into "res += -x[i]". ?Avoid changing
> + ? ? gimple instruction for the first simple tests and only do this
> + ? ? if we're allowed to change code at all. ?*/
> + ?if (code == MINUS_EXPR && modify)
> + ? ?code = PLUS_EXPR;
>
> ? if (check_reduction
> ? ? ? && (!commutative_tree_code (code) || !associative_tree_code (code)))
> @@ -1863,6 +1875,24 @@ vect_is_simple_reduction (loop_vec_info
> ? ? ? return NULL;
> ? ? }
>
> + ?/* If we detected "res -= x[i]" earlier, rewrite it into
> + ? ? "res += -x[i]" now. ?If this turns out to be useless reassoc
> + ? ? will clean it up again. ?*/
> + ?if (orig_code == MINUS_EXPR)
> + ? ?{
> + ? ? ?tree rhs = gimple_assign_rhs2 (def_stmt);
> + ? ? ?tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
> + ? ? ?gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?rhs, NULL);
> + ? ? ?gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
> + ? ? ?set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? loop_info, NULL));
> + ? ? ?gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
> + ? ? ?gimple_assign_set_rhs2 (def_stmt, negrhs);
> + ? ? ?gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
> + ? ? ?update_stmt (def_stmt);
> + ? ?}
> +
> ? /* Reduction is safe. We're dealing with one of the following:
> ? ? ?1) integer arithmetic and no trapv
> ? ? ?2) floating point arithmetic, and special flags permit this optimization
> @@ -1940,6 +1970,28 @@ vect_is_simple_reduction (loop_vec_info
> ? ? }
> ?}
>
> +/* Wrapper around vect_is_simple_reduction_1, that won't modify code
> + ? in-place. ?Arguments as there. ?*/
> +
> +static gimple
> +vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?bool check_reduction, bool *double_reduc)
> +{
> + ?return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?double_reduc, false);
> +}
> +
> +/* Wrapper around vect_is_simple_reduction_1, which will modify code
> + ? in-place if it enables detection of more reductions. ?Arguments
> + ? as there. ?*/
> +
> +gimple
> +vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?bool check_reduction, bool *double_reduc)
> +{
> + ?return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?double_reduc, true);
> +}
>
> ?/* Function vect_estimate_min_profitable_iters
>
> Index: testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c
> ===================================================================
> --- testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c ? ? ?(revision 0)
> +++ testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c ? ? ?(revision 0)
> @@ -0,0 +1,23 @@
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-do compile } */
> +
> +#include "tree-vect.h"
> +
> +extern float x[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +extern float y[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +extern float z[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +
> +float f (unsigned n)
> +{
> + ?float ret = 0.0;
> + ?unsigned i;
> + ?for (i = 0; i < n; i++)
> + ? ?{
> + ? ? ?float diff = x[i] - y[i];
> + ? ? ?ret -= diff * diff * z[i];
> + ? ?}
> + ?return ret;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]