This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: rfa: vectorize strided loads [2/2] [PR 18437]


On Tue, Apr 10, 2012 at 3:46 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> and this implements generally strided loads where the stride is a
> loop-invariant (constant or ssa-name). ?We only do so if the load can't be
> handled by interleaving groups. ?The implementation is fairly straight
> forward:
>
> ? ? ? ? for (i = 0; i < n; i += stride)
> ? ? ? ? ? ... = array[i];
>
> is transformed into:
>
> ? ? ? ? for (j = 0; ; j += VF*stride)
> ? ? ? ? ? tmp1 = array[j];
> ? ? ? ? ? tmp2 = array[j + stride];
> ? ? ? ? ? ...
> ? ? ? ? ? vectemp = {tmp1, tmp2, ...}
>
> (of course variously adjusted for component number)
>
> The nice thing is that by such separate loads we don't need to care for
> alignment (if the old access was aligned, so will be the new as the access
> size doesn't change).
>
> This is one more step in vectorizing the same loops as ICC. ?polyhedron
> has such a case in rnflow, where it helps quite much:
>
> Without patch:
>
> ? Benchmark ? Compile ?Executable ? Ave Run ?Number ? Estim
> ? ? ? ?Name ? ?(secs) ? ? (bytes) ? ?(secs) Repeats ? Err %
> ? --------- ? ------- ?---------- ? ------- ------- ?------
> ? ? ? ? ?ac ? ? ?3.71 ? ? 3960337 ? ? 10.06 ? ? ? 2 ?0.0368
> ? ? ?aermod ? ? 75.30 ? ? 5594185 ? ? 19.30 ? ? ? 5 ?0.7102
> ? ? ? ? air ? ? 10.30 ? ? 4832222 ? ? ?6.59 ? ? ? 2 ?0.0653
> ? ?capacita ? ? ?7.23 ? ? 4185227 ? ? 57.25 ? ? ? 2 ?0.0968
> ? ? channel ? ? ?2.05 ? ? 4658532 ? ? ?2.70 ? ? ? 5 ?4.4701
> ? ? ? doduc ? ? 14.55 ? ? 4237850 ? ? 23.31 ? ? ? 2 ?0.0768
> ? ? fatigue ? ? ?6.64 ? ? 4127554 ? ? ?7.48 ? ? ? 5 ?0.3063
> ? ? gas_dyn ? ? 13.25 ? ? 4113557 ? ? ?2.99 ? ? ? 5 ?4.5063
> ? ? ?induct ? ? ?7.23 ? ? 4338356 ? ? ?8.85 ? ? ? 5 ?0.9927
> ? ? ? linpk ? ? ?1.24 ? ? 3927350 ? ? 10.37 ? ? ? 5 ?5.1174
> ? ? ? ?mdbx ? ? ?5.51 ? ? 4053841 ? ? 12.95 ? ? ? 5 ?0.0956
> ? ? ? ? ?nf ? ? 10.69 ? ? 4062276 ? ? 11.44 ? ? ? 5 ?0.2727
> ? ? protein ? ? 33.04 ? ? 5011924 ? ? 39.53 ? ? ? 5 ?0.2328
> ? ? ?rnflow ? ? 25.35 ? ? 4238651 ? ? 31.78 ? ? ? 2 ?0.0673
> ? ?test_fpu ? ? 12.24 ? ? 4184939 ? ? ?9.00 ? ? ? 5 ?0.1157
> ? ? ? ?tfft ? ? ?1.15 ? ? 3976710 ? ? ?3.95 ? ? ? 3 ?0.0736
>
> Geometric Mean Execution Time = ? ? ?11.21 seconds
>
> With patch:
>
> ? Benchmark ? Compile ?Executable ? Ave Run ?Number ? Estim
> ? ? ? ?Name ? ?(secs) ? ? (bytes) ? ?(secs) Repeats ? Err %
> ? --------- ? ------- ?---------- ? ------- ------- ?------
> ? ? ? ? ?ac ? ? ?3.91 ? ? 3960337 ? ? 10.34 ? ? ? 5 ?0.3661
> ? ? ?aermod ? ? 76.44 ? ? 5598281 ? ? 19.03 ? ? ? 5 ?1.3394
> ? ? ? ? air ? ? 11.52 ? ? 4832222 ? ? ?6.75 ? ? ? 5 ?0.9347
> ? ?capacita ? ? ?7.78 ? ? 4189323 ? ? 56.33 ? ? ? 2 ?0.0976
> ? ? channel ? ? ?2.14 ? ? 4658532 ? ? ?2.59 ? ? ? 5 ?0.6640
> ? ? ? doduc ? ? 14.61 ? ? 4237850 ? ? 23.41 ? ? ? 5 ?0.2974
> ? ? fatigue ? ? ?6.62 ? ? 4127554 ? ? ?7.44 ? ? ? 5 ?0.1082
> ? ? gas_dyn ? ? 13.14 ? ? 4113557 ? ? ?2.82 ? ? ? 5 ?0.5253
> ? ? ?induct ? ? ?7.26 ? ? 4338356 ? ? ?8.49 ? ? ? 2 ?0.0082
> ? ? ? linpk ? ? ?1.23 ? ? 3927350 ? ? ?9.78 ? ? ? 2 ?0.0705
> ? ? ? ?mdbx ? ? ?5.47 ? ? 4053841 ? ? 12.90 ? ? ? 2 ?0.0601
> ? ? ? ? ?nf ? ? 10.67 ? ? 4062276 ? ? 11.33 ? ? ? 2 ?0.0004
> ? ? protein ? ? 32.81 ? ? 5011924 ? ? 39.48 ? ? ? 2 ?0.0893
> ? ? ?rnflow ? ? 26.57 ? ? 4246843 ? ? 26.70 ? ? ? 5 ?0.0915
> ? ?test_fpu ? ? 13.29 ? ? 4193131 ? ? ?8.82 ? ? ? 5 ?0.2136
> ? ? ? ?tfft ? ? ?1.14 ? ? 3976710 ? ? ?3.95 ? ? ? 5 ?0.1753
>
> Geometric Mean Execution Time = ? ? ?10.95 seconds
>
> I.e. for rnflow from 31.78 to 26.70 seconds, nearly 20% better.
>
> Regstrapped together with [1/2] on x86_64-linux, all langs+Ada, no
> regressions. ?Okay for trunk?

Ok with ...

>
> Ciao,
> Michael.
>
> ? ? ? ?PR tree-optimization/18437
>
> ? ? ? ?* tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member.
> ? ? ? ?(STMT_VINFO_STRIDE_LOAD_P): New accessor.
> ? ? ? ?(vect_check_strided_load): Declare.
> ? ? ? ?* tree-vect-data-refs.c (vect_check_strided_load): New function.
> ? ? ? ?(vect_analyze_data_refs): Use it to accept strided loads.
> ? ? ? ?* tree-vect-stmts.c (vectorizable_load): Ditto and handle them.
>
> testsuite/
> ? ? ? ?* gfortran.dg/vect/rnflow-trs2a2.f90: New test.
>
> Index: tree-vectorizer.h
> ===================================================================
> --- tree-vectorizer.h.orig ? ? ?2012-04-10 13:19:18.000000000 +0200
> +++ tree-vectorizer.h ? 2012-04-10 13:21:19.000000000 +0200
> @@ -545,6 +545,7 @@ typedef struct _stmt_vec_info {
>
> ? /* For loads only, true if this is a gather load. ?*/
> ? bool gather_p;
> + ?bool stride_load_p;
> ?} *stmt_vec_info;
>
> ?/* Access Functions. ?*/
> @@ -559,6 +560,7 @@ typedef struct _stmt_vec_info {
> ?#define STMT_VINFO_VECTORIZABLE(S) ? ? ? ? (S)->vectorizable
> ?#define STMT_VINFO_DATA_REF(S) ? ? ? ? ? ? (S)->data_ref_info
> ?#define STMT_VINFO_GATHER_P(S) ? ? ? ? ? ?(S)->gather_p
> +#define STMT_VINFO_STRIDE_LOAD_P(S) ? ? ? (S)->stride_load_p
>
> ?#define STMT_VINFO_DR_BASE_ADDRESS(S) ? ? ?(S)->dr_base_address
> ?#define STMT_VINFO_DR_INIT(S) ? ? ? ? ? ? ?(S)->dr_init
> @@ -875,6 +877,7 @@ extern bool vect_analyze_data_ref_access
> ?extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
> ?extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int *);
> +extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
> ?extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
> ?extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, gimple_stmt_iterator *,
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c.orig ?2012-04-10 13:18:35.000000000 +0200
> +++ tree-vect-data-refs.c ? ? ? 2012-04-10 13:21:19.000000000 +0200
> @@ -2690,6 +2690,53 @@ vect_check_gather (gimple stmt, loop_vec
> ? return decl;
> ?}
>
> +/* Check wether a non-affine load in STMT (being in the loop referred to
> + ? in LOOP_VINFO) is suitable for handling as strided load. ?That is the case
> + ? if its address is a simple induction variable. ?If so return the base
> + ? of that induction variable in *BASEP and the (loop-invariant) step
> + ? in *STEPP, both only when that pointer is non-zero.
> +
> + ? This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
> + ? base pointer) only. ?*/
> +
> +bool
> +vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
> + ? ? ? ? ? ? ? ? ? ? ? ?tree *stepp)
> +{
> + ?struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> + ?stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> + ?struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> + ?tree base, off;
> + ?affine_iv iv;
> +
> + ?base = DR_REF (dr);
> +
> + ?if (TREE_CODE (base) == ARRAY_REF)
> + ? ?{
> + ? ? ?off = TREE_OPERAND (base, 1);
> + ? ? ?base = TREE_OPERAND (base, 0);
> + ? ?}
> + ?else if (TREE_CODE (base) == MEM_REF)
> + ? ?{
> + ? ? ?off = TREE_OPERAND (base, 0);
> + ? ? ?base = TREE_OPERAND (base, 1);
> + ? ?}
> + ?else
> + ? ?return false;
> +
> + ?if (TREE_CODE (off) != SSA_NAME)
> + ? ?return false;
> +
> + ?if (!expr_invariant_in_loop_p (loop, base)
> + ? ? ?|| !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
> + ? ?return false;
> +
> + ?if (basep)
> + ? ?*basep = iv.base;
> + ?if (stepp)
> + ? ?*stepp = iv.step;
> + ?return true;
> +}
>
> ?/* Function vect_analyze_data_refs.
>
> @@ -3090,16 +3137,21 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ?VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
> ? ? ? ? ?struct data_dependence_relation *ddr, *newddr;
> ? ? ? ? ?bool bad = false;
> + ? ? ? ? bool strided_load = false;
> ? ? ? ? ?tree off;
> ? ? ? ? ?VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
>
> - ? ? ? ? if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
> - ? ? ? ? ? ? || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
> + ? ? ? ? strided_load = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
> + ? ? ? ? gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
> + ? ? ? ? if (gather
> + ? ? ? ? ? ? && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
> + ? ? ? ? ? gather = false;
> + ? ? ? ? if (!gather && !strided_load)
> ? ? ? ? ? ?{
> ? ? ? ? ? ? ?if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> ? ? ? ? ? ? ? ?{
> ? ? ? ? ? ? ? ? ?fprintf (vect_dump,
> - ? ? ? ? ? ? ? ? ? ? ? ? ?"not vectorized: not suitable for gather ");
> + ? ? ? ? ? ? ? ? ? ? ? ? ?"not vectorized: not suitable for gather/strided load ");
> ? ? ? ? ? ? ? ? ?print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> ? ? ? ? ? ? ? ?}
> ? ? ? ? ? ? ?return false;
> @@ -3152,13 +3204,16 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ? ? ? ?{
> ? ? ? ? ? ? ? ? ?fprintf (vect_dump,
> ? ? ? ? ? ? ? ? ? ? ? ? ? "not vectorized: data dependence conflict"
> - ? ? ? ? ? ? ? ? ? ? ? ? ?" prevents gather");
> + ? ? ? ? ? ? ? ? ? ? ? ? ?" prevents gather/strided load");
> ? ? ? ? ? ? ? ? ?print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> ? ? ? ? ? ? ? ?}
> ? ? ? ? ? ? ?return false;
> ? ? ? ? ? ?}
>
> - ? ? ? ? STMT_VINFO_GATHER_P (stmt_info) = true;
> + ? ? ? ? if (gather)
> + ? ? ? ? ? STMT_VINFO_GATHER_P (stmt_info) = true;
> + ? ? ? ? else if (strided_load)
> + ? ? ? ? ? STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
> ? ? ? ?}
> ? ? }
>
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c.orig ? ? ?2012-04-10 13:18:35.000000000 +0200
> +++ tree-vect-stmts.c ? 2012-04-10 13:21:19.000000000 +0200
> @@ -4224,6 +4224,7 @@ vectorizable_load (gimple stmt, gimple_s
> ? tree aggr_type;
> ? tree gather_base = NULL_TREE, gather_off = NULL_TREE;
> ? tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
> + ?tree stride_base, stride_step;
> ? int gather_scale = 1;
> ? enum vect_def_type gather_dt = vect_unknown_def_type;
>
> @@ -4357,6 +4358,10 @@ vectorizable_load (gimple stmt, gimple_s
> ? ? ? ? ?return false;
> ? ? ? ?}
> ? ? }
> + ?else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + ? ?{
> + ? ? ?vect_check_strided_load (stmt, loop_vinfo, &stride_base, &stride_step);
> + ? ?}
>
> ? if (!vec_stmt) /* transformation not required. ?*/
> ? ? {
> @@ -4520,6 +4525,104 @@ vectorizable_load (gimple stmt, gimple_s
> ? ? ? ? ? ?STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
> ? ? ? ? ?else
> ? ? ? ? ? ?STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
> + ? ? ? ? prev_stmt_info = vinfo_for_stmt (new_stmt);
> + ? ? ? }
> + ? ? ?return true;
> + ? ?}
> + ?else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + ? ?{
> + ? ? ?gimple_stmt_iterator incr_gsi;
> + ? ? ?bool insert_after;
> + ? ? ?gimple incr;
> + ? ? ?tree offvar;
> + ? ? ?tree ref = DR_REF (dr);
> + ? ? ?tree ivstep;
> + ? ? ?tree running_off;
> + ? ? ?VEC(constructor_elt, gc) *v = NULL;
> + ? ? ?gimple_seq stmts = NULL;
> +
> + ? ? ?gcc_assert (stride_base && stride_step);
> +
> + ? ? ?/* For a load with loop-invariant (but other than power-of-2)
> + ? ? ? ? stride (i.e. not a grouped access) like so:
> +
> + ? ? ? ? ?for (i = 0; i < n; i += stride)
> + ? ? ? ? ? ?... = array[i];
> +
> + ? ? ? ?we generate a new induction variable and new accesses to
> + ? ? ? ?form a new vector (or vectors, depending on ncopies):
> +
> + ? ? ? ? ?for (j = 0; ; j += VF*stride)
> + ? ? ? ? ? ?tmp1 = array[j];
> + ? ? ? ? ? ?tmp2 = array[j + stride];
> + ? ? ? ? ? ?...
> + ? ? ? ? ? ?vectemp = {tmp1, tmp2, ...}
> + ? ? ? ? */
> +
> + ? ? ?ivstep = stride_step;
> + ? ? ?ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? build_int_cst_type (TREE_TYPE (ivstep), vf));

Use build_int_cst.

> +
> + ? ? ?standard_iv_increment_position (loop, &incr_gsi, &insert_after);
> +
> + ? ? ?create_iv (stride_base, ivstep, NULL,
> + ? ? ? ? ? ? ? ?loop, &incr_gsi, insert_after,
> + ? ? ? ? ? ? ? ?&offvar, NULL);
> + ? ? ?incr = gsi_stmt (incr_gsi);
> + ? ? ?set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
> +
> + ? ? ?stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
> + ? ? ?if (stmts)
> + ? ? ? gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
> +
> + ? ? ?prev_stmt_info = NULL;
> + ? ? ?running_off = offvar;
> + ? ? ?for (j = 0; j < ncopies; j++)
> + ? ? ? {
> + ? ? ? ? tree vec_inv;
> +
> + ? ? ? ? v = VEC_alloc (constructor_elt, gc, nunits);
> + ? ? ? ? for (i = 0; i < nunits; i++)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? tree newref, newoff;
> + ? ? ? ? ? ? gimple incr;
> + ? ? ? ? ? ? if (TREE_CODE (ref) == ARRAY_REF)
> + ? ? ? ? ? ? ? newref = build4 (ARRAY_REF, TREE_TYPE (ref),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unshare_expr (TREE_OPERAND (ref, 0)),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?running_off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NULL_TREE, NULL_TREE);
> + ? ? ? ? ? ? else
> + ? ? ? ? ? ? ? newref = build2 (MEM_REF, TREE_TYPE (ref),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?running_off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?TREE_OPERAND (ref, 1));
> +
> + ? ? ? ? ? ? newref = force_gimple_operand_gsi (gsi, newref, true,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NULL_TREE, true,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GSI_SAME_STMT);
> + ? ? ? ? ? ? CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
> + ? ? ? ? ? ? newoff = SSA_NAME_VAR (running_off);
> + ? ? ? ? ? ? if (POINTER_TYPE_P (TREE_TYPE (newoff)))
> + ? ? ? ? ? ? ? incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?running_off, stride_step);
> + ? ? ? ? ? ? else
> + ? ? ? ? ? ? ? incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?running_off, stride_step);
> + ? ? ? ? ? ? newoff = make_ssa_name (newoff, incr);
> + ? ? ? ? ? ? gimple_assign_set_lhs (incr, newoff);
> + ? ? ? ? ? ? vect_finish_stmt_generation (stmt, incr, gsi);
> +
> + ? ? ? ? ? ? running_off = newoff;
> + ? ? ? ? ? }
> +
> + ? ? ? ? vec_inv = build_constructor (vectype, v);
> + ? ? ? ? new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
> + ? ? ? ? new_stmt = SSA_NAME_DEF_STMT (new_temp);
> + ? ? ? ? mark_symbols_for_renaming (new_stmt);

This should not be necessary - in fact please manually set the proper
VUSE here.

Thanks,
Richard.

> +
> + ? ? ? ? if (j == 0)
> + ? ? ? ? ? STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
> + ? ? ? ? else
> + ? ? ? ? ? STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
> ? ? ? ? ?prev_stmt_info = vinfo_for_stmt (new_stmt);
> ? ? ? ?}
> ? ? ? return true;
> Index: testsuite/gfortran.dg/vect/rnflow-trs2a2.f90
> ===================================================================
> --- testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 ? ? ? ?(revision 0)
> +++ testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 ? ? ? ?(revision 0)
> @@ -0,0 +1,33 @@
> +! { dg-do compile }
> +! { dg-require-effective-target vect_double }
> +
> + ? ? ?function trs2a2 (j, k, u, d, m)
> +! ? ? ?matrice de transition intermediaire, partant de k sans descendre
> +! ? ? ?sous j. R = IjU(I-Ik)DIj, avec Ii = deltajj, j >= i.
> +! ? ? ?alternative: trs2a2 = 0
> +! ? ? ? ? ? ? ? ? ? trs2a2 (j:k-1, j:k-1) = matmul (utrsft (j:k-1,j:k-1),
> +! ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dtrsft (j:k-1,j:k-1))
> +!
> + ? ? ?real, dimension (1:m,1:m) :: trs2a2 ?! resultat
> + ? ? ?real, dimension (1:m,1:m) :: u, d ? ?! matrices utrsft, dtrsft
> + ? ? ?integer, intent (in) ? ? ?:: j, k, m ! niveaux vallee pic
> +!
> +!##### following line replaced by Prentice to make less system dependent
> +! ? ? ?real (kind = kind (1.0d0)) :: dtmp
> + ? ? ?real (kind = selected_real_kind (10,50)) :: dtmp
> +!
> + ? ? ?trs2a2 = 0.0
> + ? ? ?do iclw1 = j, k - 1
> + ? ? ? ? do iclw2 = j, k - 1
> + ? ? ? ? ? ?dtmp = 0.0d0
> + ? ? ? ? ? ?do iclww = j, k - 1
> + ? ? ? ? ? ? ? dtmp = dtmp + u (iclw1, iclww) * d (iclww, iclw2)
> + ? ? ? ? ? ?enddo
> + ? ? ? ? ? ?trs2a2 (iclw1, iclw2) = dtmp
> + ? ? ? ? enddo
> + ? ? ?enddo
> + ? ? ?return
> + ? ? ?end function trs2a2
> +
> +! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" ?} }
> +! { dg-final { cleanup-tree-dump "vect" } }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]