This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Gather vectorization (PR tree-optimization/50789, take 2)


On Fri, Nov 4, 2011 at 2:22 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Fri, Nov 04, 2011 at 12:21:49PM +0100, Richard Guenther wrote:
>> Ok. ?I guess it's ok to use builtins for now - I didn't think of
>> the memory reference issue ;)
>
> Based on IRC discussion I'm posting an updated patch (both former
> patches (base and incremental) in one).
>
> I'm now using expr_invariant_in_loop_p instead of chrec_contains*,
> which nicely handles also the is_gimple_min_invariant case,
> and I've added several comments and fixed the MEM_REF offset
> folding. ?Smoke tested on the *gather* testcases, will do full
> bootstrap/regtest soon.

Ok for the vectorizer pieces, I'll defer to x86 maintainers for the target bits.

Thanks,
Richard.

> 2011-11-04 ?Jakub Jelinek ?<jakub@redhat.com>
>
> ? ? ? ?PR tree-optimization/50789
> ? ? ? ?* tree-vect-stmts.c (process_use): Add force argument, avoid
> ? ? ? ?exist_non_indexing_operands_for_use_p check if true.
> ? ? ? ?(vect_mark_stmts_to_be_vectorized): Adjust callers. ?Handle
> ? ? ? ?STMT_VINFO_GATHER_P.
> ? ? ? ?(gen_perm_mask): New function.
> ? ? ? ?(perm_mask_for_reverse): Use it.
> ? ? ? ?(reverse_vec_element): Rename to...
> ? ? ? ?(permute_vec_elements): ... this. ?Add Y and MASK_VEC arguments,
> ? ? ? ?generalize for any permutations.
> ? ? ? ?(vectorizable_load): Adjust caller. ?Handle STMT_VINFO_GATHER_P.
> ? ? ? ?* target.def (TARGET_VECTORIZE_BUILTIN_GATHER): New hook.
> ? ? ? ?* doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_GATHER): Document it.
> ? ? ? ?* doc/tm.texi: Regenerate.
> ? ? ? ?* tree-data-ref.c (initialize_data_dependence_relation,
> ? ? ? ?compute_self_dependence): No longer static.
> ? ? ? ?* tree-data-ref.h (initialize_data_dependence_relation,
> ? ? ? ?compute_self_dependence): New prototypes.
> ? ? ? ?* tree-vect-data-refs.c (vect_check_gather): New function.
> ? ? ? ?(vect_analyze_data_refs): Detect possible gather load data
> ? ? ? ?refs.
> ? ? ? ?* tree-vectorizer.h (struct _stmt_vec_info): Add gather_p field.
> ? ? ? ?(STMT_VINFO_GATHER_P): Define.
> ? ? ? ?(vect_check_gather): New prototype.
> ? ? ? ?* config/i386/i386-builtin-types.def: Add types for alternate
> ? ? ? ?gather builtins.
> ? ? ? ?* config/i386/sse.md (AVXMODE48P_DI): Remove.
> ? ? ? ?(VEC_GATHER_MODE): Rename mode_attr to...
> ? ? ? ?(VEC_GATHER_IDXSI): ... this.
> ? ? ? ?(VEC_GATHER_IDXDI, VEC_GATHER_SRCDI): New mode_attrs.
> ? ? ? ?(avx2_gathersi<mode>, *avx2_gathersi<mode>): Use <VEC_GATHER_IDXSI>
> ? ? ? ?instead of <VEC_GATHER_MODE>.
> ? ? ? ?(avx2_gatherdi<mode>): Use <VEC_GATHER_IDXDI> instead of
> ? ? ? ?<<AVXMODE48P_DI> and <VEC_GATHER_SRCDI> instead of VEC_GATHER_MODE
> ? ? ? ?on src and mask operands.
> ? ? ? ?(*avx2_gatherdi<mode>): Likewise. ?Use VEC_GATHER_MODE iterator
> ? ? ? ?instead of AVXMODE48P_DI.
> ? ? ? ?(avx2_gatherdi<mode>256, *avx2_gatherdi<mode>256): Removed.
> ? ? ? ?* config/i386/i386.c (enum ix86_builtins): Add
> ? ? ? ?IX86_BUILTIN_GATHERALTSIV4DF, IX86_BUILTIN_GATHERALTDIV8SF,
> ? ? ? ?IX86_BUILTIN_GATHERALTSIV4DI and IX86_BUILTIN_GATHERALTDIV8SI.
> ? ? ? ?(ix86_init_mmx_sse_builtins): Create those builtins.
> ? ? ? ?(ix86_expand_builtin): Handle those builtins and adjust expansions
> ? ? ? ?of other gather builtins.
> ? ? ? ?(ix86_vectorize_builtin_gather): New function.
> ? ? ? ?(TARGET_VECTORIZE_BUILTIN_GATHER): Define.
>
> ? ? ? ?* gcc.target/i386/avx2-gather-1.c: New test.
> ? ? ? ?* gcc.target/i386/avx2-gather-2.c: New test.
> ? ? ? ?* gcc.target/i386/avx2-gather-3.c: New test.
> ? ? ? ?* gcc.target/i386/avx2-gather-4.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj ? ?2011-11-04 08:52:19.000000000 +0100
> +++ gcc/tree-vect-stmts.c ? ? ? 2011-11-04 08:54:11.000000000 +0100
> @@ -332,6 +332,8 @@ exist_non_indexing_operands_for_use_p (t
> ? ?- LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
> ? ? ?that defined USE. ?This is done by calling mark_relevant and passing it
> ? ? ?the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
> + ? - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
> + ? ? be performed.
>
> ? ?Outputs:
> ? ?Generally, LIVE_P and RELEVANT are used to define the liveness and
> @@ -351,7 +353,8 @@ exist_non_indexing_operands_for_use_p (t
>
> ?static bool
> ?process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
> - ? ? ? ? ? ?enum vect_relevant relevant, VEC(gimple,heap) **worklist)
> + ? ? ? ? ? ?enum vect_relevant relevant, VEC(gimple,heap) **worklist,
> + ? ? ? ? ? ?bool force)
> ?{
> ? struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> ? stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
> @@ -363,7 +366,7 @@ process_use (gimple stmt, tree use, loop
>
> ? /* case 1: we are only interested in uses that need to be vectorized. ?Uses
> ? ? ?that are used for address computation are not considered relevant. ?*/
> - ?if (!exist_non_indexing_operands_for_use_p (use, stmt))
> + ?if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
> ? ? ?return true;
>
> ? if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
> @@ -646,7 +649,7 @@ vect_mark_stmts_to_be_vectorized (loop_v
> ? ? ? ? ? ? break;
> ? ? ? ? }
>
> - ? ? ?if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
> + ? ? ?if (is_pattern_stmt_p (stmt_vinfo))
> ? ? ? ? {
> ? ? ? ? ? /* Pattern statements are not inserted into the code, so
> ? ? ? ? ? ? ?FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
> @@ -660,9 +663,9 @@ vect_mark_stmts_to_be_vectorized (loop_v
> ? ? ? ? ? ? ?if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
> ? ? ? ? ? ? ? ?{
> ? ? ? ? ? ? ? ? ?if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? live_p, relevant, &worklist)
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? live_p, relevant, &worklist, false)
> ? ? ? ? ? ? ? ? ? ? ?|| !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?live_p, relevant, &worklist))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?live_p, relevant, &worklist, false))
> ? ? ? ? ? ? ? ? ? ?{
> ? ? ? ? ? ? ? ? ? ? ?VEC_free (gimple, heap, worklist);
> ? ? ? ? ? ? ? ? ? ? ?return false;
> @@ -673,7 +676,7 @@ vect_mark_stmts_to_be_vectorized (loop_v
> ? ? ? ? ? ? ? ? {
> ? ? ? ? ? ? ? ? ?op = gimple_op (stmt, i);
> ? ? ? ? ? ? ? ? ? if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&worklist))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &worklist, false))
> ? ? ? ? ? ? ? ? ? ? {
> ? ? ? ? ? ? ? ? ? ? ? VEC_free (gimple, heap, worklist);
> ? ? ? ? ? ? ? ? ? ? ? return false;
> @@ -686,7 +689,7 @@ vect_mark_stmts_to_be_vectorized (loop_v
> ? ? ? ? ? ? ? ? {
> ? ? ? ? ? ? ? ? ? tree arg = gimple_call_arg (stmt, i);
> ? ? ? ? ? ? ? ? ? if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&worklist))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &worklist, false))
> ? ? ? ? ? ? ? ? ? ? {
> ? ? ? ? ? ? ? ? ? ? ? VEC_free (gimple, heap, worklist);
> ? ? ? ? ? ? ? ? ? ? ? return false;
> @@ -699,12 +702,25 @@ vect_mark_stmts_to_be_vectorized (loop_v
> ? ? ? ? ? {
> ? ? ? ? ? ? tree op = USE_FROM_PTR (use_p);
> ? ? ? ? ? ? if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&worklist))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? &worklist, false))
> ? ? ? ? ? ? ? {
> ? ? ? ? ? ? ? ? VEC_free (gimple, heap, worklist);
> ? ? ? ? ? ? ? ? return false;
> ? ? ? ? ? ? ? }
> ? ? ? ? ? }
> +
> + ? ? ?if (STMT_VINFO_GATHER_P (stmt_vinfo))
> + ? ? ? {
> + ? ? ? ? tree off;
> + ? ? ? ? tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
> + ? ? ? ? gcc_assert (decl);
> + ? ? ? ? if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? &worklist, true))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? VEC_free (gimple, heap, worklist);
> + ? ? ? ? ? ? return false;
> + ? ? ? ? ? }
> + ? ? ? }
> ? ? } /* while worklist */
>
> ? VEC_free (gimple, heap, worklist);
> @@ -4142,23 +4158,17 @@ vectorizable_store (gimple stmt, gimple_
> ? return true;
> ?}
>
> -/* Given a vector type VECTYPE returns a builtin DECL to be used
> - ? for vector permutation and returns the mask that implements
> - ? reversal of the vector elements. ?If that is impossible to do,
> - ? returns NULL. ?*/
> +/* Given a vector type VECTYPE and permutation SEL returns
> + ? the VECTOR_CST mask that implements the permutation of the
> + ? vector elements. ?If that is impossible to do, returns NULL. ?*/
>
> ?static tree
> -perm_mask_for_reverse (tree vectype)
> +gen_perm_mask (tree vectype, unsigned char *sel)
> ?{
> ? tree mask_elt_type, mask_type, mask_vec;
> ? int i, nunits;
> - ?unsigned char *sel;
>
> ? nunits = TYPE_VECTOR_SUBPARTS (vectype);
> - ?sel = XALLOCAVEC (unsigned char, nunits);
> -
> - ?for (i = 0; i < nunits; ++i)
> - ? ?sel[i] = nunits - 1 - i;
>
> ? if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> ? ? return NULL;
> @@ -4169,33 +4179,52 @@ perm_mask_for_reverse (tree vectype)
> ? mask_type = get_vectype_for_scalar_type (mask_elt_type);
>
> ? mask_vec = NULL;
> - ?for (i = 0; i < nunits; i++)
> - ? ?mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
> + ?for (i = nunits - 1; i >= 0; i--)
> + ? ?mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
> + ? ? ? ? ? ? ? ? ? ? ? ? mask_vec);
> ? mask_vec = build_vector (mask_type, mask_vec);
>
> ? return mask_vec;
> ?}
>
> -/* Given a vector variable X, that was generated for the scalar LHS of
> - ? STMT, generate instructions to reverse the vector elements of X,
> - ? insert them a *GSI and return the permuted vector variable. ?*/
> +/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
> + ? reversal of the vector elements. ?If that is impossible to do,
> + ? returns NULL. ?*/
>
> ?static tree
> -reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
> +perm_mask_for_reverse (tree vectype)
> +{
> + ?int i, nunits;
> + ?unsigned char *sel;
> +
> + ?nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + ?sel = XALLOCAVEC (unsigned char, nunits);
> +
> + ?for (i = 0; i < nunits; ++i)
> + ? ?sel[i] = nunits - 1 - i;
> +
> + ?return gen_perm_mask (vectype, sel);
> +}
> +
> +/* Given a vector variable X and Y, that was generated for the scalar
> + ? STMT, generate instructions to permute the vector elements of X and Y
> + ? using permutation mask MASK_VEC, insert them at *GSI and return the
> + ? permuted vector variable. ?*/
> +
> +static tree
> +permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
> + ? ? ? ? ? ? ? ? ? ? gimple_stmt_iterator *gsi)
> ?{
> ? tree vectype = TREE_TYPE (x);
> - ?tree mask_vec, perm_dest, data_ref;
> + ?tree perm_dest, data_ref;
> ? gimple perm_stmt;
>
> - ?mask_vec = perm_mask_for_reverse (vectype);
> -
> ? perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
> + ?data_ref = make_ssa_name (perm_dest, NULL);
>
> ? /* Generate the permute statement. ?*/
> - ?perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?x, x, mask_vec);
> - ?data_ref = make_ssa_name (perm_dest, perm_stmt);
> - ?gimple_set_lhs (perm_stmt, data_ref);
> + ?perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?x, y, mask_vec);
> ? vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>
> ? return data_ref;
> @@ -4254,6 +4283,10 @@ vectorizable_load (gimple stmt, gimple_s
> ? bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> ? int vf;
> ? tree aggr_type;
> + ?tree gather_base = NULL_TREE, gather_off = NULL_TREE;
> + ?tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
> + ?int gather_scale = 1;
> + ?enum vect_def_type gather_dt = vect_unknown_def_type;
>
> ? if (loop_vinfo)
> ? ? {
> @@ -4334,7 +4367,7 @@ vectorizable_load (gimple stmt, gimple_s
> ? ? {
> ? ? ? strided_load = true;
> ? ? ? /* FORNOW */
> - ? ? ?gcc_assert (! nested_in_vect_loop);
> + ? ? ?gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
>
> ? ? ? first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> ? ? ? if (!slp && !PURE_SLP_STMT (stmt_info))
> @@ -4349,7 +4382,7 @@ vectorizable_load (gimple stmt, gimple_s
>
> ? if (negative)
> ? ? {
> - ? ? ?gcc_assert (!strided_load);
> + ? ? ?gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
> ? ? ? alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
> ? ? ? if (alignment_support_scheme != dr_aligned
> ? ? ? ? ?&& alignment_support_scheme != dr_unaligned_supported)
> @@ -4366,6 +4399,23 @@ vectorizable_load (gimple stmt, gimple_s
> ? ? ? ?}
> ? ? }
>
> + ?if (STMT_VINFO_GATHER_P (stmt_info))
> + ? ?{
> + ? ? ?gimple def_stmt;
> + ? ? ?tree def;
> + ? ? ?gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&gather_off, &gather_scale);
> + ? ? ?gcc_assert (gather_decl);
> + ? ? ?if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&def_stmt, &def, &gather_dt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&gather_off_vectype))
> + ? ? ? {
> + ? ? ? ? if (vect_print_dump_info (REPORT_DETAILS))
> + ? ? ? ? ? fprintf (vect_dump, "gather index use not simple.");
> + ? ? ? ? return false;
> + ? ? ? }
> + ? ?}
> +
> ? if (!vec_stmt) /* transformation not required. ?*/
> ? ? {
> ? ? ? STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -4378,6 +4428,161 @@ vectorizable_load (gimple stmt, gimple_s
>
> ? /** Transform. ?**/
>
> + ?if (STMT_VINFO_GATHER_P (stmt_info))
> + ? ?{
> + ? ? ?tree vec_oprnd0 = NULL_TREE, op;
> + ? ? ?tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
> + ? ? ?tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
> + ? ? ?tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
> + ? ? ?edge pe = loop_preheader_edge (loop);
> + ? ? ?gimple_seq seq;
> + ? ? ?basic_block new_bb;
> + ? ? ?enum { NARROW, NONE, WIDEN } modifier;
> + ? ? ?int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
> +
> + ? ? ?if (nunits == gather_off_nunits)
> + ? ? ? modifier = NONE;
> + ? ? ?else if (nunits == gather_off_nunits / 2)
> + ? ? ? {
> + ? ? ? ? unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
> + ? ? ? ? modifier = WIDEN;
> +
> + ? ? ? ? for (i = 0; i < gather_off_nunits; ++i)
> + ? ? ? ? ? sel[i] = i | nunits;
> +
> + ? ? ? ? perm_mask = gen_perm_mask (gather_off_vectype, sel);
> + ? ? ? ? gcc_assert (perm_mask != NULL_TREE);
> + ? ? ? }
> + ? ? ?else if (nunits == gather_off_nunits * 2)
> + ? ? ? {
> + ? ? ? ? unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
> + ? ? ? ? modifier = NARROW;
> +
> + ? ? ? ? for (i = 0; i < nunits; ++i)
> + ? ? ? ? ? sel[i] = i < gather_off_nunits
> + ? ? ? ? ? ? ? ? ? ?? i : i + nunits - gather_off_nunits;
> +
> + ? ? ? ? perm_mask = gen_perm_mask (vectype, sel);
> + ? ? ? ? gcc_assert (perm_mask != NULL_TREE);
> + ? ? ? ? ncopies *= 2;
> + ? ? ? }
> + ? ? ?else
> + ? ? ? gcc_unreachable ();
> +
> + ? ? ?rettype = TREE_TYPE (TREE_TYPE (gather_decl));
> + ? ? ?srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> + ? ? ?ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> + ? ? ?idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> + ? ? ?masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
> + ? ? ?scaletype = TREE_VALUE (arglist);
> + ? ? ?gcc_checking_assert (types_compatible_p (srctype, rettype)
> + ? ? ? ? ? ? ? ? ? ? ? ? ?&& types_compatible_p (srctype, masktype));
> +
> + ? ? ?vec_dest = vect_create_destination_var (scalar_dest, vectype);
> +
> + ? ? ?ptr = fold_convert (ptrtype, gather_base);
> + ? ? ?if (!is_gimple_min_invariant (ptr))
> + ? ? ? {
> + ? ? ? ? ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
> + ? ? ? ? new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + ? ? ? ? gcc_assert (!new_bb);
> + ? ? ? }
> +
> + ? ? ?/* Currently we support only unconditional gather loads,
> + ? ? ? ?so mask should be all ones. ?*/
> + ? ? ?if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
> + ? ? ? mask = build_int_cst (TREE_TYPE (masktype), -1);
> + ? ? ?else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
> + ? ? ? {
> + ? ? ? ? REAL_VALUE_TYPE r;
> + ? ? ? ? long tmp[6];
> + ? ? ? ? for (j = 0; j < 6; ++j)
> + ? ? ? ? ? tmp[j] = -1;
> + ? ? ? ? real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
> + ? ? ? ? mask = build_real (TREE_TYPE (masktype), r);
> + ? ? ? }
> + ? ? ?else
> + ? ? ? gcc_unreachable ();
> + ? ? ?mask = build_vector_from_val (masktype, mask);
> + ? ? ?mask = vect_init_vector (stmt, mask, masktype, NULL);
> +
> + ? ? ?scale = build_int_cst (scaletype, gather_scale);
> +
> + ? ? ?prev_stmt_info = NULL;
> + ? ? ?for (j = 0; j < ncopies; ++j)
> + ? ? ? {
> + ? ? ? ? if (modifier == WIDEN && (j & 1))
> + ? ? ? ? ? op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?perm_mask, stmt, gsi);
> + ? ? ? ? else if (j == 0)
> + ? ? ? ? ? op = vec_oprnd0
> + ? ? ? ? ? ? = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
> + ? ? ? ? else
> + ? ? ? ? ? op = vec_oprnd0
> + ? ? ? ? ? ? = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
> +
> + ? ? ? ? if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
> + ? ? ? ? ? ? ? ? ? ? ? ? == TYPE_VECTOR_SUBPARTS (idxtype));
> + ? ? ? ? ? ? var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
> + ? ? ? ? ? ? add_referenced_var (var);
> + ? ? ? ? ? ? var = make_ssa_name (var, NULL);
> + ? ? ? ? ? ? op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
> + ? ? ? ? ? ? new_stmt
> + ? ? ? ? ? ? ? = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? op, NULL_TREE);
> + ? ? ? ? ? ? vect_finish_stmt_generation (stmt, new_stmt, gsi);
> + ? ? ? ? ? ? op = var;
> + ? ? ? ? ? }
> +
> + ? ? ? ? new_stmt
> + ? ? ? ? ? = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
> +
> + ? ? ? ? if (!useless_type_conversion_p (vectype, rettype))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
> + ? ? ? ? ? ? ? ? ? ? ? ? == TYPE_VECTOR_SUBPARTS (rettype));
> + ? ? ? ? ? ? var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
> + ? ? ? ? ? ? add_referenced_var (var);
> + ? ? ? ? ? ? op = make_ssa_name (var, new_stmt);
> + ? ? ? ? ? ? gimple_call_set_lhs (new_stmt, op);
> + ? ? ? ? ? ? vect_finish_stmt_generation (stmt, new_stmt, gsi);
> + ? ? ? ? ? ? var = make_ssa_name (vec_dest, NULL);
> + ? ? ? ? ? ? op = build1 (VIEW_CONVERT_EXPR, vectype, op);
> + ? ? ? ? ? ? new_stmt
> + ? ? ? ? ? ? ? = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NULL_TREE);
> + ? ? ? ? ? }
> + ? ? ? ? else
> + ? ? ? ? ? {
> + ? ? ? ? ? ? var = make_ssa_name (vec_dest, new_stmt);
> + ? ? ? ? ? ? gimple_call_set_lhs (new_stmt, var);
> + ? ? ? ? ? }
> +
> + ? ? ? ? vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +
> + ? ? ? ? if (modifier == NARROW)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? if ((j & 1) == 0)
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? prev_res = var;
> + ? ? ? ? ? ? ? ? continue;
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? var = permute_vec_elements (prev_res, var,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? perm_mask, stmt, gsi);
> + ? ? ? ? ? ? new_stmt = SSA_NAME_DEF_STMT (var);
> + ? ? ? ? ? }
> +
> + ? ? ? ? if (prev_stmt_info == NULL)
> + ? ? ? ? ? STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
> + ? ? ? ? else
> + ? ? ? ? ? STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
> + ? ? ? ? prev_stmt_info = vinfo_for_stmt (new_stmt);
> + ? ? ? }
> + ? ? ?return true;
> + ? ?}
> +
> ? if (strided_load)
> ? ? {
> ? ? ? first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> @@ -4769,7 +4974,9 @@ vectorizable_load (gimple stmt, gimple_s
>
> ? ? ? ? ? ? ?if (negative)
> ? ? ? ? ? ? ? ?{
> - ? ? ? ? ? ? ? ? new_temp = reverse_vec_elements (new_temp, stmt, gsi);
> + ? ? ? ? ? ? ? ? tree perm_mask = perm_mask_for_reverse (vectype);
> + ? ? ? ? ? ? ? ? new_temp = permute_vec_elements (new_temp, new_temp,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?perm_mask, stmt, gsi);
> ? ? ? ? ? ? ? ? ?new_stmt = SSA_NAME_DEF_STMT (new_temp);
> ? ? ? ? ? ? ? ?}
>
> --- gcc/target.def.jj ? 2011-11-04 08:52:19.000000000 +0100
> +++ gcc/target.def ? ? ?2011-11-04 08:53:13.000000000 +0100
> @@ -1021,6 +1021,14 @@ DEFHOOK
> ?(void),
> ?default_autovectorize_vector_sizes)
>
> +/* Target builtin that implements vector gather operation. ?*/
> +DEFHOOK
> +(builtin_gather,
> + "",
> + tree,
> + (const_tree mem_vectype, const_tree index_type, int scale),
> + NULL)
> +
> ?HOOK_VECTOR_END (vectorize)
>
> ?#undef HOOK_PREFIX
> --- gcc/tree-data-ref.c.jj ? ? ?2011-11-04 08:52:19.000000000 +0100
> +++ gcc/tree-data-ref.c 2011-11-04 08:53:13.000000000 +0100
> @@ -1351,13 +1351,11 @@ dr_may_alias_p (const struct data_refere
> ? return refs_may_alias_p (addr_a, addr_b);
> ?}
>
> -static void compute_self_dependence (struct data_dependence_relation *);
> -
> ?/* Initialize a data dependence relation between data accesses A and
> ? ?B. ?NB_LOOPS is the number of loops surrounding the references: the
> ? ?size of the classic distance/direction vectors. ?*/
>
> -static struct data_dependence_relation *
> +struct data_dependence_relation *
> ?initialize_data_dependence_relation (struct data_reference *a,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct data_reference *b,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? VEC (loop_p, heap) *loop_nest)
> @@ -4121,7 +4119,7 @@ compute_affine_dependence (struct data_d
> ?/* This computes the dependence relation for the same data
> ? ?reference into DDR. ?*/
>
> -static void
> +void
> ?compute_self_dependence (struct data_dependence_relation *ddr)
> ?{
> ? unsigned int i;
> --- gcc/tree-data-ref.h.jj ? ? ?2011-11-04 08:52:19.000000000 +0100
> +++ gcc/tree-data-ref.h 2011-11-04 13:22:28.000000000 +0100
> @@ -1,5 +1,5 @@
> ?/* Data references and dependences detectors.
> - ? Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
> + ? Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
> ? ?Free Software Foundation, Inc.
> ? ?Contributed by Sebastian Pop <pop@cri.ensmp.fr>
>
> @@ -423,6 +423,9 @@ extern bool graphite_find_data_reference
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? VEC (data_reference_p, heap) **);
> ?struct data_reference *create_data_ref (loop_p, loop_p, tree, gimple, bool);
> ?extern bool find_loop_nest (struct loop *, VEC (loop_p, heap) **);
> +extern struct data_dependence_relation *initialize_data_dependence_relation
> + ? ? (struct data_reference *, struct data_reference *, VEC (loop_p, heap) *);
> +extern void compute_self_dependence (struct data_dependence_relation *);
> ?extern void compute_all_dependences (VEC (data_reference_p, heap) *,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? bool);
> --- gcc/doc/tm.texi.in.jj ? ? ? 2011-11-04 08:52:19.000000000 +0100
> +++ gcc/doc/tm.texi.in ?2011-11-04 08:53:13.000000000 +0100
> @@ -5696,6 +5696,14 @@ mode returned by @code{TARGET_VECTORIZE_
> ?The default is zero which means to not iterate over other vector sizes.
> ?@end deftypefn
>
> +@hook TARGET_VECTORIZE_BUILTIN_GATHER
> +Target builtin that implements vector gather operation. ?@var{mem_vectype}
> +is the vector type of the load and @var{index_type} is scalar type of
> +the index, scaled by @var{scale}.
> +The default is @code{NULL_TREE} which means to not vectorize gather
> +loads.
> +@end deftypefn
> +
> ?@node Anchored Addresses
> ?@section Anchored Addresses
> ?@cindex anchored addresses
> --- gcc/doc/tm.texi.jj ?2011-11-04 08:52:19.000000000 +0100
> +++ gcc/doc/tm.texi ? ? 2011-11-04 08:53:13.000000000 +0100
> @@ -5758,6 +5758,14 @@ mode returned by @code{TARGET_VECTORIZE_
> ?The default is zero which means to not iterate over other vector sizes.
> ?@end deftypefn
>
> +@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_GATHER (const_tree @var{mem_vectype}, const_tree @var{index_type}, int @var{scale})
> +Target builtin that implements vector gather operation. ?@var{mem_vectype}
> +is the vector type of the load and @var{index_type} is scalar type of
> +the index, scaled by @var{scale}.
> +The default is @code{NULL_TREE} which means to not vectorize gather
> +loads.
> +@end deftypefn
> +
> ?@node Anchored Addresses
> ?@section Anchored Addresses
> ?@cindex anchored addresses
> --- gcc/tree-vect-data-refs.c.jj ? ? ? ?2011-11-04 08:52:57.000000000 +0100
> +++ gcc/tree-vect-data-refs.c ? 2011-11-04 14:10:17.000000000 +0100
> @@ -2497,6 +2497,199 @@ vect_prune_runtime_alias_test_list (loop
> ? return true;
> ?}
>
> +/* Check whether a non-affine read in stmt is suitable for gather load
> + ? and if so, return a builtin decl for that operation. ?*/
> +
> +tree
> +vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
> + ? ? ? ? ? ? ? ? ?tree *offp, int *scalep)
> +{
> + ?HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
> + ?struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> + ?stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> + ?struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> + ?tree offtype = NULL_TREE;
> + ?tree decl, base, off;
> + ?enum machine_mode pmode;
> + ?int punsignedp, pvolatilep;
> +
> + ?/* The gather builtins need address of the form
> + ? ? loop_invariant + vector * {1, 2, 4, 8}
> + ? ? or
> + ? ? loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
> + ? ? Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
> + ? ? of loop invariants/SSA_NAMEs defined in the loop, with casts,
> + ? ? multiplications and additions in it. ?To get a vector, we need
> + ? ? a single SSA_NAME that will be defined in the loop and will
> + ? ? contain everything that is not loop invariant and that can be
> + ? ? vectorized. ?The following code attempts to find such a preexistng
> + ? ? SSA_NAME OFF and put the loop invariants into a tree BASE
> + ? ? that can be gimplified before the loop. ?*/
> + ?base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? &pmode, &punsignedp, &pvolatilep, false);
> + ?gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
> +
> + ?if (TREE_CODE (base) == MEM_REF)
> + ? ?{
> + ? ? ?if (!integer_zerop (TREE_OPERAND (base, 1)))
> + ? ? ? {
> + ? ? ? ? if (off == NULL_TREE)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? double_int moff = mem_ref_offset (base);
> + ? ? ? ? ? ? off = double_int_to_tree (sizetype, moff);
> + ? ? ? ? ? }
> + ? ? ? ? else
> + ? ? ? ? ? off = size_binop (PLUS_EXPR, off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? fold_convert (sizetype, TREE_OPERAND (base, 1)));
> + ? ? ? }
> + ? ? ?base = TREE_OPERAND (base, 0);
> + ? ?}
> + ?else
> + ? ?base = build_fold_addr_expr (base);
> +
> + ?if (off == NULL_TREE)
> + ? ?off = size_zero_node;
> +
> + ?/* If base is not loop invariant, either off is 0, then we start with just
> + ? ? the constant offset in the loop invariant BASE and continue with base
> + ? ? as OFF, otherwise give up.
> + ? ? We could handle that case by gimplifying the addition of base + off
> + ? ? into some SSA_NAME and use that as off, but for now punt. ?*/
> + ?if (!expr_invariant_in_loop_p (loop, base))
> + ? ?{
> + ? ? ?if (!integer_zerop (off))
> + ? ? ? return NULL_TREE;
> + ? ? ?off = base;
> + ? ? ?base = size_int (pbitpos / BITS_PER_UNIT);
> + ? ?}
> + ?/* Otherwise put base + constant offset into the loop invariant BASE
> + ? ? and continue with OFF. ?*/
> + ?else
> + ? ?{
> + ? ? ?base = fold_convert (sizetype, base);
> + ? ? ?base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
> + ? ?}
> +
> + ?/* OFF at this point may be either a SSA_NAME or some tree expression
> + ? ? from get_inner_reference. ?Try to peel off loop invariants from it
> + ? ? into BASE as long as possible. ?*/
> + ?STRIP_NOPS (off);
> + ?while (offtype == NULL_TREE)
> + ? ?{
> + ? ? ?enum tree_code code;
> + ? ? ?tree op0, op1, add = NULL_TREE;
> +
> + ? ? ?if (TREE_CODE (off) == SSA_NAME)
> + ? ? ? {
> + ? ? ? ? gimple def_stmt = SSA_NAME_DEF_STMT (off);
> +
> + ? ? ? ? if (expr_invariant_in_loop_p (loop, off))
> + ? ? ? ? ? return NULL_TREE;
> +
> + ? ? ? ? if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
> + ? ? ? ? ? break;
> +
> + ? ? ? ? op0 = gimple_assign_rhs1 (def_stmt);
> + ? ? ? ? code = gimple_assign_rhs_code (def_stmt);
> + ? ? ? ? op1 = gimple_assign_rhs2 (def_stmt);
> + ? ? ? }
> + ? ? ?else
> + ? ? ? {
> + ? ? ? ? if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
> + ? ? ? ? ? return NULL_TREE;
> + ? ? ? ? code = TREE_CODE (off);
> + ? ? ? ? extract_ops_from_tree (off, &code, &op0, &op1);
> + ? ? ? }
> + ? ? ?switch (code)
> + ? ? ? {
> + ? ? ? case POINTER_PLUS_EXPR:
> + ? ? ? case PLUS_EXPR:
> + ? ? ? ? if (expr_invariant_in_loop_p (loop, op0))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? add = op0;
> + ? ? ? ? ? ? off = op1;
> + ? ? ? ? ? do_add:
> + ? ? ? ? ? ? add = fold_convert (sizetype, add);
> + ? ? ? ? ? ? if (scale != 1)
> + ? ? ? ? ? ? ? add = size_binop (MULT_EXPR, add, size_int (scale));
> + ? ? ? ? ? ? base = size_binop (PLUS_EXPR, base, add);
> + ? ? ? ? ? ? continue;
> + ? ? ? ? ? }
> + ? ? ? ? if (expr_invariant_in_loop_p (loop, op1))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? add = op1;
> + ? ? ? ? ? ? off = op0;
> + ? ? ? ? ? ? goto do_add;
> + ? ? ? ? ? }
> + ? ? ? ? break;
> + ? ? ? case MINUS_EXPR:
> + ? ? ? ? if (expr_invariant_in_loop_p (loop, op1))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? add = fold_convert (sizetype, op1);
> + ? ? ? ? ? ? add = size_binop (MINUS_EXPR, size_zero_node, add);
> + ? ? ? ? ? ? off = op0;
> + ? ? ? ? ? ? goto do_add;
> + ? ? ? ? ? }
> + ? ? ? ? break;
> + ? ? ? case MULT_EXPR:
> + ? ? ? ? if (scale == 1 && host_integerp (op1, 0))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? scale = tree_low_cst (op1, 0);
> + ? ? ? ? ? ? off = op0;
> + ? ? ? ? ? ? continue;
> + ? ? ? ? ? }
> + ? ? ? ? break;
> + ? ? ? case SSA_NAME:
> + ? ? ? ? off = op0;
> + ? ? ? ? continue;
> + ? ? ? CASE_CONVERT:
> + ? ? ? ? if (!POINTER_TYPE_P (TREE_TYPE (op0))
> + ? ? ? ? ? ? && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
> + ? ? ? ? ? break;
> + ? ? ? ? if (TYPE_PRECISION (TREE_TYPE (op0))
> + ? ? ? ? ? ? == TYPE_PRECISION (TREE_TYPE (off)))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? off = op0;
> + ? ? ? ? ? ? continue;
> + ? ? ? ? ? }
> + ? ? ? ? if (TYPE_PRECISION (TREE_TYPE (op0))
> + ? ? ? ? ? ? < TYPE_PRECISION (TREE_TYPE (off)))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? off = op0;
> + ? ? ? ? ? ? offtype = TREE_TYPE (off);
> + ? ? ? ? ? ? STRIP_NOPS (off);
> + ? ? ? ? ? ? continue;
> + ? ? ? ? ? }
> + ? ? ? ? break;
> + ? ? ? default:
> + ? ? ? ? break;
> + ? ? ? }
> + ? ? ?break;
> + ? ?}
> +
> + ?/* If at the end OFF still isn't a SSA_NAME or isn't
> + ? ? defined in the loop, punt. ?*/
> + ?if (TREE_CODE (off) != SSA_NAME
> + ? ? ?|| expr_invariant_in_loop_p (loop, off))
> + ? ?return NULL_TREE;
> +
> + ?if (offtype == NULL_TREE)
> + ? ?offtype = TREE_TYPE (off);
> +
> + ?decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?offtype, scale);
> + ?if (decl == NULL_TREE)
> + ? ?return NULL_TREE;
> +
> + ?if (basep)
> + ? ?*basep = base;
> + ?if (offp)
> + ? ?*offp = off;
> + ?if (scalep)
> + ? ?*scalep = scale;
> + ?return decl;
> +}
> +
>
> ?/* Function vect_analyze_data_refs.
>
> @@ -2573,6 +2766,7 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? gimple stmt;
> ? ? ? stmt_vec_info stmt_info;
> ? ? ? tree base, offset, init;
> + ? ? ?bool gather = false;
> ? ? ? int vf;
>
> ? ? ? if (!dr || !DR_REF (dr))
> @@ -2594,22 +2788,51 @@ vect_analyze_data_refs (loop_vec_info lo
>
> ? ? ? /* Check that analysis of the data-ref succeeded. ?*/
> ? ? ? if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
> - ? ? ? ? ?|| !DR_STEP (dr))
> + ? ? ? ? || !DR_STEP (dr))
> ? ? ? ? {
> - ? ? ? ? ?if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> - ? ? ? ? ? ?{
> - ? ? ? ? ? ? ?fprintf (vect_dump, "not vectorized: data ref analysis failed ");
> - ? ? ? ? ? ? ?print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> - ? ? ? ? ? ?}
> + ? ? ? ? /* If target supports vector gather loads, see if they can't
> + ? ? ? ? ? ?be used. ?*/
> + ? ? ? ? if (loop_vinfo
> + ? ? ? ? ? ? && DR_IS_READ (dr)
> + ? ? ? ? ? ? && !TREE_THIS_VOLATILE (DR_REF (dr))
> + ? ? ? ? ? ? && targetm.vectorize.builtin_gather != NULL
> + ? ? ? ? ? ? && !nested_in_vect_loop_p (loop, stmt))
> + ? ? ? ? ? {
> + ? ? ? ? ? ? struct data_reference *newdr
> + ? ? ? ? ? ? ? = create_data_ref (NULL, loop_containing_stmt (stmt),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?DR_REF (dr), stmt, true);
> + ? ? ? ? ? ? gcc_assert (newdr != NULL && DR_REF (newdr));
> + ? ? ? ? ? ? if (DR_BASE_ADDRESS (newdr)
> + ? ? ? ? ? ? ? ? && DR_OFFSET (newdr)
> + ? ? ? ? ? ? ? ? && DR_INIT (newdr)
> + ? ? ? ? ? ? ? ? && DR_STEP (newdr)
> + ? ? ? ? ? ? ? ? && integer_zerop (DR_STEP (newdr)))
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? dr = newdr;
> + ? ? ? ? ? ? ? ? gather = true;
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? else
> + ? ? ? ? ? ? ? free_data_ref (newdr);
> + ? ? ? ? ? }
>
> - ? ? ? ? ?if (bb_vinfo)
> - ? ? ? ? ? ?{
> - ? ? ? ? ? ? ?STMT_VINFO_VECTORIZABLE (stmt_info) = false;
> - ? ? ? ? ? ? ?stop_bb_analysis = true;
> - ? ? ? ? ? ? ?continue;
> - ? ? ? ? ? ?}
> + ? ? ? ? if (!gather)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? fprintf (vect_dump, "not vectorized: data ref analysis "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "failed ");
> + ? ? ? ? ? ? ? ? print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> + ? ? ? ? ? ? ? }
> +
> + ? ? ? ? ? ? if (bb_vinfo)
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? STMT_VINFO_VECTORIZABLE (stmt_info) = false;
> + ? ? ? ? ? ? ? ? stop_bb_analysis = true;
> + ? ? ? ? ? ? ? ? continue;
> + ? ? ? ? ? ? ? }
>
> - ? ? ? ? ?return false;
> + ? ? ? ? ? ? return false;
> + ? ? ? ? ? }
> ? ? ? ? }
>
> ? ? ? if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
> @@ -2625,7 +2848,9 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ? ? ? continue;
> ? ? ? ? ? ? }
>
> - ? ? ? ? ? return false;
> + ? ? ? ? if (gather)
> + ? ? ? ? ? free_data_ref (dr);
> + ? ? ? ? return false;
> ? ? ? ? }
>
> ? ? ? if (TREE_THIS_VOLATILE (DR_REF (dr)))
> @@ -2666,6 +2891,8 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ? ? ? continue;
> ? ? ? ? ? ? }
>
> + ? ? ? ? if (gather)
> + ? ? ? ? ? free_data_ref (dr);
> ? ? ? ? ? return false;
> ? ? ? ? }
>
> @@ -2791,6 +3018,8 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ? ? ? continue;
> ? ? ? ? ? ? }
>
> + ? ? ? ? if (gather)
> + ? ? ? ? ? free_data_ref (dr);
> ? ? ? ? ? return false;
> ? ? ? ? }
>
> @@ -2818,8 +3047,13 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? ? ? ? ? stop_bb_analysis = true;
> ? ? ? ? ? ? ? continue;
> ? ? ? ? ? ? }
> - ? ? ? ? ?else
> - ? ? ? ? ? ?return false;
> +
> + ? ? ? ? if (gather)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? STMT_VINFO_DATA_REF (stmt_info) = NULL;
> + ? ? ? ? ? ? free_data_ref (dr);
> + ? ? ? ? ? }
> + ? ? ? ? return false;
> ? ? ? ? }
>
> ? ? ? /* Adjust the minimal vectorization factor according to the
> @@ -2827,6 +3061,86 @@ vect_analyze_data_refs (loop_vec_info lo
> ? ? ? vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
> ? ? ? if (vf > *min_vf)
> ? ? ? ?*min_vf = vf;
> +
> + ? ? ?if (gather)
> + ? ? ? {
> + ? ? ? ? unsigned int j, k, n;
> + ? ? ? ? struct data_reference *olddr
> + ? ? ? ? ? = VEC_index (data_reference_p, datarefs, i);
> + ? ? ? ? VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
> + ? ? ? ? struct data_dependence_relation *ddr, *newddr;
> + ? ? ? ? bool bad = false;
> + ? ? ? ? tree off;
> + ? ? ? ? VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
> +
> + ? ? ? ? if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
> + ? ? ? ? ? ? || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? fprintf (vect_dump,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?"not vectorized: not suitable for gather ");
> + ? ? ? ? ? ? ? ? print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? return false;
> + ? ? ? ? ? }
> +
> + ? ? ? ? n = VEC_length (data_reference_p, datarefs) - 1;
> + ? ? ? ? for (j = 0, k = i - 1; j < i; j++)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? ddr = VEC_index (ddr_p, ddrs, k);
> + ? ? ? ? ? ? gcc_assert (DDR_B (ddr) == olddr);
> + ? ? ? ? ? ? newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nest);
> + ? ? ? ? ? ? VEC_replace (ddr_p, ddrs, k, newddr);
> + ? ? ? ? ? ? free_dependence_relation (ddr);
> + ? ? ? ? ? ? if (!bad
> + ? ? ? ? ? ? ? ? && DR_IS_WRITE (DDR_A (newddr))
> + ? ? ? ? ? ? ? ? && DDR_ARE_DEPENDENT (newddr) != chrec_known)
> + ? ? ? ? ? ? ? bad = true;
> + ? ? ? ? ? ? k += --n;
> + ? ? ? ? ? }
> +
> + ? ? ? ? k++;
> + ? ? ? ? n = k + VEC_length (data_reference_p, datarefs) - i - 1;
> + ? ? ? ? for (; k < n; k++)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? ddr = VEC_index (ddr_p, ddrs, k);
> + ? ? ? ? ? ? gcc_assert (DDR_A (ddr) == olddr);
> + ? ? ? ? ? ? newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nest);
> + ? ? ? ? ? ? VEC_replace (ddr_p, ddrs, k, newddr);
> + ? ? ? ? ? ? free_dependence_relation (ddr);
> + ? ? ? ? ? ? if (!bad
> + ? ? ? ? ? ? ? ? && DR_IS_WRITE (DDR_B (newddr))
> + ? ? ? ? ? ? ? ? && DDR_ARE_DEPENDENT (newddr) != chrec_known)
> + ? ? ? ? ? ? ? bad = true;
> + ? ? ? ? ? }
> +
> + ? ? ? ? k = VEC_length (ddr_p, ddrs)
> + ? ? ? ? ? ? - VEC_length (data_reference_p, datarefs) + i;
> + ? ? ? ? ddr = VEC_index (ddr_p, ddrs, k);
> + ? ? ? ? gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
> + ? ? ? ? newddr = initialize_data_dependence_relation (dr, dr, nest);
> + ? ? ? ? compute_self_dependence (newddr);
> + ? ? ? ? VEC_replace (ddr_p, ddrs, k, newddr);
> + ? ? ? ? free_dependence_relation (ddr);
> + ? ? ? ? VEC_replace (data_reference_p, datarefs, i, dr);
> +
> + ? ? ? ? if (bad)
> + ? ? ? ? ? {
> + ? ? ? ? ? ? if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> + ? ? ? ? ? ? ? {
> + ? ? ? ? ? ? ? ? fprintf (vect_dump,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?"not vectorized: data dependence conflict"
> + ? ? ? ? ? ? ? ? ? ? ? ? ?" prevents gather");
> + ? ? ? ? ? ? ? ? print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? return false;
> + ? ? ? ? ? }
> +
> + ? ? ? ? STMT_VINFO_GATHER_P (stmt_info) = true;
> + ? ? ? }
> ? ? }
>
> ? return true;
> --- gcc/tree-vectorizer.h.jj ? ?2011-11-04 08:52:19.000000000 +0100
> +++ gcc/tree-vectorizer.h ? ? ? 2011-11-04 08:53:13.000000000 +0100
> @@ -535,6 +535,9 @@ typedef struct _stmt_vec_info {
> ? /* Is this statement vectorizable or should it be skipped in (partial)
> ? ? ?vectorization. ?*/
> ? bool vectorizable;
> +
> + ?/* For loads only, true if this is a gather load. ?*/
> + ?bool gather_p;
> ?} *stmt_vec_info;
>
> ?/* Access Functions. ?*/
> @@ -548,6 +551,7 @@ typedef struct _stmt_vec_info {
> ?#define STMT_VINFO_VEC_STMT(S) ? ? ? ? ? ? (S)->vectorized_stmt
> ?#define STMT_VINFO_VECTORIZABLE(S) ? ? ? ? (S)->vectorizable
> ?#define STMT_VINFO_DATA_REF(S) ? ? ? ? ? ? (S)->data_ref_info
> +#define STMT_VINFO_GATHER_P(S) ? ? ? ? ? ?(S)->gather_p
>
> ?#define STMT_VINFO_DR_BASE_ADDRESS(S) ? ? ?(S)->dr_base_address
> ?#define STMT_VINFO_DR_INIT(S) ? ? ? ? ? ? ?(S)->dr_init
> @@ -858,6 +862,8 @@ extern bool vect_analyze_data_refs_align
> ?extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
> ?extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
> ?extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
> +extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int *);
> ?extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
> ?extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, gimple_stmt_iterator *,
> --- gcc/config/i386/i386-builtin-types.def.jj ? 2011-11-04 08:52:19.000000000 +0100
> +++ gcc/config/i386/i386-builtin-types.def ? ? ?2011-11-04 08:53:13.000000000 +0100
> @@ -432,20 +432,24 @@ DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI,
>
> ?DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, V2DF, INT)
> ?DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, V4DF, INT)
> +DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, V4DF, INT)
> ?DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V2DI, V2DF, INT)
> ?DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4DI, V4DF, INT)
> ?DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4SI, V4SF, INT)
> ?DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8SI, V8SF, INT)
> ?DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V2DI, V4SF, INT)
> ?DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4DI, V4SF, INT)
> +DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V4DI, V8SF, INT)
> ?DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V4SI, V2DI, INT)
> ?DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4SI, V4DI, INT)
> +DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V8SI, V4DI, INT)
> ?DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V2DI, V2DI, INT)
> ?DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4DI, V4DI, INT)
> ?DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4SI, V4SI, INT)
> ?DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8SI, V8SI, INT)
> ?DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, V4SI, INT)
> ?DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, V4SI, INT)
> +DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, V8SI, INT)
>
> ?DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
> ?DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
> --- gcc/config/i386/sse.md.jj ? 2011-11-04 08:52:19.000000000 +0100
> +++ gcc/config/i386/sse.md ? ? ?2011-11-04 12:48:16.000000000 +0100
> @@ -316,14 +316,6 @@ (define_mode_attr i128
> ?;; Mix-n-match
> ?(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
>
> -(define_mode_iterator AVXMODE48P_DI
> - ? ? ? ? ? ? ? ? ? ? [V2DI V2DF V4DI V4DF V4SF V4SI])
> -(define_mode_attr AVXMODE48P_DI
> - ? ? ? ? ? ? ? ? ? ? [(V2DI "V2DI") (V2DF "V2DI")
> - ? ? ? ? ? ? ? ? ? ? ?(V4DI "V4DI") (V4DF "V4DI")
> - ? ? ? ? ? ? ? ? ? ? ?(V4SI "V2DI") (V4SF "V2DI")
> - ? ? ? ? ? ? ? ? ? ? ?(V8SI "V4DI") (V8SF "V4DI")])
> -
> ?(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
>
> ?;; Mapping of immediate bits for blend instructions
> @@ -12516,11 +12508,21 @@ (define_insn "vcvtps2ph256"
> ?;; For gather* insn patterns
> ?(define_mode_iterator VEC_GATHER_MODE
> ? ? ? ? ? ? ? ? ? ? ?[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
> -(define_mode_attr VEC_GATHER_MODE
> +(define_mode_attr VEC_GATHER_IDXSI
> ? ? ? ? ? ? ? ? ? ? ?[(V2DI "V4SI") (V2DF "V4SI")
> ? ? ? ? ? ? ? ? ? ? ? (V4DI "V4SI") (V4DF "V4SI")
> ? ? ? ? ? ? ? ? ? ? ? (V4SI "V4SI") (V4SF "V4SI")
> ? ? ? ? ? ? ? ? ? ? ? (V8SI "V8SI") (V8SF "V8SI")])
> +(define_mode_attr VEC_GATHER_IDXDI
> + ? ? ? ? ? ? ? ? ? ? [(V2DI "V2DI") (V2DF "V2DI")
> + ? ? ? ? ? ? ? ? ? ? ?(V4DI "V4DI") (V4DF "V4DI")
> + ? ? ? ? ? ? ? ? ? ? ?(V4SI "V2DI") (V4SF "V2DI")
> + ? ? ? ? ? ? ? ? ? ? ?(V8SI "V4DI") (V8SF "V4DI")])
> +(define_mode_attr VEC_GATHER_SRCDI
> + ? ? ? ? ? ? ? ? ? ? [(V2DI "V2DI") (V2DF "V2DF")
> + ? ? ? ? ? ? ? ? ? ? ?(V4DI "V4DI") (V4DF "V4DF")
> + ? ? ? ? ? ? ? ? ? ? ?(V4SI "V4SI") (V4SF "V4SF")
> + ? ? ? ? ? ? ? ? ? ? ?(V8SI "V4SI") (V8SF "V4SF")])
>
> ?(define_expand "avx2_gathersi<mode>"
> ? [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
> @@ -12529,7 +12531,8 @@ (define_expand "avx2_gathersi<mode>"
> ? ? ? ? ? ? ? ? ? ? ?(mem:<ssescalarmode>
> ? ? ? ? ? ? ? ? ? ? ? ?(match_par_dup 7
> ? ? ? ? ? ? ? ? ? ? ? ? ?[(match_operand 2 "vsib_address_operand" "")
> - ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
> + ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_IDXSI>
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? 3 "register_operand" "")
> ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 5 "const1248_operand " "")]))
> ? ? ? ? ? ? ? ? ? ? ?(mem:BLK (scratch))
> ? ? ? ? ? ? ? ? ? ? ?(match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
> @@ -12549,7 +12552,7 @@ (define_insn "*avx2_gathersi<mode>"
> ? ? ? ? ? (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
> ? ? ? ? ? ? [(unspec:P
> ? ? ? ? ? ? ? ?[(match_operand:P 3 "vsib_address_operand" "p")
> - ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
> + ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
> ? ? ? ? ? ? ? ? (match_operand:SI 6 "const1248_operand" "n")]
> ? ? ? ? ? ? ? ?UNSPEC_VSIBADDR)])
> ? ? ? ? ? (mem:BLK (scratch))
> @@ -12565,14 +12568,16 @@ (define_insn "*avx2_gathersi<mode>"
> ?(define_expand "avx2_gatherdi<mode>"
> ? [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
> ? ? ? ? ? ? ? ? ? (unspec:VEC_GATHER_MODE
> - ? ? ? ? ? ? ? ? ? ?[(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
> + ? ? ? ? ? ? ? ? ? ?[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
> ? ? ? ? ? ? ? ? ? ? ?(mem:<ssescalarmode>
> ? ? ? ? ? ? ? ? ? ? ? ?(match_par_dup 7
> ? ? ? ? ? ? ? ? ? ? ? ? ?[(match_operand 2 "vsib_address_operand" "")
> - ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
> + ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_IDXDI>
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? 3 "register_operand" "")
> ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 5 "const1248_operand " "")]))
> ? ? ? ? ? ? ? ? ? ? ?(mem:BLK (scratch))
> - ? ? ? ? ? ? ? ? ? ? (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
> + ? ? ? ? ? ? ? ? ? ? (match_operand:<VEC_GATHER_SRCDI>
> + ? ? ? ? ? ? ? ? ? ? ? 4 "register_operand" "")]
> ? ? ? ? ? ? ? ? ? ? UNSPEC_GATHER))
> ? ? ? ? ? ? ?(clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
> ? "TARGET_AVX2"
> @@ -12583,63 +12588,21 @@ (define_expand "avx2_gatherdi<mode>"
> ?})
>
> ?(define_insn "*avx2_gatherdi<mode>"
> - ?[(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
> - ? ? ? (unspec:AVXMODE48P_DI
> - ? ? ? ? [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
> + ?[(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
> + ? ? ? (unspec:VEC_GATHER_MODE
> + ? ? ? ? [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
> ? ? ? ? ? (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
> ? ? ? ? ? ? [(unspec:P
> ? ? ? ? ? ? ? ?[(match_operand:P 3 "vsib_address_operand" "p")
> - ? ? ? ? ? ? ? ?(match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
> + ? ? ? ? ? ? ? ?(match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
> ? ? ? ? ? ? ? ? (match_operand:SI 6 "const1248_operand" "n")]
> ? ? ? ? ? ? ? ?UNSPEC_VSIBADDR)])
> ? ? ? ? ? (mem:BLK (scratch))
> - ? ? ? ? ?(match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
> + ? ? ? ? ?(match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
> ? ? ? ? ?UNSPEC_GATHER))
> - ? (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
> - ?"TARGET_AVX2"
> - ?"v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> - ?[(set_attr "type" "ssemov")
> - ? (set_attr "prefix" "vex")
> - ? (set_attr "mode" "<sseinsnmode>")])
> -
> -;; Special handling for VEX.256 with float arguments
> -;; since there're still xmms as operands
> -(define_expand "avx2_gatherdi<mode>256"
> - ?[(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
> - ? ? ? ? ? ? ? ? ?(unspec:VI4F_128
> - ? ? ? ? ? ? ? ? ? ?[(match_operand:VI4F_128 1 "register_operand" "")
> - ? ? ? ? ? ? ? ? ? ? (mem:<ssescalarmode>
> - ? ? ? ? ? ? ? ? ? ? ? (match_par_dup 7
> - ? ? ? ? ? ? ? ? ? ? ? ? [(match_operand 2 "vsib_address_operand" "")
> - ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:V4DI 3 "register_operand" "")
> - ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:SI 5 "const1248_operand " "")]))
> - ? ? ? ? ? ? ? ? ? ? (mem:BLK (scratch))
> - ? ? ? ? ? ? ? ? ? ? (match_operand:VI4F_128 4 "register_operand" "")]
> - ? ? ? ? ? ? ? ? ? ?UNSPEC_GATHER))
> - ? ? ? ? ? ? (clobber (match_scratch:VI4F_128 6 ""))])]
> - ?"TARGET_AVX2"
> -{
> - ?operands[7]
> - ? ?= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[5]), UNSPEC_VSIBADDR);
> -})
> -
> -(define_insn "*avx2_gatherdi<mode>256"
> - ?[(set (match_operand:VI4F_128 0 "register_operand" "=x")
> - ? ? ? (unspec:VI4F_128
> - ? ? ? ? [(match_operand:VI4F_128 2 "register_operand" "0")
> - ? ? ? ? ?(match_operator:<ssescalarmode> 7 "vsib_mem_operator"
> - ? ? ? ? ? ?[(unspec:P
> - ? ? ? ? ? ? ? [(match_operand:P 3 "vsib_address_operand" "p")
> - ? ? ? ? ? ? ? ?(match_operand:V4DI 4 "register_operand" "x")
> - ? ? ? ? ? ? ? ?(match_operand:SI 6 "const1248_operand" "n")]
> - ? ? ? ? ? ? ? UNSPEC_VSIBADDR)])
> - ? ? ? ? ?(mem:BLK (scratch))
> - ? ? ? ? ?(match_operand:VI4F_128 5 "register_operand" "1")]
> - ? ? ? ? UNSPEC_GATHER))
> - ? (clobber (match_scratch:VI4F_128 1 "=&x"))]
> + ? (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> ? "TARGET_AVX2"
> - ?"v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> + ?"v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> ? [(set_attr "type" "ssemov")
> ? ?(set_attr "prefix" "vex")
> ? ?(set_attr "mode" "<sseinsnmode>")])
> --- gcc/config/i386/i386.c.jj ? 2011-11-04 08:52:19.000000000 +0100
> +++ gcc/config/i386/i386.c ? ? ?2011-11-04 12:48:16.000000000 +0100
> @@ -25105,6 +25105,13 @@ enum ix86_builtins
> ? IX86_BUILTIN_GATHERDIV4SI,
> ? IX86_BUILTIN_GATHERDIV8SI,
>
> + ?/* Alternate 4 element gather for the vectorizer where
> + ? ? all operands are 32-byte wide. ?*/
> + ?IX86_BUILTIN_GATHERALTSIV4DF,
> + ?IX86_BUILTIN_GATHERALTDIV8SF,
> + ?IX86_BUILTIN_GATHERALTSIV4DI,
> + ?IX86_BUILTIN_GATHERALTDIV8SI,
> +
> ? /* TFmode support builtins. ?*/
> ? IX86_BUILTIN_INFQ,
> ? IX86_BUILTIN_HUGE_VALQ,
> @@ -26883,6 +26890,22 @@ ix86_init_mmx_sse_builtins (void)
> ? ? ? ? ? ? ? V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
> ? ? ? ? ? ? ? IX86_BUILTIN_GATHERDIV8SI);
>
> + ?def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
> + ? ? ? ? ? ? ?V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
> + ? ? ? ? ? ? ?IX86_BUILTIN_GATHERALTSIV4DF);
> +
> + ?def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
> + ? ? ? ? ? ? ?V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
> + ? ? ? ? ? ? ?IX86_BUILTIN_GATHERALTDIV8SF);
> +
> + ?def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
> + ? ? ? ? ? ? ?V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
> + ? ? ? ? ? ? ?IX86_BUILTIN_GATHERALTSIV4DI);
> +
> + ?def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
> + ? ? ? ? ? ? ?V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
> + ? ? ? ? ? ? ?IX86_BUILTIN_GATHERALTDIV8SI);
> +
> ? /* MMX access to the vec_init patterns. ?*/
> ? def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
> ? ? ? ? ? ? ? ? ? ? V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
> @@ -28869,7 +28892,7 @@ rdrand_step:
> ? ? ? icode = CODE_FOR_avx2_gatherdiv4sf;
> ? ? ? goto gather_gen;
> ? ? case IX86_BUILTIN_GATHERDIV8SF:
> - ? ? ?icode = CODE_FOR_avx2_gatherdiv4sf256;
> + ? ? ?icode = CODE_FOR_avx2_gatherdiv8sf;
> ? ? ? goto gather_gen;
> ? ? case IX86_BUILTIN_GATHERSIV2DI:
> ? ? ? icode = CODE_FOR_avx2_gathersiv2di;
> @@ -28893,7 +28916,20 @@ rdrand_step:
> ? ? ? icode = CODE_FOR_avx2_gatherdiv4si;
> ? ? ? goto gather_gen;
> ? ? case IX86_BUILTIN_GATHERDIV8SI:
> - ? ? ?icode = CODE_FOR_avx2_gatherdiv4si256;
> + ? ? ?icode = CODE_FOR_avx2_gatherdiv8si;
> + ? ? ?goto gather_gen;
> + ? ?case IX86_BUILTIN_GATHERALTSIV4DF:
> + ? ? ?icode = CODE_FOR_avx2_gathersiv4df;
> + ? ? ?goto gather_gen;
> + ? ?case IX86_BUILTIN_GATHERALTDIV8SF:
> + ? ? ?icode = CODE_FOR_avx2_gatherdiv8sf;
> + ? ? ?goto gather_gen;
> + ? ?case IX86_BUILTIN_GATHERALTSIV4DI:
> + ? ? ?icode = CODE_FOR_avx2_gathersiv4df;
> + ? ? ?goto gather_gen;
> + ? ?case IX86_BUILTIN_GATHERALTDIV8SI:
> + ? ? ?icode = CODE_FOR_avx2_gatherdiv8si;
> + ? ? ?goto gather_gen;
>
> ? ? gather_gen:
> ? ? ? arg0 = CALL_EXPR_ARG (exp, 0);
> @@ -28912,8 +28948,39 @@ rdrand_step:
> ? ? ? mode3 = insn_data[icode].operand[4].mode;
> ? ? ? mode4 = insn_data[icode].operand[5].mode;
>
> - ? ? ?if (target == NULL_RTX)
> - ? ? ? target = gen_reg_rtx (insn_data[icode].operand[0].mode);
> + ? ? ?if (target == NULL_RTX
> + ? ? ? ? || GET_MODE (target) != insn_data[icode].operand[0].mode)
> + ? ? ? subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
> + ? ? ?else
> + ? ? ? subtarget = target;
> +
> + ? ? ?if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
> + ? ? ? ? || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
> + ? ? ? {
> + ? ? ? ? rtx half = gen_reg_rtx (V4SImode);
> + ? ? ? ? if (!nonimmediate_operand (op2, V8SImode))
> + ? ? ? ? ? op2 = copy_to_mode_reg (V8SImode, op2);
> + ? ? ? ? emit_insn (gen_vec_extract_lo_v8si (half, op2));
> + ? ? ? ? op2 = half;
> + ? ? ? }
> + ? ? ?else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
> + ? ? ? ? ? ? ?|| fcode == IX86_BUILTIN_GATHERALTDIV8SI)
> + ? ? ? {
> + ? ? ? ? rtx (*gen) (rtx, rtx);
> + ? ? ? ? rtx half = gen_reg_rtx (mode0);
> + ? ? ? ? if (mode0 == V4SFmode)
> + ? ? ? ? ? gen = gen_vec_extract_lo_v8sf;
> + ? ? ? ? else
> + ? ? ? ? ? gen = gen_vec_extract_lo_v8si;
> + ? ? ? ? if (!nonimmediate_operand (op0, GET_MODE (op0)))
> + ? ? ? ? ? op0 = copy_to_mode_reg (GET_MODE (op0), op0);
> + ? ? ? ? emit_insn (gen (half, op0));
> + ? ? ? ? op0 = half;
> + ? ? ? ? if (!nonimmediate_operand (op3, GET_MODE (op3)))
> + ? ? ? ? ? op3 = copy_to_mode_reg (GET_MODE (op3), op3);
> + ? ? ? ? emit_insn (gen (half, op3));
> + ? ? ? ? op3 = half;
> + ? ? ? }
>
> ? ? ? /* Force memory operand only with base register here. ?But we
> ? ? ? ? don't want to do it on memory operand for other builtin
> @@ -28935,10 +29002,26 @@ rdrand_step:
> ? ? ? ? ? error ("last argument must be scale 1, 2, 4, 8");
> ? ? ? ? ? return const0_rtx;
> ? ? ? ?}
> - ? ? ?pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
> + ? ? ?pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
> ? ? ? if (! pat)
> ? ? ? ?return const0_rtx;
> ? ? ? emit_insn (pat);
> +
> + ? ? ?if (fcode == IX86_BUILTIN_GATHERDIV8SF
> + ? ? ? ? || fcode == IX86_BUILTIN_GATHERDIV8SI)
> + ? ? ? {
> + ? ? ? ? enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? V4SFmode : V4SImode;
> + ? ? ? ? if (target == NULL_RTX)
> + ? ? ? ? ? target = gen_reg_rtx (tmode);
> + ? ? ? ? if (tmode == V4SFmode)
> + ? ? ? ? ? emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
> + ? ? ? ? else
> + ? ? ? ? ? emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
> + ? ? ? }
> + ? ? ?else
> + ? ? ? target = subtarget;
> +
> ? ? ? return target;
>
> ? ? default:
> @@ -29443,6 +29526,73 @@ ix86_veclibabi_acml (enum built_in_funct
> ? return new_fndecl;
> ?}
>
> +/* Returns a decl of a function that implements gather load with
> + ? memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
> + ? Return NULL_TREE if it is not available. ?*/
> +
> +static tree
> +ix86_vectorize_builtin_gather (const_tree mem_vectype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?const_tree index_type, int scale)
> +{
> + ?bool si;
> + ?enum ix86_builtins code;
> +
> + ?if (! TARGET_AVX2)
> + ? ?return NULL_TREE;
> +
> + ?if ((TREE_CODE (index_type) != INTEGER_TYPE
> + ? ? ? && !POINTER_TYPE_P (index_type))
> + ? ? ?|| (TYPE_MODE (index_type) != SImode
> + ? ? ? ? && TYPE_MODE (index_type) != DImode))
> + ? ?return NULL_TREE;
> +
> + ?if (TYPE_PRECISION (index_type) > POINTER_SIZE)
> + ? ?return NULL_TREE;
> +
> + ?/* v*gather* insn sign extends index to pointer mode. ?*/
> + ?if (TYPE_PRECISION (index_type) < POINTER_SIZE
> + ? ? ?&& TYPE_UNSIGNED (index_type))
> + ? ?return NULL_TREE;
> +
> + ?if (scale <= 0
> + ? ? ?|| scale > 8
> + ? ? ?|| (scale & (scale - 1)) != 0)
> + ? ?return NULL_TREE;
> +
> + ?si = TYPE_MODE (index_type) == SImode;
> + ?switch (TYPE_MODE (mem_vectype))
> + ? ?{
> + ? ?case V2DFmode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
> + ? ? ?break;
> + ? ?case V4DFmode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
> + ? ? ?break;
> + ? ?case V2DImode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
> + ? ? ?break;
> + ? ?case V4DImode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
> + ? ? ?break;
> + ? ?case V4SFmode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
> + ? ? ?break;
> + ? ?case V8SFmode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
> + ? ? ?break;
> + ? ?case V4SImode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
> + ? ? ?break;
> + ? ?case V8SImode:
> + ? ? ?code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
> + ? ? ?break;
> + ? ?default:
> + ? ? ?return NULL_TREE;
> + ? ?}
> +
> + ?return ix86_builtins[code];
> +}
> +
> ?/* Returns a code for a target-specific builtin that implements
> ? ?reciprocal of the function, or NULL_TREE if not available. ?*/
>
> @@ -37642,6 +37792,9 @@ ix86_autovectorize_vector_sizes (void)
> ?#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
> ? ix86_builtin_vectorized_function
>
> +#undef TARGET_VECTORIZE_BUILTIN_GATHER
> +#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
> +
> ?#undef TARGET_BUILTIN_RECIPROCAL
> ?#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
>
> --- gcc/testsuite/gcc.target/i386/avx2-gather-1.c.jj ? ?2011-11-04 08:53:13.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-gather-1.c ? ? ? 2011-11-04 08:53:13.000000000 +0100
> @@ -0,0 +1,215 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-options "-O3 -mavx2" } */
> +
> +#include "avx2-check.h"
> +
> +#define N 1024
> +float vf1[N+16], vf2[N];
> +double vd1[N+16], vd2[N];
> +int k[N];
> +long l[N];
> +short n[N];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vf2[i] = vf1[k[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vf1[k[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (int x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vf2[i] = vf1[k[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (int x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vf1[k[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vd2[i] = vd1[k[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vd1[k[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vd2[i] = vd1[k[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vd1[k[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f9 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vf2[i] = vf1[l[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f10 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vf1[l[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f11 (long x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vf2[i] = vf1[l[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f12 (long x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vf1[l[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f13 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vd2[i] = vd1[l[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f14 (void)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vd1[l[i]];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f15 (long x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?vd2[i] = vd1[l[i] + x];
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f16 (long x)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?n[i] = (int) vd1[l[i] + x];
> +}
> +
> +static void
> +avx2_test (void)
> +{
> + ?int i;
> +
> + ?for (i = 0; i < N + 16; i++)
> + ? ?{
> + ? ? ?asm ("");
> + ? ? ?vf1[i] = 17.0f + i;
> + ? ? ?vd1[i] = 19.0 + i;
> + ? ?}
> + ?for (i = 0; i < N; i++)
> + ? ?{
> + ? ? ?asm ("");
> + ? ? ?k[i] = (i * 731) & (N - 1);
> + ? ? ?l[i] = (i * 657) & (N - 1);
> + ? ?}
> +
> + ?f1 ();
> + ?f2 ();
> + ?for (i = 0; i < N; i++)
> + ? ?if (vf2[i] != ((i * 731) & (N - 1)) + 17
> + ? ? ? || n[i] != ((i * 731) & (N - 1)) + 17)
> + ? ? ?abort ();
> +
> + ?f3 (12);
> + ?f4 (14);
> + ?for (i = 0; i < N; i++)
> + ? ?if (vf2[i] != ((i * 731) & (N - 1)) + 17 + 12
> + ? ? ? || n[i] != ((i * 731) & (N - 1)) + 17 + 14)
> + ? ? ?abort ();
> +
> + ?f5 ();
> + ?f6 ();
> + ?for (i = 0; i < N; i++)
> + ? ?if (vd2[i] != ((i * 731) & (N - 1)) + 19
> + ? ? ? || n[i] != ((i * 731) & (N - 1)) + 19)
> + ? ? ?abort ();
> +
> + ?f7 (7);
> + ?f8 (9);
> + ?for (i = 0; i < N; i++)
> + ? ?if (vd2[i] != ((i * 731) & (N - 1)) + 19 + 7
> + ? ? ? || n[i] != ((i * 731) & (N - 1)) + 19 + 9)
> + ? ? ?abort ();
> +
> + ?f9 ();
> + ?f10 ();
> + ?for (i = 0; i < N; i++)
> + ? ?if (vf2[i] != ((i * 657) & (N - 1)) + 17
> + ? ? ? || n[i] != ((i * 657) & (N - 1)) + 17)
> + ? ? ?abort ();
> +
> + ?f11 (2);
> + ?f12 (4);
> + ?for (i = 0; i < N; i++)
> + ? ?if (vf2[i] != ((i * 657) & (N - 1)) + 17 + 2
> + ? ? ? || n[i] != ((i * 657) & (N - 1)) + 17 + 4)
> + ? ? ?abort ();
> +
> + ?f13 ();
> + ?f14 ();
> + ?for (i = 0; i < N; i++)
> + ? ?if (vd2[i] != ((i * 657) & (N - 1)) + 19
> + ? ? ? || n[i] != ((i * 657) & (N - 1)) + 19)
> + ? ? ?abort ();
> +
> + ?f15 (13);
> + ?f16 (15);
> + ?for (i = 0; i < N; i++)
> + ? ?if (vd2[i] != ((i * 657) & (N - 1)) + 19 + 13
> + ? ? ? || n[i] != ((i * 657) & (N - 1)) + 19 + 15)
> + ? ? ?abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-gather-2.c.jj ? ?2011-11-04 08:53:13.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-gather-2.c ? ? ? 2011-11-04 08:53:13.000000000 +0100
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
> +
> +#include "avx2-gather-1.c"
> +
> +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 16 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.target/i386/avx2-gather-3.c.jj ? ?2011-11-04 08:53:13.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-gather-3.c ? ? ? 2011-11-04 08:53:13.000000000 +0100
> @@ -0,0 +1,167 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-options "-O3 -mavx2 -ffast-math" } */
> +
> +#include "avx2-check.h"
> +
> +#define N 1024
> +float f[N];
> +double d[N];
> +int k[N];
> +float *l[N];
> +double *n[N];
> +int **m[N];
> +long **o[N];
> +long q[N];
> +long *r[N];
> +int *s[N];
> +
> +__attribute__((noinline, noclone)) float
> +f1 (void)
> +{
> + ?int i;
> + ?float g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += f[k[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) float
> +f2 (float *p)
> +{
> + ?int i;
> + ?float g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += p[k[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) float
> +f3 (void)
> +{
> + ?int i;
> + ?float g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += *l[i];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (void)
> +{
> + ?int i;
> + ?int g = 0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += **m[i];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) double
> +f5 (void)
> +{
> + ?int i;
> + ?double g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += d[k[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) double
> +f6 (double *p)
> +{
> + ?int i;
> + ?double g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += p[k[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) double
> +f7 (void)
> +{
> + ?int i;
> + ?double g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += *n[i];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f8 (void)
> +{
> + ?int i;
> + ?int g = 0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += **o[i];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) float
> +f9 (void)
> +{
> + ?int i;
> + ?float g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += f[q[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) float
> +f10 (float *p)
> +{
> + ?int i;
> + ?float g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += p[q[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) double
> +f11 (void)
> +{
> + ?int i;
> + ?double g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += d[q[i]];
> + ?return g;
> +}
> +
> +__attribute__((noinline, noclone)) double
> +f12 (double *p)
> +{
> + ?int i;
> + ?double g = 0.0;
> + ?for (i = 0; i < N / 2; i++)
> + ? ?g += p[q[i]];
> + ?return g;
> +}
> +
> +static void
> +avx2_test (void)
> +{
> + ?int i;
> +
> + ?for (i = 0; i < N; i++)
> + ? ?{
> + ? ? ?asm ("");
> + ? ? ?f[i] = -256.0f + i;
> + ? ? ?d[i] = -258.0 + i;
> + ? ? ?k[i] = (i * 731) & (N - 1);
> + ? ? ?q[i] = (i * 657) & (N - 1);
> + ? ? ?l[i] = &f[(i * 239) & (N - 1)];
> + ? ? ?n[i] = &d[(i * 271) & (N - 1)];
> + ? ? ?r[i] = &q[(i * 323) & (N - 1)];
> + ? ? ?s[i] = &k[(i * 565) & (N - 1)];
> + ? ? ?m[i] = &s[(i * 13) & (N - 1)];
> + ? ? ?o[i] = &r[(i * 19) & (N - 1)];
> + ? ?}
> +
> + ?if (f1 () != 136448.0f || f2 (f) != 136448.0f || f3 () != 130304.0)
> + ? ?abort ();
> + ?if (f4 () != 261376 || f5 () != 135424.0 || f6 (d) != 135424.0)
> + ? ?abort ();
> + ?if (f7 () != 129280.0 || f8 () != 259840L || f9 () != 130816.0f)
> + ? ?abort ();
> + ?if (f10 (f) != 130816.0f || f11 () != 129792.0 || f12 (d) != 129792.0)
> + ? ?abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-gather-4.c.jj ? ?2011-11-04 08:54:11.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-gather-4.c ? ? ? 2011-11-04 08:54:11.000000000 +0100
> @@ -0,0 +1,38 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-options "-O3 -mavx2" } */
> +
> +#include "avx2-check.h"
> +
> +#define N 1024
> +int a[N], b[N], c[N], d[N];
> +
> +__attribute__((noinline, noclone)) void
> +foo (float *__restrict p, float *__restrict q, float *__restrict r,
> + ? ? long s1, long s2, long s3)
> +{
> + ?int i;
> + ?for (i = 0; i < N; i++)
> + ? ?p[i] = q[a[i] * s1 + b[i] * s2 + s3] * r[c[i] * s1 + d[i] * s2 + s3];
> +}
> +
> +static void
> +avx2_test (void)
> +{
> + ?int i;
> + ?float e[N], f[N], g[N];
> + ?for (i = 0; i < N; i++)
> + ? ?{
> + ? ? ?a[i] = (i * 7) & (N / 8 - 1);
> + ? ? ?b[i] = (i * 13) & (N / 8 - 1);
> + ? ? ?c[i] = (i * 23) & (N / 8 - 1);
> + ? ? ?d[i] = (i * 5) & (N / 8 - 1);
> + ? ? ?e[i] = 16.5 + i;
> + ? ? ?f[i] = 127.5 - i;
> + ? ?}
> + ?foo (g, e, f, 3, 2, 4);
> + ?for (i = 0; i < N; i++)
> + ? ?if (g[i] != (float) ((20.5 + a[i] * 3 + b[i] * 2)
> + ? ? ? ? ? ? ? ? ? ? ? ?* (123.5 - c[i] * 3 - d[i] * 2)))
> + ? ? ?abort ();
> +}
>
>
> ? ? ? ?Jakub
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]