This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][4.6] [2/2] Handle multiple vector sizes with AVX
- From: Ira Rosen <IRAR at il dot ibm dot com>
- To: Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Sun, 7 Mar 2010 09:33:50 +0200
- Subject: Re: [PATCH][4.6] [2/2] Handle multiple vector sizes with AVX
- References: <alpine.LNX.2.00.1003031434530.4650@zhemvz.fhfr.qr>
Richard Guenther <rguenther@suse.de> wrote on 03/03/2010 03:54:15 PM:
> This adjusts the vectorizer to handle multiple vector sizes as
> supported by AVX.
>
> The main parts of this patch are
>
> 1) Separate analysis of datarefs and stmts from committing to
> vector types
>
> 2) Fixup remaining calls to get_vectype_for_scalar_type to get
> the correct vector type
>
> I mostly concentrated on loop vectorization and only made SLP
> work as far as the testsuite or SPEC is concerned. Also the
> pattern recognizer commits to vector types too early. Thus likely
> I will have to dissect the analysis phase some more.
Yes, I think, since pattern recognition checks for exact target support of
the patterns, changing VF afterwards may be problematic.
> Index: trunk/gcc/tree-vect-data-refs.c
> ===================================================================
> *** trunk.orig/gcc/tree-vect-data-refs.c 2010-03-02 17:57:08.000000000
+0100
> --- trunk/gcc/tree-vect-data-refs.c 2010-03-02 18:05:48.000000000 +0100
...
> *************** vect_analyze_data_ref_dependence (struct
> *** 595,611 ****
> if (vect_print_dump_info (REPORT_DR_DETAILS))
> fprintf (vect_dump, "dependence distance = %d.", dist);
>
> ! /* Same loop iteration. */
> ! if (dist % vectorization_factor == 0 && dra_size == drb_size)
> {
> /* Two references with distance zero have the same alignment. */
> VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS
> (stmtinfo_a), drb);
> VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS
> (stmtinfo_b), dra);
> if (vect_print_dump_info (REPORT_ALIGNMENT))
> fprintf (vect_dump, "accesses have the same alignment.");
> if (vect_print_dump_info (REPORT_DR_DETAILS))
> {
> ! fprintf (vect_dump, "dependence distance modulo vf == 0
between ");
> print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
> fprintf (vect_dump, " and ");
> print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
> --- 596,616 ----
> if (vect_print_dump_info (REPORT_DR_DETAILS))
> fprintf (vect_dump, "dependence distance = %d.", dist);
>
> ! /* ??? Why was that dist % vectorization_factor == 0? Only for
> ! dist == 0 we have to record a rw dependence? Alignment
I think you are right, the code is incorrect and we need to record
read-write dependence for dist == 0.
> ! stuff is handled in vect_analyze_data_ref_group after we
> ! determinded the final vectorization factor. */
> ! if (dist == 0 && dra_size == drb_size)
> {
> /* Two references with distance zero have the same alignment. */
> VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS
> (stmtinfo_a), drb);
> VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS
> (stmtinfo_b), dra);
Why do you collect same alignment pairs here (for the case of dist == 0),
instead of collecting them with the rest in vect_analyze_data_ref_group?
...
>
> + /* Function vect_analyze_data_ref_group.
> +
> + Update group and alignment relations according to the chosen
> + vectorization factor. */
> +
> + static void
> + vect_analyze_data_ref_group (struct data_dependence_relation *ddr,
> + loop_vec_info loop_vinfo)
I think that the name of this function is confusing. We use a term "group"
for groups of strided accesses. Maybe vect_find_same_alignment_drs is
better?
...
*************** vect_determine_vectorization_factor (loo
...
> else
> {
> ! int stmt_desired_vf = 0;
> !
> ! gcc_assert (!is_pattern_stmt_p (stmt_info));
> !
> ! /* Iterate over all scalar types of the stmt operands and
> ! determine the minimal and maximal vectorization factors. */
> ! for (j = 0; j < gimple_num_ops (stmt); ++j)
> ! {
> ! tree op = gimple_op (stmt, j);
> ! if (!op
> ! || TYPE_P (op))
> ! continue;
> !
> ! scalar_type = TREE_TYPE (op);
> ! vectype = get_vectype_for_scalar_type_1 (scalar_type,
> ! *min_vf, true);
> ! if (!vectype
> ! || (int)TYPE_VECTOR_SUBPARTS (vectype) > *max_vf)
> ! {
> ! if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> ! {
> ! fprintf (vect_dump,
> ! "not vectorized: unsupported data-type ");
> ! print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
> ! }
> ! return false;
> ! }
> !
> ! nunits = TYPE_VECTOR_SUBPARTS (vectype);
> ! if (nunits > *min_vf)
> ! {
> ! *min_vf = nunits;
> ! if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "increasing minimal vectorization "
> ! "factor to %d\n", *min_vf);
> ! }
> !
> ! vectype = get_vectype_for_scalar_type (scalar_type, *max_vf);
> ! nunits = TYPE_VECTOR_SUBPARTS (vectype);
> ! if (desired_vf == 0
> ! || nunits > desired_vf)
> ! {
> ! desired_vf = nunits;
> ! if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "increasing desired vectorization "
> ! "factor to %d\n", desired_vf);
> ! }
> ! if (stmt_desired_vf == 0
> ! || nunits > stmt_desired_vf)
> ! stmt_desired_vf = nunits;
Looks like stmt_desired_vf is unused.
> ! }
> ! }
> ! }
> ! }
> !
> ! /* If we do not support the desired vectorization factor, adjust it.
*/
> ! if (desired_vf > *max_vf)
> ! desired_vf = *max_vf;
> !
> ! /* Restrict the vectorization factor to a known loop-trip count. */
> ! if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> ! && ((int)LOOP_VINFO_INT_NITERS (loop_vinfo) < desired_vf))
> ! {
> ! desired_vf = LOOP_VINFO_INT_NITERS (loop_vinfo);
> ! if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "reducing desired vectorization factor to "
> ! "knwon loop-trip count %d.", desired_vf);
Typo: knwon -> known.
> ! }
> !
> ! /* If we didn't have any interesting statements that shows what we
> ! desire, use the maximal (??? for now minimal) vectorization
factor. */
> ! if (desired_vf == 0
> ! || desired_vf < *min_vf)
> ! desired_vf = *min_vf;
> !
> ! if (desired_vf == 0
> ! || *min_vf > *max_vf)
> ! {
> ! if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> ! fprintf (vect_dump, "not vectorized: unsupported data-type");
> ! return false;
> ! }
> !
> ! /* TODO: Analyze cost. Decide if worth while to vectorize. */
> ! if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "vectorization factor = %d", desired_vf);
> !
> ! /* The vectorization factor is now determined. */
> ! LOOP_VINFO_VECT_FACTOR (loop_vinfo) = desired_vf;
> !
> ! return true;
> ! }
> !
It would be nice to gave some general explanation in this function, like
what desired_vf, min_vf, max_vf stand for and how they are computed...
A clarification question: for AVX in case there are both ints and floats in
the loop, vf will be 8 and all the int statements will have two copies,
right?
...
> *************** vect_analyze_loop (struct loop *loop)
> *** 1410,1428 ****
> return NULL;
> }
>
> ! /* Analyze the alignment of the data-refs in the loop.
> ! Fail if a data reference is found that cannot be vectorized. */
>
> ! ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
> ! if (!ok)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "bad data alignment.");
> destroy_loop_vec_info (loop_vinfo, true);
> return NULL;
> }
>
> ! ok = vect_determine_vectorization_factor (loop_vinfo);
> if (!ok)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> --- 1570,1593 ----
> return NULL;
> }
>
> ! /* Analyze data dependences between the data-refs in the loop
> ! and the maximal possible vectorization factor.
> ! FORNOW: fail at the first data dependence that we encounter. */
>
> ! ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
> ! if (!ok
> ! || max_vf < min_vf)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "bad data dependence.");
> destroy_loop_vec_info (loop_vinfo, true);
> return NULL;
> }
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "maximum vectorization factor %i.", max_vf);
>
> ! /* Determine the vectorization factor. */
> ! ok = vect_determine_vectorization_factor (loop_vinfo, &min_vf,
&max_vf);
Is there a reason to pass min_vf and max_vf by reference? (They are not
used after this call).
Thanks,
Ira