[patch] Data dependence analysis for basic block SLP
Ira Rosen
IRAR@il.ibm.com
Wed Sep 8 07:57:00 GMT 2010
Sebastian Pop <sebpop@gmail.com> wrote on 03/09/2010 08:45:22 PM:
> On Thu, Sep 2, 2010 at 01:05, Ira Rosen <IRAR@il.ibm.com> wrote:
> >
> > Hi,
> >
> > This patch implements data dependence analysis for basic block
> > vectorization.
> >
> > For data dependence testing in basic block vectorization it's enough to
> > check that all the loads in the basic block are before all the stores.
> > Since we create vector loads before the first load and vector stores
after
> > the last store, we don't change the order of memory accesses.
> >
> > Bootstrapped and tested on powerpc64-suse-linux.
> > Committed.
> >
> > Ira
> >
> > ChangeLog:
> >
> > * tree-vectorizer.h (get_later_stmt): New function.
> > (vect_analyze_data_ref_dependences): Add argument.
> > * tree-vect-loop.c (vect_analyze_loop): Update call to
> > vect_analyze_data_ref_dependences.
> > * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block):
> > New function.
> > (vect_analyze_data_ref_dependence): Add argument for basic block
> > dependencies. Check dependencies in basic block vectorization.
>
> Point space space new sentence. This appears several times below.
> Please use "contrib/check_GNU_style.sh patch.diff" to check the patch.
OK, I'll prepare a cleanup patch to address your style comments (and maybe
additional places if I can find them).
>
> Dot, space, space, new sentence.
> patch.diff:92:+ /* pin and pout may alias. But since all the loads
> are before the first
> patch.diff:140:+ /* pin and pout may alias, and loads and stores are
> mixed. The basic
> patch.diff:247:+ check into different basic blocks. Expect
> vectorization if
> patch.diff:353:+ /* Check that the data-refs have same bases and
> offsets. If not, we
> patch.diff:382:+ /* We have a read-write dependence. Check that the
> load is before the
> patch.diff:386:+ corresponding scalar store. So the order of the
> acceses is preserved
>
> There should be exactly one space between function name and parentheses.
> patch.diff:160:+ abort();
> patch.diff:221:+ abort();
>
> > (vect_analyze_data_ref_dependences): Add argument and update
call to
> > vect_analyze_data_ref_dependences.
> > * tree-vect-slp.c (vect_find_last_store_in_slp_instance): New.
> > (vect_bb_vectorizable_with_dependencies): New.
> > (vect_slp_analyze_bb): Check dependencies in basic block.
> > (vect_schedule_slp_instance): Insert stores before the last
store in
> > SLP instance.
> >
> > testsuite/ChangeLog:
> >
> > * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
> > check into different basic blocks. Expect vectorization if
misaligned
> > stores are supported.
> > * gcc.dg/vect/bb-slp-8a.c: New test.
> > * gcc.dg/vect/bb-slp-8b.c: New test.
> >
> > Index: testsuite/gcc.dg/vect/bb-slp-8.c
> > ===================================================================
> > --- testsuite/gcc.dg/vect/bb-slp-8.c (revision 163732)
> > +++ testsuite/gcc.dg/vect/bb-slp-8.c (working copy)
> > @@ -15,7 +15,8 @@ main1 (unsigned int x, unsigned int y, u
> > int i;
> > unsigned int a0, a1, a2, a3;
> >
> > - /* pin and pout may alias. */
> > + /* pin and pout may alias. But since all the loads are before the
first
> > store
> > + the basic block is vectorizable. */
> > a0 = *pin++ + 23;
> > a1 = *pin++ + 142;
> > a2 = *pin++ + 2;
> > @@ -26,6 +27,9 @@ main1 (unsigned int x, unsigned int y, u
> > *pout++ = a2 * x;
> > *pout++ = a3 * y;
> >
> > + if (i)
> > + __asm__ volatile ("" : : : "memory");
> > +
> > /* Check results. */
> > if (out[0] != (in[0] + 23) * x
> > || out[1] != (in[1] + 142) * y
> > @@ -45,6 +49,6 @@ int main (void)
> > return 0;
> > }
> >
> > -/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0
> > "slp" } } */
> > +/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1
> > "slp" { target vect_hw_misalign } } } */
> > /* { dg-final { cleanup-tree-dump "slp" } } */
> > Index: testsuite/gcc.dg/vect/bb-slp-8a.c
> > ===================================================================
> > --- testsuite/gcc.dg/vect/bb-slp-8a.c (revision 0)
> > +++ testsuite/gcc.dg/vect/bb-slp-8a.c (revision 0)
> > @@ -0,0 +1,53 @@
> > +/* { dg-require-effective-target vect_int } */
> > +
> > +#include <stdarg.h>
> > +#include <stdio.h>
> > +#include "tree-vect.h"
> > +
> > +#define N 16
> > +
> > +unsigned int out[N];
> > +unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> > +
> > +__attribute__ ((noinline)) int
> > +main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int
> > *pout)
> > +{
> > + int i;
> > + unsigned int a0, a1, a2, a3;
> > +
> > + /* pin and pout may alias, and loads and stores are mixed. The basic
> > block
> > + cannot be vectorized. */
> > + a0 = *pin++ + 23;
> > + *pout++ = a0 * x;
> > + a1 = *pin++ + 142;
> > + *pout++ = a1 * y;
> > + a2 = *pin++ + 2;
> > + *pout++ = a2 * x;
> > + a3 = *pin++ + 31;
> > + *pout++ = a3 * y;
> > +
> > + if (i)
> > + __asm__ volatile ("" : : : "memory");
> > +
> > + /* Check results. */
> > + if (out[0] != (in[0] + 23) * x
> > + || out[1] != (in[1] + 142) * y
> > + || out[2] != (in[2] + 2) * x
> > + || out[3] != (in[3] + 31) * y)
> > + abort();
> > +
> > + return 0;
> > +}
> > +
> > +int main (void)
> > +{
> > + check_vect ();
> > +
> > + main1 (2, 3, &in[0], &out[0]);
> > +
> > + return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0
> > "slp" } } */
> > +/* { dg-final { cleanup-tree-dump "slp" } } */
> > +
> > Index: testsuite/gcc.dg/vect/bb-slp-8b.c
> > ===================================================================
> > --- testsuite/gcc.dg/vect/bb-slp-8b.c (revision 0)
> > +++ testsuite/gcc.dg/vect/bb-slp-8b.c (revision 0)
> > @@ -0,0 +1,55 @@
> > +/* { dg-require-effective-target vect_int } */
> > +
> > +#include <stdarg.h>
> > +#include <stdio.h>
> > +#include "tree-vect.h"
> > +
> > +#define N 16
> > +
> > +unsigned int out[N];
> > +unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> > +
> > +__attribute__ ((noinline)) int
> > +main1 (unsigned int x, unsigned int y)
> > +{
> > + int i;
> > + unsigned int a0, a1, a2, a3;
> > + unsigned int *pin = &in[0];
> > + unsigned int *pout = &out[0];
> > +
> > + /* pin and pout are different, so despite the fact that loads and
stores
> > + are mixed the basic block is vectorizable. */
> > + a0 = *pin++ + 23;
> > + *pout++ = a0 * x;
> > + a1 = *pin++ + 142;
> > + *pout++ = a1 * y;
> > + a2 = *pin++ + 2;
> > + *pout++ = a2 * x;
> > + a3 = *pin++ + 31;
> > + *pout++ = a3 * y;
> > +
> > + if (i)
> > + __asm__ volatile ("" : : : "memory");
> > +
> > + /* Check results. */
> > + if (out[0] != (in[0] + 23) * x
> > + || out[1] != (in[1] + 142) * y
> > + || out[2] != (in[2] + 2) * x
> > + || out[3] != (in[3] + 31) * y)
> > + abort();
> > +
> > + return 0;
> > +}
> > +
> > +int main (void)
> > +{
> > + check_vect ();
> > +
> > + main1 (2, 3);
> > +
> > + return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1
> > "slp" { target vect_hw_misalign } } } */
> > +/* { dg-final { cleanup-tree-dump "slp" } } */
> > +
> > Index: testsuite/ChangeLog
> > ===================================================================
> > --- testsuite/ChangeLog (revision 163732)
> > +++ testsuite/ChangeLog (working copy)
> > @@ -1,3 +1,11 @@
> > +2010-09-01 Ira Rosen <irar@il.ibm.com>
> > +
> > + * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
> > + check into different basic blocks. Expect vectorization if
> > misaligned
> > + stores are supported.
> > + * gcc.dg/vect/bb-slp-8a.c: New test.
> > + * gcc.dg/vect/bb-slp-8b.c: New test.
> > +
> > 2010-09-01 Richard Guenther <rguenther@suse.de>
> >
> > * gcc.dg/vect/vect-outer-fir.c: Adjust.
> > Index: tree-vectorizer.h
> > ===================================================================
> > --- tree-vectorizer.h (revision 163732)
> > +++ tree-vectorizer.h (working copy)
> > @@ -633,6 +633,32 @@ get_earlier_stmt (gimple stmt1, gimple s
> > return stmt2;
> > }
> >
> > +static inline gimple
> > +get_later_stmt (gimple stmt1, gimple stmt2)
>
> Please add documentation before each function.
>
> > +{
> > + unsigned int uid1, uid2;
> > +
> > + if (stmt1 == NULL)
> > + return stmt2;
> > +
> > + if (stmt2 == NULL)
> > + return stmt1;
> > +
> > + uid1 = gimple_uid (stmt1);
> > + uid2 = gimple_uid (stmt2);
> > +
> > + if (uid1 == 0 || uid2 == 0)
> > + return NULL;
> > +
> > + gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec));
> > + gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec));
> > +
> > + if (uid1 > uid2)
> > + return stmt1;
> > + else
> > + return stmt2;
> > +}
> > +
> > static inline bool
> > is_pattern_stmt_p (stmt_vec_info stmt_info)
>
> Same here, and also elsewhere in the same file, static inline
> functions should be documented.
>
> > {
> > @@ -776,7 +802,7 @@ extern enum dr_alignment_support vect_su
> > extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
> > HOST_WIDE_INT *);
> > extern bool vect_analyze_data_ref_dependences (loop_vec_info,
bb_vec_info,
> > - int *);
> > + int *, bool *);
> > extern bool vect_enhance_data_refs_alignment (loop_vec_info);
> > extern bool vect_analyze_data_refs_alignment (loop_vec_info,
bb_vec_info);
> > extern bool vect_verify_datarefs_alignment (loop_vec_info,
bb_vec_info);
> > Index: tree-vect-loop.c
> > ===================================================================
> > --- tree-vect-loop.c (revision 163732)
> > +++ tree-vect-loop.c (working copy)
> > @@ -1378,7 +1378,7 @@ vect_analyze_loop_operations (loop_vec_i
> > loop_vec_info
> > vect_analyze_loop (struct loop *loop)
> > {
> > - bool ok;
> > + bool ok, dummy;
> > loop_vec_info loop_vinfo;
> > int max_vf = MAX_VECTORIZATION_FACTOR;
> > int min_vf = 2;
> > @@ -1444,7 +1444,7 @@ vect_analyze_loop (struct loop *loop)
> > the dependences.
> > FORNOW: fail at the first data dependence that we encounter. */
> >
> > - ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
> > + ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf,
> > &dummy);
> > if (!ok
> > || max_vf < min_vf)
> > {
> > Index: tree-vect-data-refs.c
> > ===================================================================
> > --- tree-vect-data-refs.c (revision 163732)
> > +++ tree-vect-data-refs.c (working copy)
> > @@ -322,6 +322,64 @@ vect_equal_offsets (tree offset1, tree o
> > }
> >
> >
> > +/* Check dependence between DRA and DRB for basic block vectorization.
*/
> > +
>
> This comment is not enough: what does this function return?
> What does this function check for?
>
> > +static bool
> > +vect_drs_dependent_in_basic_block (struct data_reference *dra,
> > + struct data_reference *drb)
> > +{
>
> Why isn't this function defined in tree-data-ref.c: it looks like it
> should be independent of vectorizer's data structures.
We can make it independent from vectorizer's data structures, but I am not
sure about the meaning of dependence. This analysis assumes that the loads
can only go up and the stores can only go down. So I don't know if it
should be an API for general use...
Thanks for the DR_WRITE patch.
Ira
>
> > + HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
> > + gimple earlier_stmt;
> > +
> > + /* We only call this function for pairs of loads and stores, but we
> > verify
> > + it here. */
> > + if (DR_IS_READ (dra) == DR_IS_READ (drb))
> > + {
> > + if (DR_IS_READ (dra))
> > + return false;
> > + else
> > + return true;
> > + }
> > +
> > + /* Check that the data-refs have same bases and offsets. If not, we
> > can't
> > + determine if they are dependent. */
> > + if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
> > + && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
> > + || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
> > + || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
> > + != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
> > + || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
> > + return true;
> > +
> > + /* Check the types. */
> > + type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
> > (dra))));
> > + type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
> > (drb))));
> > +
> > + if (type_size_a != type_size_b
> > + || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
> > + TREE_TYPE (DR_REF (drb))))
> > + return true;
> > +
> > + init_a = TREE_INT_CST_LOW (DR_INIT (dra));
> > + init_b = TREE_INT_CST_LOW (DR_INIT (drb));
> > +
> > + /* Two different locations - no dependence. */
> > + if (init_a != init_b)
> > + return false;
> > +
> > + /* We have a read-write dependence. Check that the load is before
the
> > store.
> > + When we vectorize basic blocks, vector load can be only before
> > + corresponding scalar load, and vector store can be only after its
> > + corresponding scalar store. So the order of the acceses is
preserved
> > in
> > + case the load is before the store. */
> > + earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
> > + if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt
(earlier_stmt))))
> > + return false;
> > +
> > + return true;
>
> You may want to just
> return DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt
(earlier_stmt)));
>
> I am testing a patch to replace all the "!DR_IS_READ" with
> DR_IS_WRITE: see the attached patch.
>
> I think we can implement something independent of STMT_VINFO_DATA_REF,
> like a predicate function dr_is_raw (Read After Write) and return the
> result of that.
>
> > +}
> > +
> > +
> > /* Function vect_check_interleaving.
> >
> > Check if DRA and DRB are a part of interleaving. In case they are,
> > insert
> > @@ -495,7 +553,8 @@ vect_mark_for_runtime_alias_test (ddr_p
> >
> > static bool
> > vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr,
> > - loop_vec_info loop_vinfo, int
*max_vf)
> > + loop_vec_info loop_vinfo, int
*max_vf,
> > + bool *data_dependence_in_bb)
> > {
> > unsigned int i;
> > struct loop *loop = NULL;
> > @@ -554,10 +613,14 @@ vect_analyze_data_ref_dependence (struct
> > print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
> > }
> >
> > - /* Mark the statements as unvectorizable. */
> > - STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
> > - STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
> > -
> > + /* We do not vectorize basic blocks with write-write
dependencies.
> > */
> > + if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
> > + return true;
> > +
> > + /* We deal with read-write dependencies in basic blocks later
(by
> > + verifying that all the loads in the basic block are before
all
> > the
> > + stores). */
> > + *data_dependence_in_bb = true;
> > return false;
> > }
> >
> > @@ -577,7 +640,12 @@ vect_analyze_data_ref_dependence (struct
> > print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
> > }
> >
> > - return true;
> > + /* Do not vectorize basic blcoks with write-write dependences.
*/
> > + if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
> > + return true;
> > +
> > + /* Check if this dependence is allowed in basic block
vectorization.
> > */
> > + return vect_drs_dependent_in_basic_block (dra, drb);
> > }
> >
> > /* Loop-based vectorization and known data dependence. */
> > @@ -678,7 +746,8 @@ vect_analyze_data_ref_dependence (struct
> >
> > bool
> > vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
> > - bb_vec_info bb_vinfo, int *max_vf)
> > + bb_vec_info bb_vinfo, int *max_vf,
> > + bool *data_dependence_in_bb)
> > {
> > unsigned int i;
> > VEC (ddr_p, heap) *ddrs = NULL;
> > @@ -693,7 +762,8 @@ vect_analyze_data_ref_dependences (loop_
> > ddrs = BB_VINFO_DDRS (bb_vinfo);
> >
> > FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
> > - if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
> > + if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
> > + data_dependence_in_bb))
> > return false;
> >
> > return true;
> > Index: tree-vect-slp.c
> > ===================================================================
> > --- tree-vect-slp.c (revision 163732)
> > +++ tree-vect-slp.c (working copy)
> > @@ -1082,6 +1082,24 @@ vect_find_first_load_in_slp_instance (sl
> > }
> >
> >
> > +/* Find the last store in SLP INSTANCE. */
> > +static gimple
> > +vect_find_last_store_in_slp_instance (slp_instance instance)
>
> Please add a new line between comment and function definition.
>
> > +{
> > + int i;
> > + slp_tree node;
> > + gimple last_store = NULL, store;
> > +
> > + node = SLP_INSTANCE_TREE (instance);
> > + for (i = 0;
> > + VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
> > + i++)
> > + last_store = get_later_stmt (store, last_store);
> > +
> > + return last_store;
> > +}
> > +
> > +
> > /* Analyze an SLP instance starting from a group of strided stores.
Call
> > vect_build_slp_tree to build a tree of packed stmts if possible.
> > Return FALSE if it's impossible to SLP any stmt in the loop. */
> > @@ -1503,6 +1521,42 @@ vect_slp_analyze_operations (bb_vec_info
> > return true;
> > }
> >
> > +/* Check if loads and stores are mixed in the basic block (in that
> > + case if we are not sure that the accesses differ, we can't
vectorize
> > the
> > + basic block). Also return FALSE in case that there is statement
marked
> > as
> > + not vectorizable. */
> > +
> > +static bool
> > +vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
> > +{
> > + basic_block bb = BB_VINFO_BB (bb_vinfo);
> > + gimple_stmt_iterator si;
> > + bool detected_store = false;
> > + gimple stmt;
> > + struct data_reference *dr;
> > +
> > + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
> > + {
> > + stmt = gsi_stmt (si);
> > +
> > + /* We can't allow not analyzed statements, since they may
contain
> > data
> > + accesses. */
> > + if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
> > + return false;
> > +
> > + if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
> > + continue;
> > +
> > + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
> > + if (DR_IS_READ (dr) && detected_store)
> > + return false;
> > +
> > + if (!DR_IS_READ (dr))
> > + detected_store = true;
> > + }
> > +
> > + return true;
> > +}
> >
> > /* Check if vectorization of the basic block is profitable. */
> >
> > @@ -1585,6 +1639,8 @@ vect_slp_analyze_bb (basic_block bb)
> > gimple_stmt_iterator gsi;
> > int min_vf = 2;
> > int max_vf = MAX_VECTORIZATION_FACTOR;
> > + bool data_dependence_in_bb = false;
> > +
> >
> > if (vect_print_dump_info (REPORT_DETAILS))
> > fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
> > @@ -1632,8 +1688,11 @@ vect_slp_analyze_bb (basic_block bb)
> > return NULL;
> > }
> >
> > - if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
> > - || min_vf > max_vf)
> > + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
> > + &data_dependence_in_bb)
> > + || min_vf > max_vf
> > + || (data_dependence_in_bb
> > + && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
> > {
> > if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> > fprintf (vect_dump, "not vectorized: unhandled data dependence
"
> > @@ -2420,6 +2479,14 @@ vect_schedule_slp_instance (slp_tree nod
> > else
> > si = gsi_for_stmt (stmt);
> >
> > + /* Stores should be inserted just before the last store. */
> > + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
> > + && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
> > + {
> > + gimple last_store = vect_find_last_store_in_slp_instance
(instance);
> > + si = gsi_for_stmt (last_store);
> > + }
> > +
> > is_store = vect_transform_stmt (stmt, &si, &strided_store, node,
> > instance);
> > return is_store;
> > }
> >
> >
> [attachment "0001-Use-DR_IS_WRITE-instead-of-DR_IS_READ.patch"
> deleted by Ira Rosen/Haifa/IBM]
More information about the Gcc-patches
mailing list