This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Add DR_BASE_ALIGNMENT


On Wed, Jun 28, 2017 at 3:40 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch records the base alignment in data_reference, to avoid the
> second-guessing that was previously done in vect_compute_data_ref_alignment.
> It also makes vect_analyze_data_refs use dr_analyze_innermost, instead
> of having an almost-copy of the same code.
>
> I'd originally tried to do the second part as a standalone patch,
> but on its own it caused us to miscompute the base alignment (due to
> the second-guessing not quite working).  This was previously latent
> because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,
> whereas it should have been bits.
>
> After the previous patch, the only thing that dr_analyze_innermost
> read from the dr was the DR_REF.  I thought it would be better to pass
> that in and make data_reference write-only.  This means that callers
> like vect_analyze_data_refs don't have to set any fields first
> (and thus not worry about *which* fields they should set).
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Richard
>
>
> 2017-06-28  Richard Sandiford  <richard.sandiford@linaro.org>
>
> gcc/
>         * tree-data-ref.h (data_reference): Add a base_alignment field.
>         (DR_BASE_ALIGNMENT): New macro.
>         (dr_analyze_innermost): Add a tree argument.
>         * tree-data-ref.c: Include builtins.h.
>         (dr_analyze_innermost): Take the tree reference as argument.
>         Set up DR_BASE_ALIGNMENT.
>         (create_data_ref): Update call accordingly.
>         * tree-predcom.c (find_looparound_phi): Likewise.
>         * tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.
>         (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
>         * tree-vect-data-refs.c: Include tree-cfg.h.
>         (vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead
>         of calculating an alignment here.
>         (vect_analyze_data_refs): Use dr_analyze_innermost.  Record the
>         base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.
>
> Index: gcc/tree-data-ref.h
> ===================================================================
> --- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100
> +++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100
> @@ -119,6 +119,10 @@ struct data_reference
>    /* True when the data reference is in RHS of a stmt.  */
>    bool is_read;
>
> +  /* The alignment of INNERMOST.base_address, in bits.  This is logically
> +     part of INNERMOST, but is kept here to avoid unnecessary padding.  */
> +  unsigned int base_alignment;
> +

But then it would be nice to have dr_analyze_innermost take a struct
innermost_loop_behavior *
only.  That way the vectorizer copy for the outer loop behavior can
just be a innermost_loop_behavior
sub-struct as well and predcom wouldn't need to invent an interesting
DR_STMT either.  The
DR_ accessors wouldn't work on that but I guess they could be made work with

inline tree dr_base_address (struct data_reference *dr) { return
dr->innermost.base_address; }
inline tree dr_base_address (struct innermost_loop_behavior *p) {
return p->base_address; }
#define DR_BASE_ADDRESS(DR) (dr_base_address (dr))

anyway, that's implementation detail ;)

>    /* Behavior of the memory reference in the innermost loop.  */
>    struct innermost_loop_behavior innermost;
>
> @@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR)      DR_AC
>  #define DR_IS_READ(DR)             (DR)->is_read
>  #define DR_IS_WRITE(DR)            (!DR_IS_READ (DR))
>  #define DR_BASE_ADDRESS(DR)        (DR)->innermost.base_address
> +#define DR_BASE_ALIGNMENT(DR)      (DR)->base_alignment
>  #define DR_OFFSET(DR)              (DR)->innermost.offset
>  #define DR_INIT(DR)                (DR)->innermost.init
>  #define DR_STEP(DR)                (DR)->innermost.step
> @@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \
>  #define DDR_REVERSED_P(DDR) (DDR)->reversed_p
>
>
> -bool dr_analyze_innermost (struct data_reference *, struct loop *);
> +bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);
>  extern bool compute_data_dependences_for_loop (struct loop *, bool,
>                                                vec<loop_p> *,
>                                                vec<data_reference_p> *,
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100
> +++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100
> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o
>  #include "dumpfile.h"
>  #include "tree-affine.h"
>  #include "params.h"
> +#include "builtins.h"
>
>  static struct datadep_stats
>  {
> @@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a
>    return build_fold_addr_expr (TREE_OPERAND (addr, 0));
>  }
>
> -/* Analyze the behavior of memory reference DR.  There are two modes:
> +/* Analyze the behavior of memory reference REF.  There are two modes:
>
>     - BB analysis.  In this case we simply split the address into base,
>       init and offset components, without reference to any containing loop.
> @@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a
>     dummy outermost loop.  In other cases perform loop analysis.
>
>     Return true if the analysis succeeded and store the results in DR if so.
> -   BB analysis can only fail for bitfield or reversed-storage accesses.  */
> +   BB analysis can only fail for bitfield or reversed-storage accesses.
> +
> +   Note that DR is purely an output; the contents on input don't matter.  */
>
>  bool
> -dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
> +dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)
>  {
> -  tree ref = DR_REF (dr);
>    HOST_WIDE_INT pbitsize, pbitpos;
>    tree base, poffset;
>    machine_mode pmode;
> @@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere
>        return false;
>      }
>
> +  unsigned int base_alignment = get_object_alignment (base);

I think you want to use get_obect_alignment_1 to also track an known
misalignment so you can add...

>    if (TREE_CODE (base) == MEM_REF)
>      {
>        if (!integer_zerop (TREE_OPERAND (base, 1)))
> @@ -858,16 +861,31 @@ dr_analyze_innermost (struct data_refere
>
>    init = ssize_int (pbitpos / BITS_PER_UNIT);
>    split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> -  init =  size_binop (PLUS_EXPR, init, dinit);
> +
> +  /* If the stripped offset has a lower alignment than the original base,
> +     we need to adjust the alignment of the new base down to match.  */
> +  unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit)
> +                                * BITS_PER_UNIT);

... dinit * BITS_PER_UNIT to it and then do the following on the misalign value.
That should get you slightly better results in some cases.

Ok with that change with or without the suggested above refactoring (not
sure if it will really make things so much nicer).

Thanks,
Richard.

> +  if (dinit_bits_low != 0)
> +    base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low);
> +  init = size_binop (PLUS_EXPR, init, dinit);
> +
>    split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> -  init =  size_binop (PLUS_EXPR, init, dinit);
> +  init = size_binop (PLUS_EXPR, init, dinit);
>
>    step = size_binop (PLUS_EXPR,
>                      fold_convert (ssizetype, base_iv.step),
>                      fold_convert (ssizetype, offset_iv.step));
>
> -  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
> +  base = canonicalize_base_object_address (base_iv.base);
> +
> +  /* See if get_pointer_alignment can guarantee a higher alignment than
> +     the one we calculated above.  */
> +  unsigned int alt_alignment = get_pointer_alignment (base);
> +  base_alignment = MAX (base_alignment, alt_alignment);
>
> +  DR_BASE_ADDRESS (dr) = base;
> +  DR_BASE_ALIGNMENT (dr) = base_alignment;
>    DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
>    DR_INIT (dr) = init;
>    DR_STEP (dr) = step;
> @@ -1071,7 +1089,7 @@ create_data_ref (loop_p nest, loop_p loo
>    DR_REF (dr) = memref;
>    DR_IS_READ (dr) = is_read;
>
> -  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
> +  dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL);
>    dr_analyze_indices (dr, nest, loop);
>    dr_analyze_alias (dr);
>
> Index: gcc/tree-predcom.c
> ===================================================================
> --- gcc/tree-predcom.c  2017-05-18 07:51:11.896799736 +0100
> +++ gcc/tree-predcom.c  2017-06-28 14:26:19.651051322 +0100
> @@ -1149,7 +1149,7 @@ find_looparound_phi (struct loop *loop,
>    memset (&init_dr, 0, sizeof (struct data_reference));
>    DR_REF (&init_dr) = init_ref;
>    DR_STMT (&init_dr) = phi;
> -  if (!dr_analyze_innermost (&init_dr, loop))
> +  if (!dr_analyze_innermost (&init_dr, init_ref, loop))
>      return NULL;
>
>    if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       2017-06-26 19:41:19.549571836 +0100
> +++ gcc/tree-vectorizer.h       2017-06-28 14:26:19.653051245 +0100
> @@ -553,7 +553,8 @@ typedef struct _stmt_vec_info {
>    struct data_reference *data_ref_info;
>
>    /* Information about the data-ref relative to this loop
> -     nest (the loop that is being considered for vectorization).  */
> +     nest (the loop that is being considered for vectorization).
> +     See also dr_base_alignment.  */
>    tree dr_base_address;
>    tree dr_init;
>    tree dr_offset;
> @@ -652,6 +653,10 @@ typedef struct _stmt_vec_info {
>
>    /* The number of scalar stmt references from active SLP instances.  */
>    unsigned int num_slp_uses;
> +
> +  /* Logically belongs with dr_base_address etc., but is kept here to
> +     avoid unnecessary padding.  */
> +  unsigned int dr_base_alignment;
>  } *stmt_vec_info;
>
>  /* Information about a gather/scatter call.  */
> @@ -711,6 +716,7 @@ #define STMT_VINFO_DR_INIT(S)
>  #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_offset
>  #define STMT_VINFO_DR_STEP(S)              (S)->dr_step
>  #define STMT_VINFO_DR_ALIGNED_TO(S)        (S)->dr_aligned_to
> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_base_alignment
>
>  #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
>  #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   2017-06-28 14:25:58.811888377 +0100
> +++ gcc/tree-vect-data-refs.c   2017-06-28 14:26:19.652051284 +0100
> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o
>  #include "expr.h"
>  #include "builtins.h"
>  #include "params.h"
> +#include "tree-cfg.h"
>
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
> @@ -672,6 +673,7 @@ vect_compute_data_ref_alignment (struct
>    tree aligned_to;
>    tree step;
>    unsigned HOST_WIDE_INT alignment;
> +  unsigned int base_alignment;
>
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
> @@ -687,6 +689,7 @@ vect_compute_data_ref_alignment (struct
>      misalign = DR_INIT (dr);
>    aligned_to = DR_ALIGNED_TO (dr);
>    base_addr = DR_BASE_ADDRESS (dr);
> +  base_alignment = DR_BASE_ALIGNMENT (dr);
>    vectype = STMT_VINFO_VECTYPE (stmt_info);
>
>    /* In case the dataref is in an inner-loop of the loop that is being
> @@ -708,6 +711,7 @@ vect_compute_data_ref_alignment (struct
>           misalign = STMT_VINFO_DR_INIT (stmt_info);
>           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
>           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
> +         base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info);
>          }
>        else
>         {
> @@ -738,40 +742,6 @@ vect_compute_data_ref_alignment (struct
>         }
>      }
>
> -  /* To look at alignment of the base we have to preserve an inner MEM_REF
> -     as that carries alignment information of the actual access.  */
> -  base = ref;
> -  while (handled_component_p (base))
> -    base = TREE_OPERAND (base, 0);
> -  unsigned int base_alignment = 0;
> -  unsigned HOST_WIDE_INT base_bitpos;
> -  get_object_alignment_1 (base, &base_alignment, &base_bitpos);
> -  /* As data-ref analysis strips the MEM_REF down to its base operand
> -     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
> -     adjust things to make base_alignment valid as the alignment of
> -     DR_BASE_ADDRESS.  */
> -  if (TREE_CODE (base) == MEM_REF)
> -    {
> -      /* Note all this only works if DR_BASE_ADDRESS is the same as
> -        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
> -        in other offsets.  We need to rework DR to compute the alingment
> -        of DR_BASE_ADDRESS as long as all information is still available.  */
> -      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
> -       {
> -         base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
> -         base_bitpos &= (base_alignment - 1);
> -       }
> -      else
> -       base_bitpos = BITS_PER_UNIT;
> -    }
> -  if (base_bitpos != 0)
> -    base_alignment = base_bitpos & -base_bitpos;
> -  /* Also look at the alignment of the base address DR analysis
> -     computed.  */
> -  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
> -  if (base_addr_alignment > base_alignment)
> -    base_alignment = base_addr_alignment;
> -
>    if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
>      DR_VECT_AUX (dr)->base_element_aligned = true;
>
> @@ -3560,100 +3530,35 @@ vect_analyze_data_refs (vec_info *vinfo,
>          the outer-loop.  */
>        if (loop && nested_in_vect_loop_p (loop, stmt))
>         {
> -         tree outer_step, outer_base, outer_init;
> -         HOST_WIDE_INT pbitsize, pbitpos;
> -         tree poffset;
> -         machine_mode pmode;
> -         int punsignedp, preversep, pvolatilep;
> -         affine_iv base_iv, offset_iv;
> -         tree dinit;
> -
>           /* Build a reference to the first location accessed by the
> -            inner-loop: *(BASE+INIT).  (The first location is actually
> -            BASE+INIT+OFFSET, but we add OFFSET separately later).  */
> -          tree inner_base = build_fold_indirect_ref
> -                                (fold_build_pointer_plus (base, init));
> +            inner loop: *(BASE + INIT + OFFSET).  By construction,
> +            this address must be invariant in the inner loop, so we
> +            can consider it as being used in the outer loop.  */
> +         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
> +                                         init, offset);
> +         tree init_addr = fold_build_pointer_plus (base, init_offset);
> +         tree init_ref = build_fold_indirect_ref (init_addr);
>
>           if (dump_enabled_p ())
>             {
>               dump_printf_loc (MSG_NOTE, vect_location,
> -                               "analyze in outer-loop: ");
> -             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
> +                               "analyze in outer loop: ");
> +             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
>               dump_printf (MSG_NOTE, "\n");
>             }
>
> -         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
> -                                           &poffset, &pmode, &punsignedp,
> -                                           &preversep, &pvolatilep);
> -         gcc_assert (outer_base != NULL_TREE);
> -
> -         if (pbitpos % BITS_PER_UNIT != 0)
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "failed: bit offset alignment.\n");
> -             return false;
> -           }
> -
> -         if (preversep)
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                "failed: reverse storage order.\n");
> -             return false;
> -           }
> -
> -         outer_base = build_fold_addr_expr (outer_base);
> -         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
> -                          &base_iv, false))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "failed: evolution of base is not affine.\n");
> -             return false;
> -           }
> -
> -         if (offset)
> -           {
> -             if (poffset)
> -               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
> -                                       poffset);
> -             else
> -               poffset = offset;
> -           }
> -
> -         if (!poffset)
> -           {
> -             offset_iv.base = ssize_int (0);
> -             offset_iv.step = ssize_int (0);
> -           }
> -         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
> -                               &offset_iv, false))
> -           {
> -             if (dump_enabled_p ())
> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                                 "evolution of offset is not affine.\n");
> -             return false;
> -           }
> +         data_reference init_dr;
> +         if (!dr_analyze_innermost (&init_dr, init_ref, loop))
> +           /* dr_analyze_innermost already explained the faiure.  */
> +           return false;
>
> -         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
> -         split_constant_offset (base_iv.base, &base_iv.base, &dinit);
> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
> -         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
> -
> -         outer_step = size_binop (PLUS_EXPR,
> -                               fold_convert (ssizetype, base_iv.step),
> -                               fold_convert (ssizetype, offset_iv.step));
> -
> -         STMT_VINFO_DR_STEP (stmt_info) = outer_step;
> -         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
> -         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
> -         STMT_VINFO_DR_INIT (stmt_info) = outer_init;
> -         STMT_VINFO_DR_OFFSET (stmt_info) =
> -                               fold_convert (ssizetype, offset_iv.base);
> -         STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
> -                               size_int (highest_pow2_factor (offset_iv.base));
> +         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr);
> +         STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)
> +           = DR_BASE_ALIGNMENT (&init_dr);
> +         STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr);
> +         STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr);
> +         STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr);
> +         STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr);
>
>            if (dump_enabled_p ())
>             {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]