Fw: [patch] [4.2 projects] vectorize strided accesses (4 patches)

Ira Rosen IRAR@il.ibm.com
Tue Sep 5 09:11:00 GMT 2006


Hi,

Thanks for reviewing.

>
> [ Apologies for the long delay ]
>
> > +     case VEC_EXTRACT_EVEN_EXPR:
> > +       pp_string (buffer, " VEC_EXTRACT_EVEN_EXPR < ");
> > +       dump_generic_node (buffer, TREE_OPERAND (node, 0), spc,
> flags, false);
> > +       pp_string (buffer, " , ");
> >
> s/" , "/", "/
>
>
> > !             print_generic_expr (dump_file, comp_ref, TDF_SLIM);
> > !             fprintf (dump_file, "\n");
> >
> If you use print_generic_stmt, you won't need to emit the \n separately.

O.K.

>
> > *************** create_data_ref (tree memref, tree stmt,
> > *** 1812,1817 ****
> > --- 1834,1865 ----
> >
> >     type_size = fold_convert (ssizetype, TYPE_SIZE_UNIT (TREE_TYPE
> (DR_REF (dr))));
> >
> > +   /* Extract CONSTANT and INVARIANT from OFFSET.  */
> > +   /* Remove cast from OFFSET and restore it for INVARIANT part.  */
> > +   orig_offset = offset;
> > +   STRIP_NOPS (offset);
> > +   if (offset != orig_offset)
> > +     type = TREE_TYPE (orig_offset);
> > +   analyze_offset (offset, &invariant, &constant);
> > +   if (type && invariant)
> > +     invariant = fold_convert (type, invariant);
> > +
> > +   /* Put CONSTANT part of OFFSET in DR_INIT and INVARIANT in
> DR_OFFSET field
> > +      of DR.  */
> > +   if (constant)
> > +     {
> > +       DR_INIT (dr) = fold_convert (ssizetype, constant);
> > +       init_cond = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (constant),
> > +                 constant, type_size);
> > +     }
> > +   else
> > +     DR_INIT (dr) = init_cond = ssize_int (0);;
> > +
> s/;;/;/
>
> >           }
> >
> >         if (TREE_CODE (opnd1) == ARRAY_REF
> > !           || TREE_CODE (opnd1) == INDIRECT_REF)
> >           {
> >             dr = create_data_ref (opnd1, stmt, true);
> >             if (dr)
> > --- 3733,3740 ----
> >           }
> >
> >         if (TREE_CODE (opnd1) == ARRAY_REF
> > !           || TREE_CODE (opnd1) == INDIRECT_REF
> > !                     || TREE_CODE (opnd1) == COMPONENT_REF)
> >
> What about ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF?  Could those
> be handled?

Probably they could. We will think it over. It is a data ref analysis issue
though,
it should be in a different patch, I think.


>
> > Index: tree-data-ref.h
> > ===================================================================
> > *** tree-data-ref.h   (revision 111108)
> > --- tree-data-ref.h   (working copy)
> > *************** Software Foundation, 51 Franklin Street,
> > *** 25,31 ****
> >   #include "lambda.h"
> >
> >   /** {base_address + offset + init} is the first location
> accessed by data-ref
> > !       in the loop, and step is the stride of data-ref in the
> loop in bytes;
> >         e.g.:
> >
> >                          Example 1                      Example 2
> > --- 25,33 ----
> >   #include "lambda.h"
> >
> >   /** {base_address + offset + init} is the first location
> accessed by data-ref
> > !       in the loop, offset is an invariant part of the initial
> offset and init is
> >
> s/offset and init/offset, init/

I am sorry, I don't understand your remark. Maybe I can rephrase in the
following way:

/**
The first location accessed by data-ref in the loop is the address of
data-ref's
base (BASE_ADDRESS) plus the initial offset from the base. We divide the
initial offset
into two parts: loop invariant offset (OFFSET) and constant offset (INIT).
STEP is the stride of data-ref in the loop in bytes.

                       Example 1                      Example 2
      data-ref         a[j].b[i][j]                   a + x + 16B (a is
int*)

First location info:
      base_address     &a                             a
      offset           j_0*D_j + i_0*D_i              x
      init             C_b + C_a                      16
      step             D_j                            4
      access_fn        NULL                           {16, +, 1}

Base object info:
      base_object      a                              NULL
      access_fn        <access_fns of indexes of b>   NULL

  **/

>
> > --- 65,75 ----
> >   static bool vect_can_advance_ivs_p (loop_vec_info);
> >   static void vect_update_misalignment_for_peel
> >     (struct data_reference *, struct data_reference *, int npeel);
> > ! static void vect_check_interleaving
> > !   (struct data_reference *, struct data_reference *);
> > ! static void vect_update_interleaving_chain
> > !   (struct data_reference *, struct data_reference *);
> > ! static bool vect_equal_offsets (tree, tree);
> >
> No forward declarations for static functions, please.

O.K.

>
> >   /* Function vect_determine_vectorization_factor
> >
> > *************** vect_determine_vectorization_factor (loo
> > *** 187,195 ****
> >             if (vect_print_dump_info (REPORT_DETAILS))
> >               fprintf (vect_dump, "nunits = %d", nunits);
> >
> > !      if (!vectorization_factor
> > !               || (nunits > vectorization_factor))
> > !             vectorization_factor = nunits;
> >           }
> >       }
> >
> > --- 192,201 ----
> >             if (vect_print_dump_info (REPORT_DETAILS))
> >               fprintf (vect_dump, "nunits = %d", nunits);
> >
> > !           if (!vectorization_factor
> > !          || (nunits > vectorization_factor))
> > !        vectorization_factor = nunits;
> > !
> >           }
> >       }
> >
> > *************** vect_analyze_scalar_cycles (loop_vec_inf
> > *** 559,564 ****
> > --- 565,865 ----
> >   }
> >
> >
> > + /* Function vect_insert_into_interleaving_chain.
> > +
> > +    Insert DRA into the interleaving chain of DRB according to DRA's
INIT.
> > + */
> > +
> Closing comment should be '.  */'

O.K.

>
> > + static void
> > + vect_insert_into_interleaving_chain (struct data_reference *dra,
> > +                  struct data_reference *drb)
> > + {
> > +   tree prev, next, next_init;
> > +   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
> > +   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
> > +
> > +   prev = DR_GROUP_FIRST_DR (stmtinfo_b);
> > +   next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
> > +   while (next)
> > +     {
> > +       next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt
(next)));
> > +       if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
> >
> Are we positive at this point that DR_INIT will always have
> INTEGER_CSTs?  This happens a few times in other functions.

Yes we are. When a DR is created in create_data_ref the constant part of
the offset
is put in DR_INIT.


>
> > +       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
> > +     }
> > +   /* We got to the end of the list. Insert here. */
> >
> Vertical spacing after closing brace.

O.K.

>
> > +   /* 3. DRA is a part of a chain and DRB is not.  */
> > +   if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR
(stmtinfo_b))
> > +     {
> > +       tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
> > +       tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
> > +                            old_first_stmt)));
> > +       tree tmp;
> > +
> > +       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
> > +    {
> > +      /* DRB's init is smaller than the init of the stmt
> previously mark as
> >
> s/previously mark/previously marked/
>
> Either that, or you need to rephrase the comment because it's a bit hard
> to read.

It should be "previously marked".


>
>
> > +
> > +
> > + /* Function vect_equal_offsets.
> > +
> > +    Check if OFFSET1 and OFFSET2 are identical expressions.
> > + */
> > +
> Closing comment should be '.  */'

O.K.

> > +   if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) > 0)
> > +     {
> > +       /* If init_a == init_b + the size of the type * k, we have
> an interleaving,
> > +     and DRB is accessed before DRA.  */
> > +       inits_diff = fold_build2 (MINUS_EXPR, TREE_TYPE (type_size_a),
> > +             DR_INIT (dra), DR_INIT (drb));
> > +       diff_mod_size = fold_build2 (TRUNC_MOD_EXPR, TREE_TYPE
> (type_size_a),
> > +                inits_diff, type_size_a);
> > +
> > +       if (tree_int_cst_compare (inits_diff, DR_STEP (dra)) > 0)
> > +          return;
> > +
> Hmm, wouldn't it be more efficient to compute this using HOST_WIDE_INTs?

O.K. I will rewrite this part.


>
> > +       if (!tree_int_cst_compare (diff_mod_size, ssize_int (0)))
> > +    {
> > +      vect_update_interleaving_chain (drb, dra);
> > +      if (vect_print_dump_info (REPORT_DR_DETAILS))
> > +        {
> > +          fprintf (vect_dump, "Detected interleaving ");
> > +          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
> > +          fprintf (vect_dump, " and ");
> > +          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
> > +        }
> > +      return;
> > +    }
> > +     }
> > +   else
> > +     {
> > +       /* If init_b == init_a + the size of the type * k, we have an
> > +     interleaving, and DRA is accessed before DRB.  */
> > +       inits_diff = fold_build2 (MINUS_EXPR, TREE_TYPE (type_size_a),
> > +             DR_INIT (drb), DR_INIT (dra));
> > +       diff_mod_size = fold_build2 (TRUNC_MOD_EXPR, TREE_TYPE
> (type_size_a),
> > +                inits_diff, type_size_a);
> > +
> > +       if (tree_int_cst_compare (inits_diff, DR_STEP (dra)) > 0)
> > +          return;
> > +
> Likewise.

O.K.

> > !       gcc_assert (DR_MISALIGNMENT (dr)/dr_size ==
> > !         DR_MISALIGNMENT (dr_peel)/dr_peel_size);
> >
> Spacing around '/'.
>
> > +          elem_size = UNITS_PER_SIMD_WORD/vf;
> > +          mis_in_elements = DR_MISALIGNMENT (dr)/elem_size;
> > +
> Likewise.

O.K.

>
>
> >   static void update_vuses_to_preheader (tree, struct loop*);
> >   static void vect_create_epilog_for_reduction (tree, tree, enum
> tree_code, tree);
> >   static tree get_initial_def_for_reduction (tree, tree, tree *);
> > + static bool vect_transform_strided_load (tree, VEC(tree,heap) *, int,
> > +                 block_stmt_iterator *);
> > + static bool vect_permute_load_chain (VEC(tree,heap) *, unsigned
> int, tree,
> > +                  block_stmt_iterator *,
> > +                  VEC(tree,heap) **);
> > + static bool vect_strided_load_supported (tree);
> >
> Forward declaration of static functions.

Will be removed.

>
> > + /* Function vect_strided_load_supported.
> > +
> > +    Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are
supported,
> > +    and FALSE otherwise.
> > +
> > + */
> > +
> Closing comment '.  */'

O.K.

>
>
> > +   for (i = 0; i < exact_log2(length); i++)
> > +     {
> > +       for (j = 0; j < length; j+=2)
> >
> Spacing around += and ().

O.K.

>
> > +    {
> > +      first_vect = VEC_index(tree, dr_chain, j);
> > +      second_vect = VEC_index(tree, dr_chain, j+1);
> > +
> > +      /* data_ref = permute_even (first_data_ref, second_data_ref);
*/
> > +      perm_dest = create_tmp_var (vectype, "vect_perm_even");
> > +      add_referenced_tmp_var (perm_dest);
> > +
> > +      perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
> > +                build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
> > +                   first_vect, second_vect));
> > +
> > +      data_ref = make_ssa_name (perm_dest, perm_stmt);
> > +      TREE_OPERAND (perm_stmt, 0) = data_ref;
> > +      vect_finish_stmt_generation (stmt, perm_stmt, bsi);
> > +
> > +      /* CHECKME.  */
> >
> What is there in this loop to check?
>
> > +      FOR_EACH_SSA_USE_OPERAND (use_p, perm_stmt, iter,
> > +                 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
> > +        {
> > +          tree op = USE_FROM_PTR (use_p);
> > +          if (TREE_CODE (op) == SSA_NAME)
> > +       op = SSA_NAME_VAR (op);
> >
> You should never find an SSA name in a recently built statement.  Why
> not just call mark_new_vars_to_rename()?

O.K.


>
> > +      /* CHECKME.  */
> > +      FOR_EACH_SSA_USE_OPERAND (use_p, perm_stmt, iter,
> > +                 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
> > +        {
> > +          tree op = USE_FROM_PTR (use_p);
> > +          if (TREE_CODE (op) == SSA_NAME)
> > +       op = SSA_NAME_VAR (op);
> > +          mark_sym_for_renaming (op);
> > +        }
> > +
> Likewise.
>
> > --- 2395,2402 ----
> >       return false;
> >
> >     op = TREE_OPERAND (stmt, 1);
> > !   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF
> > !       && !DR_GROUP_FIRST_DR (stmt_info))
> >
> Align vertically.

O.K.

>
> > *************** vectorizable_load (tree stmt, block_stmt
> > *** 2182,2187 ****
> > --- 2493,2532 ----
> >        information we recorded in RELATED_STMT field is used to
vectorize
> >        stmt S2.  */
> >
> > +   /* In case of interleaving (non-unit strided access):
> > +
> > +         S1:  x2 = &base + 2
> > +         S2:  x0 = &base
> > +         S3:  x1 = &base + 1
> > +         S4:  x3 = &base + 3
> > +
> > +      We create vectorized loads starting from base address (the
> access of the
> > +      first stmt in the chain (S2 in the above example):
> > +
> Unbalanced nested brackets.  Re-phrase.

      Vectorized loads are created in the order of memory accesses
      starting from the access of the first stmt of the chain:

Is this more clear?

>
> > +         VS1: vx0 = &base
> > +    VS2: vx1 = &base + vec_size*1
> > +    VS3: vx3 = &base + vec_size*2
> > +    VS4: vx4 = &base + vec_size*3
> > +
> > +      Then permutation statements are generated:
> > +
> > +         VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
> > +         VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
> > +    ...
> > +
> > +      And they are put in STMT_VINFO_VEC_STMT of the
> corresponding scalar stmts
> > +      (the order of the data-refs in the output of
vect_permute_load_chain
> > +      corresponds to the order of scalar stmts in the
> interleaving chain - see
> > +      the documentaion of vect_permute_load_chain()).
> > +      The generation of permutation stmts and recording them in
> > +      STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
> > +
> > +      In case of both multiple types and interleaving, above
> vector loads and
> >
> s/above vector loads/the vector loads and permutation stmts above/

O.K.

>
>
> > +     case VEC_INTERLEAVE_HIGH_EXPR:
> > +       pp_string (buffer, " VEC_INTERLEAVE_HIGH_EXPR < ");
> > +       dump_generic_node (buffer, TREE_OPERAND (node, 0), spc,
> flags, false);
> > +       pp_string (buffer, " , ");
> >
> s/" , "/", "/


O.K.

>
> > +   if (!interleave_high_optab || !interleave_low_optab)
> > +     {
> > +       if (vect_print_dump_info (REPORT_DETAILS))
> > +    fprintf (vect_dump, "no optab for interleave.");
> > +       return false;
> > +     }
> > +   if (interleave_high_optab->handlers[(int) mode].insn_code
> > +       == CODE_FOR_nothing
> > +       || interleave_low_optab->handlers[(int) mode].insn_code
> > +       == CODE_FOR_nothing)
> > +     {
> > +       if (vect_print_dump_info (REPORT_DETAILS))
> > +    fprintf (vect_dump, "interleave op not supported by target.");
> > +       return false;
> > +     }
> > +   return true;
> > + }
> > +
> >
> Vertical spacing to separate if() bodies.

O.K.

>
> > !          /* The original store is deleted so the same SSA_NAMEs
> can be used.
> > !           */
> > !          FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter,
SSA_OP_VMAYDEF)
> > !       {
> > !         SSA_NAME_DEF_STMT (def) = new_stmt;
> > !         mark_sym_for_renaming (SSA_NAME_VAR (def));
> > !       }
> > !
> mark_new_vars_to_rename()?

O.K.

>
> > +       /* Gaps are supported only for loads. STEP must be a
> multiple of the type
> > +        size.  The size of the group must be a power of 2.  */
> > +       if (DR_IS_READ (dr)
> > +         && !tree_int_cst_compare (fold_build2 (TRUNC_MOD_EXPR,
> > +                                                TREE_TYPE (step),
step,
> > +                                                TYPE_SIZE_UNIT
> (scalar_type)),
> > +                                   ssize_int (0))
> >
> Again.  Why not compute with HWIs?
>

I will do that.

>
> The patch is missing .texi documentation on the new tree codes and
optabs.
>

Will be added.


Is it O.K. for mainline 4.3 with those changes?

Thanks,
Ira








More information about the Gcc-patches mailing list