This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, vec-tails 05/10] Check if loop can be masked


On Thu, May 19, 2016 at 9:42 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch introduces analysis to determine if loop can be masked
> (compute LOOP_VINFO_CAN_BE_MASKED and LOOP_VINFO_REQUIRED_MASKS)
> and compute how much masking costs.

Maybe in a different patch, but it looks like you assume say, a
division, does not need
masking.

Code-generation-wise we'd add a new iv starting with

 iv = { 0, 1, 2, 3 };

and the mask is computed by comparing that against {niter, niter, niter, niter}?

So if we need masks for different vector element counts we could also add
additional IVs rather than "widening"/"shortening" the comparison result.
cond-expr reduction does this kind of IV as well which is a chance to share
some code (eventually).

You look at TREE_TYPE of LOOP_VINFO_NITERS (loop_vinfo) - I don't think
this is meaningful (if then only by accident).  I think you should look at the
control IV itself, possibly it's value-range, to determine the smallest possible
type to use.

Finally we have a related missed optimization opportunity, namely avoiding
peeling for gaps if we mask the last load of the group (profitability depends
on the overhead of such masking of course as it would be done in the main
vectorized loop).

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * tree-vect-loop.c: Include insn-config.h and recog.h.
>         (vect_check_required_masks_widening): New.
>         (vect_check_required_masks_narrowing): New.
>         (vect_get_masking_iv_elems): New.
>         (vect_get_masking_iv_type): New.
>         (vect_get_extreme_masks): New.
>         (vect_check_required_masks): New.
>         (vect_analyze_loop_operations): Add vect_check_required_masks
>         call to compute LOOP_VINFO_CAN_BE_MASKED.
>         (vect_analyze_loop_2): Initialize LOOP_VINFO_CAN_BE_MASKED and
>         LOOP_VINFO_NEED_MASKING before starting over.
>         (vectorizable_reduction): Compute LOOP_VINFO_CAN_BE_MASKED and
>         masking cost.
>         * tree-vect-stmts.c (can_mask_load_store): New.
>         (vect_model_load_masking_cost): New.
>         (vect_model_store_masking_cost): New.
>         (vect_model_simple_masking_cost): New.
>         (vectorizable_mask_load_store): Compute LOOP_VINFO_CAN_BE_MASKED
>         and masking cost.
>         (vectorizable_simd_clone_call): Likewise.
>         (vectorizable_store): Likewise.
>         (vectorizable_load): Likewise.
>         (vect_stmt_should_be_masked_for_epilogue): New.
>         (vect_add_required_mask_for_stmt): New.
>         (vect_analyze_stmt): Compute LOOP_VINFO_CAN_BE_MASKED.
>         * tree-vectorizer.h (vect_model_load_masking_cost): New.
>         (vect_model_store_masking_cost): New.
>         (vect_model_simple_masking_cost): New.
>
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index e25a0ce..31360d3 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-pass.h"
>  #include "ssa.h"
>  #include "optabs-tree.h"
> +#include "insn-config.h"
> +#include "recog.h"             /* FIXME: for insn_data */
>  #include "diagnostic-core.h"
>  #include "fold-const.h"
>  #include "stor-layout.h"
> @@ -1601,6 +1603,266 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
>                      vectorization_factor);
>  }
>
> +/* Function vect_check_required_masks_widening.
> +
> +   Return 1 if vector mask of type MASK_TYPE can be widened
> +   to a type having REQ_ELEMS elements in a single vector.  */
> +
> +static bool
> +vect_check_required_masks_widening (loop_vec_info loop_vinfo,
> +                                   tree mask_type, unsigned req_elems)
> +{
> +  unsigned mask_elems = TYPE_VECTOR_SUBPARTS (mask_type);
> +
> +  gcc_assert (mask_elems > req_elems);
> +
> +  /* Don't convert if it requires too many intermediate steps.  */
> +  int steps = exact_log2 (mask_elems / req_elems);
> +  if (steps > MAX_INTERM_CVT_STEPS + 1)
> +    return false;
> +
> +  /* Check we have conversion support for given mask mode.  */
> +  machine_mode mode = TYPE_MODE (mask_type);
> +  insn_code icode = optab_handler (vec_unpacks_lo_optab, mode);
> +  if (icode == CODE_FOR_nothing
> +      || optab_handler (vec_unpacks_hi_optab, mode) == CODE_FOR_nothing)
> +    return false;
> +
> +  /* Make recursive call for multi-step conversion.  */
> +  if (steps > 1)
> +    {
> +      mask_elems = mask_elems >> 1;
> +      mask_type = build_truth_vector_type (mask_elems, current_vector_size);
> +      if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
> +       return false;
> +
> +      if (!vect_check_required_masks_widening (loop_vinfo, mask_type,
> +                                              req_elems))
> +       return false;
> +    }
> +  else
> +    {
> +      mask_type = build_truth_vector_type (req_elems, current_vector_size);
> +      if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
> +       return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Function vect_check_required_masks_narowing.
> +
> +   Return 1 if vector mask of type MASK_TYPE can be narrowed
> +   to a type having REQ_ELEMS elements in a single vector.  */
> +
> +static bool
> +vect_check_required_masks_narrowing (loop_vec_info loop_vinfo,
> +                                    tree mask_type, unsigned req_elems)
> +{
> +  unsigned mask_elems = TYPE_VECTOR_SUBPARTS (mask_type);
> +
> +  gcc_assert (req_elems > mask_elems);
> +
> +  /* Don't convert if it requires too many intermediate steps.  */
> +  int steps = exact_log2 (req_elems / mask_elems);
> +  if (steps > MAX_INTERM_CVT_STEPS + 1)
> +    return false;
> +
> +  /* Check we have conversion support for given mask mode.  */
> +  machine_mode mode = TYPE_MODE (mask_type);
> +  insn_code icode = optab_handler (vec_pack_trunc_optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return false;
> +
> +  /* Make recursive call for multi-step conversion.  */
> +  if (steps > 1)
> +    {
> +      mask_elems = mask_elems << 1;
> +      mask_type = build_truth_vector_type (mask_elems, current_vector_size);
> +      if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
> +       return false;
> +
> +      if (!vect_check_required_masks_narrowing (loop_vinfo, mask_type,
> +                                               req_elems))
> +       return false;
> +    }
> +  else
> +    {
> +      mask_type = build_truth_vector_type (req_elems, current_vector_size);
> +      if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
> +       return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Function vect_get_masking_iv_elems.
> +
> +   Return a number of elements in IV used for loop masking.  */
> +static int
> +vect_get_masking_iv_elems (loop_vec_info loop_vinfo)
> +{
> +  tree iv_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
> +  tree iv_vectype = get_vectype_for_scalar_type (iv_type);
> +
> +  /* We extend IV type in case it is not big enough to
> +     fill full vector.  */
> +  return MIN ((int)TYPE_VECTOR_SUBPARTS (iv_vectype),
> +             LOOP_VINFO_VECT_FACTOR (loop_vinfo));
> +}
> +
> +/* Function vect_get_masking_iv_type.
> +
> +   Return a type of IV used for loop masking.  */
> +static tree
> +vect_get_masking_iv_type (loop_vec_info loop_vinfo)
> +{
> +  tree iv_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
> +  tree iv_vectype = get_vectype_for_scalar_type (iv_type);
> +  unsigned vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> +
> +  if (TYPE_VECTOR_SUBPARTS (iv_vectype) <= vf)
> +    return iv_vectype;
> +
> +  unsigned elem_size = current_vector_size * BITS_PER_UNIT / vf;
> +  iv_type = build_nonstandard_integer_type (elem_size, TYPE_UNSIGNED (iv_type));
> +
> +  return get_vectype_for_scalar_type (iv_type);
> +}
> +
> +/* Function vect_get_extreme_masks.
> +
> +   Determine minimum and maximum number of elements in masks
> +   required for masking a loop described by LOOP_VINFO.
> +   Computed values are returned in MIN_MASK_ELEMS and
> +   MAX_MASK_ELEMS. */
> +
> +static void
> +vect_get_extreme_masks (loop_vec_info loop_vinfo,
> +                       unsigned *min_mask_elems,
> +                       unsigned *max_mask_elems)
> +{
> +  unsigned required_masks = LOOP_VINFO_REQUIRED_MASKS (loop_vinfo);
> +  unsigned elems = 1;
> +
> +  *min_mask_elems = *max_mask_elems = vect_get_masking_iv_elems (loop_vinfo);
> +
> +  while (required_masks)
> +    {
> +      if (required_masks & 1)
> +       {
> +         if (elems < *min_mask_elems)
> +           *min_mask_elems = elems;
> +         if (elems > *max_mask_elems)
> +           *max_mask_elems = elems;
> +       }
> +      elems = elems << 1;
> +      required_masks = required_masks >> 1;
> +    }
> +}
> +
> +/* Function vect_check_required_masks.
> +
> +   For given LOOP_VINFO check all required masks can be computed
> +   and add computation cost into loop cost data.  */
> +
> +static void
> +vect_check_required_masks (loop_vec_info loop_vinfo)
> +{
> +  if (!LOOP_VINFO_REQUIRED_MASKS (loop_vinfo))
> +    return;
> +
> +  /* Firstly check we have a proper comparison to get
> +     an initial mask.  */
> +  tree iv_vectype = vect_get_masking_iv_type (loop_vinfo);
> +  unsigned iv_elems = TYPE_VECTOR_SUBPARTS (iv_vectype);
> +
> +  tree mask_type = build_same_sized_truth_vector_type (iv_vectype);
> +
> +  if (!expand_vec_cmp_expr_p (iv_vectype, mask_type))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: required vector comparison "
> +                        "is not supported.\n");
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +      return;
> +    }
> +
> +  int cmp_copies  = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / iv_elems;
> +  /* Add cost of initial iv values creation.  */
> +  add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
> +                scalar_to_vec, NULL, 0, vect_masking_prologue);
> +  /* Add cost of upper bound and step values creation.  It is the same
> +     for all copies.  */
> +  add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), 2,
> +                scalar_to_vec, NULL, 0, vect_masking_prologue);
> +  /* Add cost of vector comparisons.  */
> +  add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
> +                vector_stmt, NULL, 0, vect_masking_body);
> +  /* Add cost of iv increment.  */
> +  add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
> +                vector_stmt, NULL, 0, vect_masking_body);
> +
> +
> +  /* Now check the widest and the narrowest masks.
> +     All intermediate values are obtained while
> +     computing extreme values.  */
> +  unsigned min_mask_elems = 0;
> +  unsigned max_mask_elems = 0;
> +
> +  vect_get_extreme_masks (loop_vinfo, &min_mask_elems, &max_mask_elems);
> +
> +  if (min_mask_elems < iv_elems)
> +    {
> +      /* Check mask widening is available.  */
> +      if (!vect_check_required_masks_widening (loop_vinfo, mask_type,
> +                                              min_mask_elems))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: required mask widening "
> +                            "is not supported.\n");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +         return;
> +       }
> +
> +      /* Add widening cost.  We have totally (2^N - 1) vectors
> +        we need to widen per each original vector, where N is
> +        a number of conversion steps.  Each widening requires
> +        two extracts.  */
> +      int steps = exact_log2 (iv_elems / min_mask_elems);
> +      int conversions = cmp_copies * 2 * ((1 << steps) - 1);
> +      add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo),
> +                    conversions, vec_promote_demote,
> +                    NULL, 0, vect_masking_body);
> +    }
> +
> +  if (max_mask_elems > iv_elems)
> +    {
> +      if (!vect_check_required_masks_narrowing (loop_vinfo, mask_type,
> +                                               max_mask_elems))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: required mask narrowing "
> +                            "is not supported.\n");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +         return;
> +       }
> +
> +      /* Add narrowing cost.  We have totally (2^N - 1) vector
> +        narrowings per each resulting vector, where N is
> +        a number of conversion steps.  */
> +      int steps = exact_log2 (max_mask_elems / iv_elems);
> +      int results = cmp_copies * iv_elems / max_mask_elems;
> +      int conversions = results * ((1 << steps) - 1);
> +      add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo),
> +                    conversions, vec_promote_demote,
> +                    NULL, 0, vect_masking_body);
> +    }
> +}
> +
>  /* Function vect_analyze_loop_operations.
>
>     Scan the loop stmts and make sure they are all vectorizable.  */
> @@ -1759,6 +2021,12 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
>        return false;
>      }
>
> +  /* If all statements can be masked then we also need
> +     to check we may compute required masks and compute
> +     its cost.  */
> +  if (LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +    vect_check_required_masks (loop_vinfo);
> +
>    return true;
>  }
>
> @@ -2232,6 +2500,8 @@ again:
>    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
>    LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
>    LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
> +  LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = true;
> +  LOOP_VINFO_NEED_MASKING (loop_vinfo) = false;
>
>    goto start_over;
>  }
> @@ -5424,6 +5694,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
>        outer_loop = loop;
>        loop = loop->inner;
>        nested_cycle = true;
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
>      }
>
>    /* 1. Is vectorizable reduction?  */
> @@ -5623,6 +5894,18 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
>
>    gcc_assert (ncopies >= 1);
>
> +  if (slp_node || PURE_SLP_STMT (stmt_info) || code == COND_EXPR
> +      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
> +      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
> +        == INTEGER_INDUC_COND_REDUCTION)
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: unsupported conditional "
> +                        "reduction\n");
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +    }
> +
>    vec_mode = TYPE_MODE (vectype_in);
>
>    if (code == COND_EXPR)
> @@ -5900,6 +6183,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
>           return false;
>         }
>      }
> +  if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +    {
> +      /* Check that masking of reduction is supported.  */
> +      tree mask_vtype = build_same_sized_truth_vector_type (vectype_out);
> +      if (!expand_vec_cond_expr_p (vectype_out, mask_vtype))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: required vector conditional "
> +                            "expression is not supported.\n");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +       }
> +    }
>
>    if (!vec_stmt) /* transformation not required.  */
>      {
> @@ -5908,6 +6204,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
>                                          reduc_index))
>          return false;
>        STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
> +
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       vect_model_simple_masking_cost (stmt_info, ncopies);
> +
>        return true;
>      }
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 9ab4af4..91ebe5a 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-vectorizer.h"
>  #include "builtins.h"
>  #include "internal-fn.h"
> +#include "tree-ssa-loop-ivopts.h"
>
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -535,6 +536,38 @@ process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
>    return true;
>  }
>
> +/* Return ture if STMT can be converted to masked form.  */
> +
> +static bool
> +can_mask_load_store (gimple *stmt)
> +{
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +  tree vectype, mask_vectype;
> +  tree lhs, ref;
> +
> +  if (!stmt_info)
> +    return false;
> +  lhs = gimple_assign_lhs (stmt);
> +  ref = (TREE_CODE (lhs) == SSA_NAME) ? gimple_assign_rhs1 (stmt) : lhs;
> +  if (may_be_nonaddressable_p (ref))
> +    return false;
> +  vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  mask_vectype = build_same_sized_truth_vector_type (vectype);
> +  if (!can_vec_mask_load_store_p (TYPE_MODE (vectype),
> +                                 TYPE_MODE (mask_vectype),
> +                                 gimple_assign_load_p (stmt)))
> +    {
> +      if (dump_enabled_p ())
> +       {
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "Statement can't be masked.\n");
> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
> +       }
> +
> +       return false;
> +    }
> +  return true;
> +}
>
>  /* Function vect_mark_stmts_to_be_vectorized.
>
> @@ -1193,6 +1226,52 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
>      }
>  }
>
> +/* Function vect_model_load_masking_cost.
> +
> +   Models cost for memory load masking.  */
> +
> +void
> +vect_model_load_masking_cost (stmt_vec_info stmt_info, int ncopies)
> +{
> +  if (gimple_code (stmt_info->stmt) == GIMPLE_CALL)
> +    add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
> +                          ncopies, vector_mask_load, stmt_info, false,
> +                          vect_masking_body);
> +  else
> +    add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
> +                          ncopies, vector_load, stmt_info, false,
> +                          vect_masking_body);
> +}
> +
> +/* Function vect_model_store_masking_cost.
> +
> +   Models cost for memory store masking.  */
> +
> +void
> +vect_model_store_masking_cost (stmt_vec_info stmt_info, int ncopies)
> +{
> +  if (gimple_code (stmt_info->stmt) == GIMPLE_CALL)
> +    add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
> +                          ncopies, vector_mask_store, stmt_info, false,
> +                          vect_masking_body);
> +  else
> +    add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
> +                          ncopies, vector_store, stmt_info, false,
> +                          vect_masking_body);
> +}
> +
> +/* Function vect_model_simple_masking_cost.
> +
> +   Models cost for statement masking.  Return estimated cost.  */
> +
> +void
> +vect_model_simple_masking_cost (stmt_vec_info stmt_info, int ncopies)
> +{
> +  add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
> +                        ncopies, vector_stmt, stmt_info, false,
> +                        vect_masking_body);
> +}
> +
>  /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
>     the loop preheader for the vectorized stmt STMT.  */
>
> @@ -1791,6 +1870,20 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>                && !useless_type_conversion_p (vectype, rhs_vectype)))
>      return false;
>
> +  if (LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +    {
> +      /* Check that mask conjuction is supported.  */
> +      optab tab;
> +      tab = optab_for_tree_code (BIT_AND_EXPR, vectype, optab_default);
> +      if (!tab || optab_handler (tab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: unsupported mask operation\n");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +       }
> +    }
> +
>    if (!vec_stmt) /* transformation not required.  */
>      {
>        STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
> @@ -1799,6 +1892,15 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>                                NULL, NULL, NULL);
>        else
>         vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
> +
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         if (is_store)
> +           vect_model_store_masking_cost (stmt_info, ncopies);
> +         else
> +           vect_model_load_masking_cost (stmt_info, ncopies);
> +       }
> +
>        return true;
>      }
>
> @@ -2795,6 +2897,18 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
>    if (slp_node || PURE_SLP_STMT (stmt_info))
>      return false;
>
> +  /* Masked clones are not yet supported.  But we allow
> +     calls which may be just called with no mask.  */
> +  if (!(gimple_call_flags (stmt) & ECF_CONST)
> +      || (gimple_call_flags (stmt) & ECF_LOOPING_CONST_OR_PURE))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: non-const call "
> +                        "(masked calls are not supported)\n");
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +    }
> +
>    /* Process function arguments.  */
>    nargs = gimple_call_num_args (stmt);
>
> @@ -5335,6 +5449,14 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                                  "negative step and reversing not supported.\n");
>               return false;
>             }
> +         if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +           {
> +             LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                "cannot be masked: negative step"
> +                                " is not supported.");
> +           }
>         }
>      }
>
> @@ -5343,6 +5465,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>        grouped_store = true;
>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: grouped access"
> +                            " is not supported." );
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +      }
> +
>        if (!slp
>           && !PURE_SLP_STMT (stmt_info)
>           && !STMT_VINFO_STRIDED_P (stmt_info))
> @@ -5398,6 +5529,44 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                               "scatter index use not simple.");
>           return false;
>         }
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: gather/scatter is"
> +                            " not supported.");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +       }
> +    }
> +
> +  if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +      && STMT_VINFO_STRIDED_P (stmt_info))
> +    {
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: strided store is not"
> +                        " supported.\n");
> +    }
> +
> +  if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +      && integer_zerop (nested_in_vect_loop_p (loop, stmt)
> +                       ? STMT_VINFO_DR_STEP (stmt_info)
> +                       : DR_STEP (dr)))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: invariant store.\n");
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +    }
> +
> +  if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +      && !can_mask_load_store (stmt))
> +    {
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: unsupported mask store.\n");
>      }
>
>    if (!vec_stmt) /* transformation not required.  */
> @@ -5407,6 +5576,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>        if (!PURE_SLP_STMT (stmt_info))
>         vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
>                                NULL, NULL, NULL);
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       vect_model_store_masking_cost (stmt_info, ncopies);
> +
>        return true;
>      }
>
> @@ -6312,6 +6484,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>        grouped_load = true;
>        /* FORNOW */
>        gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> +      /* Not yet supported.  */
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: grouped acces is not"
> +                            " supported.");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +      }
>
>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>
> @@ -6358,6 +6539,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>             }
>
>           LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
>         }
>
>        if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
> @@ -6423,6 +6605,16 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>        gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
>                                                &gather_off, &gather_scale);
>        gcc_assert (gather_decl);
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                           "cannot be masked: gather/scatter is not"
> +                           " supported.\n");
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +       }
> +
> +
>        if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
>                                &gather_off_vectype))
>         {
> @@ -6434,6 +6626,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>      }
>    else if (STMT_VINFO_STRIDED_P (stmt_info))
>      {
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       {
> +         LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "cannot be masked: strided load is not"
> +                            " supported.\n");
> +       }
> +
>        if ((grouped_load
>            && (slp || PURE_SLP_STMT (stmt_info)))
>           && (group_size > nunits
> @@ -6485,9 +6686,35 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                                   "\n");
>               return false;
>             }
> +         if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                               "cannot be masked: negative step "
> +                                "for masking.\n");
> +             LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +           }
>         }
>      }
>
> +  if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +      && integer_zerop (nested_in_vect_loop
> +                       ? STMT_VINFO_DR_STEP (stmt_info)
> +                       : DR_STEP (dr)))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "allow invariant load for masked loop.\n");
> +    }
> +  else if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +          && !can_mask_load_store (stmt))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "cannot be masked: unsupported masked load.\n");
> +      LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +    }
> +
>    if (!vec_stmt) /* transformation not required.  */
>      {
>        STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -6495,6 +6722,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>        if (!PURE_SLP_STMT (stmt_info))
>         vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
>                               NULL, NULL, NULL);
> +      if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
> +       vect_model_load_masking_cost (stmt_info, ncopies);
> +
>        return true;
>      }
>
> @@ -7891,6 +8121,43 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
>    return true;
>  }
>
> +/* Return true if vector version of STMT should be masked
> +   in a vectorized loop epilogue (considering usage of the
> +   same VF as for main loop).  */
> +
> +static bool
> +vect_stmt_should_be_masked_for_epilogue (gimple *stmt)
> +{
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +
> +  /* We should mask all statements accessing memory.  */
> +  if (STMT_VINFO_DATA_REF (stmt_info))
> +    return true;
> +
> +  /* We should also mask all recursions.  */
> +  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
> +      || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Add a mask required to mask STMT to LOOP_VINFO_REQUIRED_MASKS.  */
> +
> +static void
> +vect_add_required_mask_for_stmt (gimple *stmt)
> +{
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  unsigned HOST_WIDE_INT nelems = TYPE_VECTOR_SUBPARTS (vectype);
> +  int bit_no = exact_log2 (nelems);
> +
> +  gcc_assert (bit_no >= 0);
> +
> +  LOOP_VINFO_REQUIRED_MASKS (loop_vinfo) |= (1 << bit_no);
> +}
> +
>  /* Make sure the statement is vectorizable.  */
>
>  bool
> @@ -7898,6 +8165,7 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
>  {
>    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>    enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
>    bool ok;
>    tree scalar_type, vectype;
> @@ -8064,6 +8332,10 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
>        STMT_VINFO_VECTYPE (stmt_info) = vectype;
>     }
>
> +  /* Masking is not supported for SLP yet.  */
> +  if (loop_vinfo && node)
> +    LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
> +
>    if (STMT_VINFO_RELEVANT_P (stmt_info))
>      {
>        gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
> @@ -8123,6 +8395,11 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
>        return false;
>      }
>
> +  if (loop_vinfo
> +      && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
> +      && vect_stmt_should_be_masked_for_epilogue (stmt))
> +    vect_add_required_mask_for_stmt (stmt);
> +
>    if (bb_vinfo)
>      return true;
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index d3450b6..86c5371 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1033,6 +1033,9 @@ extern void vect_model_store_cost (stmt_vec_info, int, bool,
>  extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree,
>                                   stmt_vector_for_cost *,
>                                   stmt_vector_for_cost *);
> +extern void vect_model_load_masking_cost (stmt_vec_info, int);
> +extern void vect_model_store_masking_cost (stmt_vec_info, int);
> +extern void vect_model_simple_masking_cost (stmt_vec_info, int);
>  extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
>                                   enum vect_cost_for_stmt, stmt_vec_info,
>                                   int, enum vect_cost_model_location);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]