This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH][gimple-linterchange] Rewrite compute_access_stride

From: "Bin.Cheng" <amker dot cheng at gmail dot com>
To: Richard Biener <rguenther at suse dot de>
Cc: gcc-patches List <gcc-patches at gcc dot gnu dot org>, Bin Cheng <bin dot cheng at arm dot com>
Date: Fri, 1 Dec 2017 12:54:09 +0000
Subject: Re: [PATCH][gimple-linterchange] Rewrite compute_access_stride
Authentication-results: sourceware.org; auth=none
References: <alpine.LSU.2.20.1712011325250.12252@zhemvz.fhfr.qr>
On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguenther@suse.de> wrote:
>
> This is the access stride computation change.  Apart from the
> stride extraction I adjusted the cost model to handle non-constant
> strides by checking if either is a multiple of the other and
> simply fail interchanging if it's the wrong way around for one
> ref or if the simple method using multiple_of_p fails to determine
> either case.
>
> This still handles the bwaves case.
>
> I think we want additional testcases with variable strides for each
> case we add - I believe this is the most conservative way to treat
> variable strides.
>
> It may be inconsistent with the constant stride handling where you
> allow for many OK DRs to outweight a few not OK DRs, but as it
> worked for bwaves it must be good enough ;)
>
> Tested on x86_64-unknown-linux-gnu (just the interchange testcases).
>
> Currently running a bootstrap with -O3 -g -floop-interchange.
>
> Ok for the branch?
Ok.  This actually is closer the motivation: simple/conservative cost
model that only transforms code when it's known to be good.
I will check the impact on the number of interchange in spec.

Thanks,
bin
>
> Richard.
>
> 2017-12-01  Richard Biener  <rguenther@suse.de>
>
>         * gimple-loop-interchange.cc (estimate_val_by_simplify_replace):
>         Remove.
>         (compute_access_stride): Rewrite using instantiate_scev,
>         remove constant substitution.
>         (should_interchange_loops): Adjust for non-constant strides.
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc      (revision 255303)
> +++ gcc/gimple-loop-interchange.cc      (working copy)
> @@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne
>      }
>  }
>
> -/* Estimate and return the value of EXPR by replacing variables in EXPR
> -   with CST_TREE and simplifying.  */
> -
> -static tree
> -estimate_val_by_simplify_replace (tree expr, tree cst_tree)
> -{
> -  unsigned i, n;
> -  tree ret = NULL_TREE, e, se;
> -
> -  if (!expr)
> -    return NULL_TREE;
> -
> -  /* Do not bother to replace constants.  */
> -  if (CONSTANT_CLASS_P (expr))
> -    return expr;
> -
> -  if (!EXPR_P (expr))
> -    return cst_tree;
> -
> -  n = TREE_OPERAND_LENGTH (expr);
> -  for (i = 0; i < n; i++)
> -    {
> -      e = TREE_OPERAND (expr, i);
> -      se = estimate_val_by_simplify_replace (e, cst_tree);
> -      if (e == se)
> -       continue;
> -
> -      if (!ret)
> -       ret = copy_node (expr);
> -
> -      TREE_OPERAND (ret, i) = se;
> -    }
> -
> -  return (ret ? fold (ret) : expr);
> -}
> -
>  /* Given data reference DR in LOOP_NEST, the function computes DR's access
>     stride at each level of loop from innermost LOOP to outer.  On success,
>     it saves access stride at each level loop in a vector which is pointed
> @@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop
>
>    tree ref = DR_REF (dr);
>    tree scev_base = build_fold_addr_expr (ref);
> -  tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
> -  tree niters = build_int_cst (sizetype, AVG_LOOP_NITER);
> -  access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size);
> -
> -  do {
> -    tree scev_fn = analyze_scalar_evolution (loop, scev_base);
> -    if (chrec_contains_undetermined (scev_fn)
> -       || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num))
> -      break;
> -
> -    if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
> -      {
> -       scev_base = scev_fn;
> -       strides->safe_push (build_int_cst (sizetype, 0));
> -       continue;
> -      }
> -
> -    scev_base = CHREC_LEFT (scev_fn);
> -    if (tree_contains_chrecs (scev_base, NULL))
> -      break;
> -
> -    tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn));
> -
> -    enum ev_direction scev_dir = scev_direction (scev_fn);
> -    /* Estimate if step isn't constant.  */
> -    if (scev_dir == EV_DIR_UNKNOWN)
> -      {
> -       scev_step = estimate_val_by_simplify_replace (scev_step, niters);
> -       if (TREE_CODE (scev_step) != INTEGER_CST
> -           || tree_int_cst_lt (scev_step, access_size))
> -         scev_step = access_size;
> -      }
> -    /* Compute absolute value of scev step.  */
> -    else if (scev_dir == EV_DIR_DECREASES)
> -      scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step);
> -
> -    strides->safe_push (scev_step);
> -  } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
> +  tree scev = analyze_scalar_evolution (loop, scev_base);
> +  scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
> +  if (! chrec_contains_undetermined (scev))
> +    {
> +      tree sl = scev;
> +      struct loop *expected = loop;
> +      while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
> +       {
> +         struct loop *sl_loop = get_chrec_loop (sl);
> +         while (sl_loop != expected)
> +           {
> +             strides->safe_push (size_int (0));
> +             expected = loop_outer (expected);
> +           }
> +         strides->safe_push (CHREC_RIGHT (sl));
> +         sl = CHREC_LEFT (sl);
> +         expected = loop_outer (expected);
> +       }
> +      if (! tree_contains_chrecs (sl, NULL))
> +       while (expected != loop_outer (loop_nest))
> +         {
> +           strides->safe_push (size_int (0));
> +           expected = loop_outer (expected);
> +         }
> +    }
>
>    dr->aux = strides;
>  }
> @@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx
>    struct data_reference *dr;
>    bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
>    widest_int iloop_strides = 0, oloop_strides = 0;
> +  unsigned num_unresolved_drs = 0;
> +  unsigned num_resolved_ok_drs = 0;
> +  unsigned num_resolved_not_ok_drs = 0;
>
>    if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
>      fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
> @@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx
>      {
>        vec<tree> *stride = DR_ACCESS_STRIDE (dr);
>        tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
> -      gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST);
> -      gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST);
>
>        bool subloop_stride_p = false;
>        /* Data ref can't be invariant or sequential access at current loop if
>          its address changes with respect to any subloops.  */
>        for (j = i_idx + 1; j < stride->length (); ++j)
> -       if (integer_nonzerop ((*stride)[j]))
> +       if (!integer_zerop ((*stride)[j]))
>           {
>             subloop_stride_p = true;
>             break;
>           }
>
> -      if (integer_nonzerop (iloop_stride))
> -       iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
> -      else if (!subloop_stride_p)
> -       num_old_inv_drs++;
> -
> -      if (integer_nonzerop (oloop_stride))
> -       oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
> -      else if (!subloop_stride_p)
> -       num_new_inv_drs++;
> +      if (integer_zerop (iloop_stride))
> +       {
> +         if (!subloop_stride_p)
> +           num_old_inv_drs++;
> +       }
> +      if (integer_zerop (oloop_stride))
> +       {
> +         if (!subloop_stride_p)
> +           num_new_inv_drs++;
> +       }
> +
> +      if (TREE_CODE (iloop_stride) == INTEGER_CST
> +         && TREE_CODE (oloop_stride) == INTEGER_CST)
> +       {
> +         iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
> +         oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
> +       }
> +      else if (multiple_of_p (TREE_TYPE (iloop_stride),
> +                             iloop_stride, oloop_stride))
> +       num_resolved_ok_drs++;
> +      else if (multiple_of_p (TREE_TYPE (iloop_stride),
> +                             oloop_stride, iloop_stride))
> +       num_resolved_not_ok_drs++;
> +      else
> +       num_unresolved_drs++;
>
>        /* Data ref can't be sequential access if its address changes in sub
>          loop.  */
> @@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx
>          interchange.  Note invariant is considered sequential here.  */
>        tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
>        if (all_seq_dr_before_p
> -         && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide (access_size)))
> +         && ! (integer_zerop (iloop_stride)
> +               || operand_equal_p (access_size, iloop_stride, 0)))
>         all_seq_dr_before_p = false;
>        if (all_seq_dr_after_p
> -         && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide (access_size)))
> +         && ! (integer_zerop (oloop_stride)
> +               || operand_equal_p (access_size, oloop_stride, 0)))
>         all_seq_dr_after_p = false;
>      }
>
> @@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx
>        fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
>                all_seq_dr_before_p ? "true" : "false",
>                all_seq_dr_after_p ? "true" : "false");
> +      fprintf (dump_file, "OK to interchage with variable strides: %d\n",
> +              num_resolved_ok_drs);
> +      fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
> +              num_resolved_not_ok_drs);
> +      fprintf (dump_file, "Variable strides we cannot decide: %d\n",
> +              num_unresolved_drs);
>      }
>
> +  if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
> +    return false;
> +
>    /* We use different stride comparison ratio for interchanging innermost
>       two loops or not.  The idea is to be conservative in interchange for
>       the innermost loops.  */
Follow-Ups:
- Re: [PATCH][gimple-linterchange] Rewrite compute_access_stride
  - From: Richard Biener
References:
- [PATCH][gimple-linterchange] Rewrite compute_access_stride
  - From: Richard Biener
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]