This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][gimple-linterchange] Rewrite compute_access_stride
- From: "Bin.Cheng" <amker dot cheng at gmail dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches List <gcc-patches at gcc dot gnu dot org>, Bin Cheng <bin dot cheng at arm dot com>
- Date: Fri, 1 Dec 2017 12:54:09 +0000
- Subject: Re: [PATCH][gimple-linterchange] Rewrite compute_access_stride
- Authentication-results: sourceware.org; auth=none
- References: <alpine.LSU.2.20.1712011325250.12252@zhemvz.fhfr.qr>
On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguenther@suse.de> wrote:
>
> This is the access stride computation change. Apart from the
> stride extraction I adjusted the cost model to handle non-constant
> strides by checking if either is a multiple of the other and
> simply fail interchanging if it's the wrong way around for one
> ref or if the simple method using multiple_of_p fails to determine
> either case.
>
> This still handles the bwaves case.
>
> I think we want additional testcases with variable strides for each
> case we add - I believe this is the most conservative way to treat
> variable strides.
>
> It may be inconsistent with the constant stride handling where you
> allow for many OK DRs to outweight a few not OK DRs, but as it
> worked for bwaves it must be good enough ;)
>
> Tested on x86_64-unknown-linux-gnu (just the interchange testcases).
>
> Currently running a bootstrap with -O3 -g -floop-interchange.
>
> Ok for the branch?
Ok. This actually is closer the motivation: simple/conservative cost
model that only transforms code when it's known to be good.
I will check the impact on the number of interchange in spec.
Thanks,
bin
>
> Richard.
>
> 2017-12-01 Richard Biener <rguenther@suse.de>
>
> * gimple-loop-interchange.cc (estimate_val_by_simplify_replace):
> Remove.
> (compute_access_stride): Rewrite using instantiate_scev,
> remove constant substitution.
> (should_interchange_loops): Adjust for non-constant strides.
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc (revision 255303)
> +++ gcc/gimple-loop-interchange.cc (working copy)
> @@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne
> }
> }
>
> -/* Estimate and return the value of EXPR by replacing variables in EXPR
> - with CST_TREE and simplifying. */
> -
> -static tree
> -estimate_val_by_simplify_replace (tree expr, tree cst_tree)
> -{
> - unsigned i, n;
> - tree ret = NULL_TREE, e, se;
> -
> - if (!expr)
> - return NULL_TREE;
> -
> - /* Do not bother to replace constants. */
> - if (CONSTANT_CLASS_P (expr))
> - return expr;
> -
> - if (!EXPR_P (expr))
> - return cst_tree;
> -
> - n = TREE_OPERAND_LENGTH (expr);
> - for (i = 0; i < n; i++)
> - {
> - e = TREE_OPERAND (expr, i);
> - se = estimate_val_by_simplify_replace (e, cst_tree);
> - if (e == se)
> - continue;
> -
> - if (!ret)
> - ret = copy_node (expr);
> -
> - TREE_OPERAND (ret, i) = se;
> - }
> -
> - return (ret ? fold (ret) : expr);
> -}
> -
> /* Given data reference DR in LOOP_NEST, the function computes DR's access
> stride at each level of loop from innermost LOOP to outer. On success,
> it saves access stride at each level loop in a vector which is pointed
> @@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop
>
> tree ref = DR_REF (dr);
> tree scev_base = build_fold_addr_expr (ref);
> - tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
> - tree niters = build_int_cst (sizetype, AVG_LOOP_NITER);
> - access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size);
> -
> - do {
> - tree scev_fn = analyze_scalar_evolution (loop, scev_base);
> - if (chrec_contains_undetermined (scev_fn)
> - || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num))
> - break;
> -
> - if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
> - {
> - scev_base = scev_fn;
> - strides->safe_push (build_int_cst (sizetype, 0));
> - continue;
> - }
> -
> - scev_base = CHREC_LEFT (scev_fn);
> - if (tree_contains_chrecs (scev_base, NULL))
> - break;
> -
> - tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn));
> -
> - enum ev_direction scev_dir = scev_direction (scev_fn);
> - /* Estimate if step isn't constant. */
> - if (scev_dir == EV_DIR_UNKNOWN)
> - {
> - scev_step = estimate_val_by_simplify_replace (scev_step, niters);
> - if (TREE_CODE (scev_step) != INTEGER_CST
> - || tree_int_cst_lt (scev_step, access_size))
> - scev_step = access_size;
> - }
> - /* Compute absolute value of scev step. */
> - else if (scev_dir == EV_DIR_DECREASES)
> - scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step);
> -
> - strides->safe_push (scev_step);
> - } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
> + tree scev = analyze_scalar_evolution (loop, scev_base);
> + scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
> + if (! chrec_contains_undetermined (scev))
> + {
> + tree sl = scev;
> + struct loop *expected = loop;
> + while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
> + {
> + struct loop *sl_loop = get_chrec_loop (sl);
> + while (sl_loop != expected)
> + {
> + strides->safe_push (size_int (0));
> + expected = loop_outer (expected);
> + }
> + strides->safe_push (CHREC_RIGHT (sl));
> + sl = CHREC_LEFT (sl);
> + expected = loop_outer (expected);
> + }
> + if (! tree_contains_chrecs (sl, NULL))
> + while (expected != loop_outer (loop_nest))
> + {
> + strides->safe_push (size_int (0));
> + expected = loop_outer (expected);
> + }
> + }
>
> dr->aux = strides;
> }
> @@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx
> struct data_reference *dr;
> bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
> widest_int iloop_strides = 0, oloop_strides = 0;
> + unsigned num_unresolved_drs = 0;
> + unsigned num_resolved_ok_drs = 0;
> + unsigned num_resolved_not_ok_drs = 0;
>
> if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
> fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
> @@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx
> {
> vec<tree> *stride = DR_ACCESS_STRIDE (dr);
> tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
> - gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST);
> - gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST);
>
> bool subloop_stride_p = false;
> /* Data ref can't be invariant or sequential access at current loop if
> its address changes with respect to any subloops. */
> for (j = i_idx + 1; j < stride->length (); ++j)
> - if (integer_nonzerop ((*stride)[j]))
> + if (!integer_zerop ((*stride)[j]))
> {
> subloop_stride_p = true;
> break;
> }
>
> - if (integer_nonzerop (iloop_stride))
> - iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
> - else if (!subloop_stride_p)
> - num_old_inv_drs++;
> -
> - if (integer_nonzerop (oloop_stride))
> - oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
> - else if (!subloop_stride_p)
> - num_new_inv_drs++;
> + if (integer_zerop (iloop_stride))
> + {
> + if (!subloop_stride_p)
> + num_old_inv_drs++;
> + }
> + if (integer_zerop (oloop_stride))
> + {
> + if (!subloop_stride_p)
> + num_new_inv_drs++;
> + }
> +
> + if (TREE_CODE (iloop_stride) == INTEGER_CST
> + && TREE_CODE (oloop_stride) == INTEGER_CST)
> + {
> + iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
> + oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
> + }
> + else if (multiple_of_p (TREE_TYPE (iloop_stride),
> + iloop_stride, oloop_stride))
> + num_resolved_ok_drs++;
> + else if (multiple_of_p (TREE_TYPE (iloop_stride),
> + oloop_stride, iloop_stride))
> + num_resolved_not_ok_drs++;
> + else
> + num_unresolved_drs++;
>
> /* Data ref can't be sequential access if its address changes in sub
> loop. */
> @@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx
> interchange. Note invariant is considered sequential here. */
> tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
> if (all_seq_dr_before_p
> - && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide (access_size)))
> + && ! (integer_zerop (iloop_stride)
> + || operand_equal_p (access_size, iloop_stride, 0)))
> all_seq_dr_before_p = false;
> if (all_seq_dr_after_p
> - && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide (access_size)))
> + && ! (integer_zerop (oloop_stride)
> + || operand_equal_p (access_size, oloop_stride, 0)))
> all_seq_dr_after_p = false;
> }
>
> @@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx
> fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
> all_seq_dr_before_p ? "true" : "false",
> all_seq_dr_after_p ? "true" : "false");
> + fprintf (dump_file, "OK to interchage with variable strides: %d\n",
> + num_resolved_ok_drs);
> + fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
> + num_resolved_not_ok_drs);
> + fprintf (dump_file, "Variable strides we cannot decide: %d\n",
> + num_unresolved_drs);
> }
>
> + if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
> + return false;
> +
> /* We use different stride comparison ratio for interchanging innermost
> two loops or not. The idea is to be conservative in interchange for
> the innermost loops. */