This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix PR32377: can't determine dependence
- From: "Sebastian Pop" <sebpop at gmail dot com>
- To: "GCC Patches" <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 31 Oct 2007 08:46:13 -0500
- Subject: Fix PR32377: can't determine dependence
Hi,
We used to fail to provide a useful representation of a dependence
relation in the case where the number of iterations was not known, or
was a symbolic expression: the number of iterations is used to compute
the last conflicting iteration, but is not needed for computing the
first conflicting iteration and the next ones. This patch rewrites a
part of the dependence analysis test that was stopping the computation
too early, for providing more precise dependence relations.
Regstrapped on amd64-linux, committed to trunk.
Sebastian
--
AMD - GNU Tools
* tree-data-ref.c (compute_overlap_steps_for_affine_univar): Make it
work also for unknown number of iterations.
(analyze_subscript_affine_affine): Clean up. Don't fail when the
number of iterations is not known.
email:sebpop@gmail.com
branch:trunk
revision:HEAD
configure:
make:
check:
Index: gcc/testsuite/gfortran.dg/vect/pr32377.f90
===================================================================
--- gcc/testsuite/gfortran.dg/vect/pr32377.f90 (revision 0)
+++ gcc/testsuite/gfortran.dg/vect/pr32377.f90 (revision 0)
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_float }
+
+subroutine s243(ntimes,ld,n,ctime,dtime,a,b,c,d,e,aa,bb,cc)
+
+ integer ntimes,ld,n,i,nl
+ real a(n),b(n),c(n),d(n),e(n),aa(ld,n),bb(ld,n),cc(ld,n)
+ real t1,t2,chksum,ctime,dtime,cs1d
+ b(:n-1)= b(:n-1)+(c(:n-1)+e(:n-1))*d(:n-1)
+ a(:n-1)= b(:n-1)+a(2:n)*d(:n-1)
+ return
+end subroutine s243
+
+! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
+! { dg-final { cleanup-tree-dump "vect" } }
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c (revision 129775)
+++ gcc/tree-data-ref.c (working copy)
@@ -1819,9 +1819,15 @@ compute_overlap_steps_for_affine_univar
step_overlaps_a = step_b / gcd_steps_a_b;
step_overlaps_b = step_a / gcd_steps_a_b;
- tau2 = FLOOR_DIV (niter, step_overlaps_a);
- tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
- last_conflict = tau2;
+ if (niter > 0)
+ {
+ tau2 = FLOOR_DIV (niter, step_overlaps_a);
+ tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
+ last_conflict = tau2;
+ *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ }
+ else
+ *last_conflicts = chrec_dont_know;
*overlaps_a = affine_fn_univar (integer_zero_node, dim,
build_int_cst (NULL_TREE,
@@ -1829,7 +1835,6 @@ compute_overlap_steps_for_affine_univar
*overlaps_b = affine_fn_univar (integer_zero_node, dim,
build_int_cst (NULL_TREE,
step_overlaps_b));
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
}
else
@@ -1985,7 +1990,6 @@ analyze_subscript_affine_affine (tree ch
{
unsigned nb_vars_a, nb_vars_b, dim;
HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
- HOST_WIDE_INT tau1, tau2;
lambda_matrix A, U, S;
if (eq_evolutions_p (chrec_a, chrec_b))
@@ -2043,18 +2047,7 @@ analyze_subscript_affine_affine (tree ch
false);
niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
false);
- if (niter_a < 0 || niter_b < 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- goto end_analyze_subs_aa;
- }
-
niter = MIN (niter_a, niter_b);
-
step_a = int_cst_value (CHREC_RIGHT (chrec_a));
step_b = int_cst_value (CHREC_RIGHT (chrec_b));
@@ -2138,31 +2131,7 @@ analyze_subscript_affine_affine (tree ch
| x0 = i0 + i1 * t,
| y0 = j0 + j1 * t. */
-
- HOST_WIDE_INT i0, j0, i1, j1;
-
- /* X0 and Y0 are the first iterations for which there is a
- dependence. X0, Y0 are two solutions of the Diophantine
- equation: chrec_a (X0) = chrec_b (Y0). */
- HOST_WIDE_INT x0, y0;
- HOST_WIDE_INT niter, niter_a, niter_b;
-
- niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
- false);
- niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
- false);
-
- if (niter_a < 0 || niter_b < 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- goto end_analyze_subs_aa;
- }
-
- niter = MIN (niter_a, niter_b);
+ HOST_WIDE_INT i0, j0, i1, j1;
i0 = U[0][0] * gamma / gcd_alpha_beta;
j0 = U[0][1] * gamma / gcd_alpha_beta;
@@ -2179,80 +2148,72 @@ analyze_subscript_affine_affine (tree ch
*overlaps_a = conflict_fn_no_dependence ();
*overlaps_b = conflict_fn_no_dependence ();
*last_conflicts = integer_zero_node;
+ goto end_analyze_subs_aa;
}
- else
+ if (i1 > 0 && j1 > 0)
{
- if (i1 > 0)
- {
- tau1 = CEIL (-i0, i1);
- tau2 = FLOOR_DIV (niter - i0, i1);
+ HOST_WIDE_INT niter_a = estimated_loop_iterations_int
+ (get_chrec_loop (chrec_a), false);
+ HOST_WIDE_INT niter_b = estimated_loop_iterations_int
+ (get_chrec_loop (chrec_b), false);
+ HOST_WIDE_INT niter = MIN (niter_a, niter_b);
+
+ /* (X0, Y0) is a solution of the Diophantine equation:
+ "chrec_a (X0) = chrec_b (Y0)". */
+ HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
+ CEIL (-j0, j1));
+ HOST_WIDE_INT x0 = i1 * tau1 + i0;
+ HOST_WIDE_INT y0 = j1 * tau1 + j0;
+
+ /* (X1, Y1) is the smallest positive solution of the eq
+ "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
+ first conflict occurs. */
+ HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
+ HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
+ HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
- if (j1 > 0)
+ if (niter > 0)
+ {
+ HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
+ FLOOR_DIV (niter - j0, j1));
+ HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
+
+ /* If the overlap occurs outside of the bounds of the
+ loop, there is no dependence. */
+ if (x1 > niter || y1 > niter)
{
- int last_conflict, min_multiple;
- tau1 = MAX (tau1, CEIL (-j0, j1));
- tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
-
- x0 = i1 * tau1 + i0;
- y0 = j1 * tau1 + j0;
-
- /* At this point (x0, y0) is one of the
- solutions to the Diophantine equation. The
- next step has to compute the smallest
- positive solution: the first conflicts. */
- min_multiple = MIN (x0 / i1, y0 / j1);
- x0 -= i1 * min_multiple;
- y0 -= j1 * min_multiple;
-
- tau1 = (x0 - i0)/i1;
- last_conflict = tau2 - tau1;
-
- /* If the overlap occurs outside of the bounds of the
- loop, there is no dependence. */
- if (x0 > niter || y0 > niter)
- {
- *overlaps_a = conflict_fn_no_dependence ();
- *overlaps_b = conflict_fn_no_dependence ();
- *last_conflicts = integer_zero_node;
- }
- else
- {
- *overlaps_a
- = conflict_fn (1,
- affine_fn_univar (build_int_cst (NULL_TREE, x0),
- 1,
- build_int_cst (NULL_TREE, i1)));
- *overlaps_b
- = conflict_fn (1,
- affine_fn_univar (build_int_cst (NULL_TREE, y0),
- 1,
- build_int_cst (NULL_TREE, j1)));
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
- }
+ *overlaps_a = conflict_fn_no_dependence ();
+ *overlaps_b = conflict_fn_no_dependence ();
+ *last_conflicts = integer_zero_node;
+ goto end_analyze_subs_aa;
}
else
- {
- /* FIXME: For the moment, the upper bound of the
- iteration domain for j is not checked. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- }
+ *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
}
-
else
- {
- /* FIXME: For the moment, the upper bound of the
- iteration domain for i is not checked. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- }
+ *last_conflicts = chrec_dont_know;
+
+ *overlaps_a
+ = conflict_fn (1,
+ affine_fn_univar (build_int_cst (NULL_TREE, x1),
+ 1,
+ build_int_cst (NULL_TREE, i1)));
+ *overlaps_b
+ = conflict_fn (1,
+ affine_fn_univar (build_int_cst (NULL_TREE, y1),
+ 1,
+ build_int_cst (NULL_TREE, j1)));
+ }
+ else
+ {
+ /* FIXME: For the moment, the upper bound of the
+ iteration domain for i and j is not checked. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
}
}
else
@@ -2264,7 +2225,6 @@ analyze_subscript_affine_affine (tree ch
*last_conflicts = chrec_dont_know;
}
}
-
else
{
if (dump_file && (dump_flags & TDF_DETAILS))