This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch, fortran] Remove bounds checking in DO loops for inline matmul
- From: Thomas Koenig <tkoenig at netcologne dot de>
- To: "fortran at gcc dot gnu dot org" <fortran at gcc dot gnu dot org>, gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Sun, 10 Jun 2018 14:50:56 +0200
- Subject: [patch, fortran] Remove bounds checking in DO loops for inline matmul
Hello world,
the attached patch was motivated by the desire to reduce the runtime
of programs with -fcheck=bounds with (some) optimization, in order
to make debugging less time consuming.
This patch removes the run-time bounds checks with -fcheck=bounds
in the DO loops created by matmul inlining. The Necessary checks are
already performed outside the loop, so the inner tests are not needed.
OK for trunk?
Regards
Thomas
2018-06-10 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran.h (gfc_expr): Add no_bounds_check field.
* frontend-passes.c (get_array_inq_function): Set no_bounds_check
on function and function argument.
(inline_matmul_assign): Set no_bounds_check on zero expression
and on lhs of zero expression.
Also handle A1B2 case if realloc on assigment is active.
* trans-array.c (gfc_conv_array_ref): Don't do range checking
if expr has no_bounds_check set.
(gfc_conv_expr_descriptor): Set no_bounds_check on ss if expr
has it set.
* trans-expr.c (gfc_trans_assignment_1): Set no_bounds_check
on lss and lss if the corresponding expressions have it set.
2018-06-10 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran.dg/inline_matmul_23.f90: New test.
! { dg-do compile }
! { dg-options "-Og -fcheck=bounds -fdump-tree-optimized" }
! Check that bounds checking is done only before the matrix
! multiplication.
module y
contains
subroutine x(a,b,c)
real, dimension(:,:) :: a, b, c
c = matmul(a,b)
end subroutine x
end module y
! { dg-final { scan-tree-dump-times "_runtime_error" 3 "optimized" } }
Index: gfortran.h
===================================================================
--- gfortran.h (Revision 261245)
+++ gfortran.h (Arbeitskopie)
@@ -2144,6 +2144,10 @@ typedef struct gfc_expr
/* Will require finalization after use. */
unsigned int must_finalize : 1;
+ /* Set this if no range check should be performed on this expression. */
+
+ unsigned int no_bounds_check : 1;
+
/* If an expression comes from a Hollerith constant or compile-time
evaluation of a transfer statement, it may have a prescribed target-
memory representation, and these cannot always be backformed from
Index: frontend-passes.c
===================================================================
--- frontend-passes.c (Revision 261248)
+++ frontend-passes.c (Arbeitskopie)
@@ -2938,9 +2938,14 @@ get_array_inq_function (gfc_isym_id id, gfc_expr *
gfc_index_integer_kind);
ec = gfc_copy_expr (e);
+
+ /* No bounds checking, this will be done before the loops if -fcheck=bounds
+ is in effect. */
+ ec->no_bounds_check = 1;
fcn = gfc_build_intrinsic_call (current_ns, id, name, e->where, 3,
ec, dim_arg, kind);
gfc_simplify_expr (fcn, 0);
+ fcn->no_bounds_check = 1;
return fcn;
}
@@ -3645,6 +3650,9 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index,
}
}
+ /* Bounds checking will be done before the loops if -fcheck=bounds
+ is in effect. */
+ e->no_bounds_check = 1;
return e;
}
@@ -3832,7 +3840,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
m_case = A1B2;
}
}
-
+
if (m_case == none)
return 0;
@@ -3911,10 +3919,13 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
next_code_point = &if_limit->block->next;
}
+ zero_e->no_bounds_check = 1;
+
assign_zero = XCNEW (gfc_code);
assign_zero->op = EXEC_ASSIGN;
assign_zero->loc = co->loc;
assign_zero->expr1 = gfc_copy_expr (expr1);
+ assign_zero->expr1->no_bounds_check = 1;
assign_zero->expr2 = zero_e;
/* Handle the reallocation, if needed. */
@@ -3926,20 +3937,33 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
bounds checking, the rest will be allocated. Also check this
for A2B1. */
- if ((gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) && (m_case == A2B2 || m_case == A2B1))
+ if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
{
gfc_code *test;
- gfc_expr *a2, *b1;
+ if (m_case == A2B2 || m_case == A2B1)
+ {
+ gfc_expr *a2, *b1;
- a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
- b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
- test = runtime_error_ne (b1, a2, "Dimension of array B incorrect "
- "in MATMUL intrinsic: Is %ld, should be %ld");
- *next_code_point = test;
- next_code_point = &test->next;
+ a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ test = runtime_error_ne (b1, a2, "Dimension of array B incorrect "
+ "in MATMUL intrinsic: Is %ld, should be %ld");
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
+ else if (m_case == A1B2)
+ {
+ gfc_expr *a1, *b1;
+
+ a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
+ b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
+ test = runtime_error_ne (b1, a1, "Dimension of array B incorrect "
+ "in MATMUL intrinsic: Is %ld, should be %ld");
+ *next_code_point = test;
+ next_code_point = &test->next;
+ }
}
-
lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case);
*next_code_point = lhs_alloc;
Index: trans-array.c
===================================================================
--- trans-array.c (Revision 261348)
+++ trans-array.c (Arbeitskopie)
@@ -3583,7 +3583,7 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * a
gfc_conv_expr_type (&indexse, ar->start[n], gfc_array_index_type);
gfc_add_block_to_block (&se->pre, &indexse.pre);
- if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
+ if ((gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) && ! expr->no_bounds_check)
{
/* Check array bounds. */
tree cond;
@@ -7181,6 +7181,9 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *ex
/* The right-hand side of a pointer assignment mustn't use a temporary. */
gcc_assert (!se->direct_byref);
+ /* Do we need bounds checking or not? */
+ ss->no_bounds_check = expr->no_bounds_check;
+
/* Setup the scalarizing loops and bounds. */
gfc_conv_ss_startstride (&loop);
Index: trans-expr.c
===================================================================
--- trans-expr.c (Revision 261348)
+++ trans-expr.c (Arbeitskopie)
@@ -9991,6 +9991,8 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr
|| expr2->value.function.isym->conversion)))
lss->is_alloc_lhs = 1;
}
+ else
+ lss->no_bounds_check = expr1->no_bounds_check;
rss = NULL;
@@ -10045,6 +10047,7 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr
if (is_poly_assign && expr2->rank == 0 && !UNLIMITED_POLY (expr2))
rss->info->type = GFC_SS_REFERENCE;
+ rss->no_bounds_check = expr2->no_bounds_check;
/* Associate the SS with the loop. */
gfc_add_ss_to_loop (&loop, lss);
gfc_add_ss_to_loop (&loop, rss);