This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gfortran,patch] PR fortran/28163: Don't call library function for string copying
- From: FX Coudert <fxcoudert at gmail dot com>
- To: gfortran <fortran at gcc dot gnu dot org>, patch <gcc-patches at gcc dot gnu dot org>
- Date: Sun, 25 Jun 2006 14:44:53 +0200
- Subject: [gfortran,patch] PR fortran/28163: Don't call library function for string copying
:ADDPATCH fortran:
Currently, gfortran generates library calls for string copying. This is
a major performance bottleneck: removing it and replacing these calls by
direct code and calls to memmove/memset, as does the attached patch,
gives a 27% speedup in execution time for aermod (from the polyhedron
benchmark).
I'd welcome a careful review of this patch. In particular, I think the
following lines
+ tmp4 = gfc_chainon_list (tmp4, build_int_cst
+ (gfc_get_int_type (gfc_c_int_kind), ' '));
are wrong for cross-compilers, as the host ' ' may not be the same as
the target ' '. But I don't know how to do anything better, so I'd
welcome hints on this one.
Bootstrapped and regtested on i686-linux.
OK for mainline?
FX
Index: trans-expr.c
===================================================================
--- trans-expr.c (revision 114972)
+++ trans-expr.c (working copy)
@@ -2196,6 +2196,11 @@
tree dsc;
tree ssc;
tree cond;
+ tree cond2;
+ tree tmp2;
+ tree tmp3;
+ tree tmp4;
+ stmtblock_t tempblock;
/* Deal with single character specially. */
dsc = gfc_to_single_character (dlen, dest);
@@ -2206,15 +2211,62 @@
return;
}
+ /* Do nothing if the destination length is zero. */
cond = fold_build2 (GT_EXPR, boolean_type_node, dlen,
build_int_cst (gfc_charlen_type_node, 0));
- tmp = NULL_TREE;
- tmp = gfc_chainon_list (tmp, dlen);
- tmp = gfc_chainon_list (tmp, dest);
- tmp = gfc_chainon_list (tmp, slen);
- tmp = gfc_chainon_list (tmp, src);
- tmp = build_function_call_expr (gfor_fndecl_copy_string, tmp);
+ /* The following code was previously in _gfortran_copy_string:
+
+ // The two strings may overlap so we use memmove.
+ void
+ copy_string (GFC_INTEGER_4 destlen, char * dest,
+ GFC_INTEGER_4 srclen, const char * src)
+ {
+ if (srclen >= destlen)
+ {
+ // This will truncate if too long.
+ memmove (dest, src, destlen);
+ }
+ else
+ {
+ memmove (dest, src, srclen);
+ // Pad with spaces.
+ memset (&dest[srclen], ' ', destlen - srclen);
+ }
+ }
+
+ We're now doing it here for better optimization, but the logic
+ is the same. */
+
+ /* Truncate string if source is too long. */
+ cond2 = fold_build2 (GE_EXPR, boolean_type_node, slen, dlen);
+ tmp2 = gfc_chainon_list (NULL_TREE, dest);
+ tmp2 = gfc_chainon_list (tmp2, src);
+ tmp2 = gfc_chainon_list (tmp2, dlen);
+ tmp2 = build_function_call_expr (built_in_decls[BUILT_IN_MEMMOVE], tmp2);
+
+ /* Else copy and pad with spaces. */
+ tmp3 = gfc_chainon_list (NULL_TREE, dest);
+ tmp3 = gfc_chainon_list (tmp3, src);
+ tmp3 = gfc_chainon_list (tmp3, slen);
+ tmp3 = build_function_call_expr (built_in_decls[BUILT_IN_MEMMOVE], tmp3);
+
+ tmp4 = fold_build2 (PLUS_EXPR, pchar_type_node, dest,
+ fold_convert (pchar_type_node, slen));
+ tmp4 = gfc_chainon_list (NULL_TREE, tmp4);
+ tmp4 = gfc_chainon_list (tmp4, build_int_cst
+ (gfc_get_int_type (gfc_c_int_kind), ' '));
+ tmp4 = gfc_chainon_list (tmp4, fold_build2 (MINUS_EXPR, TREE_TYPE(dlen),
+ dlen, slen));
+ tmp4 = build_function_call_expr (built_in_decls[BUILT_IN_MEMSET], tmp4);
+
+ gfc_init_block (&tempblock);
+ gfc_add_expr_to_block (&tempblock, tmp3);
+ gfc_add_expr_to_block (&tempblock, tmp4);
+ tmp3 = gfc_finish_block (&tempblock);
+
+ /* The whole copy_string function is there. */
+ tmp = fold_build3 (COND_EXPR, void_type_node, cond2, tmp2, tmp3);
tmp = fold_build3 (COND_EXPR, void_type_node, cond, tmp, build_empty_stmt ());
gfc_add_expr_to_block (block, tmp);
}
Index: trans.h
===================================================================
--- trans.h (revision 114972)
+++ trans.h (working copy)
@@ -508,7 +508,6 @@
extern GTY(()) tree gfor_fndecl_math_exponent16;
/* String functions. */
-extern GTY(()) tree gfor_fndecl_copy_string;
extern GTY(()) tree gfor_fndecl_compare_string;
extern GTY(()) tree gfor_fndecl_concat_string;
extern GTY(()) tree gfor_fndecl_string_len_trim;
Index: trans-decl.c
===================================================================
--- trans-decl.c (revision 114972)
+++ trans-decl.c (working copy)
@@ -121,7 +121,6 @@
/* String functions. */
-tree gfor_fndecl_copy_string;
tree gfor_fndecl_compare_string;
tree gfor_fndecl_concat_string;
tree gfor_fndecl_string_len_trim;
@@ -1938,13 +1937,6 @@
tree gfc_c_int_type_node = gfc_get_int_type (gfc_c_int_kind);
/* String functions. */
- gfor_fndecl_copy_string =
- gfc_build_library_function_decl (get_identifier (PREFIX("copy_string")),
- void_type_node,
- 4,
- gfc_charlen_type_node, pchar_type_node,
- gfc_charlen_type_node, pchar_type_node);
-
gfor_fndecl_compare_string =
gfc_build_library_function_decl (get_identifier (PREFIX("compare_string")),
gfc_int4_type_node,
2006-06-25 Francois-Xavier Coudert <coudert@clipper.ens.fr>
PR fortran/28163
* trans-expr.c (gfc_trans_string_copy): Generate code instead of
calling a library function.
* trans-decl.c (gfc_build_intrinsic_function_decls): Don't
create gfor_fndecl_copy_string.
* trans.h (gfor_fndecl_copy_string): Remove prototype.