This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gfortran,patch] PR fortran/28163: Don't call library function for string copying


:ADDPATCH fortran:

Currently, gfortran generates library calls for string copying. This is a major performance bottleneck: removing it and replacing these calls by direct code and calls to memmove/memset, as does the attached patch, gives a 27% speedup in execution time for aermod (from the polyhedron benchmark).

I'd welcome a careful review of this patch. In particular, I think the following lines

+  tmp4 = gfc_chainon_list (tmp4, build_int_cst
+                           (gfc_get_int_type (gfc_c_int_kind), ' '));

are wrong for cross-compilers, as the host ' ' may not be the same as the target ' '. But I don't know how to do anything better, so I'd welcome hints on this one.

Bootstrapped and regtested on i686-linux.

OK for mainline?

FX
Index: trans-expr.c
===================================================================
--- trans-expr.c	(revision 114972)
+++ trans-expr.c	(working copy)
@@ -2196,6 +2196,11 @@
   tree dsc;
   tree ssc;
   tree cond;
+  tree cond2;
+  tree tmp2;
+  tree tmp3;
+  tree tmp4;
+  stmtblock_t tempblock;
 
   /* Deal with single character specially.  */
   dsc = gfc_to_single_character (dlen, dest);
@@ -2206,15 +2211,62 @@
       return;
     }
 
+  /* Do nothing if the destination length is zero.  */
   cond = fold_build2 (GT_EXPR, boolean_type_node, dlen,
 		      build_int_cst (gfc_charlen_type_node, 0));
 
-  tmp = NULL_TREE;
-  tmp = gfc_chainon_list (tmp, dlen);
-  tmp = gfc_chainon_list (tmp, dest);
-  tmp = gfc_chainon_list (tmp, slen);
-  tmp = gfc_chainon_list (tmp, src);
-  tmp = build_function_call_expr (gfor_fndecl_copy_string, tmp);
+  /* The following code was previously in _gfortran_copy_string:
+
+       // The two strings may overlap so we use memmove.
+       void
+       copy_string (GFC_INTEGER_4 destlen, char * dest,
+                    GFC_INTEGER_4 srclen, const char * src)
+       {
+         if (srclen >= destlen)
+           {
+             // This will truncate if too long.
+             memmove (dest, src, destlen);
+           }
+         else
+           {
+             memmove (dest, src, srclen);
+             // Pad with spaces.
+             memset (&dest[srclen], ' ', destlen - srclen);
+           }
+       }
+
+     We're now doing it here for better optimization, but the logic
+     is the same.  */
+  
+  /* Truncate string if source is too long.  */
+  cond2 = fold_build2 (GE_EXPR, boolean_type_node, slen, dlen);
+  tmp2 = gfc_chainon_list (NULL_TREE, dest);
+  tmp2 = gfc_chainon_list (tmp2, src);
+  tmp2 = gfc_chainon_list (tmp2, dlen);
+  tmp2 = build_function_call_expr (built_in_decls[BUILT_IN_MEMMOVE], tmp2);
+
+  /* Else copy and pad with spaces.  */
+  tmp3 = gfc_chainon_list (NULL_TREE, dest);
+  tmp3 = gfc_chainon_list (tmp3, src);
+  tmp3 = gfc_chainon_list (tmp3, slen);
+  tmp3 = build_function_call_expr (built_in_decls[BUILT_IN_MEMMOVE], tmp3);
+
+  tmp4 = fold_build2 (PLUS_EXPR, pchar_type_node, dest,
+		      fold_convert (pchar_type_node, slen));
+  tmp4 = gfc_chainon_list (NULL_TREE, tmp4);
+  tmp4 = gfc_chainon_list (tmp4, build_int_cst
+				   (gfc_get_int_type (gfc_c_int_kind), ' '));
+  tmp4 = gfc_chainon_list (tmp4, fold_build2 (MINUS_EXPR, TREE_TYPE(dlen),
+					      dlen, slen));
+  tmp4 = build_function_call_expr (built_in_decls[BUILT_IN_MEMSET], tmp4);
+
+  gfc_init_block (&tempblock);
+  gfc_add_expr_to_block (&tempblock, tmp3);
+  gfc_add_expr_to_block (&tempblock, tmp4);
+  tmp3 = gfc_finish_block (&tempblock);
+
+  /* The whole copy_string function is there.  */
+  tmp = fold_build3 (COND_EXPR, void_type_node, cond2, tmp2, tmp3);
   tmp = fold_build3 (COND_EXPR, void_type_node, cond, tmp, build_empty_stmt ());
   gfc_add_expr_to_block (block, tmp);
 }
Index: trans.h
===================================================================
--- trans.h	(revision 114972)
+++ trans.h	(working copy)
@@ -508,7 +508,6 @@
 extern GTY(()) tree gfor_fndecl_math_exponent16;
 
 /* String functions.  */
-extern GTY(()) tree gfor_fndecl_copy_string;
 extern GTY(()) tree gfor_fndecl_compare_string;
 extern GTY(()) tree gfor_fndecl_concat_string;
 extern GTY(()) tree gfor_fndecl_string_len_trim;
Index: trans-decl.c
===================================================================
--- trans-decl.c	(revision 114972)
+++ trans-decl.c	(working copy)
@@ -121,7 +121,6 @@
 
 /* String functions.  */
 
-tree gfor_fndecl_copy_string;
 tree gfor_fndecl_compare_string;
 tree gfor_fndecl_concat_string;
 tree gfor_fndecl_string_len_trim;
@@ -1938,13 +1937,6 @@
   tree gfc_c_int_type_node = gfc_get_int_type (gfc_c_int_kind);
 
   /* String functions.  */
-  gfor_fndecl_copy_string =
-    gfc_build_library_function_decl (get_identifier (PREFIX("copy_string")),
-				     void_type_node,
-				     4,
-				     gfc_charlen_type_node, pchar_type_node,
-				     gfc_charlen_type_node, pchar_type_node);
-
   gfor_fndecl_compare_string =
     gfc_build_library_function_decl (get_identifier (PREFIX("compare_string")),
 				     gfc_int4_type_node,
2006-06-25  Francois-Xavier Coudert  <coudert@clipper.ens.fr>

	PR fortran/28163
	* trans-expr.c (gfc_trans_string_copy): Generate code instead of
	calling a library function.
	* trans-decl.c (gfc_build_intrinsic_function_decls): Don't
	create gfor_fndecl_copy_string.
	* trans.h (gfor_fndecl_copy_string): Remove prototype.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]