Take: character(2) :: c character(1) :: a character(1) :: b c = a//b end ---- We get: _gfortran_concat_string (2, str.1, 1, &a, 1, &b); _gfortran_copy_string (2, &c, 2, str.1); We should be able to get: _gfortran_concat_string (2, &c, 1, &a, 1, &b); Instead and get rid of the tempory variable of str.1.
Confirmed.
Here's another patch for concat, to have the front-end generate the code itself instead of calling a library function. It still generates the extra temporary, but can optimize other things. Index: trans-expr.c =================================================================== --- trans-expr.c (revision 121506) +++ trans-expr.c (working copy) @@ -934,6 +934,36 @@ } +/* Helper for gfc_trans_array_copy, gfc_trans_array_constructor_copy + and gfc_conv_concat_op that constructs the call to __builtin_memcpy. */ + +static tree +gfc_build_memcpy_call (tree dst, tree src, tree len) +{ + tree tmp, args; + + /* Convert arguments to the correct types. */ + if (!POINTER_TYPE_P (TREE_TYPE (dst))) + dst = gfc_build_addr_expr (pvoid_type_node, dst); + else + dst = fold_convert (pvoid_type_node, dst); + + if (!POINTER_TYPE_P (TREE_TYPE (src))) + src = gfc_build_addr_expr (pvoid_type_node, src); + else + src = fold_convert (pvoid_type_node, src); + + len = fold_convert (size_type_node, len); + + /* Construct call to __builtin_memcpy. */ + args = build_tree_list (NULL_TREE, len); + args = tree_cons (NULL_TREE, src, args); + args = tree_cons (NULL_TREE, dst, args); + tmp = build_function_call_expr (built_in_decls[BUILT_IN_MEMCPY], args); + return fold_convert (void_type_node, tmp); +} + + /* Handle a string concatenation operation. A temporary will be allocated to hold the result. */ @@ -942,11 +972,15 @@ { gfc_se lse; gfc_se rse; - tree len; + tree destlen; tree type; - tree var; + tree dest; tree args; tree tmp; + tree d; + tree dlen; + tree cond1, cond2, action1, action2, expr_block1, expr_block2; + stmtblock_t block1, block2; gcc_assert (expr->value.op.op1->ts.type == BT_CHARACTER && expr->value.op.op2->ts.type == BT_CHARACTER); @@ -962,34 +996,92 @@ gfc_add_block_to_block (&se->pre, &rse.pre); type = gfc_get_character_type (expr->ts.kind, expr->ts.cl); - len = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); - if (len == NULL_TREE) - { - len = fold_build2 (PLUS_EXPR, TREE_TYPE (lse.string_length), - lse.string_length, rse.string_length); - } + destlen = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + if (destlen == NULL_TREE) + destlen = fold_build2 (PLUS_EXPR, TREE_TYPE (lse.string_length), + lse.string_length, rse.string_length); type = build_pointer_type (type); - var = gfc_conv_string_tmp (se, type, len); + dest = gfc_conv_string_tmp (se, type, destlen); - /* Do the actual concatenation. */ - args = NULL_TREE; - args = gfc_chainon_list (args, len); - args = gfc_chainon_list (args, var); - args = gfc_chainon_list (args, lse.string_length); - args = gfc_chainon_list (args, lse.expr); - args = gfc_chainon_list (args, rse.string_length); - args = gfc_chainon_list (args, rse.expr); - tmp = build_function_call_expr (gfor_fndecl_concat_string, args); + /* Do the actual concatenation. The original code in libgfortran was: + + void + concat_string (GFC_INTEGER_4 destlen, char * dest, + GFC_INTEGER_4 len1, const char * s1, + GFC_INTEGER_4 len2, const char * s2) + { + if (len1 >= destlen) + { + memcpy (dest, s1, destlen); + return; + } + memcpy (dest, s1, len1); + dest += len1; + destlen -= len1; + + if (len2 >= destlen) + { + memcpy (dest, s2, destlen); + } + else + { + memcpy (dest, s2, len2); + memset (&dest[len2], ' ', destlen - len2); + } + } + + And that's exactly what we do here. + */ + + cond1 = fold_build2 (GE_EXPR, boolean_type_node, lse.string_length, destlen + action1 = gfc_build_memcpy_call (dest, lse.expr, destlen); + + gfc_start_block (&block1); + tmp = gfc_build_memcpy_call (dest, lse.expr, lse.string_length); + gfc_add_expr_to_block (&block1, tmp); + dlen = gfc_create_var (TREE_TYPE (destlen), "dlen"); + d = gfc_create_var (pchar_type_node, "d"); + tmp = fold_build2 (MINUS_EXPR, TREE_TYPE (dlen), destlen, lse.string_length + gfc_add_modify_expr (&block1, dlen, tmp); + tmp = fold_build2 (PLUS_EXPR, pchar_type_node, dest, + fold_convert (pchar_type_node, lse.string_length)); + gfc_add_modify_expr (&block1, d, tmp); + + gfc_init_block (&block2); + tmp = gfc_build_memcpy_call (d, rse.expr, rse.string_length); + gfc_add_expr_to_block (&block2, tmp); + + /* Call to memset. */ + args = fold_build2 (PLUS_EXPR, pchar_type_node, d, + fold_convert (pchar_type_node, rse.string_length)); + args = gfc_chainon_list (NULL_TREE, args); + args = gfc_chainon_list (args, build_int_cst + (gfc_get_int_type (gfc_c_int_kind), + lang_hooks.to_target_charset (' '))); + args = gfc_chainon_list (args, fold_build2 (MINUS_EXPR, TREE_TYPE(dlen), + dlen, rse.string_length)); + tmp = build_function_call_expr (built_in_decls[BUILT_IN_MEMSET], args); + + gfc_add_expr_to_block (&block2, tmp); + expr_block2 = gfc_finish_block (&block2); + + cond2 = fold_build2 (GE_EXPR, boolean_type_node, rse.string_length, dlen); + action2 = gfc_build_memcpy_call (d, rse.expr, dlen); + tmp = fold_build3 (COND_EXPR, void_type_node, cond2, action2, expr_block2); + gfc_add_expr_to_block (&block1, tmp); + expr_block1 = gfc_finish_block (&block1); + + tmp = fold_build3 (COND_EXPR, void_type_node, cond1, action1, expr_block1); gfc_add_expr_to_block (&se->pre, tmp); /* Add the cleanup for the operands. */ gfc_add_block_to_block (&se->pre, &rse.post); gfc_add_block_to_block (&se->pre, &lse.post); - se->expr = var; - se->string_length = len; + se->expr = dest; + se->string_length = destlen; } /* Translates an op expression. Common (binary) cases are handled by this @@ -3622,36 +3713,6 @@ } -/* Helper for gfc_trans_array_copy and gfc_trans_array_constructor_copy - that constructs the call to __builtin_memcpy. */ - -static tree -gfc_build_memcpy_call (tree dst, tree src, tree len) -{ - tree tmp, args; - - /* Convert arguments to the correct types. */ - if (!POINTER_TYPE_P (TREE_TYPE (dst))) - dst = gfc_build_addr_expr (pvoid_type_node, dst); - else - dst = fold_convert (pvoid_type_node, dst); - - if (!POINTER_TYPE_P (TREE_TYPE (src))) - src = gfc_build_addr_expr (pvoid_type_node, src); - else - src = fold_convert (pvoid_type_node, src); - - len = fold_convert (size_type_node, len); - - /* Construct call to __builtin_memcpy. */ - args = build_tree_list (NULL_TREE, len); - args = tree_cons (NULL_TREE, src, args); - args = tree_cons (NULL_TREE, dst, args); - tmp = build_function_call_expr (built_in_decls[BUILT_IN_MEMCPY], args); - return fold_convert (void_type_node, tmp); -} - - /* Try to efficiently translate dst(:) = src(:). Return NULL if this can't be done. EXPR1 is the destination/lhs and EXPR2 is the source/rhs, both are gfc_full_array_ref_p which have been checked for
Created attachment 13136 [details] Updated version of the patch in comment 2
The test case from comment#1 could be done as c(1:1) = a c(2:2) = b in the front end optimization pass. I'll give it a shot.
Still present at r218462.
Working on other stuff, unassigning for now.
This seems to be now _gfortran_concat_string (2, &str.0, 1, &a, 1, &b); __builtin_memmove (&c, &str.0, 2); so it still has a temporary AFAIC tell.
No more temporary FAICT https://godbolt.org/z/o8fYE1nej If written as a proper function: function c(a, b) character(2) :: c character(1) :: a character(1) :: b c = a//b end we get: c_: push rbx mov r9, rcx mov rbx, rdi mov rcx, rdx mov r8d, 1 mov edx, 1 mov edi, 2 sub rsp, 16 lea rsi, [rsp+14] call _gfortran_concat_string movzx eax, WORD PTR [rsp+14] mov WORD PTR [rbx], ax add rsp, 16 pop rbx ret ... and likewise for the original testcase.