This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
- From: Martin Liška <mliska at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Jakub Jelinek <jakub at redhat dot com>, "H.J. Lu" <hjl dot tools at gmail dot com>
- Date: Mon, 12 Mar 2018 09:57:09 +0100
- Subject: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
- Authentication-results: sourceware.org; auth=none
Hi.
This is fix for the PR that introduces a new target macro. Using the macro
one can say that a target has a fast mempcpy and thus it's preferred to be used
if possible.
Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
I also tested on x86_64-linux-gnu.
Ready to be installed?
Martin
gcc/ChangeLog:
2018-03-08 Martin Liska <mliska@suse.cz>
PR middle-end/81657
* builtins.c (expand_builtin_memory_copy_args): Add new
arguments.
* config/i386/i386.h (TARGET_HAS_FAST_MEMPCPY_ROUTINE):
New macro.
* defaults.h (TARGET_HAS_FAST_MEMPCPY_ROUTINE): Likewise.
* doc/tm.texi: Likewise.
* doc/tm.texi.in: Likewise.
* expr.c (compare_by_pieces): Add support for bail out.
(emit_block_move_hints): Likewise.
* expr.h (emit_block_move_hints): Add new arguments.
gcc/testsuite/ChangeLog:
2018-03-09 Martin Liska <mliska@suse.cz>
PR middle-end/81657
* gcc.dg/string-opt-1.c: Adjust test to run only on non-x86
target.
---
gcc/builtins.c | 13 ++++++++++++-
gcc/config/i386/i386.h | 3 +++
gcc/defaults.h | 7 +++++++
gcc/doc/tm.texi | 5 +++++
gcc/doc/tm.texi.in | 5 +++++
gcc/expr.c | 16 +++++++++++++++-
gcc/expr.h | 4 +++-
gcc/testsuite/gcc.dg/string-opt-1.c | 4 ++--
8 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 85affa74510..c2ca36934f7 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3651,13 +3651,24 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
src_mem = get_memory_rtx (src, len);
set_mem_align (src_mem, src_align);
+ bool is_move_done;
+
/* Copy word part most expediently. */
dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
CALL_EXPR_TAILCALL (exp)
&& (endp == 0 || target == const0_rtx)
? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
expected_align, expected_size,
- min_size, max_size, probable_max_size);
+ min_size, max_size, probable_max_size,
+ TARGET_HAS_FAST_MEMPCPY_ROUTINE
+ && endp == 1,
+ &is_move_done);
+
+ /* Bail out when a mempcpy call would be expanded as libcall and when
+ we have a target that provides a fast implementation
+ of mempcpy routine. */
+ if (!is_move_done)
+ return NULL_RTX;
if (dest_addr == 0)
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e43edd77b56..8744d706fd7 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1914,6 +1914,9 @@ typedef struct ix86_args {
#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
+/* C library provides fast implementation of mempcpy function. */
+#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 1
+
/* Define if shifts truncate the shift count which implies one can
omit a sign-extension or zero-extension of a shift count.
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 78a08a33f12..2e5caac8dcd 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1340,6 +1340,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define SET_RATIO(speed) MOVE_RATIO (speed)
#endif
+/* By default do not generate libcall to mempcpy and rather use
+ libcall to memcpy and adjustment of return value. */
+
+#ifndef TARGET_HAS_FAST_MEMPCPY_ROUTINE
+#define TARGET_HAS_FAST_MEMPCPY_ROUTINE 0
+#endif
+
/* Supply a default definition of STACK_SAVEAREA_MODE for emit_stack_save.
Normally move_insn, so Pmode stack pointer. */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index bd8b917ba82..0c8a3f3298c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6627,6 +6627,11 @@ optimized for speed rather than size.
If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
@end defmac
+@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE
+By default do not generate libcall to mempcpy and rather use
+libcall to memcpy and adjustment of return value.
+@end defmac
+
@defmac USE_LOAD_POST_INCREMENT (@var{mode})
A C expression used to determine whether a load postincrement is a good
thing to use for a given mode. Defaults to the value of
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index b0207146e8c..e7ef85ab78e 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4560,6 +4560,11 @@ optimized for speed rather than size.
If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
@end defmac
+@defmac TARGET_HAS_FAST_MEMPCPY_ROUTINE
+By default do not generate libcall to mempcpy and rather use
+libcall to memcpy and adjustment of return value.
+@end defmac
+
@defmac USE_LOAD_POST_INCREMENT (@var{mode})
A C expression used to determine whether a load postincrement is a good
thing to use for a given mode. Defaults to the value of
diff --git a/gcc/expr.c b/gcc/expr.c
index 00660293f72..b6c13652d79 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1554,6 +1554,8 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,
MIN_SIZE is the minimal size of block to move
MAX_SIZE is the maximal size of block to move, if it can not be represented
in unsigned HOST_WIDE_INT, than it is mask of all ones.
+ If BAIL_OUT_LIBCALL is set true, do not emit library call and set
+ *IS_MOVE_DONE to false.
Return the address of the new block, if memcpy is called and returns it,
0 otherwise. */
@@ -1563,12 +1565,17 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size,
- unsigned HOST_WIDE_INT probable_max_size)
+ unsigned HOST_WIDE_INT probable_max_size,
+ bool bail_out_libcall, bool *is_move_done)
{
bool may_use_call;
rtx retval = 0;
unsigned int align;
+ /* When not doing a bail out, we always emit a memory move. */
+ if (is_move_done)
+ *is_move_done = true;
+
gcc_assert (size);
if (CONST_INT_P (size) && INTVAL (size) == 0)
return 0;
@@ -1625,6 +1632,13 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
&& ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
&& ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
{
+ if (bail_out_libcall)
+ {
+ if (is_move_done)
+ *is_move_done = false;
+ return retval;
+ }
+
/* Since x and y are passed to a libcall, mark the corresponding
tree EXPR as addressable. */
tree y_expr = MEM_EXPR (y);
diff --git a/gcc/expr.h b/gcc/expr.h
index b3d523bcb24..023bc5aec47 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -110,7 +110,9 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
- unsigned HOST_WIDE_INT);
+ unsigned HOST_WIDE_INT,
+ bool bail_out_libcall = false,
+ bool *is_move_done = NULL);
extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
by_pieces_constfn, void *);
extern bool emit_storent_insn (rtx to, rtx from);
diff --git a/gcc/testsuite/gcc.dg/string-opt-1.c b/gcc/testsuite/gcc.dg/string-opt-1.c
index 2f060732bf0..851c8b04a33 100644
--- a/gcc/testsuite/gcc.dg/string-opt-1.c
+++ b/gcc/testsuite/gcc.dg/string-opt-1.c
@@ -48,5 +48,5 @@ main (void)
return 0;
}
-/* { dg-final { scan-assembler-not "\<mempcpy\>" } } */
-/* { dg-final { scan-assembler "memcpy" } } */
+/* { dg-final { scan-assembler-not "\<mempcpy\>" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler "memcpy" { target { ! { i?86-*-* x86_64-*-* } } } } } */