This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).


On 03/28/2018 04:31 PM, Jakub Jelinek wrote:
On Wed, Mar 28, 2018 at 04:18:45PM +0200, Martin Liška wrote:
2018-03-14  Martin Liska  <mliska@suse.cz>

	PR middle-end/81657
	* builtins.c (expand_builtin_memory_copy_args): Handle situation
	when libc library provides a fast mempcpy implementation/
	* config/i386/i386-protos.h (gnu_libc_func_speed): New.
	* config/i386/i386.c (enum libc_speed): Likewise.
	(ix86_libc_func_speed): Likewise.
	(TARGET_LIBC_FUNC_SPEED): Likewise.
	* coretypes.h (enum libc_speed): Likewise.
	* doc/tm.texi: Document new target hook.
	* doc/tm.texi.in: Likewise.
	* expr.c (emit_block_move_hints): Handle libc bail out argument.
	* expr.h (emit_block_move_hints): Add new parameters.
	* target.def: Add new hook.
	* targhooks.c (enum libc_speed): New enum.
	(default_libc_func_speed): Provide a default hook
	implementation.
	* targhooks.h (default_libc_func_speed): Likewise.

gcc/testsuite/ChangeLog:

2018-03-14  Martin Liska  <mliska@suse.cz>

	* gcc.dg/string-opt-1.c: Adjust scans for i386 and glibc target
	and others.
    /* Copy word part most expediently.  */
+  bool libcall_avoided = false;
    dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
  				     CALL_EXPR_TAILCALL (exp)
  				     && (endp == 0 || target == const0_rtx)
  				     ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
  				     expected_align, expected_size,
-				     min_size, max_size, probable_max_size);
+				     min_size, max_size, probable_max_size,
+				     avoid_libcall ? &libcall_avoided: NULL);

Missing space before :.

Fixed.

+
+  if (libcall_avoided)
+    return NULL_RTX;
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -47,6 +47,8 @@ extern void ix86_reset_previous_fndecl (void);
extern bool ix86_using_red_zone (void); +extern enum libc_speed gnu_libc_func_speed (int fn);

Here you declare gnu_libc_func_speed.

--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2735,6 +2735,27 @@ ix86_using_red_zone (void)
  	  && (!cfun->machine->has_local_indirect_jump
  	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
  }
+
+/* This hook determines whether a function from libc has a fast implementation
+   FN is present at the runtime.  We override it for i386 and glibc C library
+   as this combination provides fast implementation of mempcpy function.  */
+
+enum libc_speed
+ix86_libc_func_speed (int fn)

But define a different function.

You are right.


+{
+  enum built_in_function f = (built_in_function)fn;
+
+  if (!OPTION_GLIBC)
+    return LIBC_UNKNOWN_SPEED;

OPTION_GLIBC is only defined if linux.h is included, so I think you break
all other x86 targets this way.

That said the proper way is probably to define the function in linux64.c
(should we also use it on i386, or nobody cares?). And corresponding declaration
will be in config/linux-protos.h.

The hook should have linux in the name,
perhaps defined only in config/i386/linux.h and redefined in i386.c through
#ifdef SUBTARGET_LIBC_FUNC_SPEED
#undef TARGET_LIBC_FUNC_SPEED
#define TARGET_LIBC_FUNC_SPEED SUBTARGET_LIBC_FUNC_SPEED
#endif
or something similar.

I adopted this suggested mechanism.
Thanks for review.

Martin


Otherwise LGTM, but please get approval also from Richi (for the generic part) and
Uros (for the backend side).

	Jakub


>From 4378fa93aafac3d28eaae48c9a41a2f2ef5e1d18 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Wed, 14 Mar 2018 09:44:18 +0100
Subject: [PATCH] Introduce new libc_func_speed target hook (PR
 middle-end/81657).

gcc/ChangeLog:

2018-03-14  Martin Liska  <mliska@suse.cz>

	PR middle-end/81657
	* builtins.c (expand_builtin_memory_copy_args): Handle situation
	when libc library provides a fast mempcpy implementation/
	* config/linux-protos.h (ix86_linux_libc_func_speed): New.
	* config/linux.c (ix86_linux_libc_func_speed): Likewise.
	(TARGET_LIBC_FUNC_SPEED): Likewise.
	* config/i386/linux64.h (SUBTARGET_LIBC_FUNC_SPEED): Define
	macro.
	* coretypes.h (enum libc_speed): Likewise.
	* doc/tm.texi: Document new target hook.
	* doc/tm.texi.in: Likewise.
	* expr.c (emit_block_move_hints): Handle libc bail out argument.
	* expr.h (emit_block_move_hints): Add new parameters.
	* target.def: Add new hook.
	* targhooks.c (enum libc_speed): New enum.
	(default_libc_func_speed): Provide a default hook
	implementation.
	* targhooks.h (default_libc_func_speed): Likewise.

gcc/testsuite/ChangeLog:

2018-03-28  Martin Liska  <mliska@suse.cz>

	* gcc.dg/string-opt-1.c:

gcc/testsuite/ChangeLog:

2018-03-14  Martin Liska  <mliska@suse.cz>

	* gcc.dg/string-opt-1.c: Adjust scans for i386 and glibc target
	and others.
---
 gcc/builtins.c                      | 15 ++++++++++++++-
 gcc/config/i386/i386.c              |  5 +++++
 gcc/config/i386/linux64.h           |  2 ++
 gcc/config/linux-protos.h           |  1 +
 gcc/config/linux.c                  | 21 +++++++++++++++++++++
 gcc/coretypes.h                     |  7 +++++++
 gcc/doc/tm.texi                     |  4 ++++
 gcc/doc/tm.texi.in                  |  1 +
 gcc/expr.c                          | 11 ++++++++++-
 gcc/expr.h                          |  3 ++-
 gcc/target.def                      |  7 +++++++
 gcc/targhooks.c                     |  9 +++++++++
 gcc/targhooks.h                     |  1 +
 gcc/testsuite/gcc.dg/string-opt-1.c |  5 +++--
 14 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 487d9d58db2..98ee3fb272d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3651,13 +3651,26 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
   src_mem = get_memory_rtx (src, len);
   set_mem_align (src_mem, src_align);
 
+  /* emit_block_move_hints can generate a library call to memcpy function.
+     In situations when a libc library provides fast implementation
+     of mempcpy, then it's better to call mempcpy directly.  */
+  bool avoid_libcall
+    = (endp == 1
+       && targetm.libc_func_speed ((int)BUILT_IN_MEMPCPY) == LIBC_FAST_SPEED
+       && target != const0_rtx);
+
   /* Copy word part most expediently.  */
+  bool libcall_avoided = false;
   dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
 				     CALL_EXPR_TAILCALL (exp)
 				     && (endp == 0 || target == const0_rtx)
 				     ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
 				     expected_align, expected_size,
-				     min_size, max_size, probable_max_size);
+				     min_size, max_size, probable_max_size,
+				     avoid_libcall ? &libcall_avoided : NULL);
+
+  if (libcall_avoided)
+    return NULL_RTX;
 
   if (dest_addr == 0)
     {
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b4f6aec1434..2471ff7b99a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -52105,6 +52105,11 @@ ix86_run_selftests (void)
 #undef TARGET_WARN_PARAMETER_PASSING_ABI
 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
 
+#ifdef SUBTARGET_LIBC_FUNC_SPEED
+#undef TARGET_LIBC_FUNC_SPEED
+#define TARGET_LIBC_FUNC_SPEED SUBTARGET_LIBC_FUNC_SPEED
+#endif
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index f2d913e30ac..d855f5cc239 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -37,3 +37,5 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-x86_64.so.1"
 #undef MUSL_DYNAMIC_LINKERX32
 #define MUSL_DYNAMIC_LINKERX32 "/lib/ld-musl-x32.so.1"
+
+#define SUBTARGET_LIBC_FUNC_SPEED ix86_linux_libc_func_speed
diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h
index 9da8dd7ecaa..b7284735366 100644
--- a/gcc/config/linux-protos.h
+++ b/gcc/config/linux-protos.h
@@ -20,3 +20,4 @@ along with GCC; see the file COPYING3.  If not see
 extern bool linux_has_ifunc_p (void);
 
 extern bool linux_libc_has_function (enum function_class fn_class);
+extern enum libc_speed ix86_linux_libc_func_speed (int fn);
diff --git a/gcc/config/linux.c b/gcc/config/linux.c
index bc06e54a73d..b878d51e4b4 100644
--- a/gcc/config/linux.c
+++ b/gcc/config/linux.c
@@ -37,3 +37,24 @@ linux_libc_has_function (enum function_class fn_class)
 
   return false;
 }
+
+/* This hook determines whether a function from libc has a fast implementation
+   FN is present at the runtime.  We override it for i386 and glibc C library
+   as this combination provides fast implementation of mempcpy function.  */
+
+enum libc_speed
+ix86_linux_libc_func_speed (int fn)
+{
+  enum built_in_function f = (built_in_function)fn;
+
+  if (!OPTION_GLIBC)
+    return LIBC_UNKNOWN_SPEED;
+
+  switch (f)
+    {
+      case BUILT_IN_MEMPCPY:
+	return LIBC_FAST_SPEED;
+      default:
+	return LIBC_UNKNOWN_SPEED;
+    }
+}
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index 283b4eb33fe..fe618f708f4 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -384,6 +384,13 @@ enum excess_precision_type
   EXCESS_PRECISION_TYPE_FAST
 };
 
+enum libc_speed
+{
+  LIBC_FAST_SPEED,
+  LIBC_SLOW_SPEED,
+  LIBC_UNKNOWN_SPEED
+};
+
 /* Support for user-provided GGC and PCH markers.  The first parameter
    is a pointer to a pointer, the second a cookie.  */
 typedef void (*gt_pointer_operator) (void *, void *);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index bd8b917ba82..0f7c91a22c4 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5501,6 +5501,10 @@ macro, a reasonable default is used.
 This hook determines whether a function from a class of functions
 @var{fn_class} is present at the runtime.
 @end deftypefn
+@deftypefn {Target Hook} libc_speed TARGET_LIBC_FUNC_SPEED (int @var{fn})
+This hook determines whether a function from libc has a fast implementation
+@var{fn} is present at the runtime.
+@end deftypefn
 
 @defmac NEXT_OBJC_RUNTIME
 Set this macro to 1 to use the "NeXT" Objective-C message sending conventions
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index b0207146e8c..4bb2998a8a1 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3933,6 +3933,7 @@ macro, a reasonable default is used.
 @end defmac
 
 @hook TARGET_LIBC_HAS_FUNCTION
+@hook TARGET_LIBC_FUNC_SPEED
 
 @defmac NEXT_OBJC_RUNTIME
 Set this macro to 1 to use the "NeXT" Objective-C message sending conventions
diff --git a/gcc/expr.c b/gcc/expr.c
index 00660293f72..f3bd698bc4d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1554,6 +1554,8 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,
    MIN_SIZE is the minimal size of block to move
    MAX_SIZE is the maximal size of block to move, if it can not be represented
    in unsigned HOST_WIDE_INT, than it is mask of all ones.
+   If BAIL_OUT_LIBCALL is non-null, do not emit library call and assign
+   true to the pointer when move is not done.
 
    Return the address of the new block, if memcpy is called and returns it,
    0 otherwise.  */
@@ -1563,7 +1565,8 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
 		       unsigned int expected_align, HOST_WIDE_INT expected_size,
 		       unsigned HOST_WIDE_INT min_size,
 		       unsigned HOST_WIDE_INT max_size,
-		       unsigned HOST_WIDE_INT probable_max_size)
+		       unsigned HOST_WIDE_INT probable_max_size,
+		       bool *bail_out_libcall)
 {
   bool may_use_call;
   rtx retval = 0;
@@ -1625,6 +1628,12 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
 	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
 	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
     {
+      if (bail_out_libcall)
+	{
+	  *bail_out_libcall = true;
+	  return retval;
+	}
+
       /* Since x and y are passed to a libcall, mark the corresponding
 	 tree EXPR as addressable.  */
       tree y_expr = MEM_EXPR (y);
diff --git a/gcc/expr.h b/gcc/expr.h
index b3d523bcb24..c2bf87fd14e 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -110,7 +110,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
 			          unsigned int, HOST_WIDE_INT,
 				  unsigned HOST_WIDE_INT,
 				  unsigned HOST_WIDE_INT,
-				  unsigned HOST_WIDE_INT);
+				  unsigned HOST_WIDE_INT,
+				  bool *bail_out_libcall = NULL);
 extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
 				 by_pieces_constfn, void *);
 extern bool emit_storent_insn (rtx to, rtx from);
diff --git a/gcc/target.def b/gcc/target.def
index c5b2a1e7e71..3bbddc82776 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2639,6 +2639,13 @@ DEFHOOK
  bool, (enum function_class fn_class),
  default_libc_has_function)
 
+DEFHOOK
+(libc_func_speed,
+ "This hook determines whether a function from libc has a fast implementation\n\
+@var{fn} is present at the runtime.",
+ libc_speed, (int fn),
+ default_libc_func_speed)
+
 /* True if new jumps cannot be created, to replace existing ones or
    not, at the current point in the compilation.  */
 DEFHOOK
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index fafcc6c5196..6e44f6f79cf 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1642,6 +1642,15 @@ no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED)
   return false;
 }
 
+/* This hook determines whether a function from libc has a fast implementation
+   FN is present at the runtime.  */
+
+enum libc_speed
+default_libc_func_speed (int)
+{
+  return LIBC_UNKNOWN_SPEED;
+}
+
 tree
 default_builtin_tm_load_store (tree ARG_UNUSED (type))
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 8a4393f2ba4..7508673ad0a 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -205,6 +205,7 @@ extern bool default_have_conditional_execution (void);
 extern bool default_libc_has_function (enum function_class);
 extern bool no_c99_libc_has_function (enum function_class);
 extern bool gnu_libc_has_function (enum function_class);
+extern enum libc_speed default_libc_func_speed (int);
 
 extern tree default_builtin_tm_load_store (tree);
 
diff --git a/gcc/testsuite/gcc.dg/string-opt-1.c b/gcc/testsuite/gcc.dg/string-opt-1.c
index 2f060732bf0..7faaadcbb1f 100644
--- a/gcc/testsuite/gcc.dg/string-opt-1.c
+++ b/gcc/testsuite/gcc.dg/string-opt-1.c
@@ -48,5 +48,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-assembler-not "\<mempcpy\>" } } */
-/* { dg-final { scan-assembler "memcpy" } } */
+/* { dg-final { scan-assembler-not "\<mempcpy\>" { target { ! { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } } */
+/* { dg-final { scan-assembler "memcpy" { target  { ! { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } } */
+/* { dg-final { scan-assembler "mempcpy" { target  { i?86-*-gnu* x86_64-*-gnu* i?86-*-linux* x86_64-*-linux* } } } } */
-- 
2.16.2


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]