[PATCH 3/4] S/390: Unroll mvc loop for memcpy with small constant lengths.

Andreas Krebbel krebbel@linux.vnet.ibm.com
Tue Jan 3 09:44:00 GMT 2017


See the memset unrolling patch.  The very same applies to memcpys with
constant lengths.

2017-01-03  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_expand_movmem): Unroll MVC loop for
	small constant length operands.

gcc/testsuite/ChangeLog:

2017-01-03  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/memcpy-1.c: New test.
---
 gcc/config/s390/s390.c                   | 21 +++++++++++--
 gcc/testsuite/gcc.target/s390/memcpy-1.c | 53 ++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/memcpy-1.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 1266f45..9bd98eb 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -5246,10 +5246,25 @@ s390_expand_movmem (rtx dst, rtx src, rtx len)
       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
     return false;
 
-  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+  /* Expand memcpy for constant length operands without a loop if it
+     is shorter that way.
+
+     With a constant length argument a
+     memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
+  if (GET_CODE (len) == CONST_INT
+      && INTVAL (len) >= 0
+      && INTVAL (len) <= 256 * 6
+      && (!TARGET_MVCLE || INTVAL (len) <= 256))
     {
-      if (INTVAL (len) > 0)
-        emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
+      HOST_WIDE_INT o, l;
+
+      for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
+	{
+	  rtx newdst = adjust_address (dst, BLKmode, o);
+	  rtx newsrc = adjust_address (src, BLKmode, o);
+	  emit_insn (gen_movmem_short (newdst, newsrc,
+				       GEN_INT (l > 256 ? 255 : l - 1)));
+	}
     }
 
   else if (TARGET_MVCLE)
diff --git a/gcc/testsuite/gcc.target/s390/memcpy-1.c b/gcc/testsuite/gcc.target/s390/memcpy-1.c
new file mode 100644
index 0000000..58c1b49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/memcpy-1.c
@@ -0,0 +1,53 @@
+/* Make sure that short memcpy's with constant length are emitted
+   without loop statements.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch" } */
+
+/* 3 MVCs */
+void
+*memcpy1(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 700);
+}
+
+/* NOP */
+void
+*memcpy2(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 0);
+}
+
+/* 1 MVC */
+void
+*memcpy3(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 256);
+}
+
+/* 2 MVCs */
+void
+*memcpy4(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 512);
+}
+
+/* 3 MVCs */
+void
+*memcpy5(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 768);
+}
+
+/* Loop with 2 MVCs */
+void
+*memcpy6(void *dest, const void *src)
+{
+  return __builtin_memcpy (dest, src, 1537);
+}
+
+/* memcpy6 uses a loop - check for the two load address instructions
+   used to increment src and dest.  */
+/* { dg-final { scan-assembler-times "la" 2 } } */
+
+/* { dg-final { scan-assembler-times "mvc" 11 } } */
-- 
2.9.1



More information about the Gcc-patches mailing list