This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Use TImode for piecewise move in 64-bit mode


Use TImode for piecewise move in 64-bit mode.  When vector register
is used for piecewise move, we don't increase stack_alignment_needed
since vector register spill isn't required for piecewise move.  Since
stack_realign_needed is set to true by checking stack_alignment_estimated
set by pseudo vector register usage, we also need to check
stack_realign_needed to eliminate frame pointer.

Tested on x86-64.  OK for trunk?

H.J.
---
gcc/

	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Also
	check stack_realign_needed for stack realignment.
	* config/i386/i386.h (MOVE_MAX_PIECES): Set to 16 in 64-bit mode
	if unaligned SSE load and store are optimal.

gcc/testsuite/

	* gcc.target/i386/pieces-memcpy-1.c: New test.
	* gcc.target/i386/pieces-memcpy-2.c: Likewise.
	* gcc.target/i386/pieces-memcpy-3.c: Likewise.
	* gcc.target/i386/pieces-memcpy-4.c: Likewise.
	* gcc.target/i386/pieces-memcpy-5.c: Likewise.
	* gcc.target/i386/pieces-memcpy-6.c: Likewise.
---
 gcc/config/i386/i386.c                          | 11 +++++++++--
 gcc/config/i386/i386.h                          |  6 +++++-
 gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 17 +++++++++++++++++
 8 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 93eaab1..60dc160 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13286,8 +13286,15 @@ ix86_finalize_stack_realign_flags (void)
   /* If the only reason for frame_pointer_needed is that we conservatively
      assumed stack realignment might be needed, but in the end nothing that
      needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     and say we don't need stack realignment.
+
+     When vector register is used for piecewise move and store, we don't
+     increase stack_alignment_needed as there is no register spill for
+     piecewise move and store.  Since stack_realign_needed is set to true
+     by checking stack_alignment_estimated which is updated by pseudo
+     vector register usage, we also need to check stack_realign_needed to
+     eliminate frame pointer.  */
+  if ((stack_realign || crtl->stack_realign_needed)
       && frame_pointer_needed
       && crtl->is_leaf
       && flag_omit_frame_pointer
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9b66264..24db855 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1951,7 +1951,11 @@ typedef struct ix86_args {
 /* MOVE_MAX_PIECES is the number of bytes at a time which we can
    move efficiently, as opposed to  MOVE_MAX which is the maximum
    number of bytes we can move with a single instruction.  */
-#define MOVE_MAX_PIECES UNITS_PER_WORD
+#define MOVE_MAX_PIECES \
+  ((TARGET_64BIT \
+    && TARGET_SSE2 \
+    && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+    && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) ? 16 : UNITS_PER_WORD)
 
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c
new file mode 100644
index 0000000..adc0aa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 64);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c
new file mode 100644
index 0000000..c52c1d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c
new file mode 100644
index 0000000..c532bbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 17);
+}
+
+/* { dg-final { scan-assembler-times "movdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c
new file mode 100644
index 0000000..4ef763d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 18);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c
new file mode 100644
index 0000000..2687560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 19);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
new file mode 100644
index 0000000..a205f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]