This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, rs6000] Allow memmov/memset builtin expansion to use unaligned vsx on p8/p9


This patch allows the use of unaligned vsx loads/stores for builtin
expansion of memset and memcmp on p8/p9. Performance of unaligned vsx
instructions is good on these processors. 

OK for trunk if bootstrap/regtest on ppc64le passes?

2017-12-13  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>

	* config/rs6000/rs6000-string.c (expand_block_move): Allow the use of
	unaligned VSX load/store on P8/P9.
	(expand_block_clear): Allow the use of unaligned VSX load/store on P8/P9.


-- 
Aaron Sawdey, Ph.D.  acsawdey@linux.vnet.ibm.com
050-2/C113  (507) 253-7520 home: 507/263-0782
IBM Linux Technology Center - PPC Toolchain
Index: gcc/config/rs6000/rs6000-string.c
===================================================================
--- gcc/config/rs6000/rs6000-string.c	(revision 255585)
+++ gcc/config/rs6000/rs6000-string.c	(working copy)
@@ -67,11 +67,14 @@
   if (bytes <= 0)
     return 1;
 
+  bool isP8 = (rs6000_cpu == PROCESSOR_POWER8);
+  bool isP9 = (rs6000_cpu == PROCESSOR_POWER9);
+
   /* Use the builtin memset after a point, to avoid huge code bloat.
      When optimize_size, avoid any significant code bloat; calling
      memset is about 4 instructions, so allow for one instruction to
      load zero and three to do clearing.  */
-  if (TARGET_ALTIVEC && align >= 128)
+  if (TARGET_ALTIVEC && (align >= 128 || isP8 || isP9))
     clear_step = 16;
   else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
     clear_step = 8;
@@ -88,7 +91,7 @@
       machine_mode mode = BLKmode;
       rtx dest;
 
-      if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+      if (bytes >= 16 && TARGET_ALTIVEC && (align >= 128 || isP8 || isP9))
 	{
 	  clear_bytes = 16;
 	  mode = V4SImode;
@@ -1247,6 +1250,9 @@
   if (bytes > rs6000_block_move_inline_limit)
     return 0;
 
+  bool isP8 = (rs6000_cpu == PROCESSOR_POWER8);
+  bool isP9 = (rs6000_cpu == PROCESSOR_POWER9);
+
   for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
     {
       union {
@@ -1258,7 +1264,7 @@
 
       /* Altivec first, since it will be faster than a string move
 	 when it applies, and usually not significantly larger.  */
-      if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+      if (TARGET_ALTIVEC && bytes >= 16 && (isP8 || isP9 || align >= 128))
 	{
 	  move_bytes = 16;
 	  mode = V4SImode;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]