[PATCH] dse: Remove partial load after full store for high part access[PR71309]

Xiong Hu Luo luoxhu@linux.ibm.com
Tue Jul 21 10:54:27 GMT 2020


This patch could optimize (works for char/short/int/void*):

6: r119:TI=[r118:DI+0x10]
7: [r118:DI]=r119:TI
8: r121:DI=[r118:DI+0x8]

=>

6: r119:TI=[r118:DI+0x10]
16: r122:DI=r119:TI#8

Final ASM will be as below without partial load after full store(stxv+ld):
  ld 10,16(3)
  mr 9,3
  ld 3,24(3)
  std 10,0(9)
  std 3,8(9)
  blr

It could achieve ~25% performance improvement for typical cases on
Power9.  Bootstrap and regression tested on Power9-LE.

BTW, for AArch64, one ldr is replaced by mov with this patch, though
no performance change observerd...

ldp     x2, x3, [x0, 16]
stp     x2, x3, [x0]
ldr     x0, [x0, 8]

=>

mov     x1, x0
ldp     x2, x0, [x0, 16]
stp     x2, x0, [x1]

gcc/ChangeLog:

2020-07-21  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/71309
	* dse.c (get_stored_val): Use subreg before extract if shifting
	from high part.

gcc/testsuite/ChangeLog:

2020-07-21  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/71309
	* gcc.target/powerpc/pr71309.c: New test.
	* gcc.target/powerpc/fold-vec-extract-short.p7.c: Add -mbig.
---
 gcc/dse.c                                     | 26 ++++++++++++---
 .../powerpc/fold-vec-extract-short.p7.c       |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr71309.c    | 33 +++++++++++++++++++
 3 files changed, 56 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr71309.c

diff --git a/gcc/dse.c b/gcc/dse.c
index bbe792e48e8..13f952ee5ff 100644
--- a/gcc/dse.c
+++ b/gcc/dse.c
@@ -1855,7 +1855,7 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
 {
   machine_mode store_mode = GET_MODE (store_info->mem);
   poly_int64 gap;
-  rtx read_reg;
+  rtx read_reg = NULL;
 
   /* To get here the read is within the boundaries of the write so
      shift will never be negative.  Start out with the shift being in
@@ -1872,9 +1872,27 @@ get_stored_val (store_info *store_info, machine_mode read_mode,
     {
       poly_int64 shift = gap * BITS_PER_UNIT;
       poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
-      read_reg = find_shift_sequence (access_size, store_info, read_mode,
-				      shift, optimize_bb_for_speed_p (bb),
-				      require_cst);
+      rtx rhs_subreg = NULL;
+
+      if (known_eq (GET_MODE_BITSIZE (store_mode), shift * 2))
+	{
+	  scalar_int_mode inner_mode = smallest_int_mode_for_size (shift);
+	  poly_uint64 sub_off
+	    = ((!BYTES_BIG_ENDIAN)
+		 ? GET_MODE_SIZE (store_mode) - GET_MODE_SIZE (inner_mode)
+		 : 0);
+
+	  rhs_subreg = simplify_gen_subreg (inner_mode, store_info->rhs,
+					    store_mode, sub_off);
+	  if (rhs_subreg)
+	    read_reg
+	      = extract_low_bits (read_mode, inner_mode, copy_rtx (rhs_subreg));
+	}
+
+      if (read_reg == NULL)
+	read_reg
+	  = find_shift_sequence (access_size, store_info, read_mode, shift,
+				 optimize_bb_for_speed_p (bb), require_cst);
     }
   else if (store_mode == BLKmode)
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c
index 8616e7b11ad..b5cefe7dc12 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c
@@ -3,7 +3,7 @@
 
 /* { dg-do compile { target { powerpc*-*-linux* } } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-mdejagnu-cpu=power7 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power7 -O2 -mbig" } */
 
 // six tests total. Targeting P7 BE.
 // p7 (be) vars:                 li, addi,              stxvw4x, rldic, addi, lhax/lhzx
diff --git a/gcc/testsuite/gcc.target/powerpc/pr71309.c b/gcc/testsuite/gcc.target/powerpc/pr71309.c
new file mode 100644
index 00000000000..94d727a8ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr71309.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+
+#define TYPE void*
+#define TYPE2 void*
+
+struct path {
+    TYPE2 mnt;
+    TYPE dentry;
+};
+
+struct nameidata {
+    struct path path;
+    struct path root;
+};
+
+__attribute__ ((noinline))
+TYPE foo(struct nameidata *nd)
+{
+  TYPE d;
+  TYPE2 d2;
+
+  nd->path = nd->root;
+  d = nd->path.dentry;
+  d2 = nd->path.mnt;
+  return d;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */
-- 
2.27.0.90.geebb51ba8c



More information about the Gcc-patches mailing list