[gcc(refs/users/meissner/heads/work027)] PowerPC: PR target/81594
Michael Meissner
meissner@gcc.gnu.org
Thu Nov 12 14:14:37 GMT 2020
https://gcc.gnu.org/g:c348267ee39224e80c60e075910fc8d9ad3e33d4
commit c348267ee39224e80c60e075910fc8d9ad3e33d4
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Nov 12 09:13:40 2020 -0500
PowerPC: PR target/81594
gcc/
2020-11-12 Michael Meissner <meissner@linux.ibm.com>
PR target/81594
* config/rs6000/predicates.md (ds_form_memory): New predicate.
* config/rs6000/vsx.md (concatv2di_store): New insn.
(dupv2di_store): New insn.
gcc/testsuite/
2020-11-12 Michael Meissner <meissner@linux.ibm.com>
PR target/81594
* gcc.target/powerpc/pr81594.c: New test.
Diff:
---
gcc/config/rs6000/predicates.md | 42 +++++++++++++++
gcc/config/rs6000/vsx.md | 84 ++++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/powerpc/pr81594.c | 61 ++++++++++++++++++++++
3 files changed, 187 insertions(+)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..048763633bc 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1876,3 +1876,45 @@
{
return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
})
+
+;; Return true if the operand is a valid memory operand with an offsettable
+;; address that can be split into 2 sub-addresses, each of which is a valid
+;; DS-form (bottom 2 bits of the offset are 0). This is used to optimize
+;; creating a vector of two DImode elements and then storing the vector. We
+;; want to eliminate the direct moves from GPRs to form the vector and do the
+;; store directly from the GPRs.
+
+(define_predicate "ds_form_memory"
+ (match_code "mem")
+{
+ if (!memory_operand (op, mode))
+ return false;
+
+ rtx addr = XEXP (op, 0);
+
+ if (REG_P (addr) || SUBREG_P (addr))
+ return true;
+
+ if (GET_CODE (addr) != PLUS)
+ return false;
+
+ if (!base_reg_operand (XEXP (addr, 0), Pmode))
+ return false;
+
+ rtx offset = XEXP (addr, 1);
+ if (!CONST_INT_P (offset))
+ return false;
+
+ HOST_WIDE_INT value = INTVAL (offset);
+
+ if (TARGET_PREFIXED)
+ return SIGNED_34BIT_OFFSET_EXTRA_P (value, GET_MODE_SIZE (DImode));
+
+ /* If we don't support prefixed addressing, ensure that the two addresses
+ created would each be valid for doing a STD instruction (which is a
+ DS-form instruction that requires the bottom 2 bits to be 0). */
+ if ((value & 0x3) != 0)
+ return false;
+
+ return SIGNED_16BIT_OFFSET_EXTRA_P (value, GET_MODE_SIZE (DImode));
+})
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 947631d83ee..1ef9e9b95fc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2981,6 +2981,90 @@
}
[(set_attr "type" "vecperm")])
+;; If the only use for a VEC_CONCAT is to store 2 64-bit values, replace it
+;; with two stores. Only do this on DImode, since it saves doing 1 direct move
+;; on power9, and 2 direct moves + XXPERMDI on power8 to form the vector so we
+;; can do a vector store. This typically shows up with -O3 where two stores
+;; are combined into a vector.
+;;
+;; Typically DFmode would generate XXPERMDI and a vector store. Benchmarks
+;; like Spec show that is typically the same speed or faster than doing the two
+;; scalar DFmode stores.
+(define_insn_and_split "*concatv2di_store"
+ [(set (match_operand:V2DI 0 "memory_operand" "=m,m,m,m")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "gpc_reg_operand" "r,wa,r,wa")
+ (match_operand:DI 2 "gpc_reg_operand" "r,wa,wa,r")))
+ (clobber (match_scratch:DI 3 "=&b,&b,&b,&b"))]
+ "TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(set (match_dup 4)
+ (match_dup 5))
+ (set (match_dup 6)
+ (match_dup 7))]
+{
+ rtx mem = operands[0];
+
+ /* If the address can't be used directly for both stores, copy it to the
+ temporary base register. */
+ if (!ds_form_memory (mem, V2DImode))
+ {
+ rtx old_addr = XEXP (mem, 0);
+ rtx new_addr = operands[3];
+ if (GET_CODE (new_addr) == SCRATCH)
+ new_addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (new_addr, old_addr);
+ mem = change_address (mem, VOIDmode, new_addr);
+ }
+
+ /* Because we are creating scalar stores, we don't have to swap the order
+ of the elements and then swap the stores to get the right order on
+ little endian systems. */
+ operands[4] = adjust_address (mem, DImode, 0);
+ operands[5] = operands[1];
+ operands[6] = adjust_address (mem, DImode, 8);
+ operands[7] = operands[2];
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "store,fpstore,fpstore,store")])
+
+;; Optimize creating a vector with 2 duplicate DImode elements and storing it.
+(define_insn_and_split "*dupv2di_store"
+ [(set (match_operand:V2DI 0 "memory_operand" "=m,m")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "gpc_reg_operand" "r,wa")))
+ (clobber (match_scratch:DI 2 "=&b,&b"))]
+ "TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 1))
+ (set (match_dup 4)
+ (match_dup 1))]
+{
+ rtx mem = operands[0];
+
+ /* If the address can't be used directly for both stores, copy it to the
+ temporary base register. */
+ if (!ds_form_memory (mem, V2DImode))
+ {
+ rtx old_addr = XEXP (mem, 0);
+ rtx new_addr = operands[2];
+ if (GET_CODE (new_addr) == SCRATCH)
+ new_addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (new_addr, old_addr);
+ mem = change_address (mem, VOIDmode, new_addr);
+ }
+
+ operands[3] = adjust_address (mem, DImode, 0);
+ operands[4] = adjust_address (mem, DImode, 8);
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "store,fpstore")])
+
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
diff --git a/gcc/testsuite/gcc.target/powerpc/pr81594.c b/gcc/testsuite/gcc.target/powerpc/pr81594.c
new file mode 100644
index 00000000000..35a9749db38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr81594.c
@@ -0,0 +1,61 @@
+/* { dg-do compile { target { powerpc-*-* && ilp64 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+
+/* PR target/81594. Optimize creating a vector of 2 64-bit elements and then
+ storing the vector into separate stores. */
+
+void
+store_v2di_0 (vector unsigned long long *p,
+ unsigned long long a,
+ unsigned long long b)
+{
+ *p = (vector unsigned long long) { a, b };
+}
+
+void
+store_v2di_4 (vector unsigned long long *p,
+ unsigned long long a,
+ unsigned long long b)
+{
+ p[4] = (vector unsigned long long) { a, b };
+}
+
+void
+store_v2di_splat_0 (vector unsigned long long *p, unsigned long long a)
+{
+ *p = (vector unsigned long) { a, a };
+}
+
+void
+store_v2di_splat_8 (vector unsigned long long *p, unsigned long long a)
+{
+ p[8] = (vector unsigned long long) { a, a };
+}
+
+/* 2047 is the largest index that can be used with DS-form instructions. */
+void
+store_v2di_2047 (vector unsigned long long *p,
+ unsigned long long a,
+ unsigned long long b)
+{
+ p[2047] = (vector unsigned long long) { a, b };
+}
+
+/* 2048 will require the constant to be loaded because we can't use a pair of
+ DS-form instructions. If we have prefixed addressing, a prefixed form will
+ be generated instead. Two separate stores should still be issued. */
+void
+store_v2di_2048 (vector unsigned long long *p,
+ unsigned long long a,
+ unsigned long long b)
+{
+ p[2048] = (vector unsigned long long) { a, b };
+}
+
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxvx\M} } } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrd\M} } } */
+/* { dg-final { scan-assembler-not {\mmtvsrdd\M} } } */
+/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
More information about the Gcc-cvs
mailing list