[PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd
Jiufu Guo
guojiufu@linux.ibm.com
Fri Feb 17 01:34:05 GMT 2023
Hi,
Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611823.html
This patch does not define new insn for mtvsrws, but use exit one.
As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI#0 to SF (or lowpart DI to SF).
For code:
*(long long*)buff = di;
float f = *(float*)(buff);
We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
"mtvsrws 1,3 ; xscvspdpn 1,1".
This patch update this, and also enhance the bitcast from highpart
DI to SF.
Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?
BR,
Jeff (Jiufu)
PR target/108338
gcc/ChangeLog:
* config/rs6000/predicates.md (lowpart_subreg_operator): New
define_predicate.
* config/rs6000/rs6000.md (any_rshift): New code_iterator.
(movsf_from_si): Update to generate mtvsrws.
(movsf_from_si2): Rename to...
(movsf_from_si2_<code>): ... this.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr108338.c: New test.
---
gcc/config/rs6000/predicates.md | 5 +++
gcc/config/rs6000/rs6000.md | 34 +++++++++++------
gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
3 files changed, 70 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..e57c9d99c6b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
else
return false;
})
+
+(define_predicate "lowpart_subreg_operator"
+ (and (match_code "subreg")
+ (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+ == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..74b1c9cee6a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8200,13 +8200,24 @@ (define_insn_and_split "movsf_from_si"
{
rtx op0 = operands[0];
rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
- /* Move SF value to upper 32-bits for xscvspdpn. */
- emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
- emit_insn (gen_p8_mtvsrd_sf (op0, op2));
- emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ if (TARGET_P9_VECTOR)
+ {
+ rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+ emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+ else
+ {
+ rtx op2 = operands[2];
+ rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+ /* Move SF value to upper 32-bits for xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+
DONE;
}
[(set_attr "length"
@@ -8219,18 +8230,19 @@ (define_insn_and_split "movsf_from_si"
"*, *, p9v, p8v, *, *,
p8v, p8v, p8v, *")])
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
;; For extracting high part element from DImode register like:
;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
;; split it before reload with "and mask" to avoid generating shift right
;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_<code>"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
(unspec:SF
- [(subreg:SI
- (ashiftrt:DI
+ [(match_operator:SI 3 "lowpart_subreg_operator"
+ [(any_rshift:DI
(match_operand:DI 1 "input_operand" "r")
- (const_int 32))
- 0)]
+ (const_int 32))])]
UNSPEC_SF_FROM_SI))
(clobber (match_scratch:DI 2 "=r"))]
"TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 00000000000..2438dc13f41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+ char buff[16];
+ *(long long*)buff = l;
+ float f = *(float*)(buff);
+ return f;
+}
+
+float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+ char buff[16];
+ *(long long*)buff = l;
+ float f = *(float*)(buff + 4);
+ return f;
+}
+
+/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+
+union di_sf_sf
+{
+ struct {float f1; float f2;};
+ long long l;
+};
+
+int main()
+{
+ union di_sf_sf v;
+ v.f1 = 1.0f;
+ v.f2 = 2.0f;
+ if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
+ __builtin_abort ();
+ return 0;
+}
--
2.31.1
More information about the Gcc-patches
mailing list