[PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
Kong, Lingling
lingling.kong@intel.com
Wed Nov 24 06:24:43 GMT 2021
Hi,
vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
And cleared before conversion, updated movhi_internal and ix86_can_change_mode_class.
OK for master?
gcc/ChangeLog:
PR target/102811
* config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 16bit data
to sse register via pinsrw.
* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
(extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
extendhfdf only for target_avx512fp16.
(*extendhf<mode>2):rename extendhf<mode>2.
(truncsfhf2): Likewise.
(truncdfhf2): Likewise.
(*trunc<mode>2): Likewise.
gcc/testsuite/ChangeLog:
PR target/102811
* gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
optimize vmovd + movw to vpextrw.
* gcc.target/i386/pr90773-23.c: Ditto.
* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
gcc/config/i386/i386.c | 5 +-
gcc/config/i386/i386.md | 74 +++++++++++++++++--
.../i386/avx512vl-vcvtps2ph-pr102811.c | 11 +++
gcc/testsuite/gcc.target/i386/pr90773-21.c | 2 +-
gcc/testsuite/gcc.target/i386/pr90773-23.c | 2 +-
5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
disallow a change to these modes, reload will assume it's ok to
drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
the vec_dupv4hi pattern.
- NB: AVX512FP16 supports vmovw which can load 16bit data to sse
- register. */
- int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+ NB: SSE2 can load 16bit data to sse register via pinsrw. */
+ int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
+4;
if (GET_MODE_SIZE (from) < mov_size)
return false;
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@
case TYPE_SSEMOV:
return ix86_output_ssemov (insn, operands);
+ case TYPE_SSELOG:
+ if (SSE_REG_P (operands[0]))
+ return MEM_P (operands[1])
+ ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+ : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+ else
+ return MEM_P (operands[1])
+ ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+ : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
case TYPE_MSKLOG:
if (operands[1] == const0_rtx)
return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@
}
}
[(set (attr "isa")
- (cond [(eq_attr "alternative" "9,10,11,12,13")
- (const_string "avx512fp16")
+ (cond [(eq_attr "alternative" "9,10,11,12")
+ (const_string "sse2")
+ (eq_attr "alternative" "13")
+ (const_string "sse4")
]
(const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "9,10,11,12,13")
- (const_string "ssemov")
+ (if_then_else (match_test "TARGET_AVX512FP16")
+ (const_string "ssemov")
+ (const_string "sselog"))
(eq_attr "alternative" "4,5,6,7")
(const_string "mskmov")
(eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
})
-(define_insn "extendhf<mode>2"
- [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+ [(set (match_operand:SF 0 "register_operand")
+ (float_extend:SF
+ (match_operand:HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+ if (!TARGET_AVX512FP16)
+ {
+ rtx res = gen_reg_rtx (V4SFmode);
+ rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+ ix86_expand_vector_set (false, tmp, operands[1], 0);
+ emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+ emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+ DONE;
+ }
+})
+
+(define_expand "extendhfdf2"
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+ [(set (match_operand:MODEF 0 "register_operand" "=v")
(float_extend:MODEF
(match_operand:HF 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@
;; Conversion from {SF,DF}mode to HFmode.
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+ [(set (match_operand:HF 0 "register_operand")
+ (float_truncate:HF
+ (match_operand:SF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+ {
+ if (!TARGET_AVX512FP16)
+ {
+ rtx res = gen_reg_rtx (V8HFmode);
+ rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+
+ ix86_expand_vector_set (false, tmp, operands[1], 0);
+ emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+ emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+ DONE;
+ }
+ })
+
+(define_expand "truncdfhf2"
+ [(set (match_operand:HF 0 "register_operand")
+ (float_truncate:HF
+ (match_operand:DF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
[(set (match_operand:HF 0 "register_operand" "=v")
(float_truncate:HF
(match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..dfbfb167953
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+ return a + b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 5bbb387a3ea..0d620fff83c 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -10,4 +10,4 @@ foo (int c)
}
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
+32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index ca4a86f30b8..b7369e802e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -10,4 +10,4 @@ foo (void)
}
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
+32\\(%\[\^,\]+\\)" 1 } } */
--
2.18.1
More information about the Gcc-patches
mailing list