[PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

Wed Nov 24 07:05:26 GMT 2021


>-----Original Message-----
>From: Kong, Lingling <lingling.kong@intel.com>
>Sent: Wednesday, November 24, 2021 2:25 PM
>To: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
>Cc: Kong, Lingling <lingling.kong@intel.com>
>Subject: RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert
>_Float16 to SFmode with -mf16c [PR 102811]
>
>Hi,
>
>vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with
>-mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
>And cleared before conversion, updated  movhi_internal and
>ix86_can_change_mode_class.
>
>OK for master?
>
>gcc/ChangeLog:
>
>	PR target/102811
>	* config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load
>16bit data
>	to sse register via pinsrw.
>	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
>	(extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2,
>extendhfdf2.
>	extendhfdf only for target_avx512fp16.
>	(*extendhf<mode>2):rename extendhf<mode>2.
>	(truncsfhf2): Likewise.
>	(truncdfhf2): Likewise.
>	(*trunc<mode>2): Likewise.
>
>gcc/testsuite/ChangeLog:
>
>	PR target/102811
>	* gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
>	optimize vmovd + movw to vpextrw.
>	* gcc.target/i386/pr90773-23.c: Ditto.
>	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
>---
> gcc/config/i386/i386.c                        |  5 +-
> gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
> .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
> gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
> gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
> 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644
>gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
>diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index
>e94efdf39fb..4b813533961 100644
>--- a/gcc/config/i386/i386.c
>+++ b/gcc/config/i386/i386.c
>@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode
>from, machine_mode to,
> 	 disallow a change to these modes, reload will assume it's ok to
> 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
> 	 the vec_dupv4hi pattern.
>-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
>-	 register.  */
>-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ?
>2 : 4;
>+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
>+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
>+4;
>       if (GET_MODE_SIZE (from) < mov_size)
> 	return false;
>     }
>diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index
>6eb9de81921..6ee264f1151 100644
>--- a/gcc/config/i386/i386.md
>+++ b/gcc/config/i386/i386.md
>@@ -2525,6 +2525,16 @@
>     case TYPE_SSEMOV:
>       return ix86_output_ssemov (insn, operands);
>
>+    case TYPE_SSELOG:
>+      if (SSE_REG_P (operands[0]))
>+	return MEM_P (operands[1])
>+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
>+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
>+      else
>+	return MEM_P (operands[1])
>+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
>+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
>+
>     case TYPE_MSKLOG:
>       if (operands[1] == const0_rtx)
> 	return "kxorw\t%0, %0, %0";
>@@ -2540,13 +2550,17 @@
>     }
> }
>   [(set (attr "isa")
>-	(cond [(eq_attr "alternative" "9,10,11,12,13")
>-		  (const_string "avx512fp16")
>+	(cond [(eq_attr "alternative" "9,10,11,12")
>+		  (const_string "sse2")
>+	       (eq_attr "alternative" "13")
>+		  (const_string "sse4")
> 	       ]
> 	       (const_string "*")))
>    (set (attr "type")
>      (cond [(eq_attr "alternative" "9,10,11,12,13")
>-	      (const_string "ssemov")
>+	      (if_then_else (match_test "TARGET_AVX512FP16")
>+		(const_string "ssemov")
>+		(const_string "sselog"))
> 	    (eq_attr "alternative" "4,5,6,7")
> 	      (const_string "mskmov")
> 	    (eq_attr "alternative" "8")
>@@ -4574,8 +4588,32 @@
>   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
> })
>
>-(define_insn "extendhf<mode>2"
>-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
>+(define_expand "extendhfsf2"
>+  [(set (match_operand:SF 0 "register_operand")
>+	(float_extend:SF
>+	  (match_operand:HF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
>+{
>+  if (!TARGET_AVX512FP16)
>+    {
>+      rtx res = gen_reg_rtx (V4SFmode);
>+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
>+
>+      ix86_expand_vector_set (false, tmp, operands[1], 0);
>+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
>+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
>+      DONE;
>+    }
>+})
>+
>+(define_expand "extendhfdf2"
>+  [(set (match_operand:DF 0 "register_operand")
>+	(float_extend:DF
>+	  (match_operand:HF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16")
>+
>+(define_insn "*extendhf<mode>2"
>+  [(set (match_operand:MODEF 0 "register_operand" "=v")
>         (float_extend:MODEF
> 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>   "TARGET_AVX512FP16"
>@@ -4766,7 +4804,31 @@
>
> ;; Conversion from {SF,DF}mode to HFmode.
>
>-(define_insn "trunc<mode>hf2"
>+(define_expand "truncsfhf2"
>+  [(set (match_operand:HF 0 "register_operand")
>+	(float_truncate:HF
>+	  (match_operand:SF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
>+  {
>+    if (!TARGET_AVX512FP16)
>+    {
>+      rtx res = gen_reg_rtx (V8HFmode);
>+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
>+
>+      ix86_expand_vector_set (false, tmp, operands[1], 0);
>+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT
>(4)));
>+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
>+      DONE;
>+    }
>+  })
>+
>+(define_expand "truncdfhf2"
>+  [(set (match_operand:HF 0 "register_operand")
>+	(float_truncate:HF
>+	  (match_operand:DF 1 "nonimmediate_operand")))]
>+  "TARGET_AVX512FP16")
>+
>+(define_insn "*trunc<mode>hf2"
>   [(set (match_operand:HF 0 "register_operand" "=v")
>        (float_truncate:HF
>          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git
>a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>new file mode 100644
>index 00000000000..dfbfb167953
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>@@ -0,0 +1,11 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
>+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
>+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
>+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
>+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
>+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
>+_Float16 test (_Float16 a, _Float16 b)
>+{
>+  return a + b;
>+}
>diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c
>b/gcc/testsuite/gcc.target/i386/pr90773-21.c
>index 5bbb387a3ea..0d620fff83c 100644
>--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
>+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
>@@ -10,4 +10,4 @@ foo (int c)
> }
>
> /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+,
>\\(%\[\^,\]+\\)" 1 } } */
>-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } }
>*/
>+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
>+32\\(%\[\^,\]+\\)" 1 } } */
>diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c
>b/gcc/testsuite/gcc.target/i386/pr90773-23.c
>index ca4a86f30b8..b7369e802e1 100644
>--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
>+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
>@@ -10,4 +10,4 @@ foo (void)
> }
>
> /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+,
>\\(%\[\^,\]+\\)" 1 } } */
>-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } }
>*/
>+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
>+32\\(%\[\^,\]+\\)" 1 } } */
>--
>2.18.1