[PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

Tue Nov 16 08:48:47 GMT 2021

On Tue, Nov 16, 2021 at 9:15 AM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
>
> OK for master?

No, this is the wrong approach. There can be invalid values in the
high elements of the vector, so these should be cleared before
conversion.

Please see the attached (unfinished) patch and use it as a starting
point. Please note that we can now allow 2-byte values in SSE
registers, so movhi_internal and ix86_can_change_mode_class should be
updated accordingly.

Uros.
>
> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.
>         (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, extendhfdf2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.md                       | 48 +++++++++++++++----
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 10 ++++
>  2 files changed, 49 insertions(+), 9 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..c5415475342 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4574,15 +4574,30 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> -        (float_extend:MODEF
> +(define_insn "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand" "=v")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (TARGET_AVX512FP16)
> +    return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}";
> +  else
> +    return "vcvtph2ps\t{%1, %0|%0, %1}"; }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "maybe_evex")
> +   (set_attr "mode" "SF")])
> +
> +(define_insn "extendhfdf2"
> +  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v")
> +       (float_extend:DF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
> +  "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "DF")])
>
>
>  (define_expand "extend<mode>xf2"
> @@ -4766,12 +4781,27 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_insn "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand" "=v")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand" "vm")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (TARGET_AVX512FP16)
> +      return "vcvtss2sh\t{%1, %d0|%d0, %1}";
> +    else
> +      return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}";
> +  }
> +  [(set_attr "type" "ssecvt")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "HF")])
> +
> +(define_insn "truncdfhf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
> -       (float_truncate:HF
> -         (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> -  "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
> +  "vcvtsd2sh\t{%1, %d0|%d0, %1}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "HF")])
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..ab44a304a03
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> --
> 2.18.1
>
-------------- next part --------------

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9cc903e826b..21a3a45d22c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19462,9 +19462,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e733a40fc90..27e3772c138 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2540,8 +2540,10 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		 (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
@@ -4574,8 +4576,42 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+        (float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_F16C || TARGET_AVX512FP16"
+{
+  if (!TARGET_AVX512FP16)
+    {
+#if 0
+      rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+      rtx op = lowpart_subreg (V8HFmode,
+			       force_reg (HFmode, operands[1]), HFmode);
+      rtx res = lowpart_subreg (V4SFmode, operands[0], SFmode);
+      rtx tmp = gen_reg_rtx (V8HFmode);
+
+      emit_insn (gen_sse4_1_pblendph (tmp, op, zero, const1_rtx));
+#endif
+
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = gen_reg_rtx (V8HFmode);
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+        (float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4584,7 +4620,6 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-
 (define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand")
         (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
@@ -4766,7 +4801,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+       (float_truncate:HF
+         (match_operand:SF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_F16C || TARGET_AVX512FP16"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = gen_reg_rtx (V4SFmode);
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+       (float_truncate:HF
+         (match_operand:DF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fbf056bf9e6..7125801724f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9969,6 +9969,28 @@
 	   ]
 	   (symbol_ref "true")))])
 
+(define_insn "*vec_set<mode>_0"
+  [(set (match_operand:V8_128 0 "register_operand" "=v,v,x,x,x,x,x,x")
+	(vec_merge:V8_128
+	  (vec_duplicate:V8_128
+	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,x,r,m,x"))
+	  (match_operand:V8_128 1 "reg_or_0_operand" "C,C,0,0,0,x,x,x")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   vmovw\t{%k2, %x0|%x0, %k2}
+   vmovw\t{%2, %x0|%x0, %2}
+   pinsr<sseintmodesuffix>\t{$0, %k2, %0|%0, %k2, $0}
+   pinsr<sseintmodesuffix>\t{$0, %2, %0|%0, %2, $0}
+   pblendw\t{$1, %2, %0|%0, %2, $1}
+   vpinsr<sseintmodesuffix>\t{$0, %k2, %1, %0|%0, %1, %k2, $0}
+   vpinsr<sseintmodesuffix>\t{$0, %2, %1, %0|%0, %1, %2, $0}
+   vpblendw\t{$1, %2, %1, %0|%0, %1, %2, $1}"
+  [(set_attr "isa" "avx512fp16,avx512fp16,noavx,noavx,sse4_noavx,avx,avx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
 ;; vmovw clears also the higer bits
 (define_insn "vec_set<mode>_0"
   [(set (match_operand:VI2F 0 "register_operand" "=v,v")