[PATCH 2/6] [i386] Enable _Float16 type for TARGET_SSE2 and above.

liuhongt hongtao.liu@intel.com
Mon Aug 2 06:31:12 GMT 2021


gcc/ChangeLog:

	* config/i386/i386-modes.def (FLOAT_MODE): Define ieee HFmode.
	* config/i386/i386.c (enum x86_64_reg_class): Add
	X86_64_SSEHF_CLASS.
	(merge_classes): Handle X86_64_SSEHF_CLASS.
	(examine_argument): Ditto.
	(construct_container): Ditto.
	(classify_argument): Ditto, and set HFmode/HCmode to
	X86_64_SSEHF_CLASS.
	(function_value_32): Return _FLoat16/Complex Float16 by
	%xmm0.
	(function_value_64): Return _Float16/Complex Float16 by SSE
	register.
	(ix86_print_operand): Handle CONST_DOUBLE HFmode.
	(ix86_secondary_reload): Require gpr as intermediate register
	to store _Float16 from sse register when sse4 is not
	available.
	(ix86_libgcc_floating_mode_supported_p): Enable _FLoat16 under
	sse2.
	(ix86_scalar_mode_supported_p): Ditto.
	(TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Defined.
	* config/i386/i386.h (VALID_SSE2_REG_MODE): Add HFmode.
	(VALID_INT_MODE_P): Add HFmode and HCmode.
	* config/i386/i386.md (*pushhf_rex64): New define_insn.
	(*pushhf): Ditto.
	(*movhf_internal): Ditto.
	* doc/extend.texi (Half-Precision Floating Point): Documemt
	_Float16 for x86.
	* emit-rtl.c (validate_subreg): Allow (subreg:SI (reg:HF) 0)
	which is used by extract_bit_field but not backends.

gcc/lto/ChangeLog:

	* lto-lang.c (lto_type_for_mode): Return float16_type_node
	when mode == TYPE_MODE (float16_type_node).

gcc/testsuite/ChangeLog

	* gcc.target/i386/sse2-float16-1.c: New test.
	* gcc.target/i386/sse2-float16-2.c: Ditto.
	* gcc.target/i386/sse2-float16-3.c: Ditto.
	* gcc.target/i386/float16-5.c: Ditto.
---
 gcc/config/i386/i386-modes.def                |   1 +
 gcc/config/i386/i386.c                        |  91 +++++++++++++-
 gcc/config/i386/i386.h                        |   3 +-
 gcc/config/i386/i386.md                       | 118 +++++++++++++++++-
 gcc/doc/extend.texi                           |  13 ++
 gcc/emit-rtl.c                                |   5 +
 gcc/lto/lto-lang.c                            |   3 +
 gcc/testsuite/gcc.target/i386/float16-5.c     |  12 ++
 .../gcc.target/i386/sse2-float16-1.c          |   8 ++
 .../gcc.target/i386/sse2-float16-2.c          |  16 +++
 .../gcc.target/i386/sse2-float16-3.c          |  12 ++
 11 files changed, 274 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/float16-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-3.c

diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 4e7014be034..9232f59a925 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 
 FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
 FLOAT_MODE (TF, 16, ieee_quad_format);
+FLOAT_MODE (HF, 2, ieee_half_format);
 
 /* In ILP32 mode, XFmode has size 12 and alignment 4.
    In LP64 mode, XFmode has size and alignment 16.  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ff96134fb37..7979e240426 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -387,6 +387,7 @@ enum x86_64_reg_class
     X86_64_INTEGER_CLASS,
     X86_64_INTEGERSI_CLASS,
     X86_64_SSE_CLASS,
+    X86_64_SSEHF_CLASS,
     X86_64_SSESF_CLASS,
     X86_64_SSEDF_CLASS,
     X86_64_SSEUP_CLASS,
@@ -2023,8 +2024,10 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
     return X86_64_MEMORY_CLASS;
 
   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
-  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
-      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+  if ((class1 == X86_64_INTEGERSI_CLASS
+       && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
+      || (class2 == X86_64_INTEGERSI_CLASS
+	  && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
     return X86_64_INTEGERSI_CLASS;
   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
@@ -2178,6 +2181,8 @@ classify_argument (machine_mode mode, const_tree type,
 	    /* The partial classes are now full classes.  */
 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
 	      subclasses[0] = X86_64_SSE_CLASS;
+	    if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
+	      subclasses[0] = X86_64_SSE_CLASS;
 	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
 		&& !((bit_offset % 64) == 0 && bytes == 4))
 	      subclasses[0] = X86_64_INTEGER_CLASS;
@@ -2350,6 +2355,12 @@ classify_argument (machine_mode mode, const_tree type,
       gcc_unreachable ();
     case E_CTImode:
       return 0;
+    case E_HFmode:
+      if (!(bit_offset % 64))
+	classes[0] = X86_64_SSEHF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
     case E_SFmode:
       if (!(bit_offset % 64))
 	classes[0] = X86_64_SSESF_CLASS;
@@ -2367,6 +2378,15 @@ classify_argument (machine_mode mode, const_tree type,
       classes[0] = X86_64_SSE_CLASS;
       classes[1] = X86_64_SSEUP_CLASS;
       return 2;
+    case E_HCmode:
+      classes[0] = X86_64_SSE_CLASS;
+      if (!(bit_offset % 64))
+	return 1;
+      else
+	{
+	  classes[1] = X86_64_SSEHF_CLASS;
+	  return 2;
+	}
     case E_SCmode:
       classes[0] = X86_64_SSE_CLASS;
       if (!(bit_offset % 64))
@@ -2481,6 +2501,7 @@ examine_argument (machine_mode mode, const_tree type, int in_return,
 	(*int_nregs)++;
 	break;
       case X86_64_SSE_CLASS:
+      case X86_64_SSEHF_CLASS:
       case X86_64_SSESF_CLASS:
       case X86_64_SSEDF_CLASS:
 	(*sse_nregs)++;
@@ -2580,13 +2601,14 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 
   /* First construct simple cases.  Avoid SCmode, since we want to use
      single register to pass this type.  */
-  if (n == 1 && mode != SCmode)
+  if (n == 1 && mode != SCmode && mode != HCmode)
     switch (regclass[0])
       {
       case X86_64_INTEGER_CLASS:
       case X86_64_INTEGERSI_CLASS:
 	return gen_rtx_REG (mode, intreg[0]);
       case X86_64_SSE_CLASS:
+      case X86_64_SSEHF_CLASS:
       case X86_64_SSESF_CLASS:
       case X86_64_SSEDF_CLASS:
 	if (mode != BLKmode)
@@ -2683,6 +2705,14 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 				   GEN_INT (i*8));
 	    intreg++;
 	    break;
+	  case X86_64_SSEHF_CLASS:
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (HFmode,
+						GET_SSE_REGNO (sse_regno)),
+				   GEN_INT (i*8));
+	    sse_regno++;
+	    break;
 	  case X86_64_SSESF_CLASS:
 	    exp [nexps++]
 	      = gen_rtx_EXPR_LIST (VOIDmode,
@@ -3903,6 +3933,19 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
     /* Most things go in %eax.  */
     regno = AX_REG;
 
+  /* Return _Float16/_Complex _Foat16 by sse register.  */
+  if (mode == HFmode)
+    regno = FIRST_SSE_REG;
+  if (mode == HCmode)
+    {
+      rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
+      XVECEXP (ret, 0, 0)
+	= gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_REG (SImode, FIRST_SSE_REG),
+			     GEN_INT (0));
+      return ret;
+    }
+
   /* Override FP return register with %xmm0 for local functions when
      SSE math is enabled or for functions with sseregparm attribute.  */
   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
@@ -3939,6 +3982,8 @@ function_value_64 (machine_mode orig_mode, machine_mode mode,
 
       switch (mode)
 	{
+	case E_HFmode:
+	case E_HCmode:
 	case E_SFmode:
 	case E_SCmode:
 	case E_DFmode:
@@ -13411,6 +13456,15 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
     }
 
+  else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
+    {
+      long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
+			       REAL_MODE_FORMAT (HFmode));
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('$', file);
+      fprintf (file, "0x%04x", (unsigned int) l);
+    }
+
   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
     {
       long l;
@@ -18928,6 +18982,16 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
       return NO_REGS;
     }
 
+  /* Require movement to gpr, and then store to memory.  */
+  if (mode == HFmode
+      && !TARGET_SSE4_1
+      && SSE_CLASS_P (rclass)
+      && !in_p && MEM_P (x))
+    {
+      sri->extra_cost = 1;
+      return GENERAL_REGS;
+    }
+
   /* This condition handles corner case where an expression involving
      pointers gets vectorized.  We're trying to use the address of a
      stack slot as a vector initializer.
@@ -21555,10 +21619,27 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (mode == TFmode)
     return true;
+  else if (mode == HFmode && TARGET_SSE2)
+    return true;
   else
     return default_scalar_mode_supported_p (mode);
 }
 
+/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
+   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+
+static bool
+ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
+{
+  /* NB: Always return TRUE for HFmode so that the _Float16 type will
+     be defined by the C front-end for AVX512FP16 intrinsics.  We will
+     issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
+     enabled.  */
+  return ((mode == HFmode && TARGET_SSE2)
+	  ? true
+	  : default_libgcc_floating_mode_supported_p (mode));
+}
+
 /* Implements target hook vector_mode_supported_p.  */
 static bool
 ix86_vector_mode_supported_p (machine_mode mode)
@@ -23820,6 +23901,10 @@ ix86_run_selftests (void)
 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
 
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P	\
+ix86_libgcc_floating_mode_supported_p
+
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 0c2c93daf32..b1e66ee192e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1018,7 +1018,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode)
 
 #define VALID_SSE_REG_MODE(MODE)					\
   ((MODE) == V1TImode || (MODE) == TImode				\
@@ -1047,6 +1047,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == CQImode || (MODE) == CHImode				\
    || (MODE) == CSImode || (MODE) == CDImode				\
    || (MODE) == SDmode || (MODE) == DDmode				\
+   || (MODE) == HFmode || (MODE) == HCmode				\
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
    || (TARGET_64BIT							\
        && ((MODE) == TImode || (MODE) == CTImode			\
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8b809c49fe0..d475347172d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1222,6 +1222,9 @@ (define_mode_iterator MODEF [SF DF])
 ;; All x87 floating point modes
 (define_mode_iterator X87MODEF [SF DF XF])
 
+;; All x87 floating point modes plus HF
+(define_mode_iterator X87MODEFH [SF DF XF HF])
+
 ;; All SSE floating point modes
 (define_mode_iterator SSEMODEF [SF DF TF])
 (define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")])
@@ -3130,6 +3133,32 @@ (define_split
   operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
 })
 
+(define_insn "*pushhf_rex64"
+  [(set (match_operand:HF 0 "push_operand" "=X,X")
+	(match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
+  "TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 0);
+  return "push{q}\t%q1";
+}
+  [(set_attr "isa"  "*,sse4")
+   (set_attr "type" "push,multi")
+   (set_attr "mode" "DI,TI")])
+
+(define_insn "*pushhf"
+  [(set (match_operand:HF 0 "push_operand" "=X,X")
+	(match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
+  "!TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 0);
+  return "push{l}\t%k1";
+}
+  [(set_attr "isa"  "*,sse4")
+   (set_attr "type" "push,multi")
+   (set_attr "mode" "SI,TI")])
+
 (define_insn "*pushsf_rex64"
   [(set (match_operand:SF 0 "push_operand" "=X,X,X")
 	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
@@ -3158,10 +3187,11 @@ (define_insn "*pushsf"
    (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,SI,SF")])
 
+(define_mode_iterator MODESH [SF HF])
 ;; %%% Kill this when call knows how to work this out.
 (define_split
-  [(set (match_operand:SF 0 "push_operand")
-	(match_operand:SF 1 "any_fp_register_operand"))]
+  [(set (match_operand:MODESH 0 "push_operand")
+	(match_operand:MODESH 1 "any_fp_register_operand"))]
   "reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
    (set (match_dup 0) (match_dup 1))]
@@ -3209,8 +3239,8 @@ (define_expand "movtf"
   "ix86_expand_move (TFmode, operands); DONE;")
 
 (define_expand "mov<mode>"
-  [(set (match_operand:X87MODEF 0 "nonimmediate_operand")
-	(match_operand:X87MODEF 1 "general_operand"))]
+  [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
+	(match_operand:X87MODEFH 1 "general_operand"))]
   ""
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
@@ -3646,6 +3676,86 @@ (define_insn "*movsf_internal"
 	   ]
 	   (const_string "*")))])
 
+(define_insn "*movhf_internal"
+ [(set (match_operand:HF 0 "nonimmediate_operand"
+	 "=?r,?m,v,v,?r,m,?v,v")
+       (match_operand:HF 1 "general_operand"
+	 "rmF,rF,C,v, v,v, r,m"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+  && (lra_in_progress
+      || reload_completed
+      || !CONST_DOUBLE_P (operands[1])
+      || (TARGET_SSE && TARGET_SSE_MATH
+	  && standard_sse_constant_p (operands[1], HFmode) == 1)
+      || memory_operand (operands[0], HFmode))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOV:
+      return "mov{w}\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands);
+
+    case TYPE_SSEMOV:
+      return ix86_output_ssemov (insn, operands);
+
+    case TYPE_SSELOG:
+      if (SSE_REG_P (operands[0]))
+	return MEM_P (operands[1])
+	       ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+	       : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+      else
+	return MEM_P (operands[1])
+	       ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+	       : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "2,3,4,6,7")
+		 (const_string "sse2")
+	       (eq_attr "alternative" "5")
+		 (const_string "sse4")
+	      ]
+	      (const_string "*")))
+   (set (attr "type")
+	(cond [(eq_attr "alternative" "0,1")
+		 (const_string "imov")
+	       (eq_attr "alternative" "2")
+		 (const_string "sselog1")
+	       (eq_attr "alternative" "4,5,6,7")
+		 (const_string "sselog")
+	      ]
+	      (const_string "ssemov")))
+   (set (attr "memory")
+	(cond [(eq_attr "alternative" "4,6")
+		 (const_string "none")
+	       (eq_attr "alternative" "5")
+		 (const_string "store")
+	       (eq_attr "alternative" "7")
+		 (const_string "load")
+	      ]
+	      (const_string "*")))
+   (set (attr "prefix")
+	(cond [(eq_attr "alternative" "0,1")
+		 (const_string "orig")
+	      ]
+	      (const_string "maybe_vex")))
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "0,1")
+		 (const_string "HI")
+	       (eq_attr "alternative" "2")
+		 (const_string "V4SF")
+	       (eq_attr "alternative" "4,5,6,7")
+		 (const_string "TI")
+	       (eq_attr "alternative" "3")
+		 (const_string "SF")
+	      ]
+	      (const_string "*")))])
+
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
 	(match_operand 1 "memory_operand"))]
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b83cd4919bb..f42fd633725 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1102,6 +1102,7 @@ typedef _Complex float __attribute__((mode(IC))) _Complex_ibm128;
 @section Half-Precision Floating Point
 @cindex half-precision floating point
 @cindex @code{__fp16} data type
+@cindex @code{__Float16} data type
 
 On ARM and AArch64 targets, GCC supports half-precision (16-bit) floating
 point via the @code{__fp16} type defined in the ARM C Language Extensions.
@@ -1150,6 +1151,18 @@ calls.
 It is recommended that portable code use the @code{_Float16} type defined
 by ISO/IEC TS 18661-3:2015.  @xref{Floating Types}.
 
+On x86 targets with @code{target("sse2")} and above, GCC supports half-precision
+(16-bit) floating point via the @code{_Float16} type which is defined by
+18661-3:2015. For C++, x86 provide a builtin type named @code{_Float16}
+which contains same data format as C.
+
+Without @option{-mavx512fp16}, @code{_Float16} type is storage only, all
+operations will be emulated by software emulation and the @code{float}
+instructions. The default behavior for @code{FLT_EVAL_METHOD} is to keep
+the intermediate result of the operation as 32-bit precision. This may lead
+to inconsistent behavior between software emulation and AVX512-FP16
+instructions.
+
 @node Decimal Float
 @section Decimal Floating Types
 @cindex decimal floating types
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index ff3b4449b37..775ee397836 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -928,6 +928,11 @@ validate_subreg (machine_mode omode, machine_mode imode,
      fix them all.  */
   if (omode == word_mode)
     ;
+  /* ???Similarly like (subreg:DI (reg:SF), also allow (subreg:SI (reg:HF))
+     here. Though extract_bit_field is the culprit here, not the backends.  */
+  else if (known_gt (regsize, osize) && known_gt (osize, isize)
+	   && FLOAT_MODE_P (imode) && INTEGRAL_MODE_P (omode))
+    ;
   /* ??? Similarly, e.g. with (subreg:DF (reg:TI)).  Though store_bit_field
      is the culprit here, and not the backends.  */
   else if (known_ge (osize, regsize) && known_ge (isize, osize))
diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c
index c13c7e45ac1..92f499643b5 100644
--- a/gcc/lto/lto-lang.c
+++ b/gcc/lto/lto-lang.c
@@ -992,6 +992,9 @@ lto_type_for_mode (machine_mode mode, int unsigned_p)
     return unsigned_p ? unsigned_intTI_type_node : intTI_type_node;
 #endif
 
+  if (float16_type_node && mode == TYPE_MODE (float16_type_node))
+    return float16_type_node;
+
   if (mode == TYPE_MODE (float_type_node))
     return float_type_node;
 
diff --git a/gcc/testsuite/gcc.target/i386/float16-5.c b/gcc/testsuite/gcc.target/i386/float16-5.c
new file mode 100644
index 00000000000..ebc0af1490b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/float16-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+_Float16
+foo (int a)
+{
+  union {
+    int a;
+    _Float16 b;
+  }c;
+  c.a = a;
+  return c.b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
new file mode 100644
index 00000000000..1b645eb499d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+_Float16/* { dg-error "is not supported on this target" } */
+foo (_Float16 x) /* { dg-error "is not supported on this target" } */
+{
+  return x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
new file mode 100644
index 00000000000..3da7683fc31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx512f" } */
+
+union flt
+{
+  _Float16 flt;
+  short s;
+};
+
+_Float16
+foo (union flt x)
+{
+  return x.flt;
+}
+
+/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-3.c b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c
new file mode 100644
index 00000000000..60ff9d4ab80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx512f" } */
+
+#include<complex.h>
+
+_Complex _Float16
+foo (_Complex _Float16 x)
+{
+  return x;
+}
+
+/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} } } */
-- 
2.27.0



More information about the Gcc-patches mailing list