This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 5/14][AArch64] Add basic fp16 support


This adds basic support for moving __fp16 values around, passing and returning, and operating on them by promoting to 32-bit floats. Also a few scalar testcases.

Note I've not got an fmov (immediate) variant, because there is no 'fmov h<n>, ...' - the only way to load a 16-bit immediate is to reinterpret the bit pattern into some other type. Vector MOVs are turned off for the same reason. If this is practical it can follow in a separate patch.

My reading of ACLE suggests the type name to use is __fp16, rather than __builtin_aarch64_simd_hf. I can use the latter if that's preferable?

int<->f16 conversions are a little odd, assembly

int_to_f16: scvtf d0, w0 fcvt h0, d0 ret

int_from_f16: fcvt s0, h0 fcvtzs w0, s0 ret

The spec is silent on the absence or existence of intermediate rounding steps, however, I don't think this matters: even float32_t offers soooo many more bits than __fp16, that any integer which fits into the range of an __fp16 (i.e. is not infinite), can be expressed exactly as a float32_t without any loss of precision. So I think the above are OK. (if they can be optimized, that can follow in a later patch.)

Note that unlike ARM, where we support both IEEE and Alternative formats (and, somewhat-awkwardly, format-agnostic code too), here we are settling on IEEE format always. Technically, we should output an EABI attribute saying which format we are using here, however, aarch64 asm does not support the .eabi-attribute directive yet, so it seems reasonable to leave this while there is only one possible format.

Bootstrapped + check-gcc on aarch64-none-linux-gnu.

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.c (aarch64_fp16_type_node): New.
	(aarch64_init_builtins): Make aarch64_fp16_type_node, use for __fp16.

	* config/aarch64/aarch64-modes.def: Add HFmode.

	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
	__ARM_FP16_FORMAT_IEEE and __ARM_FP16_ARGS. Set bit 1 of __ARM_FP.

	* config/aarch64/aarch64.c (aarch64_init_libfuncs,
	aarch64_promoted_type): New.

	(aarch64_float_const_representable_p): Disable HFmode.
	(aarch64_mangle_type): Mangle half-precision floats to "Dh".
	(TARGET_PROMOTED_TYPE): Define to aarch64_promoted_type.
	(TARGET_INIT_LIBFUNCS): Define to aarch64_init_libfuncs.

	* config/aarch64/aarch64.md (mov<mode>): Include HFmode using GPF_F16.
	(movhf_aarch64, extendhfsf2, extendhfdf2, truncsfhf2, truncdfhf2): New.

	* config/aarch64/iterators.md (GPF_F16): New.


gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/f16_convs_1.c: New test.
	* gcc.target/aarch64/f16_convs_2.c: New test.
	* gcc.target/aarch64/f16_movs_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 87f1ac2ec1e3c774782c567b20c673802ae90d99..5a7b112bd1fe77826bfb84383c86dceb6b1521e3 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -453,6 +453,9 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = {
 };
 #undef ENTRY
 
+/* This type is not SIMD-specific; it is the user-visible __fp16.  */
+static tree aarch64_fp16_type_node = NULL_TREE;
+
 static tree aarch64_simd_intOI_type_node = NULL_TREE;
 static tree aarch64_simd_intEI_type_node = NULL_TREE;
 static tree aarch64_simd_intCI_type_node = NULL_TREE;
@@ -862,6 +865,12 @@ aarch64_init_builtins (void)
     = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
 			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
 
+  aarch64_fp16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
+  layout_type (aarch64_fp16_type_node);
+
+  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
   if (TARGET_CRC32)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index b17b90d90601ae0a631a78560da743720c4638ce..c30059b632fa8cb7fd9071917d3f581f0966a86d 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -36,6 +36,10 @@ CC_MODE (CC_DLTU);
 CC_MODE (CC_DGEU);
 CC_MODE (CC_DGTU);
 
+/* Half-precision floating point for arm_neon.h float16_t.  */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
+
 /* Vector modes.  */
 VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
 VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bf59e40a64459f6daddef47a5f5214adfd92d9b6..67c37ebc0e06d22e524322e5a82b6bcde550bd93 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -57,7 +57,9 @@
       if (TARGET_FLOAT)                                         \
         {                                                       \
           builtin_define ("__ARM_FEATURE_FMA");                 \
-          builtin_define_with_int_value ("__ARM_FP", 0x0C);     \
+	  builtin_define_with_int_value ("__ARM_FP", 0x0E);     \
+	  builtin_define ("__ARM_FP16_FORMAT_IEEE");		\
+	  builtin_define ("__ARM_FP16_ARGS");			\
         }                                                       \
       if (TARGET_SIMD)                                          \
         {                                                       \
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b923fdb08a8e653570e51cf516dc551955961704..44956cf0276ed7b1369d1816f472bad61ac421b1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8058,6 +8058,10 @@ aarch64_mangle_type (const_tree type)
   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
     return "St9__va_list";
 
+  /* Half-precision float.  */
+  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+    return "Dh";
+
   /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
      builtin types.  */
   if (TYPE_NAME (type) != NULL)
@@ -9251,6 +9255,33 @@ aarch64_start_file (void)
   default_file_start();
 }
 
+static void
+aarch64_init_libfuncs (void)
+{
+   /* Half-precision float operations.  The compiler handles all operations
+     with NULL libfuncs by converting to SFmode.  */
+
+  /* Conversions.  */
+  set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
+  set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
+
+  /* Arithmetic.  */
+  set_optab_libfunc (add_optab, HFmode, NULL);
+  set_optab_libfunc (sdiv_optab, HFmode, NULL);
+  set_optab_libfunc (smul_optab, HFmode, NULL);
+  set_optab_libfunc (neg_optab, HFmode, NULL);
+  set_optab_libfunc (sub_optab, HFmode, NULL);
+
+  /* Comparisons.  */
+  set_optab_libfunc (eq_optab, HFmode, NULL);
+  set_optab_libfunc (ne_optab, HFmode, NULL);
+  set_optab_libfunc (lt_optab, HFmode, NULL);
+  set_optab_libfunc (le_optab, HFmode, NULL);
+  set_optab_libfunc (ge_optab, HFmode, NULL);
+  set_optab_libfunc (gt_optab, HFmode, NULL);
+  set_optab_libfunc (unord_optab, HFmode, NULL);
+}
+
 /* Target hook for c_mode_for_suffix.  */
 static machine_mode
 aarch64_c_mode_for_suffix (char suffix)
@@ -9289,7 +9320,8 @@ aarch64_float_const_representable_p (rtx x)
   if (!CONST_DOUBLE_P (x))
     return false;
 
-  if (GET_MODE (x) == VOIDmode)
+  /* We don't support HFmode constants yet.  */
+  if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
     return false;
 
   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
@@ -11230,6 +11262,14 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
   return true;
 }
 
+/* Implement TARGET_PROMOTED_TYPE to promote float16 to 32 bits.  */
+static tree
+aarch64_promoted_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return float_type_node;
+  return NULL_TREE;
+}
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -11384,6 +11424,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
 #undef TARGET_SCHED_REASSOCIATION_WIDTH
 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
 
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE aarch64_promoted_type
+
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
 
@@ -11476,6 +11519,8 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
   aarch64_vectorize_vec_perm_const_ok
 
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
 
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1f4169ee76e7f3f321e5ed7a4d0f08b54ee3bf17..0851f6949adb69bf23221e811230fae08749887c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -976,8 +976,8 @@
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:GPF 0 "nonimmediate_operand" "")
-	(match_operand:GPF 1 "general_operand" ""))]
+  [(set (match_operand:GPF_F16 0 "nonimmediate_operand" "")
+	(match_operand:GPF_F16 1 "general_operand" ""))]
   ""
   "
     if (!TARGET_FLOAT)
@@ -991,6 +991,26 @@
   "
 )
 
+(define_insn "*movhf_aarch64"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+    || register_operand (operands[1], HFmode))"
+  "@
+   mov\\t%0.h[0], %w1
+   umov\\t%w0, %1.h[0]
+   mov\\t%0.h[0], %1.h[0]
+   ldr\\t%h0, %1
+   str\\t%h1, %0
+   ldrh\\t%w0, %1
+   strh\\t%w1, %0
+   mov\\t%w0, %w1"
+  [(set_attr "type" "neon_from_gp,neon_to_gp,fmov,\
+                     f_loads,f_stores,load1,store1,mov_reg")
+   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
+)
+
 (define_insn "*movsf_aarch64"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
 	(match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
@@ -3882,6 +3902,22 @@
   [(set_attr "type" "f_cvt")]
 )
 
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+        (float_extend:SF (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%s0, %h1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+        (float_extend:DF (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%d0, %h1"
+  [(set_attr "type" "f_cvt")]
+)
+
 (define_insn "truncdfsf2"
   [(set (match_operand:SF 0 "register_operand" "=w")
         (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
@@ -3890,6 +3926,22 @@
   [(set_attr "type" "f_cvt")]
 )
 
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+        (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+        (float_truncate:HF (match_operand:DF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%h0, %d1"
+  [(set_attr "type" "f_cvt")]
+)
+
 (define_insn "fix_trunc<GPF:mode><GPI:mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 65a2849155c9b331dc6179853501f0a6207d1773..a8b782b887ee914bd2399807d2ccfdf4a8e6433b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -38,6 +38,9 @@
 ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
 (define_mode_iterator GPF [SF DF])
 
+;; Iterator for General Purpose Float regs, inc float16_t.
+(define_mode_iterator GPF_F16 [HF SF DF])
+
 ;; Integer vector modes.
 (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_convs_1.c b/gcc/testsuite/gcc.target/aarch64/f16_convs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4e7c02db5e99068c9ddba1b5635e8904bf19e2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_convs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define EPSILON 0.0001
+
+__fp16
+convert_f32_to_f16 (float in)
+{
+  return in;
+}
+
+float
+convert_f16_to_f32 (__fp16 in)
+{
+  return in;
+}
+
+int
+main (int argc, char **argv)
+{
+  __fp16 in1 = convert_f32_to_f16 (3.14159f);
+  __fp16 in2 = convert_f32_to_f16 (2.718f);
+
+  /* Do the addition on __fp16's (implicitly converts both operands to
+     float32, adds, converts back to f16, then we convert back to f32).  */
+  float32_t result1 = convert_f16_to_f32 (in1 + in2);
+
+  /* Do the addition on float32's (we convert both operands to f32, and add,
+     as above, but skip the final conversion f32 -> f16 -> f32).  */
+  float32_t result2 = convert_f16_to_f32 (in1) + convert_f16_to_f32 (in2);
+
+  if (__builtin_fabs (result2 - result1) > EPSILON)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_convs_2.c b/gcc/testsuite/gcc.target/aarch64/f16_convs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..3421daef13ff1992775e8c4299623be9779ac45c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_convs_2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define EPSILON 0.0001
+
+__fp16
+convert_to_f16 (int in)
+{
+  return in;
+}
+
+int
+convert_from_f16 (__fp16 in)
+{
+  return in;
+}
+
+int
+main (int argc, char **argv)
+{
+  __fp16 in1 = convert_to_f16 (3);
+  __fp16 in2 = convert_to_f16 (2);
+
+  /* Do the addition on __fp16's (implicitly converts both operands to
+     float32, adds, converts back to f16, then we convert to int).  */
+  int result1 = convert_from_f16 (in1 + in2);
+
+  /* Do the addition on int's (we convert both operands directly to int, add,
+     and we're done).  */
+  int result2 = convert_from_f16 (in1) + convert_from_f16 (in2);
+
+  if (__builtin_abs (result2 - result1) > EPSILON)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..6cb80866790c5c40a59d22f2bbbfce41ae5f07d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+
+#include <arm_neon.h>
+
+__fp16
+func2 (__fp16 a, __fp16 b)
+{
+  return b;
+}
+
+int
+main (int argc, char **argv)
+{
+  __fp16 array[16];
+  int i;
+
+  for (i = 0; i < sizeof (array) / sizeof (array[0]); i++)
+    array[i] = i;
+
+  array[0] = func2 (array[1], array[2]);
+
+  __builtin_printf ("%f\n", array[0]); /* { dg-output "2.0" } */
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]