[Patch 14/17] [libgcc, ARM] Generalise float-to-half conversion function.

James Greenhalgh james.greenhalgh@arm.com
Wed Nov 23 18:18:00 GMT 2016


On Wed, Nov 16, 2016 at 04:38:17PM +0000, Kyrill Tkachov wrote:
> Hi James,
> diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c
> index 39c863c..ba89796 100644
> --- a/libgcc/config/arm/fp16.c
> +++ b/libgcc/config/arm/fp16.c
> @@ -22,40 +22,74 @@
>     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>     <http://www.gnu.org/licenses/>.  */
> +struct format
> +{
> +  /* Number of bits.  */
> +  unsigned long long size;
> +  /* Exponent bias.  */
> +  unsigned long long bias;
> +  /* Exponent width in bits.  */
> +  unsigned long long exponent;
> +  /* Significand precision in explicitly stored bits.  */
> +  unsigned long long significand;
> +};
> +
> +static const struct format
> +binary32 =
> +{
> +  32,   /* size.  */
> +  127,  /* bias.  */
> +  8,    /* exponent.  */
> +  23    /* significand.  */
> +};
> +
>  static inline unsigned short
> -__gnu_f2h_internal(unsigned int a, int ieee)
> +__gnu_float2h_internal (const struct format* fmt,
> +			unsigned long long a, int ieee)
>  {
> -  unsigned short sign = (a >> 16) & 0x8000;
> -  int aexp = (a >> 23) & 0xff;
> -  unsigned int mantissa = a & 0x007fffff;
> -  unsigned int mask;
> -  unsigned int increment;
> +  unsigned long long point = 1ULL << fmt->significand;;
> 
> Trailing ';'.
> 
> <...>
> @@ -93,7 +127,13 @@ __gnu_f2h_internal(unsigned int a, int ieee)
>    /* We leave the leading 1 in the mantissa, and subtract one
>       from the exponent bias to compensate.  */
> -  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
> +  return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
> +}
> 
> I suppose I'm not very familiar with the soft-fp code but I don't see at a glance how
> the comment relates to the operation it's above of (where is the 'one' being subtracted
> from the bias?). If you want to improve that comment or give me a quick explanation of why
> the code does what it says it does it would be appreciated.
> 
> I've gone through the generalisation and it looks correct to me.
> So given that you have put this through the testing you say you did this is ok with the nits
> above addressed.

Hi Kyrill,

Thanks. In the end this is what I committed.

James

diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c
index 39c863c..76f7327 100644
--- a/libgcc/config/arm/fp16.c
+++ b/libgcc/config/arm/fp16.c
@@ -22,40 +22,74 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+struct format
+{
+  /* Number of bits.  */
+  unsigned long long size;
+  /* Exponent bias.  */
+  unsigned long long bias;
+  /* Exponent width in bits.  */
+  unsigned long long exponent;
+  /* Significand precision in explicitly stored bits.  */
+  unsigned long long significand;
+};
+
+static const struct format
+binary32 =
+{
+  32,   /* size.  */
+  127,  /* bias.  */
+  8,    /* exponent.  */
+  23    /* significand.  */
+};
+
 static inline unsigned short
-__gnu_f2h_internal(unsigned int a, int ieee)
+__gnu_float2h_internal (const struct format* fmt,
+			unsigned long long a, int ieee)
 {
-  unsigned short sign = (a >> 16) & 0x8000;
-  int aexp = (a >> 23) & 0xff;
-  unsigned int mantissa = a & 0x007fffff;
-  unsigned int mask;
-  unsigned int increment;
+  unsigned long long point = 1ULL << fmt->significand;
+  unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
+  int aexp;
+  unsigned long long mantissa;
+  unsigned long long mask;
+  unsigned long long increment;
+
+  /* Get the exponent and mantissa encodings.  */
+  mantissa = a & (point - 1);
 
-  if (aexp == 0xff)
+  mask = (1 << fmt->exponent) - 1;
+  aexp = (a >> fmt->significand) & mask;
+
+  /* Infinity, NaN and alternative format special case.  */
+  if (((unsigned int) aexp) == mask)
     {
       if (!ieee)
 	return sign;
       if (mantissa == 0)
 	return sign | 0x7c00;	/* Infinity.  */
       /* Remaining cases are NaNs.  Convert SNaN to QNaN.  */
-      return sign | 0x7e00 | (mantissa >> 13);
+      return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
     }
 
+  /* Zero.  */
   if (aexp == 0 && mantissa == 0)
     return sign;
 
-  aexp -= 127;
+  /* Construct the exponent and mantissa.  */
+  aexp -= fmt->bias;
+
+  /* Decimal point is immediately after the significand.  */
+  mantissa |= point;
 
-  /* Decimal point between bits 22 and 23.  */
-  mantissa |= 0x00800000;
   if (aexp < -14)
     {
-      mask = 0x00ffffff;
+      mask = point | (point - 1);
+      /* Minimum exponent for half-precision is 2^-24.  */
       if (aexp >= -25)
 	mask >>= 25 + aexp;
     }
   else
-    mask = 0x00001fff;
+    mask = (point - 1) >> 10;
 
   /* Round.  */
   if (mantissa & mask)
@@ -64,8 +98,8 @@ __gnu_f2h_internal(unsigned int a, int ieee)
       if ((mantissa & mask) == increment)
 	increment = mantissa & (increment << 1);
       mantissa += increment;
-      if (mantissa >= 0x01000000)
-       	{
+      if (mantissa >= (point << 1))
+	{
 	  mantissa >>= 1;
 	  aexp++;
 	}
@@ -91,9 +125,29 @@ __gnu_f2h_internal(unsigned int a, int ieee)
       aexp = -14;
     }
 
-  /* We leave the leading 1 in the mantissa, and subtract one
-     from the exponent bias to compensate.  */
-  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
+  /* Encode the final 16-bit floating-point value.
+
+     This is formed of the sign bit, the bias-adjusted exponent, and the
+     calculated mantissa, with the following caveats:
+
+     1.  The mantissa calculated after rounding could have a leading 1.
+	 To compensate for this, subtract one from the exponent bias (15)
+	 before adding it to the calculated exponent.
+     2.  When we were calculating rounding, we left the mantissa with the
+	 number of bits of the source operand, it needs reduced to ten
+	 bits (+1 for the afforementioned leading 1) by shifting right by
+	 the number of bits in the source mantissa - 10.
+     3.  To ensure the leading 1 in the mantissa is applied to the exponent
+	 we need to add the mantissa rather than apply an arithmetic "or"
+	 to it.  */
+
+  return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
+}
+
+static inline unsigned short
+__gnu_f2h_internal (unsigned int a, int ieee)
+{
+  return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
 }
 
 unsigned int



More information about the Gcc-patches mailing list