This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Patch to speed up fp-bit.c: int-to-float conversions

From: "Joseph S. Myers" <joseph at codesourcery dot com>
To: gcc-patches at gcc dot gnu dot org
Date: Mon, 21 Nov 2005 18:45:13 +0000 (UTC)
Subject: Patch to speed up fp-bit.c: int-to-float conversions
This patch, for mainline and csl-ppc4xx-branch, speeds up
integer-to-floating conversions in fp-bit.c and a bug whereby libcalls
for unsigned-to-floating conversions would never be used although some
the relevant libgcc functions were defined by fp-bit.c and the ARM
assembly implementation (and there was dead code to use such
functions).

fp-bit.c's functions si_to_float and usi_to_float inefficiently
shifted numbers one bit at a time instead of using __builtin_clz to
determine the number of bits to shift.  This patch makes them use
__builtin_clz.  There was a latent bug in usi_to_float which would
have caused incorrect rounding in some cases (such as in the testcase)
if the function had ever been called; this patch corrects that bug
since the function is no longer dead.

expand_float's

  /* No hardware instruction available; call a library routine.  */

allows for unsigned conversions

      convert_optab tab = unsignedp ? ufloat_optab : sfloat_optab;

but previously this code was unreachable in the unsignedp case -
unsigned conversions would have been converted to signed.  Where both
signed and unsigned conversions require calling a library function,
calling the signed one for an unsigned conversion is not optimally
efficient.  This patch changes the code that does that to do so only
if a target pattern for the signed conversion is available.  This in
turn shows up that ufloat_optab was never set up, so this patch sets
it up.  Some libgcc functions which didn't previously exist may now be
needed for the unsigned conversions, so they are added (libgcc2.c
implementations being based on the ones there for signed conversions).
Note that the names are prefixed "__floatun" whereas the instruction
patterns use "floatuns", because the "__floatun" convention is already
established by the pre-existing implementations of some of these
functions.  Because those pre-existing implementations are unused, it
should also be possible if desired to rename them to use "__floatuns".

Benchmarking with a synthetic testcase

volatile FLOAT f;
int main(void) { INT i; for (i = 0; i < 10000000; i++) f = i; return 0; }

(where FLOAT and INT were defined appropriately on the command line),
on powerpc-ibm-linux-gnu --with-cpu=440, shows performance
improvements from

sidf 9.282
sisf 1.859
usidf 10.027
usisf 1.911

(csl-ppc4xx-branch before this patch) to

sidf 1.581
sisf 1.146
usidf 1.637
usisf 1.167

with the patched code.  Other benchmarks also show significant
improvements from this change.

Tested with no regressions, cross-compiling to powerpc-ibm-linux-gnu
--with-cpu=440, mainline and csl-ppc4xx-branch.  Committed to
csl-ppc4xx-branch.  OK to commit to mainline?

2005-11-21  Joseph S. Myers  <joseph@codesourcery.com>

	* config/fp-bit.c (clzusi): New function.
	(si_to_float, usi_to_float): Use it to compute proper shift.
	(usi_to_float): Preserve guard bits when shifting right.
	* libgcc-std.ver (GCC_4.2.0): New version.
	* libgcc2.c (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): New functions.
	* libgcc2.h (__floatundixf, __floatunditf, __floatundidf,
	__floatundisf): Declare.
	* mklibgcc.in (lib2funcs): Add _floatundidf, _floatundisf,
	_floatundixf, and _floatunditf.
	* optabs.c (expand_float): If target does not define a pattern for
	signed or unsigned conversion, use an unsigned libcall instead of
	a signed one.
	(init_optabs): Initialize ufloat_optab.

testsuite:
2005-11-21  Joseph S. Myers  <joseph@codesourcery.com>

	* gcc.c-torture/execute/floatunsisf-1.c: New test.

diff -rupN GCC.orig/gcc/config/fp-bit.c GCC.new/gcc/config/fp-bit.c
--- GCC.orig/gcc/config/fp-bit.c	2005-10-28 23:34:08.000000000 +0000
+++ GCC.new/gcc/config/fp-bit.c	2005-11-18 21:45:21.000000000 +0000
@@ -186,6 +186,22 @@ flip_sign ( fp_number_type *  x)
   x->sign = !x->sign;
 }
 
+/* Count leading zeroes in N.  */
+INLINE
+static int
+clzusi (USItype n)
+{
+  extern int __clzsi2 (USItype);
+  if (sizeof (USItype) == sizeof (unsigned int))
+    return __builtin_clz (n);
+  else if (sizeof (USItype) == sizeof (unsigned long))
+    return __builtin_clzl (n);
+  else if (sizeof (USItype) == sizeof (unsigned long long))
+    return __builtin_clzll (n);
+  else
+    return __clzsi2 (n);
+}
+
 extern FLO_type pack_d ( fp_number_type * );
 
 #if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
@@ -1334,6 +1350,8 @@ si_to_float (SItype arg_a)
     }
   else
     {
+      USItype uarg;
+      int shift;
       in.normal_exp = FRACBITS + NGARDS;
       if (in.sign) 
 	{
@@ -1343,15 +1361,17 @@ si_to_float (SItype arg_a)
 	    {
 	      return (FLO_type)(- MAX_SI_INT - 1);
 	    }
-	  in.fraction.ll = (-arg_a);
+	  uarg = (-arg_a);
 	}
       else
-	in.fraction.ll = arg_a;
+	uarg = arg_a;
 
-      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+      in.fraction.ll = uarg;
+      shift = clzusi (uarg) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift > 0)
 	{
-	  in.fraction.ll <<= 1;
-	  in.normal_exp -= 1;
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
 	}
     }
   return pack_d (&in);
@@ -1371,19 +1391,23 @@ usi_to_float (USItype arg_a)
     }
   else
     {
+      int shift;
       in.class = CLASS_NUMBER;
       in.normal_exp = FRACBITS + NGARDS;
       in.fraction.ll = arg_a;
 
-      while (in.fraction.ll > ((fractype)1 << (FRACBITS + NGARDS)))
-        {
-          in.fraction.ll >>= 1;
-          in.normal_exp += 1;
-        }
-      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+      shift = clzusi (arg_a) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift < 0)
+	{
+	  fractype guard = in.fraction.ll & (((fractype)1 << -shift) - 1);
+	  in.fraction.ll >>= -shift;
+	  in.fraction.ll |= (guard != 0);
+	  in.normal_exp -= shift;
+	}
+      else if (shift > 0)
 	{
-	  in.fraction.ll <<= 1;
-	  in.normal_exp -= 1;
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
 	}
     }
   return pack_d (&in);
diff -rupN GCC.orig/gcc/libgcc-std.ver GCC.new/gcc/libgcc-std.ver
--- GCC.orig/gcc/libgcc-std.ver	2005-10-28 23:34:12.000000000 +0000
+++ GCC.new/gcc/libgcc-std.ver	2005-11-18 16:24:59.000000000 +0000
@@ -252,3 +252,20 @@ GCC_4.0.0 {
   __mulxc3
   __multc3
 }
+
+%inherit GCC_4.2.0 GCC_4.0.0
+GCC_4.2.0 {
+  # unsigned-to-floating conversions
+  __floatunsisf
+  __floatunsidf
+  __floatunsixf
+  __floatunsitf
+  __floatundidf
+  __floatundisf
+  __floatundixf
+  __floatunditf
+  __floatuntidf
+  __floatuntisf
+  __floatuntixf
+  __floatuntitf
+}
diff -rupN GCC.orig/gcc/libgcc2.c GCC.new/gcc/libgcc2.c
--- GCC.orig/gcc/libgcc2.c	2005-10-28 23:34:11.000000000 +0000
+++ GCC.new/gcc/libgcc2.c	2005-11-18 16:39:52.000000000 +0000
@@ -1323,6 +1323,17 @@ __floatdixf (DWtype u)
 }
 #endif
 
+#if defined(L_floatundixf) && LIBGCC2_HAS_XF_MODE
+XFtype
+__floatundixf (UDWtype u)
+{
+  XFtype d = (UWtype) (u >> W_TYPE_SIZE);
+  d *= Wtype_MAXp1_F;
+  d += (UWtype)u;
+  return d;
+}
+#endif
+
 #if defined(L_floatditf) && LIBGCC2_HAS_TF_MODE
 TFtype
 __floatditf (DWtype u)
@@ -1334,6 +1345,17 @@ __floatditf (DWtype u)
 }
 #endif
 
+#if defined(L_floatunditf) && LIBGCC2_HAS_TF_MODE
+TFtype
+__floatunditf (UDWtype u)
+{
+  TFtype d = (UWtype) (u >> W_TYPE_SIZE);
+  d *= Wtype_MAXp1_F;
+  d += (UWtype)u;
+  return d;
+}
+#endif
+
 #if defined(L_floatdidf) && LIBGCC2_HAS_DF_MODE
 DFtype
 __floatdidf (DWtype u)
@@ -1345,6 +1367,17 @@ __floatdidf (DWtype u)
 }
 #endif
 
+#if defined(L_floatundidf) && LIBGCC2_HAS_DF_MODE
+DFtype
+__floatundidf (UDWtype u)
+{
+  DFtype d = (UWtype) (u >> W_TYPE_SIZE);
+  d *= Wtype_MAXp1_F;
+  d += (UWtype)u;
+  return d;
+}
+#endif
+
 #if defined(L_floatdisf) && LIBGCC2_HAS_SF_MODE
 #define DI_SIZE (W_TYPE_SIZE * 2)
 #define SF_SIZE FLT_MANT_DIG
@@ -1433,6 +1466,87 @@ __floatdisf (DWtype u)
 }
 #endif
 
+#if defined(L_floatundisf) && LIBGCC2_HAS_SF_MODE
+#define DI_SIZE (W_TYPE_SIZE * 2)
+#define SF_SIZE FLT_MANT_DIG
+
+SFtype
+__floatundisf (UDWtype u)
+{
+#if SF_SIZE >= W_TYPE_SIZE
+  /* When the word size is small, we never get any rounding error.  */
+  SFtype f = (UWtype) (u >> W_TYPE_SIZE);
+  f *= Wtype_MAXp1_F;
+  f += (UWtype)u;
+  return f;
+#elif LIBGCC2_HAS_DF_MODE
+
+#if LIBGCC2_DOUBLE_TYPE_SIZE == 64
+#define DF_SIZE DBL_MANT_DIG
+#elif LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 64
+#define DF_SIZE LDBL_MANT_DIG
+#else
+# error
+#endif
+
+#define REP_BIT ((UDWtype) 1 << (DI_SIZE - DF_SIZE))
+
+  /* Protect against double-rounding error.
+     Represent any low-order bits, that might be truncated by a bit that
+     won't be lost.  The bit can go in anywhere below the rounding position
+     of the SFmode.  A fixed mask and bit position handles all usual
+     configurations.  It doesn't handle the case of 128-bit DImode, however.  */
+  if (DF_SIZE < DI_SIZE
+      && DF_SIZE > (DI_SIZE - DF_SIZE + SF_SIZE))
+    {
+      if (u >= ((UDWtype) 1 << DF_SIZE))
+	{
+	  if ((UDWtype) u & (REP_BIT - 1))
+	    {
+	      u &= ~ (REP_BIT - 1);
+	      u |= REP_BIT;
+	    }
+	}
+    }
+
+  /* Do the calculation in DFmode so that we don't lose any of the
+     precision of the high word while multiplying it.  */
+  DFtype f = (UWtype) (u >> W_TYPE_SIZE);
+  f *= Wtype_MAXp1_F;
+  f += (UWtype)u;
+  return (SFtype) f;
+#else
+  /* Finally, the word size is larger than the number of bits in SFmode,
+     and we've got no DFmode.  The only way to avoid double rounding is
+     to special case the extraction.  */
+
+  /* If there are no high bits set, fall back to one conversion.  */
+  if ((UWtype)u == u)
+    return (SFtype)(UWtype)u;
+
+  /* Otherwise, find the power of two.  */
+  UWtype hi = u >> W_TYPE_SIZE;
+
+  UWtype count, shift;
+  count_leading_zeros (count, hi);
+
+  shift = W_TYPE_SIZE - count;
+
+  /* Shift down the most significant bits.  */
+  hi = u >> shift;
+
+  /* If we lost any nonzero bits, set the lsb to ensure correct rounding.  */
+  if (u & ((1 << shift) - 1))
+    hi |= 1;
+
+  /* Convert the one word of data, and rescale.  */
+  SFtype f = hi;
+  f *= (UWtype)1 << shift;
+  return f;
+#endif
+}
+#endif
+
 #if defined(L_fixunsxfsi) && LIBGCC2_HAS_XF_MODE
 /* Reenable the normal types, in case limits.h needs them.  */
 #undef char
diff -rupN GCC.orig/gcc/libgcc2.h GCC.new/gcc/libgcc2.h
--- GCC.orig/gcc/libgcc2.h	2005-10-28 23:34:11.000000000 +0000
+++ GCC.new/gcc/libgcc2.h	2005-11-18 16:30:58.000000000 +0000
@@ -238,6 +238,10 @@ typedef int word_type __attribute__ ((mo
 #define __floatditf	__NDW(float,tf)
 #define __floatdidf	__NDW(float,df)
 #define __floatdisf	__NDW(float,sf)
+#define __floatundixf	__NDW(floatun,xf)
+#define __floatunditf	__NDW(floatun,tf)
+#define __floatundidf	__NDW(floatun,df)
+#define __floatundisf	__NDW(floatun,sf)
 #define __fixunsxfSI	__NW(fixunsxf,)
 #define __fixunstfSI	__NW(fixunstf,)
 #define __fixunsdfSI	__NW(fixunsdf,)
@@ -318,6 +322,7 @@ extern SItype __negvsi2 (SItype);
 #if LIBGCC2_HAS_SF_MODE
 extern DWtype __fixsfdi (SFtype);
 extern SFtype __floatdisf (DWtype);
+extern SFtype __floatundisf (UDWtype);
 extern UWtype __fixunssfSI (SFtype);
 extern DWtype __fixunssfDI (SFtype);
 extern SFtype __powisf2 (SFtype, int);
@@ -327,6 +332,7 @@ extern SCtype __mulsc3 (SFtype, SFtype, 
 #if LIBGCC2_HAS_DF_MODE
 extern DWtype __fixdfdi (DFtype);
 extern DFtype __floatdidf (DWtype);
+extern DFtype __floatundidf (UDWtype);
 extern UWtype __fixunsdfSI (DFtype);
 extern DWtype __fixunsdfDI (DFtype);
 extern DFtype __powidf2 (DFtype, int);
@@ -338,6 +344,7 @@ extern DCtype __muldc3 (DFtype, DFtype, 
 extern DWtype __fixxfdi (XFtype);
 extern DWtype __fixunsxfDI (XFtype);
 extern XFtype __floatdixf (DWtype);
+extern XFtype __floatundixf (UDWtype);
 extern UWtype __fixunsxfSI (XFtype);
 extern XFtype __powixf2 (XFtype, int);
 extern XCtype __divxc3 (XFtype, XFtype, XFtype, XFtype);
@@ -348,6 +355,7 @@ extern XCtype __mulxc3 (XFtype, XFtype, 
 extern DWtype __fixunstfDI (TFtype);
 extern DWtype __fixtfdi (TFtype);
 extern TFtype __floatditf (DWtype);
+extern TFtype __floatunditf (UDWtype);
 extern TFtype __powitf2 (TFtype, int);
 extern TCtype __divtc3 (TFtype, TFtype, TFtype, TFtype);
 extern TCtype __multc3 (TFtype, TFtype, TFtype, TFtype);
diff -rupN GCC.orig/gcc/mklibgcc.in GCC.new/gcc/mklibgcc.in
--- GCC.orig/gcc/mklibgcc.in	2005-11-16 15:08:51.000000000 +0000
+++ GCC.new/gcc/mklibgcc.in	2005-11-18 16:27:14.000000000 +0000
@@ -63,7 +63,7 @@ lib2funcs='_muldi3 _negdi2 _lshrdi3 _ash
 	_ffssi2 _ffsdi2 _clz _clzsi2 _clzdi2 _ctzsi2 _ctzdi2 _popcount_tab
 	_popcountsi2 _popcountdi2 _paritysi2 _paritydi2 _powisf2 _powidf2
 	_powixf2 _powitf2 _mulsc3 _muldc3 _mulxc3 _multc3 _divsc3 _divdc3
-	_divxc3 _divtc3'
+	_divxc3 _divtc3 _floatundidf _floatundisf _floatundixf _floatunditf'
 
 # Disable SHLIB_LINK if shared libgcc not enabled.
 if [ "@enable_shared@" = "no" ]; then
diff -rupN GCC.orig/gcc/optabs.c GCC.new/gcc/optabs.c
--- GCC.orig/gcc/optabs.c	2005-11-01 01:36:48.000000000 +0000
+++ GCC.new/gcc/optabs.c	2005-11-18 02:32:12.000000000 +0000
@@ -4310,6 +4310,7 @@ expand_float (rtx to, rtx from, int unsi
   enum insn_code icode;
   rtx target = to;
   enum machine_mode fmode, imode;
+  bool can_do_signed = false;
 
   /* Crash now, because we won't be able to decide which mode to use.  */
   gcc_assert (GET_MODE (from) != VOIDmode);
@@ -4331,8 +4332,14 @@ expand_float (rtx to, rtx from, int unsi
 	  continue;
 
 	icode = can_float_p (fmode, imode, unsignedp);
-	if (icode == CODE_FOR_nothing && imode != GET_MODE (from) && unsignedp)
-	  icode = can_float_p (fmode, imode, 0), doing_unsigned = 0;
+	if (icode == CODE_FOR_nothing && unsignedp)
+	  {
+	    enum insn_code scode = can_float_p (fmode, imode, 0);
+	    if (scode != CODE_FOR_nothing)
+	      can_do_signed = true;
+	    if (imode != GET_MODE (from))
+	      icode = scode, doing_unsigned = 0;
+	  }
 
 	if (icode != CODE_FOR_nothing)
 	  {
@@ -4353,7 +4360,7 @@ expand_float (rtx to, rtx from, int unsi
 
   /* Unsigned integer, and no way to convert directly.
      Convert as signed, then conditionally adjust the result.  */
-  if (unsignedp)
+  if (unsignedp && can_do_signed)
     {
       rtx label = gen_label_rtx ();
       rtx temp;
@@ -5231,6 +5238,8 @@ init_optabs (void)
   /* Conversions.  */
   init_interclass_conv_libfuncs (sfloat_optab, "float",
 				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (ufloat_optab, "floatun",
+				 MODE_INT, MODE_FLOAT);
   init_interclass_conv_libfuncs (sfix_optab, "fix",
 				 MODE_FLOAT, MODE_INT);
   init_interclass_conv_libfuncs (ufix_optab, "fixuns",
diff -rupN GCC.orig/gcc/testsuite/gcc.c-torture/execute/floatunsisf-1.c GCC.new/gcc/testsuite/gcc.c-torture/execute/floatunsisf-1.c
--- GCC.orig/gcc/testsuite/gcc.c-torture/execute/floatunsisf-1.c	1970-01-01 00:00:00.000000000 +0000
+++ GCC.new/gcc/testsuite/gcc.c-torture/execute/floatunsisf-1.c	2005-11-18 11:13:33.000000000 +0000
@@ -0,0 +1,21 @@
+/* The fp-bit.c function __floatunsisf had a latent bug where guard bits
+   could be lost leading to incorrect rounding.  */
+/* Origin: Joseph Myers <joseph@codesourcery.com> */
+
+extern void abort (void);
+extern void exit (int);
+#if __INT_MAX__ >= 0x7fffffff
+volatile unsigned u = 0x80000081;
+#else
+volatile unsigned long u = 0x80000081;
+#endif
+volatile float f1, f2;
+int
+main (void)
+{
+  f1 = (float) u;
+  f2 = (float) 0x80000081;
+  if (f1 != f2)
+    abort ();
+  exit (0);
+}

-- 
Joseph S. Myers               http://www.srcf.ucam.org/~jsm28/gcc/
    jsm@polyomino.org.uk (personal mail)
    joseph@codesourcery.com (CodeSourcery mail)
    jsm28@gcc.gnu.org (Bugzilla assignments and CCs)
Follow-Ups:
- Re: Patch to speed up fp-bit.c: int-to-float conversions
  - From: Geoffrey Keating
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]