This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFC: fp printing speedup, take 2


OK, the first version I posted had a bug from migrating the code into
the library.  This version works perfectly on linux x86.  

This code replaces printf-based floating point printing with the core
printing routine from gcc-2.95.  It passes make check with no new
regressions as well as the floating point torture test I posted
earlier and another as well.

The first version I posted is here.

http://gcc.gnu.org/ml/libstdc++/2003-11/msg00002.html

As mentioned in that message, the patch needs configury work for other
platforms, but is otherwise complete.

Jerry Quinn


? src/floatconv.cc
Index: config/linker-map.gnu
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/config/linker-map.gnu,v
retrieving revision 1.49
diff -u -w -r1.49 linker-map.gnu
--- config/linker-map.gnu	11 Nov 2003 20:09:06 -0000	1.49
+++ config/linker-map.gnu	14 Nov 2003 06:46:42 -0000
@@ -159,6 +159,8 @@
 
     # std::__convert_to_v
     _ZSt14__convert_to_v*;
+    _ZSt14__dec_exponentI*;
+    _ZSt6__dtoaI*;
 
     # stub functions from libmath
     sinf;
Index: include/bits/locale_facets.tcc
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/locale_facets.tcc,v
retrieving revision 1.144
diff -u -w -r1.144 locale_facets.tcc
--- include/bits/locale_facets.tcc	9 Nov 2003 19:15:25 -0000	1.144
+++ include/bits/locale_facets.tcc	14 Nov 2003 06:46:43 -0000
@@ -36,6 +36,7 @@
 #pragma GCC system_header
 
 #include <limits>    		// For numeric_limits
+#include <cmath>		// For log10
 #include <typeinfo>  		// For bad_cast.
 #include <bits/streambuf_iterator.h>
 
@@ -843,6 +844,178 @@
 	return std::__write(__s, __cs, __len);
       } 
 
+  // fixed point
+  template<typename _CharT>
+    int
+    __float_fixed_to_char(_CharT* __sbuf, const _CharT* __atoms,
+			  _CharT __dpchar, const char* __buf, int __decp, int __prec,
+			  ios_base::fmtflags __flags, bool __pad)
+    {
+      _CharT* __sb = __sbuf;
+      const _CharT* __digits = __atoms + __num_base::_S_odigits;
+      bool __showpoint = __flags & ios_base::showpoint;
+
+      if (__decp <= 0) {
+	// Decimal point is to the left of the significant digits.  Insert 0's.
+	*__sbuf++= __digits[0];
+
+	// We need a decimal point
+	if (__prec || __showpoint)
+	  *__sbuf++= __dpchar;
+
+	int __len = __sbuf - __sb;
+	// Insert leading 0's
+	for (int __i = __decp; __i < 0; __i++)
+	  *__sbuf++= __digits[0]; // '0';
+
+	// Copy over digits
+	while (*__buf)
+	  *__sbuf++ = __digits[(*__buf++) - '0'];
+
+	// Add trimmed 0's.  dtoa sometimes trims trailing 0's.
+	if (__pad || __showpoint)
+	  while (__sbuf - __sb-__len < __prec)
+	    *__sbuf++= __digits[0]; // '0';
+	return __sbuf - __sb;
+      }
+
+      // decp > 0
+      while (*__buf) {
+	if (__decp-- == 0)		// If decp < 0, this will never fire
+	  *__sbuf++= __dpchar;
+	*__sbuf++= __digits[(*__buf++) - '0'];
+      }
+      // 0's may have been dropped at the end
+      while (__decp > 0) {
+	*__sbuf++= __digits[0];		       // Insert integer 0's
+	__decp--;
+      }
+      // decp now <= 0
+      if (!__decp) {
+	if (__showpoint || (__prec && __pad)) {
+	  *__sbuf++= __dpchar;
+	  while (__decp-- > -__prec) *__sbuf++= __digits[0];
+	}
+      }
+      else if (__pad || __showpoint) {		       // decp < 0
+	while (__decp-- > -__prec) *__sbuf++= __digits[0];
+      }
+
+      return __sbuf - __sb;
+    }
+
+  // Convert output of __dtoa to scientific form and write finished string
+  // into __sbuf.  __buf contains a null-terminated digit string.  __decp is
+  // the index of the digit in __buf that the decimal point comes before.
+  template<typename _CharT>
+    int
+    __float_scientific_to_char(_CharT* __sbuf, const _CharT* __atoms,
+			       _CharT __dpchar, const char* __buf, int __decp, int __prec,
+			       ios_base::fmtflags __flags, bool __pad)
+    {
+      const _CharT* __digits = __atoms + __num_base::_S_odigits;
+      _CharT* __sb = __sbuf;
+      const char* __bp = __buf;
+      bool __showpoint = __flags & ios_base::showpoint;
+
+      // Insert first digit and decimal point
+      if (!__buf[0])
+	*__sbuf++ = __digits[0];	// handle 0
+      else
+	*__sbuf++= __digits[(*__bp++) - '0'];
+      if ((__prec && *__bp) || __showpoint)
+	*__sbuf++= __dpchar;
+
+      // Insert digits after decimal point
+      int __j = 0;
+      for (; *__bp; __j++, __bp++ )
+	*__sbuf++= __digits[*__bp - '0'];
+      // Apparently dtoa will sometimes not add the final 0 after rounding
+      // But only if we want all the 0's.  __pad sets this.
+      for (; (__pad || __flags & ios_base::showpoint) && __j < __prec; __j++)
+	*__sbuf++ = __digits[0];
+
+      // Insert exponent symbol and sign
+      *__sbuf++ = (__flags & ios_base::uppercase) ? __atoms[__num_base::_S_oE]
+	: __atoms[__num_base::_S_oe];
+      __decp--;
+      if (__decp < 0) {
+	*__sbuf++ = __atoms[__num_base::_S_ominus];
+	__decp = -__decp;
+      }
+      else *__sbuf++ = __atoms[__num_base::_S_oplus];
+
+      // Write the exponent.  Large enough for ieee quad (15 bit exponent)
+      char __tbuf[6];
+      char *__tb = __tbuf + 5;
+      *__tb-- = 0;
+      while (__decp) {
+	*__tb-- = '0' + (__decp % 10);
+	__decp /= 10;
+      }
+      // We need at least 2 digits in exponent.  Pad if it's missing
+      while (__tb - __tbuf > 2) *__tb-- = '0';
+      while (*++__tb)
+	*__sbuf++ = __digits[*__tb - '0'];
+
+      return __sbuf - __sb;
+    }
+
+  template<typename _T>
+    int __dtoa(char*, _T, int, int, int, bool);
+
+  template<typename _CharT, typename _ValueT>
+    inline int
+    __float_to_char(_CharT* __buf, int __bufsize, const _CharT* __atoms,
+		    _CharT __dpchar, int __mode, _ValueT __v, int __exp,
+		    int __prec, ios_base::fmtflags __flags)
+    {
+      char __lbuf[__bufsize];
+      int __decp;
+
+      // Insert sign
+      int __len = 0;
+      if (__v < 0)
+	{
+	  *__buf++ = __atoms[__num_base::_S_ominus];
+	  __len = 1;
+	}
+      else if (__flags & ios_base::showpos)
+	{
+	  __len = 1;
+	  *__buf++ = __atoms[__num_base::_S_oplus];
+	}
+      
+
+      // Dispatch number conversion to the correct function
+      if (__mode == 0)
+	{
+	  __decp = __dtoa(__lbuf, __v, __exp, 3, __prec, false);
+	  __len +=__float_fixed_to_char(__buf, __atoms, __dpchar, &__lbuf[0],
+					__decp, __prec, __flags, true);
+	}
+      else if (__mode == 1)
+	{
+	  __decp = __dtoa(__lbuf, __v, __exp, 2, __prec+1, false);
+	  __len += __float_scientific_to_char(__buf, __atoms, __dpchar,
+					      &__lbuf[0], __decp, __prec,
+					      __flags, true);
+	}
+      else
+	{
+	  __decp = __dtoa(__lbuf, __v, __exp, 2, __prec, true);
+	  if (__decp <= -4 || __decp > __prec)
+	    __len += __float_scientific_to_char(__buf, __atoms, __dpchar,
+						&__lbuf[0], __decp, __prec-1,
+						__flags, false);
+	  else
+	    __len += __float_fixed_to_char(__buf, __atoms, __dpchar, &__lbuf[0],
+					   __decp, __prec-__decp,
+					   __flags, false);
+	}
+      return __len;
+    }
+
   template<typename _CharT, typename _OutIter>
     void
     num_put<_CharT, _OutIter>::
@@ -868,16 +1041,14 @@
       __len = __newlen;
     }
 
-  // The following code uses snprintf (or sprintf(), when
-  // _GLIBCXX_USE_C99 is not defined) to convert floating point values
-  // for insertion into a stream.  An optimization would be to replace
-  // them with code that works directly on a wide buffer and then use
-  // __pad to do the padding.  It would be good to replace them anyway
-  // to gain back the efficiency that C++ provides by knowing up front
-  // the type of the values to insert.  Also, sprintf is dangerous
-  // since may lead to accidental buffer overruns.  This
-  // implementation follows the C++ standard fairly directly as
-  // outlined in 22.2.2.2 [lib.locale.num.put]
+  template<typename _T>
+    int
+    __dec_exponent(_T d, int &special, bool &sign);
+
+
+  // This implementation now uses a dtoa conversion that does fp arithmetic
+  // when possible, falling back to big int when required.  Currently, numbers
+  // with < 15 bits of precision and not too large integers get optimized.
   template<typename _CharT, typename _OutIter>
     template<typename _ValueT>
       _OutIter
@@ -911,58 +1082,80 @@
 	// Long enough for the max format spec.
 	char __fbuf[16];
 
-#ifdef _GLIBCXX_USE_C99
-	// First try a buffer perhaps big enough (for sure sufficient
-	// for non-ios_base::fixed outputs)
-	int __cs_size = __max_digits * 3;
-	char* __cs = static_cast<char*>(__builtin_alloca(__cs_size));
-
-	_S_format_float(__io, __fbuf, __mod);
-	__len = std::__convert_from_v(__cs, __cs_size, __fbuf, __v,
-				      _S_get_c_locale(), __prec);
-
-	// If the buffer was not large enough, try again with the correct size.
-	if (__len >= __cs_size)
-	  {
-	    __cs_size = __len + 1; 
-	    __cs = static_cast<char*>(__builtin_alloca(__cs_size));
-	    __len = std::__convert_from_v(__cs, __cs_size, __fbuf, __v,
-					  _S_get_c_locale(), __prec);
-	  }
-#else
-	// Consider the possibility of long ios_base::fixed outputs
-	const bool __fixed = __io.flags() & ios_base::fixed;
-	const int __max_exp = numeric_limits<_ValueT>::max_exponent10;
-
-	// The size of the output string is computed as follows.
 	// ios_base::fixed outputs may need up to __max_exp+1 chars
 	// for the integer part + up to __max_digits chars for the
 	// fractional part + 3 chars for sign, decimal point, '\0'. On
 	// the other hand, for non-fixed outputs __max_digits*3 chars
 	// are largely sufficient.
-	const int __cs_size = __fixed ? __max_exp + __max_digits + 4 
-	                              : __max_digits * 3;
-	char* __cs = static_cast<char*>(__builtin_alloca(__cs_size));
-
-	_S_format_float(__io, __fbuf, __mod);
-	__len = std::__convert_from_v(__cs, 0, __fbuf, __v, 
-				      _S_get_c_locale(), __prec);
-#endif
-
-      // [22.2.2.2.2] Stage 2, convert to char_type, using correct
-      // numpunct.decimal_point() values for '.' and adding grouping.
+	int __special;
+	bool __sign;
+	_CharT* __ws;
+	const int __exponent = __dec_exponent(__v, __special, __sign);
+	if (__special)
+	  {
       const ctype<_CharT>& __ctype = use_facet<ctype<_CharT> >(__loc);
+	    const char* __cs;
+	    __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) * 4));
+	    if (__special == 1)
+	      {
 
-      _CharT* __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) 
-							   * __len));
+		if (__sign)
+		  __cs = (__io.flags() & ios_base::uppercase) ? "-INF" : "-inf";
+		else if (__io.flags() & ios_base::showpos)
+		  __cs = (__io.flags() & ios_base::uppercase) ? "+INF" : "+inf";
+		else
+		  __cs = (__io.flags() & ios_base::uppercase) ? "INF" : "inf";
+	      }
+	    else		// special == 2
+	      __cs = (__io.flags() & ios_base::uppercase) ? "NAN" : "nan";
+
+	    __len = strlen(__cs);
       __ctype.widen(__cs, __cs + __len, __ws);
+      	  }
+	else
+	  {
+	    int __decp;
+	    int __cs_size;
+	    int __mode;
+
+	    // Fixed needs space to hold all digits to left of decimal point,
+	    // plus up to __prec digits to the right.  The other formats need
+	    // __prec digits plus sign, decimal point, exponent char and sign,
+	    // and exponent itself.
+	    if (__io.flags() & ios_base::fixed)
+	      {
+		// %f style
+		__cs_size = (__exponent > 0 ? __exponent : 0) + 2 + __prec;
+		__mode = 0;
+	      }
+	    else if (__io.flags() & ios_base::scientific)
+	      {
+		// %e style
+		const int __max_exp_len =
+		  int(log10(numeric_limits<_ValueT>::max_exponent10)) + 1;
+		__cs_size = __prec + 4 + __max_exp_len;
+		__mode = 1;
+	      }
+	    else
+	      {
+		// %g style 
+		const int __max_exp_len =
+		  int(log10(numeric_limits<_ValueT>::max_exponent10)) + 1;
+		if (!__prec) __prec = 1;
+		__cs_size = __prec + 4 + __max_exp_len;
+		__mode = 2;
+	      }
       
-      // Replace decimal point.
-      const _CharT __cdec = __ctype.widen('.');
-      const _CharT __dec = __lc->_M_decimal_point;
-      const _CharT* __p;
-      if (__p = char_traits<_CharT>::find(__ws, __len, __cdec))
-	__ws[__p - __ws] = __dec;
+	    __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) * __cs_size));
+	    __len = __float_to_char(__ws, __cs_size, &__lc->_M_atoms_out[0],
+				    __lc->_M_decimal_point, __mode, __v,
+				    __exponent, __prec, __io.flags());
+      
+	  }
+
+      // [22.2.2.2.2] Stage 2, convert to char_type, using correct
+      // numpunct.decimal_point() values for '.' and adding grouping.  The
+      // conversion happened in __float_to_char().
 
       // Add grouping, if necessary. 
       if (__lc->_M_use_grouping)
@@ -971,6 +1164,8 @@
 	  // number of digits, but no more.
 	  _CharT* __ws2 = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) 
 								* __len * 2));
+	  const _CharT* __p =
+	    char_traits<_CharT>::find(__ws, __len, __lc->_M_decimal_point);
 	  _M_group_float(__lc->_M_grouping, __lc->_M_thousands_sep, __p,
 			 __ws2, __ws, __len);
 	  __ws = __ws2;
Index: src/Makefile.am
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/src/Makefile.am,v
retrieving revision 1.137
diff -u -w -r1.137 Makefile.am
--- src/Makefile.am	11 Nov 2003 20:09:11 -0000	1.137
+++ src/Makefile.am	14 Nov 2003 06:46:43 -0000
@@ -96,6 +96,7 @@
 	ctype.cc \
 	debug.cc \
 	demangle.cc \
+	floatconv.cc \
 	functexcept.cc \
 	globals_locale.cc \
 	globals_io.cc \
Index: src/Makefile.in
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/src/Makefile.in,v
retrieving revision 1.191
diff -u -w -r1.191 Makefile.in
--- src/Makefile.in	11 Nov 2003 20:09:12 -0000	1.191
+++ src/Makefile.in	14 Nov 2003 06:46:44 -0000
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.7.8 from Makefile.am.
+# Makefile.in generated by automake 1.7.7 from Makefile.am.
 # @configure_input@
 
 # Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
@@ -253,6 +253,7 @@
 	ctype.cc \
 	debug.cc \
 	demangle.cc \
+	floatconv.cc \
 	functexcept.cc \
 	globals_locale.cc \
 	globals_io.cc \
@@ -362,7 +363,7 @@
 	time_members.lo
 am__objects_2 = basic_file.lo c++locale.lo
 am__objects_3 = codecvt.lo complex_io.lo ctype.lo debug.lo demangle.lo \
-	functexcept.lo globals_locale.lo globals_io.lo ios.lo \
+	floatconv.lo functexcept.lo globals_locale.lo globals_io.lo ios.lo \
 	ios_failure.lo ios_init.lo ios_locale.lo limits.lo locale.lo \
 	locale_init.lo locale_facets.lo localename.lo stdexcept.lo \
 	stl_tree.lo strstream.lo allocator-inst.lo concept-inst.lo \
@@ -573,6 +574,7 @@
 
 distclean: distclean-am
 	-rm -f Makefile
+
 distclean-am: clean-am distclean-compile distclean-generic \
 	distclean-libtool distclean-tags
 
@@ -596,6 +598,7 @@
 
 maintainer-clean: maintainer-clean-am
 	-rm -f Makefile
+
 maintainer-clean-am: distclean-am maintainer-clean-generic
 
 mostlyclean: mostlyclean-am
--- src/floatconv.cc.old	1969-12-31 19:00:00.000000000 -0500
+++ src/floatconv.cc	2003-11-13 01:11:09.000000000 -0500
@@ -0,0 +1,1902 @@
+// Numeric printing support -*- C++ -*-
+
+// Copyright (C) 1993, 1994, 2003
+// Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+// Original copyright below
+
+/* JQ -- long double support is guaranteed in c++ */
+
+/* 
+Copyright (C) 1993, 1994 Free Software Foundation
+
+This file is part of the GNU IO Library.  This library is free
+software; you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this library; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+As a special exception, if you link this library with files
+compiled with a GNU compiler to produce an executable, this does not cause
+the resulting executable to be covered by the GNU General Public License.
+This exception does not however invalidate any other reasons why
+the executable file might be covered by the GNU General Public License. */
+
+// #include <libioP.h>
+#define __USE_DTOA
+#ifdef __USE_DTOA
+/****************************************************************
+ *
+ * The author of this software is David M. Gay.
+ *
+ * Copyright (c) 1991 by AT&T.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHOR NOR AT&T MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ *
+ ***************************************************************/
+
+/* Some cleaning up by Per Bothner, bothner@cygnus.com, 1992, 1993.
+   Re-written to not need static variables
+   (except result, result_k, HIWORD, LOWORD). */
+
+/* Note that the checking of _DOUBLE_IS_32BITS is for use with the
+   cross targets that employ the newlib ieeefp.h header.  -- brendan */
+
+/* Please send bug reports to
+        David M. Gay
+        AT&T Bell Laboratories, Room 2C-463
+        600 Mountain Avenue
+        Murray Hill, NJ 07974-2070
+        U.S.A.
+        dmg@research.att.com or research!dmg
+ */
+
+/* strtod for IEEE-, VAX-, and IBM-arithmetic machines.
+ *
+ * This strtod returns a nearest machine number to the input decimal
+ * string (or sets errno to ERANGE).  With IEEE arithmetic, ties are
+ * broken by the IEEE round-even rule.  Otherwise ties are broken by
+ * biased rounding (add half and chop).
+ *
+ * Inspired loosely by William D. Clinger's paper "How to Read Floating
+ * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101].
+ *
+ * Modifications:
+ *
+ *      1. We only require IEEE, IBM, or VAX double-precision
+ *              arithmetic (not IEEE double-extended).
+ *      2. We get by with floating-point arithmetic in a case that
+ *              Clinger missed -- when we're computing d * 10^n
+ *              for a small integer d and the integer n is not too
+ *              much larger than 22 (the maximum integer k for which
+ *              we can represent 10^k exactly), we may be able to
+ *              compute (d*10^k) * 10^(e-k) with just one roundoff.
+ *      3. Rather than a bit-at-a-time adjustment of the binary
+ *              result in the hard case, we use floating-point
+ *              arithmetic to determine the adjustment to within
+ *              one bit; only in really hard cases do we need to
+ *              compute a second residual.
+ *      4. Because of 3., we don't need a large table of powers of 10
+ *              for ten-to-e (just some small tables, e.g. of 10^k
+ *              for 0 <= k <= 22).
+ */
+
+/*
+ * #define IEEE for IEEE floating-point arithmetic
+ * #define IBM for IBM mainframe-style floating-point arithmetic.
+ * #define VAX for VAX-style floating-point arithmetic.
+ * #define Unsigned_Shifts if >> does treats its left operand as unsigned.
+ * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3.
+ */
+
+// XXX JQ convert a few macros to inlines
+// Remove DOUBLE_IS_32BITS - doesn't seem to be any support for it.
+// So we assume that double is 64bit.
+
+#include <cstdio>
+#ifdef DEBUG
+#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);}
+#endif
+#include <new>
+#include <cstring>
+#include <cmath>		// log, floor
+#include <limits>
+#include <iostream>
+
+namespace std {
+
+//////////////////////////////////////////////////////////////////////
+// Stuff that needs to be configured by libstdc++
+//
+// Unsigned_Shifts
+// 32bit unsigned type
+// 32bit signed type
+// FLOAT_WORDS_BIG_ENDIAN - took the name from real.c
+// IEEE, IBM, VAX
+// IEEE_QUAD if long double is quad rather than extended
+
+#ifdef Unsigned_Shifts
+#define Sign_Extend(a,b) if (b < 0) a |= 0xffff0000;
+#else
+#define Sign_Extend(a,b) /*no-op*/
+#endif
+
+//XXX need uint32_t here somehow
+typedef unsigned _G_uint32_t;
+typedef int _G_int32_t;
+
+typedef _G_uint32_t unsigned32;
+
+template<typename _T> class fptype {};
+
+#define IEEE
+// #define IBM
+// #define VAX
+
+// #define FLOAT_WORDS_BIG_ENDIAN
+
+
+/* #define P DBL_MANT_DIG */
+/* Ten_pmax = floor(mant_size*log(2)/log(5)) */
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+/* Quick_max = floor((mant_size-1)*log(FLT_RADIX)/log(10) - 1) */
+/* Int_max = floor(mant_size*log(FLT_RADIX)/log(10) - 1) */
+
+typedef std::numeric_limits<double> nld;
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+static const int Bletch_dbl = (1 << __builtin_ffs(nld::max_exponent10)) / 16;
+static const int Quick_max_dbl
+  = int(floor((nld::digits-1) * log(double(nld::radix))/log(10) - 1));
+// For secondary fast codepath
+static const int Int_max_dbl
+  = int(floor(nld::digits * log(nld::radix)/log(10) - 1));
+
+typedef std::numeric_limits<long double> nlld;
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+static const int Bletch_ldbl = (1 << __builtin_ffs(nlld::max_exponent10)) / 16;
+static const int Quick_max_ldbl
+  = int(floor((nlld::digits-1) * log(double(nlld::radix))/log(10) - 1));
+// For secondary fast codepath
+static const int Int_max_ldbl
+  = int(floor(nlld::digits * log(nlld::radix)/log(10) - 1));
+
+// Attempt to wrap up float differences
+#ifdef IEEE
+//	flt	dbl		ldbl (96)	ldbl(128)
+// sign:  1	1		1		1
+// exp:   8	11		15,16(empty)	15
+// mant: 23	20,32		32,32		16,32,32,32
+// qnan: 1,rest of mantissa
+// bias: 0x7f	0x3ff		0x3fff		0x3fff
+
+// IEEE DOUBLE
+// sgn:1 exp:11 mant0:20 mant1:32
+// exp==0 is denorm (biased)
+// exp==2047(0x7ff) mant=0, +-inf
+// exp==2047 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 1023 (0x3ff) is unbiased exponent
+// number is 1.mantissa e exp-0x3ff
+// the 1 is implicit (since we're binary)
+
+// IEEE EXTENDED (80 bit)
+// empty:16 sgn:1 exp:15 mant0:32 mant1:32
+// exp==0 is denorm (biased)
+// exp==32767(0x7fff) mant=0, +-inf
+// exp==32767 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 16383 is unbiased exponent
+// 0 and denormalized numbers have a zero in the top bit of mant0.  All
+// others have a 1 bit.
+
+// IEEE QUAD
+// sgn:1 exp:15 mant0:16 mant1:32 mant2:32 mant3:32
+// exp==0 is denorm (biased)
+// exp==32767(0x7fff) mant=0, +-inf
+// exp==32767 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 16383 is unbiased exponent
+
+// IEEE double format
+template<>
+class fptype<double>
+{
+  union {
+    double d;
+    unsigned32 b[2];
+  } x;
+
+  // Handle endianness
+#if FLOAT_WORDS_BIG_ENDIAN
+  static const int hi = 0;	// high order word
+  static const int lo = 1;	// lo order word
+#else
+  static const int hi = 1;
+  static const int lo = 0;
+#endif
+
+public:
+
+  static const int sign_bit = 0x80000000; // mask to get sign bit from hi word
+  static const int bias = 1023;	// exponent bias
+  static const int exp_mask = 0x7ff00000; // mask to leave just exp bits in hi word
+  static const int exp_shift = 20; // amt to shift hi word to get exp at bottom
+  static const int exp_base = 0x100000;	// First bit of exponent
+  static const int mant_mask = 0xfffff;	// mask to get mantissa bits in hi word
+  static const int mant_size = 53; // Number of bits in the mantissa
+  static const int implicit_bit = 0x100000; // implicit top bit of mantissa
+
+  static const int log2p = 1;
+
+  static const int Bletch;
+  static const int Quick_max;
+  static const int Int_max;
+
+  // Constructor and conversion operators
+  fptype() { }
+  fptype(double d) { x.d = d; }
+  fptype& operator=(double d) { x.d = d; return *this;}
+  operator double() const { return x.d; }
+
+  /// Features ///
+  static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+  /// Tests ///
+  bool isnonzeromant() const { return x.b[lo] || x.b[hi] & mant_mask; }
+  bool iszeromant() const { return x.b[lo]==0 && (x.b[hi] & mant_mask) == 0; }
+
+  // equiv to isnan && isinf, but faster
+  bool isspecial() const { return x.b[hi] & exp_mask == exp_mask; }
+  bool isnan() const { return bexp() == 0x7ff && isnonzeromant();}
+  bool isinf() const { return bexp() == 0x7ff && iszeromant();}
+
+  bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+  bool isdenorm() const { return has_denorm() && bexp() == 0; }
+  bool iszero() const { return x.b[lo] == 0 && (x.b[hi] & ~sign_bit) == 0; }
+  // Return true if d is a normalized power of 2
+  bool ispwr2() const
+  {
+    return !x.b[lo] && !(x.b[hi] & mant_mask) &&
+      (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+  }
+
+
+  /// Twiddle sign bit ///
+  void setneg() { x.b[hi] |= sign_bit; }
+  void clearneg() { x.b[hi] &= ~sign_bit; }
+
+  /// Exponent ///
+  long exp() const { return bexp() - bias; } 
+  // biased exponent
+  long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; } 
+  // Set biased exponent to 0, i.e. 1.0e+00
+  void zeroexp() { x.b[hi] = (x.b[hi] & mant_mask) | (bias << exp_shift); }
+  // Add v to the exponent
+  void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+  /// Mantissa ///
+  long mant0() const { return x.b[hi] & mant_mask; }
+  int mantsize() const { return 2; }
+  long mant(int i=1) const { return x.b[lo]; }
+  // Return most significant mantissa bits, including implicit top bit, unless denorm.
+  long imant0() const
+  {
+    if (x.b[hi] & exp_mask) return (mant0() | implicit_bit);
+    else return mant0();
+  }
+  // Return mantissa bits with bit i as the most significant
+  unsigned32 mantissa_denorm(int i) {
+    return (i > 32)  ? x.b[hi] << (64 - i) | x.b[lo] >> (i - 32)
+      : x.b[lo] << (32 - i);
+  }
+
+  double val() const { return x.d; }
+
+  void normalize() {}
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+// Currently only ieee extended and quad are covered
+template<>
+class fptype<long double>
+{
+  union {
+    long double d;
+    unsigned32 b[sizeof(long double)/sizeof(unsigned32)];
+  } x;
+
+  static const int wordlen = sizeof(long double)/sizeof(unsigned32);
+
+  // Handle endianness
+#if FLOAT_WORDS_BIG_ENDIAN
+  static const int hi = 0;	// high order word
+  static const int lo = wordlen-1;	// lo order word
+  static const int idx[] = { 0, 1, 2, 3 };
+#else
+  static const int hi = wordlen-1;
+  static const int lo = 0;
+  static const int idx[4];
+#endif
+
+public:
+
+  static const int sign_bit = 0x8000; // mask to get sign bit from hi word (LE)
+  static const int bias = 16383;	// exponent bias
+  static const int exp_mask = 0x7fff; // mask to leave just exp bits in hi word (LE)
+  static const int exp_shift = 0; // amt to shift hi word to get exp at bottom
+  static const int exp_base = 0x1;	// First bit of exponent
+  // Number of bits in the mantissa for long double.  64 for extended, 112 for quad.
+  static const int mant_size = std::numeric_limits<long double>::digits;
+
+#if IEEE_QUAD
+  static const int implicit_bit = 0x10000; // implicit top bit of mantissa
+  static const int mant_mask = 0xffff;	// mask to get mantissa bits in hi word
+#else
+  static const int implicit_bit = 0;
+  static const int mant_mask = 0x0;	// mask to get mantissa bits in hi word
+#endif
+
+  static const int log2p = 1;
+
+  static const int Bletch;
+  static const int Quick_max;
+  static const int Int_max;
+
+  // Constructor and conversion operators
+  fptype() {}
+  fptype(long double d) { x.d = d; }
+  fptype& operator=(long double d) { x.d = d; return *this;}
+  operator long double() const { return x.d; }
+
+  /// Features ///
+  static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+  /// Tests ///
+  bool isnonzeromant() const
+  {
+    bool ret = (mant0()) != 0;
+    for (int i=0; i < wordlen-1; i++)
+      ret = (ret || (x.b[idx[i]]));
+    return ret;
+  }
+  bool iszeromant() const { return !isnonzeromant(); }
+
+  // equiv to isnan && isinf, but faster
+  bool isspecial() const { return x.b[hi] & exp_mask == exp_mask; }
+  bool isnan() const { return bexp() == 0x7ff && isnonzeromant();}
+  bool isinf() const { return bexp() == 0x7ff && iszeromant();}
+
+  bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+  bool isdenorm() const { return has_denorm() && bexp() == 0; }
+#if IEEE_QUAD
+  bool iszero() const {
+    return x.b[idx[1]] == 0 && x.b[idx[2]] == 0 && x.b[idx[3]] == 0 &&
+      (x.b[hi] & ~sign_bit) == 0;
+  }
+#else
+  bool iszero() const {
+    return x.b[idx[1]] == 0 && x.b[idx[2]] == 0 && (x.b[hi] & ~sign_bit) == 0;
+  }
+#endif
+  // Return true if d is a normalized power of 2
+  bool ispwr2() const
+  {
+#if IEEE_QUAD
+    return !x.b[idx[1]] && !x.b[idx[2]] && !x.b[idx[3]]
+      && !(x.b[hi] & mant_mask) &&
+      (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+#else
+    return !x.b[idx[1]] && !x.b[idx[2]] &&
+      (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+#endif
+  }
+
+  /// Twiddle sign bit ///
+  void setneg() { x.b[hi] |= sign_bit; }
+  void clearneg() { x.b[hi] &= ~sign_bit; }
+
+  /// Exponent ///
+  long exp() const { return bexp() - bias; } 
+  // biased exponent
+  long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; } 
+  // Set biased exponent to 0, i.e. 1.0e+00
+  void zeroexp() {
+#if IEEE_QUAD
+    x.b[hi] = (mant0()) | (bias << exp_shift);
+#else
+    x.b[hi] = (bias << exp_shift);
+#endif
+ }
+  // Add v to the exponent
+  void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+  /// Mantissa ///
+#if IEEE_QUAD
+  long mant0() const { return x.b[hi] & mant_mask; }
+  int mantsize() const { return wordlen; }
+  // Return i'th 32 bit word of mantissa.  Only for 1-n
+  long mant(int i) const { return x.b[idx[i]]; }
+#else
+  long mant0() const { return x.b[idx[1]]; }
+  int mantsize() const { return wordlen-1; }
+  long mant(int i) const { return x.b[idx[i+1]]; }
+#endif
+  // Return most significant mantissa bits, including implicit top bit
+  long imant0() const
+  {
+    if (x.b[hi] & exp_mask) return (mant0() | implicit_bit);
+    else return mant0();
+  }
+  // Return top 32 mantissa bits with bit i as the most significant
+  unsigned32 mantissa_denorm(int i) {
+#if IEEE_QUAD
+    // total 112 bits
+    if (i > 96) return x.b[hi] << (128 - i) | x.b[idx[1]] >> (i - 96);
+    else if (i > 64) return x.b[idx[1]] << (96 - i) | x.b[idx[2]] >> (i - 64);
+    else if (i > 32) return x.b[idx[2]] << (64 - i) | x.b[idx[3]] >> (i - 32);
+    else return x.b[idx[3]] << (32 - i);
+#else
+    // total 64 bits
+    if (i > 32) return x.b[idx[1]] << (64 - i) | x.b[idx[2]] >> (i - 32);
+    else return x.b[idx[2]] << (32 - i);
+#endif
+  }
+
+  double val() const { return x.d; }
+
+  void normalize() {}
+};
+
+const int fptype<long double>::idx[4]
+= { wordlen-1, wordlen-2, wordlen-3, wordlen-4 };
+
+const int fptype<long double>::Bletch = Bletch_ldbl;
+const int fptype<long double>::Quick_max = Quick_max_ldbl;
+const int fptype<long double>::Int_max = Int_max_ldbl;
+
+#else
+#ifdef IBM
+// IBM floating point
+
+// i370 format double
+// sign:1 exp:7 mant0:24 mant1:32
+// (exp-64) * 4 is unbiased exp
+// no denorm, no nan, no inf
+template<>
+class fptype<double>
+{
+  union {
+    double d;
+    unsigned32 b[2];
+  } x;
+
+public:
+
+  static const int sign_bit = 0x80000000; // mask to get sign bit from hi word
+  static const int bias = 65;		     // Exponent bias
+  static const int exp_mask = 0x7f000000;    // mask for exp
+  static const int exp_shift = 24;	     // amt to shift to get sign+exp
+  static const int exp_base = 0x1000000;     // First bit of exponent
+  static const int mant_mask = 0xffffff;
+  static const int mant_size = 14;	     // Number of bits in the mantissa (but not really)
+  static const int top_bit = 0x1000000;	     // top bit of mantissa
+
+  static const int log2p = 4;
+
+  static const int Bletch;
+  static const int Quick_max;
+  static const int Int_max;
+
+  /// Features ///
+  // should be false for IBM
+  static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+  bool isspecial() const { return false; }
+  bool isnan() const { return false; }
+  bool isinf() const { return false; }
+  bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+  bool isdenorm() const { return false; }
+  bool iszero() const {}
+  // Return true if d is a normalized power of 2
+  bool ispwr2() const
+  { return !x.b[lo] && !(x.b[hi] & (mant_mask - top_bit)); }
+
+  /// Twiddle sign bit ///
+  void setneg() { x.b[hi] |= sign_bit; }
+  void clearneg() { x.b[hi] &= ~sign_bit; }
+
+  /// Exponent ///
+  long exp() const { return bexp() - bias; }
+  // biased exponent
+  long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; } 
+  // Set biased exponent to 0, i.e. 1.0e+00
+  void zeroexp() { b[hi] = (b[hi] & mant_mask) | (bias << exp_shift); }
+  // Add v to the exponent
+  void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+  /// Mantissa ///
+  long mant0() const { return x.b[hi] & mant_mask; }
+  int mantsize() const { return 2; }
+  long mant(int i) const { return x.b[lo]; }
+  // Return most significant mantissa bits.  IBM has no implicit top bit
+  long imant0() const { return mant0(); }
+  // Return mantissa bits with bit i as the most significant
+  unsigned32 mantissa_denorm(int i) { return 0; }	// Unused
+
+  double val() const { return x.d; }
+
+  void normalize() { x.d += 0; }
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+#else
+// VAX floating point
+
+// VAX D (64 bit)
+// mant1:16 sign:1 exp:8 mant0:7 mant3:16 mant2:16
+// exp - 129 is unbiased exp
+
+template<>
+class fptype<double>
+{
+  union {
+    double d;
+    unsigned32 b[2];
+  } x;
+
+  static const int hi = 0;	// Always little-endian
+
+  long wswap(long w) { return ((w & 0xffff) << 16) | ((w & 0xffff0000) >> 16);}
+
+public:
+
+  static const int sign_bit = 0x8000; 		// sign bit mask
+  static const int bias = 129;			// Exponent bias
+  static const int exp_mask = 0x7f80; 		// Exp mask for F
+  static const int exp_shift = 7;	 // Amt to shift to get sgn+exp in F,D
+  static const int exp_base = 0x80;		// First bit of exponent
+  static const int mant_mask = 0xffff007f; 	// mant mask in F,D
+  static const int mant_size = 56		// bits in the mantissa
+  static const int implicit_bit = 0x800000;    // implicit top bit of mantissa
+
+  static const int log2p = 1;
+
+  static const int Bletch;
+  static const int Quick_max;
+  static const int Int_max;
+
+  /// Features ///
+  // should be false for VAX
+  static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+  // equiv to isnan && isinf, but faster
+  bool isspecial() const { return isnan(); }
+  bool isnan() const { return (x.b[hi] & 0x8000) != 0;}
+  bool isinf() const { return false;}
+  bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+  bool isdenorm() const { return false; }
+  bool iszero() const { return (x.b[hi] & (sign_bit | exp_mask)) == 0; }
+  // Return true if d is a normalized power of 2
+  bool ispwr2() const
+  { return !x.b[lo] && !(x.b[hi] & mant_mask); }
+
+  /// Twiddle sign bit ///
+  void setneg() { x.b[hi] |= sign_bit; }
+  void clearneg() { x.b[hi] &= ~sign_bit; }
+
+  /// Exponent ///
+  long exp() const { return bexp() - bias; }
+  // biased exponent
+  long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; } 
+  // Set biased exponent to 0, i.e. 1.0e+00
+  void zeroexp() { b[hi] = (b[hi] & mant_mask) | (bias << exp_shift); }
+  // Add v to the exponent
+  void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+  /// Mantissa ///
+  long mant0() const { return wswap(x.b[hi] & mant_mask); }
+  long mant(int i) const { return wswap(x.b[lo]); }
+  // Return most significant mantissa bits.  VAX always has implicit top bit
+  long imant0() const { return mant0() | implicit_bit; }
+  // Return mantissa bits with bit i as the most significant
+  unsigned32 mantissa_denorm(int i) { return 0; } // Unused
+
+  double val() const { return x.d; }
+
+  void normalize() {}
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+#endif
+#endif
+
+// Shift y so lowest bit is 1 and return the number of bits y was
+// shifted.
+static int
+lo0bits (unsigned32 *y)
+{
+  int ret = __builtin_ctz(*y);
+  *y = *y >> ret;
+  return ret;
+}
+
+
+/* (1<<BIGINT_MINIMUM_K) is the minimum number of words to allocate
+   in a Bigint.  dtoa usually manages with 1<<2, and has not been
+   known to need more than 1<<3.  */
+
+#define BIGINT_MINIMUM_K 3
+
+struct Bigint {
+  int k;		/* Parameter given to Balloc(k) */  // power of 2 for space
+  int maxwds;		/* Allocated space: equals 1<<k. */ // number of 32bit words
+  int wds;		/* Current length. */
+  bool on_stack;	/* 1 if stack-allocated. */
+  bool sign;		/* false if value is positive or zero; true if negative. */
+  unsigned32 x[1<<BIGINT_MINIMUM_K]; /* Actually: x[maxwds] */
+
+  Bigint()
+    : k(BIGINT_MINIMUM_K), maxwds(1<<BIGINT_MINIMUM_K), wds(0),
+      on_stack(0), sign(0)
+  { }
+
+  Bigint(int new_k, int new_maxwds)
+    : k(new_k), maxwds(new_maxwds), wds(0), on_stack(0), sign(0)
+  { }
+
+
+  void
+  Bcopy(Bigint *y)
+  {
+    unsigned32 *xp, *yp;
+    int i = y->wds;
+    sign = y->sign;
+    wds = i;
+    for (xp = x, yp = y->x; --i >= 0; )
+      *xp++ = *yp++;
+  }
+
+  // Set contents to 0
+  void clear() { memset(&x[0], 0, wds * sizeof(unsigned32)); }
+
+};
+
+#define BIGINT_HEADER_SIZE \
+  (sizeof(Bigint) - (1<<BIGINT_MINIMUM_K) * sizeof(unsigned32))
+
+
+class bigint
+{
+  Bigint* rep;
+  Bigint bg;
+
+  friend int cmp(const bigint& a, const bigint& b);
+
+public:
+  // Constructors and destructor
+  bigint() : rep(&bg) { bg.on_stack = 1; }
+  bigint(Bigint* bi) : rep(bi) {}
+  ~bigint() {}
+
+  bigint& operator=(bigint& i) { rep = i.rep; return *this; }
+  bigint& operator=(Bigint* i) { rep = i; return *this; }
+  bigint& operator=(int i) {
+    resize(1);
+    rep->x[0] = i;
+    rep->wds = 1;
+    return *this;
+  }
+
+  int order() const { return rep->k; }
+  int size() const { return rep->wds; }
+  int capacity() const { return rep->maxwds; }
+
+  void resize(int k)
+  {
+    if (rep->k >= k) return;
+    Bigint* nv = rep;
+    balloc(k);
+    rep->Bcopy(nv);
+    if (!nv->on_stack) delete[] (char*)nv;
+  }
+
+  unsigned32 operator[](int idx) const { return rep->x[idx]; }
+  unsigned32& operator[](int idx) { return rep->x[idx]; }
+
+  unsigned32 back() const { return rep->x[size()-1]; }
+  unsigned32& back() { return rep->x[size()-1]; }
+
+  // Set contents to 0
+  void clear() { rep->clear(); }
+
+  /* Allocate a Bigint with '1<<k' big digits. */
+  void balloc(int k)
+  {
+    int x;
+
+    if (k < BIGINT_MINIMUM_K)
+      k = BIGINT_MINIMUM_K;
+
+    x = 1 << k;
+    char* space = new char[BIGINT_HEADER_SIZE + x * sizeof(unsigned32)];
+    rep = new (space) Bigint(k, x);
+  }
+
+  void swap(bigint& b) { Bigint* tmp = b.rep; b.rep = rep; rep = tmp; }
+
+  Bigint* get() { return rep; }
+  const Bigint* get() const { return rep; }
+
+  bool isneg() const { return rep->sign != 0; }
+
+  template<typename _T>
+  void d2b(_T d, _G_int32_t *e, _G_int32_t *bits);
+  void mult(bigint& ar, bigint& br);
+  bigint& multadd(int m, int a);
+  bigint& pow5mult(int k);
+  void lshift(int k);
+  bigint& diff(bigint& ba, bigint& bb);
+  int quorem(const bigint &S);
+};
+
+
+// Transfers the significant bits of d's mantissa, including any implicit top
+// bit to this bigint.  As a result, the lsb of x[0] will be 1.  Returns
+// significant number of bits in *bits.  Returns a modified exponent such that
+// the decimal point is to the right of the lsb of x[0].
+template<typename _T>
+void
+bigint::d2b(_T d, _G_int32_t *e, _G_int32_t *bits)
+{
+  // Shift d fraction until bottom bit is 1.  Store fraction into result
+  // (which may be 1 or 2 ints).  Store e adjusted for the shift of d.
+  // bits is the number of bits d was shifted.
+  int de;			// Exponent of d
+  int i, k;
+  unsigned32 *x, y, z;
+
+  resize(1);
+  x = rep->x;
+
+  fptype<_T> fd = d;
+  z = fd.imant0();		// Mantissa plus implicit top 1
+  de = fd.bexp();		// Exponent part of d
+//   y = fd.mant1();		// Low order mantissa bits
+
+  // Shift fraction so bit 0 is 1, store into x[0] (and x[1] if
+  // needed).  k is distance the fraction was shifted.
+
+  int mantsize = fd.mantsize();
+
+  // Set j to the lowest order word with significant bits
+  int j;
+  int shift = 0;
+
+  // Find lowest non-zero mantissa word
+  for (j=mantsize-1; j>0 ; j--)
+    if ((y=fd.mant(j)) != 0) break;
+
+  int skipped = mantsize - j - 1; // Number of words == 0
+
+  // Transfer words to array
+  int idx = 0;
+  for (; j>0 ; j--)
+    x[idx++] = fd.mant(j);
+  x[idx++] = z;
+
+  // Right shift data so the lsb of x[0] is 1.
+  shift = lo0bits(&x[0]);
+  for (int m=1; m < idx; m++) {
+    // Need to test because 0x1c5ad3 << 32 != 0.  I guess this is just
+    // undefined behavior.  If a bug, it's present in 2.95, 3.3, 3.4.
+    if (shift) {
+      x[m-1] |= x[m] << (32-shift);
+      x[m] >>= shift;
+    }
+  }
+
+  while (!x[idx-1]) idx--;	// in case upper words are 0 for denorm
+  i = rep->wds = idx;
+
+  k = shift + 32 * skipped;
+
+//   // original code
+//   if (y) {
+//     if ((k = lo0bits(&y))) {
+//       x[0] = y | z << (32 - k);
+//       z >>= k;
+//     }
+//     else
+//       x[0] = y;
+//     i = rep->wds = (x[1] = z) ? 2 : 1;
+//   }
+//   else {
+// #ifdef DEBUG
+//     if (!z)
+//       Bug("Zero passed to d2b");
+// #endif
+//     k = lo0bits(&z);
+//     x[0] = z;
+//     i = rep->wds = 1;
+//     k += 32;
+//   }
+
+  // Adjust exponent for bias and fraction shift
+  if (fd.isdenorm()) {
+    // denorm case
+    *e = de - fptype<_T>::bias - (fptype<_T>::mant_size-1) + 1 + k;
+    *bits = 32*i - __builtin_clz(x[i-1]);
+  }
+  else {
+#ifdef IBM
+    *e = (de - fptype<_T>::bias - (fptype<_T>::mant_size-1) << 2) + k;
+    *bits = 4*P + 8 - k - __builtin_clz(fd.mant0());
+#else
+    *e = de - fptype<_T>::bias - (fptype<_T>::mant_size-1) + k;
+    *bits = fptype<_T>::mant_size - k;
+#endif
+  }
+}
+
+void
+bigint::mult(bigint& ar, bigint& br)
+{
+  int k, wa, wb, wc;
+  unsigned32* x;
+  unsigned32 carry, y, z;
+  unsigned32 *xa, *xae;	// ptr to a->x, end of x array
+  unsigned32 *xb, *xbe;	// ptr to b->x, end of x array
+  unsigned32 *xc, *xc0;
+  unsigned32 z2;
+  Bigint* a;
+  Bigint* b;
+  if (ar.size() < br.size()) {
+    a = br.get();
+    b = ar.get();
+  }
+  else {
+    a = ar.get();
+    b = br.get();
+  }
+  k = a->k;
+  wa = a->wds;		// current bytes in ar
+  wb = b->wds;		// current bytes in br
+  wc = wa + wb;
+  if (wc > a->maxwds)
+    k++;			// double storage in dest
+  resize(k);
+  for(x = rep->x, xa = x + wc; x < xa; x++)
+    *x = 0;
+  xa = a->x;
+  xae = xa + wa;
+  xb = b->x;
+  xbe = xb + wb;
+  xc0 = rep->x;
+
+  // iterate over words in b
+  for(; xb < xbe; xb++, xc0++) {
+    if ((y = *xb & 0xffff)) {	// grab bottom 16 bits of xb
+      x = xa;
+      xc = xc0;
+      carry = 0;
+      do {
+	// repeated 16x16 into 32 multiplications
+	z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
+	carry = z >> 16;
+	z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
+	carry = z2 >> 16;
+	*xc++ = z2 << 16 | z & 0xffff;
+      }
+      while(x < xae);
+      *xc = carry;
+    }
+    if ((y = *xb >> 16)) {
+      x = xa;
+      xc = xc0;
+      carry = 0;
+      z2 = *xc;
+      do {
+	z = (*x & 0xffff) * y + (*xc >> 16) + carry;
+	carry = z >> 16;
+	*xc++ = z << 16 | z2 & 0xffff;
+	z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
+	carry = z2 >> 16;
+      }
+      while(x < xae);
+      *xc = z2;
+    }
+  }
+  for(xc0 = rep->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ;
+  rep->wds = wc;
+}
+
+/* Return b*m+a.  b is modified.
+   Assumption:  0xFFFF*m+a fits in 32 bits. */
+
+bigint&
+bigint::multadd(int m, int a)
+{
+  int i, wds;
+  unsigned32 *x, y;
+  unsigned32 xi, z;
+
+  wds = rep->wds;
+  x = rep->x;
+  i = 0;
+  do {
+    xi = *x;
+    y = (xi & 0xffff) * m + a;
+    z = (xi >> 16) * m + (y >> 16);
+    a = (int)(z >> 16);
+    *x++ = (z << 16) + (y & 0xffff);
+  }
+  while(++i < wds);
+  if (a) {
+    if (wds >= rep->maxwds)
+      resize(rep->k+1);
+    rep->x[wds++] = a;
+    rep->wds = wds;
+  }
+  return *this;
+}
+
+/* Returns b*(5**k).  b is modified. */
+/* Re-written by Per Bothner to not need a static list. */
+
+bigint&
+bigint::pow5mult(int k)
+{
+  static int p05[6] = { 5, 25, 125, 625, 3125, 15625 };
+
+  for (; k > 6; k -= 6)
+    multadd(15625, 0); /* b *= 5**6 */
+  if (k != 0)
+    multadd(p05[k-1], 0);
+  return *this;
+}
+
+
+/* Re-written by Per Bothner so shift can be in place. */
+
+void
+bigint::lshift(int k)
+{
+  int i;
+  unsigned32 *x, *x1, *xe;
+  int old_wds = rep->wds;
+  int n = k >> 5;
+  int k1 = rep->k;
+  int n1 = n + old_wds + 1;
+
+  if (k == 0)
+    return;
+
+  // compute k where 2^k is num words needed
+  for(i = rep->maxwds; n1 > i; i <<= 1)
+    k1++;
+  resize(k1);
+
+  xe = rep->x; /* Source limit */
+  x = xe + old_wds; /* Source pointer */
+  x1 = x + n; /* Destination pointer */
+  if (k &= 0x1f) {
+    int k1 = 32 - k;
+    unsigned32 z = *--x;
+    if ((*x1 = (z >> k1)) != 0) {
+      ++n1;
+    }
+    while (x > xe) {
+      unsigned32 w = *--x;
+      *--x1 = (z << k) | (w >> k1);
+      z = w;
+    }
+    *--x1 = z << k;
+  }
+  else
+    // lshift % 32 = 0, just copy the words to the left.
+    do {
+      *--x1 = *--x;
+    } while(x > xe);
+  while (x1 > xe)
+    *--x1 = 0;
+  rep->wds = n1 - 1;
+}
+
+/* Do: c = a-b. */
+
+bigint&
+bigint::diff(bigint& ba, bigint& bb)
+{
+  const Bigint* a;
+  const Bigint* b;
+  int i, wa, wb;
+  _G_int32_t borrow, y; /* We need signed shifts here. */
+  const unsigned32 *xa, *xae, *xb, *xbe;
+  unsigned32 *xc;
+  _G_int32_t z;
+
+  i = cmp(ba,bb);
+  if (!i) {
+    resize(0);
+    rep->wds = 1;
+    rep->x[0] = 0;
+    return *this;
+  }
+
+  a = ba.get();
+  b = bb.get();
+  if (i < 0) {
+    const Bigint *tmp = a;
+    a = b;
+    b = tmp;
+    i = 1;
+  }
+  else
+    i = 0;
+  resize(a->k);
+  rep->sign = i;
+  wa = a->wds;
+  xa = a->x;			// Start of a array
+  xae = xa + wa;		// End of a array
+  wb = b->wds;
+  xb = b->x;			// Start of b array
+  xbe = xb + wb;		// End of b array
+  xc = rep->x;
+  borrow = 0;
+  do {
+    y = (*xa & 0xffff) - (*xb & 0xffff) + borrow;
+    borrow = y >> 16;
+    Sign_Extend(borrow, y);
+    z = (*xa++ >> 16) - (*xb++ >> 16) + borrow;
+    borrow = z >> 16;
+    Sign_Extend(borrow, z);
+    *xc++ = z << 16 | y & 0xffff;
+  }
+  while(xb < xbe);
+  while(xa < xae) {
+    y = (*xa & 0xffff) + borrow;
+    borrow = y >> 16;
+    Sign_Extend(borrow, y);
+    z = (*xa++ >> 16) + borrow;
+    borrow = z >> 16;
+    Sign_Extend(borrow, z);
+    *xc++ = z << 16 | y & 0xffff;
+  }
+  while(!*--xc)
+    wa--;
+  rep->wds = wa;
+  return *this;
+}
+
+// Return quotient of *this/S.  this = remainder.
+// Assumes that returned quotient is < 9, i.e. this->wds == S->wds
+// 
+int
+bigint::quorem(const bigint &S)
+{
+  int n;
+  _G_int32_t borrow, y;
+  unsigned32 carry, q, ys;
+  unsigned32 *bx, *bxe;
+  const unsigned32 *sx, *sxe;
+  _G_int32_t z;
+  unsigned32 si, zs;
+
+  n = S.size();
+#ifdef DEBUG
+  /*debug*/ if (rep->wds > n)
+    /*debug*/       Bug("oversize b in quorem");
+#endif
+  if (rep->wds < n)
+    return 0;
+  sx = S.rep->x;
+  sxe = sx + --n;
+  bx = rep->x;
+  bxe = bx + n;
+  q = *bxe / (*sxe + 1);  /* ensure q <= true quotient */
+#ifdef DEBUG
+  /*debug*/ if (q > 9)
+    /*debug*/       Bug("oversized quotient in quorem");
+#endif
+  if (q) {
+    borrow = 0;
+    carry = 0;
+    do {
+      si = *sx++;
+      ys = (si & 0xffff) * q + carry;
+      zs = (si >> 16) * q + (ys >> 16);
+      carry = zs >> 16;
+      y = (*bx & 0xffff) - (ys & 0xffff) + borrow;
+      borrow = y >> 16;
+      Sign_Extend(borrow, y);
+      z = (*bx >> 16) - (zs & 0xffff) + borrow;
+      borrow = z >> 16;
+      Sign_Extend(borrow, z);
+      *bx++ = z << 16 | y & 0xffff;
+    }
+    while(sx <= sxe);
+    if (!*bxe) {
+      bx = rep->x;
+      while(--bxe > bx && !*bxe)
+	--n;
+      rep->wds = n;
+    }
+  }
+  if (cmp(*this, S) >= 0) {
+    q++;
+    borrow = 0;
+    carry = 0;
+    bx = rep->x;
+    sx = S.rep->x;
+    do {
+      si = *sx++;
+      ys = (si & 0xffff) + carry;
+      zs = (si >> 16) + (ys >> 16);
+      carry = zs >> 16;
+      y = (*bx & 0xffff) - (ys & 0xffff) + borrow;
+      borrow = y >> 16;
+      Sign_Extend(borrow, y);
+      z = (*bx >> 16) - (zs & 0xffff) + borrow;
+      borrow = z >> 16;
+      Sign_Extend(borrow, z);
+      *bx++ = z << 16 | y & 0xffff;
+    }
+    while(sx <= sxe);
+    bx = rep->x;
+    bxe = bx + n;
+    if (!*bxe) {
+      while(--bxe > bx && !*bxe)
+	--n;
+      rep->wds = n;
+    }
+  }
+  return q;
+}
+
+
+int
+cmp(const bigint& a, const bigint& b)
+{
+  int i = a.size();
+  int j = b.size();
+#ifdef DEBUG
+  if (i > 1 && !a[i-1])
+    Bug("cmp called with a->x[a->wds-1] == 0");
+  if (j > 1 && !b[j-1])
+    Bug("cmp called with b->x[b->wds-1] == 0");
+#endif
+  if (i -= j)
+    return i;
+  while (j--) {
+    if (a[j] != b[j])
+      return a[j] < b[j] ? -1 : 1;
+  }
+  return 0;
+}
+
+
+template
+void
+bigint::d2b<double>(double d, _G_int32_t *e, _G_int32_t *bits);
+
+template
+void
+bigint::d2b<long double>(long double d, _G_int32_t *e, _G_int32_t *bits);
+
+
+// Enough entries to handle long double with 112-bit mantissa
+static const double
+tens[] = {
+                1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+                1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+                1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
+                1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
+                1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49
+};
+/* Ten_pmax = floor(mant_size*log(2)/log(5)) */
+// IEEE & IBM double is 22
+// VAX double is 24
+// IEEE quad is 48
+// This is the number of elements in tens[] to use.
+static const int Ten_pmax = int(fptype<double>::mant_size * log(2)/log(5));
+
+#ifdef IEEE
+static const double bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
+#define n_bigtens 5
+static const double bigtensl[] = { 1e16, 1e32, 1e64, 1e128, 1e256, 1e512,
+				   1e1024, 1e2048, 1e4096 };
+#define n_bigtensl 9
+#else
+#ifdef IBM
+static const double bigtens[] = { 1e16, 1e32, 1e64 };
+#define n_bigtens 3
+#else  // VAX
+/* Also used for the case when !_DOUBLE_IS_32BITS.  */
+static const double bigtens[] = { 1e16, 1e32 };
+#define n_bigtens 2
+#endif
+#endif
+
+template<typename _T>
+_T bigten(int i);
+
+template<>
+static inline double bigten<double>(int i) { return bigtens[i]; }
+
+template<>
+static inline long double bigten<long double>(int i) { return bigtensl[i]; }
+
+template<typename _T>
+_T maxbigten();
+
+template<>
+static inline double maxbigten<double>()
+{ return bigtens[n_bigtens-1]; }
+
+template<>
+static inline long double maxbigten<long double>()
+{ return bigtensl[n_bigtensl-1]; }
+
+
+
+// dec_exponent performs the first portion of the conversion to a string.  It
+// computes an upper bound on the decimal exponent that can be used to size a
+// buffer to print into.  It must be passed back into dtoa.
+//
+// This will only be used for double and long double
+template<typename _T>
+int
+__dec_exponent(_T d, int &special, bool &sign)
+{
+  /*     Arguments ndigits, decpt, sign are similar to those
+	 of ecvt and fcvt; trailing zeros are suppressed from
+	 the returned string.  If not null, *rve is set to point
+	 to the end of the return value.  If d is +-Infinity or NaN,
+	 then *decpt is set to 9999.
+
+	 special returns true if the number is nan, infinity, or 0.
+	 type contains -1 for nan, 0 for 0, and 1 for infinity.  This allows
+	 the calling routine to handle the special cases.
+  */
+
+  fptype<_T> fd = d;
+
+  _G_int32_t i;
+  _G_int32_t k;			// Holds base 10 exponent
+
+  _T d2, ds;
+
+  // Save sign value into user var
+  if (fd.isneg()) {
+    /* set sign for everything, including 0's and NaNs */
+    sign = true;
+    fd.clearneg();		// clear sign bit
+  }
+  else
+    sign = false;
+
+  // Handle IEEE and VAX infinity and nan.  Apparently IBM has none.
+  if (fd.isinf())
+    {
+      special = 1;
+      return 8;
+    }
+  if (fd.isnan())
+    {
+      special = 2;
+      return 3;
+    }
+
+  special = 0;
+
+  fd.normalize();		// only for IBM
+  if (fd.iszero()) return 0;
+
+  d = fd;
+
+  // i = exponent(d) unbiased
+  fd = d;
+  i = (int)fd.bexp();
+
+  if (fd.isdenorm()) {
+    /* d is denormalized */
+    bigint b;			// Gets significant bits of frac(d)
+    _G_int32_t bbits, be;
+
+    // be is adjusted exponent, bbits is significant bits in b.  b
+    // contains significant bits of fraction of d. (if bbits is integer, be is new exp)
+    //   b = d2b(b, d, &be, &bbits);
+    b.d2b(d, &be, &bbits);
+
+    i = bbits + be + (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1);
+    fptype<_T> ftmp = fd.mantissa_denorm(i);
+    ftmp.add_to_exp(-31);	/* adjust exponent */
+    d2 = ftmp;
+    i -= (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1) + 1;
+  }
+  else {
+    // Forcibly sets unbiased exponent to 0
+    fd.zeroexp();
+    d2 = fd;
+    i -= fptype<_T>::bias;
+
+#ifdef IBM
+    // IBM-specific adjustment
+    // j can be 11 - (>8)
+    // This seems to break if none of the top 3 bits are set.
+    if (j = 11 - __builtin_clz(fd.mant0()))
+      d2 /= 1 << j;
+    i <<= 2;
+    i += j;
+#endif
+  }
+
+  /* Now i is the unbiased base-2 exponent. */
+
+  // Compute integer base 10 exponent into k
+
+  /* log(x)       ~=~ log(1.5) + (x-1.5)/1.5
+   * log10(x)      =  log(x) / log(10)
+   *              ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
+   * log10(d) = i*log(2)/log(10) + log10(d2)
+   *
+   * This suggests computing an approximation k to log10(d) by
+   *
+   * k = i*0.301029995663981
+   *      + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
+   *
+   * We want k to be too large rather than too small.
+   * The error in the first-order Taylor series approximation
+   * is in our favor, so we just round up the constant enough
+   * to compensate for any error in the multiplication of
+   * (i) by 0.301029995663981; since |i| <= 1077,
+   * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
+   * adding 1e-13 to the constant term more than suffices.
+   * Hence we adjust the constant term to 0.1760912590558.
+   * (We could get a more accurate k by invoking log10,
+   *  but this is probably not worthwhile.)
+   *
+   * (From Richard Henderson)
+   * The comment should be updated for long double.  I guess the
+   * overestimate is still ok, since while |i| <= 16881 is much
+   * larger, 2^-64 and 2^-113 are much smaller.
+   */
+  ds = (d2-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
+  k = (int)ds;
+  if (ds < 0. && ds != k)
+    k--;    /* want k = floor(ds) */
+  return k;
+}
+
+template
+int
+__dec_exponent<double>(double d, int &special, bool &sign);
+
+template
+int
+__dec_exponent<long double>(long double d, int &special, bool &sign);
+
+/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
+ *
+ * Inspired by "How to Print Floating-Point Numbers Accurately" by
+ * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 92-101].
+ *
+ * Modifications:
+ *      1. Rather than iterating, we use a simple numeric overestimate
+ *         to determine k = floor(log10(d)).  We scale relevant
+ *         quantities using O(log2(k)) rather than O(k) multiplications.
+ *      2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
+ *         try to generate digits strictly left to right.  Instead, we
+ *         compute with fewer bits and propagate the carry if necessary
+ *         when rounding the final digit up.  This is often faster.
+ *      3. Under the assumption that input will be rounded nearest,
+ *         mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
+ *         That is, we allow equality in stopping tests when the
+ *         round-nearest rule will give the same floating-point value
+ *         as would satisfaction of the stopping test with strict
+ *         inequality.
+ *      4. We remove common factors of powers of 2 from relevant
+ *         quantities.
+ *      5. When converting floating-point integers less than 1e16,
+ *         we use floating-point arithmetic rather than resorting
+ *         to multiple-precision integers.
+ *      6. When asked to produce fewer than 15 digits, we first try
+ *         to get by with floating-point arithmetic; we resort to
+ *         multiple-precision integer arithmetic only if we cannot
+ *         guarantee that the floating-point calculation has given
+ *         the correctly rounded result.  For k requested digits and
+ *         "uniformly" distributed input, the probability is
+ *         something like 10^(k-15) that we must resort to the long
+ *         calculation.
+ */
+
+template<typename _T>
+int
+__dtoa(char* buf, _T d, int exponent, int mode, int ndigits, bool trim)
+{
+  // buf is a buffer whose size was computed using dec_exponent(), so as to be
+  // large enough to hold the number if needed.
+
+  // exponent is the value computed by dec_exponent
+
+  // trim is a flag that says whether trailing zeros should be trimmed off
+
+  // decpt returns the actual decimal point location.  It will often be
+  // exponent, but may be adjusted in some cases.
+
+  /*     Arguments ndigits, decpt, sign are similar to those of ecvt and fcvt;
+	 trailing zeros are suppressed from the returned string.  If d is
+	 +-Infinity or NaN, then *decpt is set to 9999.
+
+	 mode:
+	 0 ==> shortest string that yields d when read in
+	       and rounded to nearest.
+	 1 ==> like 0, but with Steele & White stopping rule;
+	       e.g. with IEEE P754 arithmetic , mode 0 gives
+	       1e23 whereas mode 1 gives 9.999999999999999e22.
+	 2 ==> max(1,ndigits) significant digits.  This gives a
+	       return value similar to that of ecvt, except
+	       that trailing zeros are suppressed.
+	 3 ==> through ndigits past the decimal point.  This
+	       gives a return value similar to that from fcvt,
+	       except that trailing zeros are suppressed, and
+	       ndigits can be negative.
+
+Leftright modes have been removed.
+	 4-9 should give the same return values as 2-3, i.e.,
+	       4 <= mode <= 9 ==> same return as mode
+	       2 + (mode & 1).  These modes are mainly for
+	       debugging; often they run slower but sometimes
+	       faster than modes 2-3.
+	 4,5,8,9 ==> left-to-right digit generation.
+	 6-9 ==> don't try fast floating-point estimate
+	 	(if applicable).
+
+	 Values of mode other than 0-9 are treated as mode 0.
+
+	 Sufficient space is allocated to the return value
+	 to hold the suppressed trailing zeros.
+  */
+
+  fptype<_T> fd = d;
+
+  _G_int32_t bbits, b2, b5, be, dig, i, ilim, ilim1,
+    j, j1;
+  _G_int32_t k;			// Holds base 10 exponent
+  _G_int32_t k_check, m2, m5, s2, s5, try_quick;
+  _G_int32_t L;
+  int denorm;
+  bigint b;			// Gets significant bits of frac(d)
+  bigint S;
+  _T ds, eps;
+  fptype<_T> fpeps;
+  char *s, *s0, *stmp;
+
+  fd.clearneg();		// clear sign bit
+
+  fd.normalize();		// only for IBM
+
+  // Handle 0 for all cases.
+  // XXX deal with trim
+  if (!fd) {
+    strcpy(buf, "0");
+    return 1;
+  }
+
+  d = fd;
+
+  // be is exponent adjusted to move lowest 1 bit of mantissa to the 0th bit
+  // and place the decimal point to the right of it.  bbits is significant
+  // bits of b's mantissa after shifting.  b contains significant bits of
+  // fraction of d. (if bbits is integer, be is new exp)
+  b.d2b(d, &be, &bbits);
+
+  // i = exponent(d) unbiased
+  fd = d;
+  i = (int)fd.bexp();
+
+  if (fd.has_denorm() && i == 0) {
+    /* d is denormalized */
+    i = bbits + be + (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1);
+    i -= (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1) + 1;
+    denorm = 1;
+  }
+  else {
+    i -= fptype<_T>::bias;
+#ifdef IBM
+    j = 11 - __builtin_clz(fd.mant0());
+    i <<= 2;
+    i += j;
+#endif
+    denorm = 0;
+  }
+
+  /* Now i is the unbiased base-2 exponent. */
+
+  // Compute integer base 10 exponent into k
+
+  // k is a bound on the integer base 10 exponent of d.  We may need to tweak
+  // it a little.
+  k = exponent;
+  k_check = 1;
+  if (k >= 0 && k < Ten_pmax) {
+    if (d < tens[k])
+      k--;
+    k_check = 0;
+  }
+
+  j = bbits - i - 1;		// mantissa bits w/ exp=0
+  if (j >= 0) {
+    b2 = 0;
+    s2 = j;
+  }
+  else {
+    b2 = -j;
+    s2 = 0;
+  }
+  if (k >= 0) {
+    b5 = 0;
+    s5 = k;
+    s2 += k;
+  }
+  else {
+    b2 -= k;
+    b5 = -k;
+    s5 = 0;
+  }
+  if (mode < 0 || mode > 9)
+    mode = 0;
+  try_quick = 1;
+  if (mode > 5) {
+    mode -= 4;
+    try_quick = 0;
+  }
+
+//   leftright = 1;
+  switch(mode) {
+  case 0:
+  case 1:
+    ilim = ilim1 = -1;
+    i = 18;
+    ndigits = 0;
+    break;
+  case 2:
+//     leftright = 0;
+    /* no break */
+  case 4:
+    if (ndigits <= 0)
+      ndigits = 1;
+    ilim = ilim1 = i = ndigits;
+    break;
+  case 3:
+//     leftright = 0;
+    /* no break */
+  case 5:
+    i = ndigits + k + 1;
+    ilim = i;
+    ilim1 = i - 1;
+    if (i <= 0)
+      i = 1;
+  }
+  /* i is now an upper bound of the number of digits to generate. */
+
+  s = s0 = buf;
+
+  if (ilim >= 0 && ilim <= fptype<_T>::Quick_max && try_quick) {
+
+    /* Try to get by with floating-point arithmetic. */
+
+    _T d2 = d;
+    _G_int32_t i = 0;
+    _G_int32_t k0 = k;
+    _G_int32_t ilim0 = ilim;
+    _G_int32_t ieps = 2; /* conservative */
+
+    if (k > 0) {
+      ds = tens[k&0xf];		// 1e0 -> 1e15
+      j = k >> 4;
+      if (j & fptype<_T>::Bletch) {
+	/* prevent overflows */
+	// n_bigtens
+	j &= fptype<_T>::Bletch - 1;
+	d /= maxbigten<_T>();
+	ieps++;
+      }
+      for(; j; j >>= 1, i++)
+	if (j & 1) {
+	  ieps++;
+	  ds *= bigten<_T>(i);
+	}
+      d /= ds;
+    }
+    else if ((j1 = -k)) {
+      d *= tens[j1 & 0xf];
+      for(j = j1 >> 4; j; j >>= 1, i++)
+	if (j & 1) {
+	  ieps++;
+	  d *= bigten<_T>(i);
+	}
+    }
+    if (k_check && d < 1. && ilim > 0) {
+      if (ilim1 <= 0)
+	goto fast_failed;
+      ilim = ilim1;
+      k--;
+      d *= 10.;
+      ieps++;
+    }
+    eps = ieps*d + 7.;
+    fpeps = eps;
+    fpeps.add_to_exp(-(fptype<_T>::mant_size-1));
+    eps = fpeps;
+    if (ilim == 0) {
+      d -= 5.;
+      if (d > eps)
+	goto one_digit;
+      if (d < -eps)
+	goto no_digits;
+      goto fast_failed;
+    }
+
+    /* Generate ilim digits, then fix them up. */
+    eps *= tens[ilim-1];
+    for(i = 1;; i++, d *= 10.) {
+      L = (_G_int32_t)d;
+      d -= L;
+      *s++ = '0' + (int)L;
+      if (i == ilim) {
+	stmp = s;		// This should be under bump_up, but there's a
+				// codegen bug, I think
+	if (d > 0.5 + eps)
+	  goto bump_up;
+	else if (d < 0.5 - eps) {
+	  // Strips off trailing 0's
+	  if (trim) {
+	    while(*--s == '0');
+	    s++;
+	  }
+	  goto ret1;
+	}
+	break;
+      }
+    }
+  fast_failed:
+    s = s0;
+    d = d2;
+    k = k0;
+    ilim = ilim0;
+  }
+
+  /* Do we have a "small" integer? */
+
+  if (be >= 0 && k <= fptype<_T>::Int_max) {
+    /* Yes. */
+    ds = tens[k];
+    if (ndigits < 0 && ilim <= 0) {
+      if (ilim < 0 || d <= 5*ds)
+	goto no_digits;
+      goto one_digit;
+    }
+    for(i = 1;; i++) {
+      L = (_G_int32_t)(d / ds);
+      d -= L*ds;
+#ifdef Check_FLT_ROUNDS
+      /* If FLT_ROUNDS == 2, L will usually be high by 1 */
+      if (d < 0) {
+	L--;
+	d += ds;
+      }
+#endif
+      *s++ = '0' + (int)L;
+      if (i == ilim) {
+	d += d;
+	if (d > ds || (d == ds && L & 1)) {
+	  stmp = s;		// This should be under bump_up, but there's a
+				// codegen bug in 3.4 as of 9/4/03, I think.
+	bump_up:
+	  while( *--s == '9' ) {
+	    *s='0';
+	    if (s == s0) {
+	      k++;
+	      *s = '0';
+	      break;
+	    }
+	  }
+	  ++*s++;
+	  if (!trim) s = stmp;
+	}
+	break;
+      }
+      if (!(d *= 10.))
+	break;
+    }
+    goto ret1;
+  }
+
+//    return 0;			// XXX bail out to avoid mp int calcs
+
+  m2 = b2;
+  m5 = b5;
+  if (m2 > 0 && s2 > 0) {
+    i = m2 < s2 ? m2 : s2;
+    b2 -= i;
+    m2 -= i;
+    s2 -= i;
+  }
+  if (b5 > 0) {
+    b.pow5mult(b5);
+  }
+  S = 1;
+  if (s5 > 0)
+    S.pow5mult(s5);
+
+  /* Check for special case that d is a normalized power of 2. */
+
+  if (mode < 2) {
+    fd = d;
+    if (fd.ispwr2()) {
+      /* The special case */
+      b2 += fptype<_T>::log2p;
+      s2 += fptype<_T>::log2p;
+    }
+  }
+
+  /* Arrange for convenient computation of quotients:
+   * shift left if necessary so divisor has 4 leading 0 bits.
+   *
+   * Perhaps we should just compute leading 28 bits of S once
+   * and for all and pass them and a shift to quorem, so it
+   * can do shifts and ors to compute the numerator for q.
+   */
+  if ((i = ((s5 ? 32 - __builtin_clz(S.back()) : 1) + s2) & 0x1f))
+    i = 32 - i;
+  if (i > 4) {
+    i -= 4;
+    b2 += i;
+    m2 += i;
+    s2 += i;
+  }
+  else if (i < 4) {
+    i += 28;
+    b2 += i;
+    m2 += i;
+    s2 += i;
+  }
+  if (b2 > 0)
+    b.lshift(b2);
+  if (s2 > 0)
+    S.lshift(s2);
+  if (k_check) {
+    if (cmp(b,S) < 0) {
+      k--;
+      b.multadd(10, 0);  /* we botched the k estimate */
+      ilim = ilim1;
+    }
+  }
+  if (ilim <= 0 && mode > 2) {
+    if (ilim < 0 || cmp(b,S.multadd(5,0)) <= 0) {
+      /* no digits, fcvt style */
+    no_digits:
+      k = -1 - ndigits;
+      goto ret;
+    }
+  one_digit:
+    *s++ = '1';
+    k++;
+    goto ret;
+  }
+  for(i = 1;; i++) {
+    *s++ = dig = b.quorem(S) + '0';
+    if (i >= ilim)
+      break;
+    b.multadd(10, 0);
+  }
+
+  /* Round off last digit */
+ 
+  b.lshift(1);
+  j = cmp(b, S);
+  if (j > 0 || (j == 0 && dig & 1)) {
+  roundoff:
+    stmp = s;
+    while(*--s == '9') {
+      *s = '0';
+      if (s == s0) {
+	k++;
+	*s++ = '1';
+	goto ret;
+      }
+    }
+    ++*s++;
+    if (!trim) s = stmp;
+  }
+  else {
+    if (trim) {
+      while(*--s == '0');
+      s++;
+    }
+  }
+ ret:
+ ret1:
+  *s = 0;
+  return k + 1;
+}
+
+// Instantiations
+template
+  int
+  __dtoa<double>(char* buf, double d, int exponent, int mode, int ndigits,
+		 bool trim);
+template
+  int
+  __dtoa<long double>(char* buf, long double d, int exponent, int mode,
+		      int ndigits, bool trim);
+
+template
+int std::__float_scientific_to_char<char>(char*, char const*, char,
+					  const char*, int, int,
+					  std::_Ios_Fmtflags, bool);
+template
+int std::__float_fixed_to_char<wchar_t>(wchar_t*, wchar_t const*, wchar_t,
+					const char*, int, int,
+					std::_Ios_Fmtflags, bool);
+template
+int std::__float_scientific_to_char<wchar_t>(wchar_t*, wchar_t const*,
+					     wchar_t, const char*, int, int,
+					     std::_Ios_Fmtflags, bool);
+template
+int std::__float_fixed_to_char<char>(char*, char const*, char, const char*,
+				     int, int, std::_Ios_Fmtflags, bool);
+
+#endif /* __USE_DTOA */
+
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]