This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
RFC: fp printing speedup, take 2
- From: Jerry Quinn <jlquinn at optonline dot net>
- To: libstdc++ at gcc dot gnu dot org
- Date: Fri, 14 Nov 2003 01:58:27 -0500
- Subject: RFC: fp printing speedup, take 2
OK, the first version I posted had a bug from migrating the code into
the library. This version works perfectly on linux x86.
This code replaces printf-based floating point printing with the core
printing routine from gcc-2.95. It passes make check with no new
regressions as well as the floating point torture test I posted
earlier and another as well.
The first version I posted is here.
http://gcc.gnu.org/ml/libstdc++/2003-11/msg00002.html
As mentioned in that message, the patch needs configury work for other
platforms, but is otherwise complete.
Jerry Quinn
? src/floatconv.cc
Index: config/linker-map.gnu
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/config/linker-map.gnu,v
retrieving revision 1.49
diff -u -w -r1.49 linker-map.gnu
--- config/linker-map.gnu 11 Nov 2003 20:09:06 -0000 1.49
+++ config/linker-map.gnu 14 Nov 2003 06:46:42 -0000
@@ -159,6 +159,8 @@
# std::__convert_to_v
_ZSt14__convert_to_v*;
+ _ZSt14__dec_exponentI*;
+ _ZSt6__dtoaI*;
# stub functions from libmath
sinf;
Index: include/bits/locale_facets.tcc
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/include/bits/locale_facets.tcc,v
retrieving revision 1.144
diff -u -w -r1.144 locale_facets.tcc
--- include/bits/locale_facets.tcc 9 Nov 2003 19:15:25 -0000 1.144
+++ include/bits/locale_facets.tcc 14 Nov 2003 06:46:43 -0000
@@ -36,6 +36,7 @@
#pragma GCC system_header
#include <limits> // For numeric_limits
+#include <cmath> // For log10
#include <typeinfo> // For bad_cast.
#include <bits/streambuf_iterator.h>
@@ -843,6 +844,178 @@
return std::__write(__s, __cs, __len);
}
+ // fixed point
+ template<typename _CharT>
+ int
+ __float_fixed_to_char(_CharT* __sbuf, const _CharT* __atoms,
+ _CharT __dpchar, const char* __buf, int __decp, int __prec,
+ ios_base::fmtflags __flags, bool __pad)
+ {
+ _CharT* __sb = __sbuf;
+ const _CharT* __digits = __atoms + __num_base::_S_odigits;
+ bool __showpoint = __flags & ios_base::showpoint;
+
+ if (__decp <= 0) {
+ // Decimal point is to the left of the significant digits. Insert 0's.
+ *__sbuf++= __digits[0];
+
+ // We need a decimal point
+ if (__prec || __showpoint)
+ *__sbuf++= __dpchar;
+
+ int __len = __sbuf - __sb;
+ // Insert leading 0's
+ for (int __i = __decp; __i < 0; __i++)
+ *__sbuf++= __digits[0]; // '0';
+
+ // Copy over digits
+ while (*__buf)
+ *__sbuf++ = __digits[(*__buf++) - '0'];
+
+ // Add trimmed 0's. dtoa sometimes trims trailing 0's.
+ if (__pad || __showpoint)
+ while (__sbuf - __sb-__len < __prec)
+ *__sbuf++= __digits[0]; // '0';
+ return __sbuf - __sb;
+ }
+
+ // decp > 0
+ while (*__buf) {
+ if (__decp-- == 0) // If decp < 0, this will never fire
+ *__sbuf++= __dpchar;
+ *__sbuf++= __digits[(*__buf++) - '0'];
+ }
+ // 0's may have been dropped at the end
+ while (__decp > 0) {
+ *__sbuf++= __digits[0]; // Insert integer 0's
+ __decp--;
+ }
+ // decp now <= 0
+ if (!__decp) {
+ if (__showpoint || (__prec && __pad)) {
+ *__sbuf++= __dpchar;
+ while (__decp-- > -__prec) *__sbuf++= __digits[0];
+ }
+ }
+ else if (__pad || __showpoint) { // decp < 0
+ while (__decp-- > -__prec) *__sbuf++= __digits[0];
+ }
+
+ return __sbuf - __sb;
+ }
+
+ // Convert output of __dtoa to scientific form and write finished string
+ // into __sbuf. __buf contains a null-terminated digit string. __decp is
+ // the index of the digit in __buf that the decimal point comes before.
+ template<typename _CharT>
+ int
+ __float_scientific_to_char(_CharT* __sbuf, const _CharT* __atoms,
+ _CharT __dpchar, const char* __buf, int __decp, int __prec,
+ ios_base::fmtflags __flags, bool __pad)
+ {
+ const _CharT* __digits = __atoms + __num_base::_S_odigits;
+ _CharT* __sb = __sbuf;
+ const char* __bp = __buf;
+ bool __showpoint = __flags & ios_base::showpoint;
+
+ // Insert first digit and decimal point
+ if (!__buf[0])
+ *__sbuf++ = __digits[0]; // handle 0
+ else
+ *__sbuf++= __digits[(*__bp++) - '0'];
+ if ((__prec && *__bp) || __showpoint)
+ *__sbuf++= __dpchar;
+
+ // Insert digits after decimal point
+ int __j = 0;
+ for (; *__bp; __j++, __bp++ )
+ *__sbuf++= __digits[*__bp - '0'];
+ // Apparently dtoa will sometimes not add the final 0 after rounding
+ // But only if we want all the 0's. __pad sets this.
+ for (; (__pad || __flags & ios_base::showpoint) && __j < __prec; __j++)
+ *__sbuf++ = __digits[0];
+
+ // Insert exponent symbol and sign
+ *__sbuf++ = (__flags & ios_base::uppercase) ? __atoms[__num_base::_S_oE]
+ : __atoms[__num_base::_S_oe];
+ __decp--;
+ if (__decp < 0) {
+ *__sbuf++ = __atoms[__num_base::_S_ominus];
+ __decp = -__decp;
+ }
+ else *__sbuf++ = __atoms[__num_base::_S_oplus];
+
+ // Write the exponent. Large enough for ieee quad (15 bit exponent)
+ char __tbuf[6];
+ char *__tb = __tbuf + 5;
+ *__tb-- = 0;
+ while (__decp) {
+ *__tb-- = '0' + (__decp % 10);
+ __decp /= 10;
+ }
+ // We need at least 2 digits in exponent. Pad if it's missing
+ while (__tb - __tbuf > 2) *__tb-- = '0';
+ while (*++__tb)
+ *__sbuf++ = __digits[*__tb - '0'];
+
+ return __sbuf - __sb;
+ }
+
+ template<typename _T>
+ int __dtoa(char*, _T, int, int, int, bool);
+
+ template<typename _CharT, typename _ValueT>
+ inline int
+ __float_to_char(_CharT* __buf, int __bufsize, const _CharT* __atoms,
+ _CharT __dpchar, int __mode, _ValueT __v, int __exp,
+ int __prec, ios_base::fmtflags __flags)
+ {
+ char __lbuf[__bufsize];
+ int __decp;
+
+ // Insert sign
+ int __len = 0;
+ if (__v < 0)
+ {
+ *__buf++ = __atoms[__num_base::_S_ominus];
+ __len = 1;
+ }
+ else if (__flags & ios_base::showpos)
+ {
+ __len = 1;
+ *__buf++ = __atoms[__num_base::_S_oplus];
+ }
+
+
+ // Dispatch number conversion to the correct function
+ if (__mode == 0)
+ {
+ __decp = __dtoa(__lbuf, __v, __exp, 3, __prec, false);
+ __len +=__float_fixed_to_char(__buf, __atoms, __dpchar, &__lbuf[0],
+ __decp, __prec, __flags, true);
+ }
+ else if (__mode == 1)
+ {
+ __decp = __dtoa(__lbuf, __v, __exp, 2, __prec+1, false);
+ __len += __float_scientific_to_char(__buf, __atoms, __dpchar,
+ &__lbuf[0], __decp, __prec,
+ __flags, true);
+ }
+ else
+ {
+ __decp = __dtoa(__lbuf, __v, __exp, 2, __prec, true);
+ if (__decp <= -4 || __decp > __prec)
+ __len += __float_scientific_to_char(__buf, __atoms, __dpchar,
+ &__lbuf[0], __decp, __prec-1,
+ __flags, false);
+ else
+ __len += __float_fixed_to_char(__buf, __atoms, __dpchar, &__lbuf[0],
+ __decp, __prec-__decp,
+ __flags, false);
+ }
+ return __len;
+ }
+
template<typename _CharT, typename _OutIter>
void
num_put<_CharT, _OutIter>::
@@ -868,16 +1041,14 @@
__len = __newlen;
}
- // The following code uses snprintf (or sprintf(), when
- // _GLIBCXX_USE_C99 is not defined) to convert floating point values
- // for insertion into a stream. An optimization would be to replace
- // them with code that works directly on a wide buffer and then use
- // __pad to do the padding. It would be good to replace them anyway
- // to gain back the efficiency that C++ provides by knowing up front
- // the type of the values to insert. Also, sprintf is dangerous
- // since may lead to accidental buffer overruns. This
- // implementation follows the C++ standard fairly directly as
- // outlined in 22.2.2.2 [lib.locale.num.put]
+ template<typename _T>
+ int
+ __dec_exponent(_T d, int &special, bool &sign);
+
+
+ // This implementation now uses a dtoa conversion that does fp arithmetic
+ // when possible, falling back to big int when required. Currently, numbers
+ // with < 15 bits of precision and not too large integers get optimized.
template<typename _CharT, typename _OutIter>
template<typename _ValueT>
_OutIter
@@ -911,58 +1082,80 @@
// Long enough for the max format spec.
char __fbuf[16];
-#ifdef _GLIBCXX_USE_C99
- // First try a buffer perhaps big enough (for sure sufficient
- // for non-ios_base::fixed outputs)
- int __cs_size = __max_digits * 3;
- char* __cs = static_cast<char*>(__builtin_alloca(__cs_size));
-
- _S_format_float(__io, __fbuf, __mod);
- __len = std::__convert_from_v(__cs, __cs_size, __fbuf, __v,
- _S_get_c_locale(), __prec);
-
- // If the buffer was not large enough, try again with the correct size.
- if (__len >= __cs_size)
- {
- __cs_size = __len + 1;
- __cs = static_cast<char*>(__builtin_alloca(__cs_size));
- __len = std::__convert_from_v(__cs, __cs_size, __fbuf, __v,
- _S_get_c_locale(), __prec);
- }
-#else
- // Consider the possibility of long ios_base::fixed outputs
- const bool __fixed = __io.flags() & ios_base::fixed;
- const int __max_exp = numeric_limits<_ValueT>::max_exponent10;
-
- // The size of the output string is computed as follows.
// ios_base::fixed outputs may need up to __max_exp+1 chars
// for the integer part + up to __max_digits chars for the
// fractional part + 3 chars for sign, decimal point, '\0'. On
// the other hand, for non-fixed outputs __max_digits*3 chars
// are largely sufficient.
- const int __cs_size = __fixed ? __max_exp + __max_digits + 4
- : __max_digits * 3;
- char* __cs = static_cast<char*>(__builtin_alloca(__cs_size));
-
- _S_format_float(__io, __fbuf, __mod);
- __len = std::__convert_from_v(__cs, 0, __fbuf, __v,
- _S_get_c_locale(), __prec);
-#endif
-
- // [22.2.2.2.2] Stage 2, convert to char_type, using correct
- // numpunct.decimal_point() values for '.' and adding grouping.
+ int __special;
+ bool __sign;
+ _CharT* __ws;
+ const int __exponent = __dec_exponent(__v, __special, __sign);
+ if (__special)
+ {
const ctype<_CharT>& __ctype = use_facet<ctype<_CharT> >(__loc);
+ const char* __cs;
+ __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) * 4));
+ if (__special == 1)
+ {
- _CharT* __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT)
- * __len));
+ if (__sign)
+ __cs = (__io.flags() & ios_base::uppercase) ? "-INF" : "-inf";
+ else if (__io.flags() & ios_base::showpos)
+ __cs = (__io.flags() & ios_base::uppercase) ? "+INF" : "+inf";
+ else
+ __cs = (__io.flags() & ios_base::uppercase) ? "INF" : "inf";
+ }
+ else // special == 2
+ __cs = (__io.flags() & ios_base::uppercase) ? "NAN" : "nan";
+
+ __len = strlen(__cs);
__ctype.widen(__cs, __cs + __len, __ws);
+ }
+ else
+ {
+ int __decp;
+ int __cs_size;
+ int __mode;
+
+ // Fixed needs space to hold all digits to left of decimal point,
+ // plus up to __prec digits to the right. The other formats need
+ // __prec digits plus sign, decimal point, exponent char and sign,
+ // and exponent itself.
+ if (__io.flags() & ios_base::fixed)
+ {
+ // %f style
+ __cs_size = (__exponent > 0 ? __exponent : 0) + 2 + __prec;
+ __mode = 0;
+ }
+ else if (__io.flags() & ios_base::scientific)
+ {
+ // %e style
+ const int __max_exp_len =
+ int(log10(numeric_limits<_ValueT>::max_exponent10)) + 1;
+ __cs_size = __prec + 4 + __max_exp_len;
+ __mode = 1;
+ }
+ else
+ {
+ // %g style
+ const int __max_exp_len =
+ int(log10(numeric_limits<_ValueT>::max_exponent10)) + 1;
+ if (!__prec) __prec = 1;
+ __cs_size = __prec + 4 + __max_exp_len;
+ __mode = 2;
+ }
- // Replace decimal point.
- const _CharT __cdec = __ctype.widen('.');
- const _CharT __dec = __lc->_M_decimal_point;
- const _CharT* __p;
- if (__p = char_traits<_CharT>::find(__ws, __len, __cdec))
- __ws[__p - __ws] = __dec;
+ __ws = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT) * __cs_size));
+ __len = __float_to_char(__ws, __cs_size, &__lc->_M_atoms_out[0],
+ __lc->_M_decimal_point, __mode, __v,
+ __exponent, __prec, __io.flags());
+
+ }
+
+ // [22.2.2.2.2] Stage 2, convert to char_type, using correct
+ // numpunct.decimal_point() values for '.' and adding grouping. The
+ // conversion happened in __float_to_char().
// Add grouping, if necessary.
if (__lc->_M_use_grouping)
@@ -971,6 +1164,8 @@
// number of digits, but no more.
_CharT* __ws2 = static_cast<_CharT*>(__builtin_alloca(sizeof(_CharT)
* __len * 2));
+ const _CharT* __p =
+ char_traits<_CharT>::find(__ws, __len, __lc->_M_decimal_point);
_M_group_float(__lc->_M_grouping, __lc->_M_thousands_sep, __p,
__ws2, __ws, __len);
__ws = __ws2;
Index: src/Makefile.am
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/src/Makefile.am,v
retrieving revision 1.137
diff -u -w -r1.137 Makefile.am
--- src/Makefile.am 11 Nov 2003 20:09:11 -0000 1.137
+++ src/Makefile.am 14 Nov 2003 06:46:43 -0000
@@ -96,6 +96,7 @@
ctype.cc \
debug.cc \
demangle.cc \
+ floatconv.cc \
functexcept.cc \
globals_locale.cc \
globals_io.cc \
Index: src/Makefile.in
===================================================================
RCS file: /cvsroot/gcc/gcc/libstdc++-v3/src/Makefile.in,v
retrieving revision 1.191
diff -u -w -r1.191 Makefile.in
--- src/Makefile.in 11 Nov 2003 20:09:12 -0000 1.191
+++ src/Makefile.in 14 Nov 2003 06:46:44 -0000
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.7.8 from Makefile.am.
+# Makefile.in generated by automake 1.7.7 from Makefile.am.
# @configure_input@
# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
@@ -253,6 +253,7 @@
ctype.cc \
debug.cc \
demangle.cc \
+ floatconv.cc \
functexcept.cc \
globals_locale.cc \
globals_io.cc \
@@ -362,7 +363,7 @@
time_members.lo
am__objects_2 = basic_file.lo c++locale.lo
am__objects_3 = codecvt.lo complex_io.lo ctype.lo debug.lo demangle.lo \
- functexcept.lo globals_locale.lo globals_io.lo ios.lo \
+ floatconv.lo functexcept.lo globals_locale.lo globals_io.lo ios.lo \
ios_failure.lo ios_init.lo ios_locale.lo limits.lo locale.lo \
locale_init.lo locale_facets.lo localename.lo stdexcept.lo \
stl_tree.lo strstream.lo allocator-inst.lo concept-inst.lo \
@@ -573,6 +574,7 @@
distclean: distclean-am
-rm -f Makefile
+
distclean-am: clean-am distclean-compile distclean-generic \
distclean-libtool distclean-tags
@@ -596,6 +598,7 @@
maintainer-clean: maintainer-clean-am
-rm -f Makefile
+
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
--- src/floatconv.cc.old 1969-12-31 19:00:00.000000000 -0500
+++ src/floatconv.cc 2003-11-13 01:11:09.000000000 -0500
@@ -0,0 +1,1902 @@
+// Numeric printing support -*- C++ -*-
+
+// Copyright (C) 1993, 1994, 2003
+// Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING. If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction. Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License. This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+// Original copyright below
+
+/* JQ -- long double support is guaranteed in c++ */
+
+/*
+Copyright (C) 1993, 1994 Free Software Foundation
+
+This file is part of the GNU IO Library. This library is free
+software; you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this library; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+As a special exception, if you link this library with files
+compiled with a GNU compiler to produce an executable, this does not cause
+the resulting executable to be covered by the GNU General Public License.
+This exception does not however invalidate any other reasons why
+the executable file might be covered by the GNU General Public License. */
+
+// #include <libioP.h>
+#define __USE_DTOA
+#ifdef __USE_DTOA
+/****************************************************************
+ *
+ * The author of this software is David M. Gay.
+ *
+ * Copyright (c) 1991 by AT&T.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR AT&T MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ *
+ ***************************************************************/
+
+/* Some cleaning up by Per Bothner, bothner@cygnus.com, 1992, 1993.
+ Re-written to not need static variables
+ (except result, result_k, HIWORD, LOWORD). */
+
+/* Note that the checking of _DOUBLE_IS_32BITS is for use with the
+ cross targets that employ the newlib ieeefp.h header. -- brendan */
+
+/* Please send bug reports to
+ David M. Gay
+ AT&T Bell Laboratories, Room 2C-463
+ 600 Mountain Avenue
+ Murray Hill, NJ 07974-2070
+ U.S.A.
+ dmg@research.att.com or research!dmg
+ */
+
+/* strtod for IEEE-, VAX-, and IBM-arithmetic machines.
+ *
+ * This strtod returns a nearest machine number to the input decimal
+ * string (or sets errno to ERANGE). With IEEE arithmetic, ties are
+ * broken by the IEEE round-even rule. Otherwise ties are broken by
+ * biased rounding (add half and chop).
+ *
+ * Inspired loosely by William D. Clinger's paper "How to Read Floating
+ * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101].
+ *
+ * Modifications:
+ *
+ * 1. We only require IEEE, IBM, or VAX double-precision
+ * arithmetic (not IEEE double-extended).
+ * 2. We get by with floating-point arithmetic in a case that
+ * Clinger missed -- when we're computing d * 10^n
+ * for a small integer d and the integer n is not too
+ * much larger than 22 (the maximum integer k for which
+ * we can represent 10^k exactly), we may be able to
+ * compute (d*10^k) * 10^(e-k) with just one roundoff.
+ * 3. Rather than a bit-at-a-time adjustment of the binary
+ * result in the hard case, we use floating-point
+ * arithmetic to determine the adjustment to within
+ * one bit; only in really hard cases do we need to
+ * compute a second residual.
+ * 4. Because of 3., we don't need a large table of powers of 10
+ * for ten-to-e (just some small tables, e.g. of 10^k
+ * for 0 <= k <= 22).
+ */
+
+/*
+ * #define IEEE for IEEE floating-point arithmetic
+ * #define IBM for IBM mainframe-style floating-point arithmetic.
+ * #define VAX for VAX-style floating-point arithmetic.
+ * #define Unsigned_Shifts if >> does treats its left operand as unsigned.
+ * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3.
+ */
+
+// XXX JQ convert a few macros to inlines
+// Remove DOUBLE_IS_32BITS - doesn't seem to be any support for it.
+// So we assume that double is 64bit.
+
+#include <cstdio>
+#ifdef DEBUG
+#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);}
+#endif
+#include <new>
+#include <cstring>
+#include <cmath> // log, floor
+#include <limits>
+#include <iostream>
+
+namespace std {
+
+//////////////////////////////////////////////////////////////////////
+// Stuff that needs to be configured by libstdc++
+//
+// Unsigned_Shifts
+// 32bit unsigned type
+// 32bit signed type
+// FLOAT_WORDS_BIG_ENDIAN - took the name from real.c
+// IEEE, IBM, VAX
+// IEEE_QUAD if long double is quad rather than extended
+
+#ifdef Unsigned_Shifts
+#define Sign_Extend(a,b) if (b < 0) a |= 0xffff0000;
+#else
+#define Sign_Extend(a,b) /*no-op*/
+#endif
+
+//XXX need uint32_t here somehow
+typedef unsigned _G_uint32_t;
+typedef int _G_int32_t;
+
+typedef _G_uint32_t unsigned32;
+
+template<typename _T> class fptype {};
+
+#define IEEE
+// #define IBM
+// #define VAX
+
+// #define FLOAT_WORDS_BIG_ENDIAN
+
+
+/* #define P DBL_MANT_DIG */
+/* Ten_pmax = floor(mant_size*log(2)/log(5)) */
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+/* Quick_max = floor((mant_size-1)*log(FLT_RADIX)/log(10) - 1) */
+/* Int_max = floor(mant_size*log(FLT_RADIX)/log(10) - 1) */
+
+typedef std::numeric_limits<double> nld;
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+static const int Bletch_dbl = (1 << __builtin_ffs(nld::max_exponent10)) / 16;
+static const int Quick_max_dbl
+ = int(floor((nld::digits-1) * log(double(nld::radix))/log(10) - 1));
+// For secondary fast codepath
+static const int Int_max_dbl
+ = int(floor(nld::digits * log(nld::radix)/log(10) - 1));
+
+typedef std::numeric_limits<long double> nlld;
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+static const int Bletch_ldbl = (1 << __builtin_ffs(nlld::max_exponent10)) / 16;
+static const int Quick_max_ldbl
+ = int(floor((nlld::digits-1) * log(double(nlld::radix))/log(10) - 1));
+// For secondary fast codepath
+static const int Int_max_ldbl
+ = int(floor(nlld::digits * log(nlld::radix)/log(10) - 1));
+
+// Attempt to wrap up float differences
+#ifdef IEEE
+// flt dbl ldbl (96) ldbl(128)
+// sign: 1 1 1 1
+// exp: 8 11 15,16(empty) 15
+// mant: 23 20,32 32,32 16,32,32,32
+// qnan: 1,rest of mantissa
+// bias: 0x7f 0x3ff 0x3fff 0x3fff
+
+// IEEE DOUBLE
+// sgn:1 exp:11 mant0:20 mant1:32
+// exp==0 is denorm (biased)
+// exp==2047(0x7ff) mant=0, +-inf
+// exp==2047 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 1023 (0x3ff) is unbiased exponent
+// number is 1.mantissa e exp-0x3ff
+// the 1 is implicit (since we're binary)
+
+// IEEE EXTENDED (80 bit)
+// empty:16 sgn:1 exp:15 mant0:32 mant1:32
+// exp==0 is denorm (biased)
+// exp==32767(0x7fff) mant=0, +-inf
+// exp==32767 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 16383 is unbiased exponent
+// 0 and denormalized numbers have a zero in the top bit of mant0. All
+// others have a 1 bit.
+
+// IEEE QUAD
+// sgn:1 exp:15 mant0:16 mant1:32 mant2:32 mant3:32
+// exp==0 is denorm (biased)
+// exp==32767(0x7fff) mant=0, +-inf
+// exp==32767 mant!=0, +-nan, top bit of mant set is qnan (except mips)
+// exp - 16383 is unbiased exponent
+
+// IEEE double format
+template<>
+class fptype<double>
+{
+ union {
+ double d;
+ unsigned32 b[2];
+ } x;
+
+ // Handle endianness
+#if FLOAT_WORDS_BIG_ENDIAN
+ static const int hi = 0; // high order word
+ static const int lo = 1; // lo order word
+#else
+ static const int hi = 1;
+ static const int lo = 0;
+#endif
+
+public:
+
+ static const int sign_bit = 0x80000000; // mask to get sign bit from hi word
+ static const int bias = 1023; // exponent bias
+ static const int exp_mask = 0x7ff00000; // mask to leave just exp bits in hi word
+ static const int exp_shift = 20; // amt to shift hi word to get exp at bottom
+ static const int exp_base = 0x100000; // First bit of exponent
+ static const int mant_mask = 0xfffff; // mask to get mantissa bits in hi word
+ static const int mant_size = 53; // Number of bits in the mantissa
+ static const int implicit_bit = 0x100000; // implicit top bit of mantissa
+
+ static const int log2p = 1;
+
+ static const int Bletch;
+ static const int Quick_max;
+ static const int Int_max;
+
+ // Constructor and conversion operators
+ fptype() { }
+ fptype(double d) { x.d = d; }
+ fptype& operator=(double d) { x.d = d; return *this;}
+ operator double() const { return x.d; }
+
+ /// Features ///
+ static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+ /// Tests ///
+ bool isnonzeromant() const { return x.b[lo] || x.b[hi] & mant_mask; }
+ bool iszeromant() const { return x.b[lo]==0 && (x.b[hi] & mant_mask) == 0; }
+
+ // equiv to isnan && isinf, but faster
+ bool isspecial() const { return x.b[hi] & exp_mask == exp_mask; }
+ bool isnan() const { return bexp() == 0x7ff && isnonzeromant();}
+ bool isinf() const { return bexp() == 0x7ff && iszeromant();}
+
+ bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+ bool isdenorm() const { return has_denorm() && bexp() == 0; }
+ bool iszero() const { return x.b[lo] == 0 && (x.b[hi] & ~sign_bit) == 0; }
+ // Return true if d is a normalized power of 2
+ bool ispwr2() const
+ {
+ return !x.b[lo] && !(x.b[hi] & mant_mask) &&
+ (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+ }
+
+
+ /// Twiddle sign bit ///
+ void setneg() { x.b[hi] |= sign_bit; }
+ void clearneg() { x.b[hi] &= ~sign_bit; }
+
+ /// Exponent ///
+ long exp() const { return bexp() - bias; }
+ // biased exponent
+ long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; }
+ // Set biased exponent to 0, i.e. 1.0e+00
+ void zeroexp() { x.b[hi] = (x.b[hi] & mant_mask) | (bias << exp_shift); }
+ // Add v to the exponent
+ void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+ /// Mantissa ///
+ long mant0() const { return x.b[hi] & mant_mask; }
+ int mantsize() const { return 2; }
+ long mant(int i=1) const { return x.b[lo]; }
+ // Return most significant mantissa bits, including implicit top bit, unless denorm.
+ long imant0() const
+ {
+ if (x.b[hi] & exp_mask) return (mant0() | implicit_bit);
+ else return mant0();
+ }
+ // Return mantissa bits with bit i as the most significant
+ unsigned32 mantissa_denorm(int i) {
+ return (i > 32) ? x.b[hi] << (64 - i) | x.b[lo] >> (i - 32)
+ : x.b[lo] << (32 - i);
+ }
+
+ double val() const { return x.d; }
+
+ void normalize() {}
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+// Currently only ieee extended and quad are covered
+template<>
+class fptype<long double>
+{
+ union {
+ long double d;
+ unsigned32 b[sizeof(long double)/sizeof(unsigned32)];
+ } x;
+
+ static const int wordlen = sizeof(long double)/sizeof(unsigned32);
+
+ // Handle endianness
+#if FLOAT_WORDS_BIG_ENDIAN
+ static const int hi = 0; // high order word
+ static const int lo = wordlen-1; // lo order word
+ static const int idx[] = { 0, 1, 2, 3 };
+#else
+ static const int hi = wordlen-1;
+ static const int lo = 0;
+ static const int idx[4];
+#endif
+
+public:
+
+ static const int sign_bit = 0x8000; // mask to get sign bit from hi word (LE)
+ static const int bias = 16383; // exponent bias
+ static const int exp_mask = 0x7fff; // mask to leave just exp bits in hi word (LE)
+ static const int exp_shift = 0; // amt to shift hi word to get exp at bottom
+ static const int exp_base = 0x1; // First bit of exponent
+ // Number of bits in the mantissa for long double. 64 for extended, 112 for quad.
+ static const int mant_size = std::numeric_limits<long double>::digits;
+
+#if IEEE_QUAD
+ static const int implicit_bit = 0x10000; // implicit top bit of mantissa
+ static const int mant_mask = 0xffff; // mask to get mantissa bits in hi word
+#else
+ static const int implicit_bit = 0;
+ static const int mant_mask = 0x0; // mask to get mantissa bits in hi word
+#endif
+
+ static const int log2p = 1;
+
+ static const int Bletch;
+ static const int Quick_max;
+ static const int Int_max;
+
+ // Constructor and conversion operators
+ fptype() {}
+ fptype(long double d) { x.d = d; }
+ fptype& operator=(long double d) { x.d = d; return *this;}
+ operator long double() const { return x.d; }
+
+ /// Features ///
+ static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+ /// Tests ///
+ bool isnonzeromant() const
+ {
+ bool ret = (mant0()) != 0;
+ for (int i=0; i < wordlen-1; i++)
+ ret = (ret || (x.b[idx[i]]));
+ return ret;
+ }
+ bool iszeromant() const { return !isnonzeromant(); }
+
+ // equiv to isnan && isinf, but faster
+ bool isspecial() const { return x.b[hi] & exp_mask == exp_mask; }
+ bool isnan() const { return bexp() == 0x7ff && isnonzeromant();}
+ bool isinf() const { return bexp() == 0x7ff && iszeromant();}
+
+ bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+ bool isdenorm() const { return has_denorm() && bexp() == 0; }
+#if IEEE_QUAD
+ bool iszero() const {
+ return x.b[idx[1]] == 0 && x.b[idx[2]] == 0 && x.b[idx[3]] == 0 &&
+ (x.b[hi] & ~sign_bit) == 0;
+ }
+#else
+ bool iszero() const {
+ return x.b[idx[1]] == 0 && x.b[idx[2]] == 0 && (x.b[hi] & ~sign_bit) == 0;
+ }
+#endif
+ // Return true if d is a normalized power of 2
+ bool ispwr2() const
+ {
+#if IEEE_QUAD
+ return !x.b[idx[1]] && !x.b[idx[2]] && !x.b[idx[3]]
+ && !(x.b[hi] & mant_mask) &&
+ (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+#else
+ return !x.b[idx[1]] && !x.b[idx[2]] &&
+ (!has_denorm() || x.b[hi] & exp_mask); // Check that this isn't a denorm
+#endif
+ }
+
+ /// Twiddle sign bit ///
+ void setneg() { x.b[hi] |= sign_bit; }
+ void clearneg() { x.b[hi] &= ~sign_bit; }
+
+ /// Exponent ///
+ long exp() const { return bexp() - bias; }
+ // biased exponent
+ long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; }
+ // Set biased exponent to 0, i.e. 1.0e+00
+ void zeroexp() {
+#if IEEE_QUAD
+ x.b[hi] = (mant0()) | (bias << exp_shift);
+#else
+ x.b[hi] = (bias << exp_shift);
+#endif
+ }
+ // Add v to the exponent
+ void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+ /// Mantissa ///
+#if IEEE_QUAD
+ long mant0() const { return x.b[hi] & mant_mask; }
+ int mantsize() const { return wordlen; }
+ // Return i'th 32 bit word of mantissa. Only for 1-n
+ long mant(int i) const { return x.b[idx[i]]; }
+#else
+ long mant0() const { return x.b[idx[1]]; }
+ int mantsize() const { return wordlen-1; }
+ long mant(int i) const { return x.b[idx[i+1]]; }
+#endif
+ // Return most significant mantissa bits, including implicit top bit
+ long imant0() const
+ {
+ if (x.b[hi] & exp_mask) return (mant0() | implicit_bit);
+ else return mant0();
+ }
+ // Return top 32 mantissa bits with bit i as the most significant
+ unsigned32 mantissa_denorm(int i) {
+#if IEEE_QUAD
+ // total 112 bits
+ if (i > 96) return x.b[hi] << (128 - i) | x.b[idx[1]] >> (i - 96);
+ else if (i > 64) return x.b[idx[1]] << (96 - i) | x.b[idx[2]] >> (i - 64);
+ else if (i > 32) return x.b[idx[2]] << (64 - i) | x.b[idx[3]] >> (i - 32);
+ else return x.b[idx[3]] << (32 - i);
+#else
+ // total 64 bits
+ if (i > 32) return x.b[idx[1]] << (64 - i) | x.b[idx[2]] >> (i - 32);
+ else return x.b[idx[2]] << (32 - i);
+#endif
+ }
+
+ double val() const { return x.d; }
+
+ void normalize() {}
+};
+
+const int fptype<long double>::idx[4]
+= { wordlen-1, wordlen-2, wordlen-3, wordlen-4 };
+
+const int fptype<long double>::Bletch = Bletch_ldbl;
+const int fptype<long double>::Quick_max = Quick_max_ldbl;
+const int fptype<long double>::Int_max = Int_max_ldbl;
+
+#else
+#ifdef IBM
+// IBM floating point
+
+// i370 format double
+// sign:1 exp:7 mant0:24 mant1:32
+// (exp-64) * 4 is unbiased exp
+// no denorm, no nan, no inf
+template<>
+class fptype<double>
+{
+ union {
+ double d;
+ unsigned32 b[2];
+ } x;
+
+public:
+
+ static const int sign_bit = 0x80000000; // mask to get sign bit from hi word
+ static const int bias = 65; // Exponent bias
+ static const int exp_mask = 0x7f000000; // mask for exp
+ static const int exp_shift = 24; // amt to shift to get sign+exp
+ static const int exp_base = 0x1000000; // First bit of exponent
+ static const int mant_mask = 0xffffff;
+ static const int mant_size = 14; // Number of bits in the mantissa (but not really)
+ static const int top_bit = 0x1000000; // top bit of mantissa
+
+ static const int log2p = 4;
+
+ static const int Bletch;
+ static const int Quick_max;
+ static const int Int_max;
+
+ /// Features ///
+ // should be false for IBM
+ static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+ bool isspecial() const { return false; }
+ bool isnan() const { return false; }
+ bool isinf() const { return false; }
+ bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+ bool isdenorm() const { return false; }
+ bool iszero() const {}
+ // Return true if d is a normalized power of 2
+ bool ispwr2() const
+ { return !x.b[lo] && !(x.b[hi] & (mant_mask - top_bit)); }
+
+ /// Twiddle sign bit ///
+ void setneg() { x.b[hi] |= sign_bit; }
+ void clearneg() { x.b[hi] &= ~sign_bit; }
+
+ /// Exponent ///
+ long exp() const { return bexp() - bias; }
+ // biased exponent
+ long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; }
+ // Set biased exponent to 0, i.e. 1.0e+00
+ void zeroexp() { b[hi] = (b[hi] & mant_mask) | (bias << exp_shift); }
+ // Add v to the exponent
+ void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+ /// Mantissa ///
+ long mant0() const { return x.b[hi] & mant_mask; }
+ int mantsize() const { return 2; }
+ long mant(int i) const { return x.b[lo]; }
+ // Return most significant mantissa bits. IBM has no implicit top bit
+ long imant0() const { return mant0(); }
+ // Return mantissa bits with bit i as the most significant
+ unsigned32 mantissa_denorm(int i) { return 0; } // Unused
+
+ double val() const { return x.d; }
+
+ void normalize() { x.d += 0; }
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+#else
+// VAX floating point
+
+// VAX D (64 bit)
+// mant1:16 sign:1 exp:8 mant0:7 mant3:16 mant2:16
+// exp - 129 is unbiased exp
+
+template<>
+class fptype<double>
+{
+ union {
+ double d;
+ unsigned32 b[2];
+ } x;
+
+ static const int hi = 0; // Always little-endian
+
+ long wswap(long w) { return ((w & 0xffff) << 16) | ((w & 0xffff0000) >> 16);}
+
+public:
+
+ static const int sign_bit = 0x8000; // sign bit mask
+ static const int bias = 129; // Exponent bias
+ static const int exp_mask = 0x7f80; // Exp mask for F
+ static const int exp_shift = 7; // Amt to shift to get sgn+exp in F,D
+ static const int exp_base = 0x80; // First bit of exponent
+ static const int mant_mask = 0xffff007f; // mant mask in F,D
+ static const int mant_size = 56 // bits in the mantissa
+ static const int implicit_bit = 0x800000; // implicit top bit of mantissa
+
+ static const int log2p = 1;
+
+ static const int Bletch;
+ static const int Quick_max;
+ static const int Int_max;
+
+ /// Features ///
+ // should be false for VAX
+ static bool has_denorm() { return (nld::has_denorm != std::denorm_absent); }
+
+ // equiv to isnan && isinf, but faster
+ bool isspecial() const { return isnan(); }
+ bool isnan() const { return (x.b[hi] & 0x8000) != 0;}
+ bool isinf() const { return false;}
+ bool isneg() const { return (x.b[hi] & sign_bit) != 0; }
+ bool isdenorm() const { return false; }
+ bool iszero() const { return (x.b[hi] & (sign_bit | exp_mask)) == 0; }
+ // Return true if d is a normalized power of 2
+ bool ispwr2() const
+ { return !x.b[lo] && !(x.b[hi] & mant_mask); }
+
+ /// Twiddle sign bit ///
+ void setneg() { x.b[hi] |= sign_bit; }
+ void clearneg() { x.b[hi] &= ~sign_bit; }
+
+ /// Exponent ///
+ long exp() const { return bexp() - bias; }
+ // biased exponent
+ long bexp() const { return (x.b[hi] & exp_mask) >> exp_shift; }
+ // Set biased exponent to 0, i.e. 1.0e+00
+ void zeroexp() { b[hi] = (b[hi] & mant_mask) | (bias << exp_shift); }
+ // Add v to the exponent
+ void add_to_exp(int v) { x.b[hi] += v * exp_base; }
+
+ /// Mantissa ///
+ long mant0() const { return wswap(x.b[hi] & mant_mask); }
+ long mant(int i) const { return wswap(x.b[lo]); }
+ // Return most significant mantissa bits. VAX always has implicit top bit
+ long imant0() const { return mant0() | implicit_bit; }
+ // Return mantissa bits with bit i as the most significant
+ unsigned32 mantissa_denorm(int i) { return 0; } // Unused
+
+ double val() const { return x.d; }
+
+ void normalize() {}
+};
+
+const int fptype<double>::Bletch = Bletch_dbl;
+const int fptype<double>::Quick_max = Quick_max_dbl;
+const int fptype<double>::Int_max = Int_max_dbl;
+
+#endif
+#endif
+
+// Shift y so lowest bit is 1 and return the number of bits y was
+// shifted.
+static int
+lo0bits (unsigned32 *y)
+{
+ int ret = __builtin_ctz(*y);
+ *y = *y >> ret;
+ return ret;
+}
+
+
+/* (1<<BIGINT_MINIMUM_K) is the minimum number of words to allocate
+ in a Bigint. dtoa usually manages with 1<<2, and has not been
+ known to need more than 1<<3. */
+
+#define BIGINT_MINIMUM_K 3
+
+struct Bigint {
+ int k; /* Parameter given to Balloc(k) */ // power of 2 for space
+ int maxwds; /* Allocated space: equals 1<<k. */ // number of 32bit words
+ int wds; /* Current length. */
+ bool on_stack; /* 1 if stack-allocated. */
+ bool sign; /* false if value is positive or zero; true if negative. */
+ unsigned32 x[1<<BIGINT_MINIMUM_K]; /* Actually: x[maxwds] */
+
+ Bigint()
+ : k(BIGINT_MINIMUM_K), maxwds(1<<BIGINT_MINIMUM_K), wds(0),
+ on_stack(0), sign(0)
+ { }
+
+ Bigint(int new_k, int new_maxwds)
+ : k(new_k), maxwds(new_maxwds), wds(0), on_stack(0), sign(0)
+ { }
+
+
+ void
+ Bcopy(Bigint *y)
+ {
+ unsigned32 *xp, *yp;
+ int i = y->wds;
+ sign = y->sign;
+ wds = i;
+ for (xp = x, yp = y->x; --i >= 0; )
+ *xp++ = *yp++;
+ }
+
+ // Set contents to 0
+ void clear() { memset(&x[0], 0, wds * sizeof(unsigned32)); }
+
+};
+
+#define BIGINT_HEADER_SIZE \
+ (sizeof(Bigint) - (1<<BIGINT_MINIMUM_K) * sizeof(unsigned32))
+
+
+class bigint
+{
+ Bigint* rep;
+ Bigint bg;
+
+ friend int cmp(const bigint& a, const bigint& b);
+
+public:
+ // Constructors and destructor
+ bigint() : rep(&bg) { bg.on_stack = 1; }
+ bigint(Bigint* bi) : rep(bi) {}
+ ~bigint() {}
+
+ bigint& operator=(bigint& i) { rep = i.rep; return *this; }
+ bigint& operator=(Bigint* i) { rep = i; return *this; }
+ bigint& operator=(int i) {
+ resize(1);
+ rep->x[0] = i;
+ rep->wds = 1;
+ return *this;
+ }
+
+ int order() const { return rep->k; }
+ int size() const { return rep->wds; }
+ int capacity() const { return rep->maxwds; }
+
+ void resize(int k)
+ {
+ if (rep->k >= k) return;
+ Bigint* nv = rep;
+ balloc(k);
+ rep->Bcopy(nv);
+ if (!nv->on_stack) delete[] (char*)nv;
+ }
+
+ unsigned32 operator[](int idx) const { return rep->x[idx]; }
+ unsigned32& operator[](int idx) { return rep->x[idx]; }
+
+ unsigned32 back() const { return rep->x[size()-1]; }
+ unsigned32& back() { return rep->x[size()-1]; }
+
+ // Set contents to 0
+ void clear() { rep->clear(); }
+
+ /* Allocate a Bigint with '1<<k' big digits. */
+ void balloc(int k)
+ {
+ int x;
+
+ if (k < BIGINT_MINIMUM_K)
+ k = BIGINT_MINIMUM_K;
+
+ x = 1 << k;
+ char* space = new char[BIGINT_HEADER_SIZE + x * sizeof(unsigned32)];
+ rep = new (space) Bigint(k, x);
+ }
+
+ void swap(bigint& b) { Bigint* tmp = b.rep; b.rep = rep; rep = tmp; }
+
+ Bigint* get() { return rep; }
+ const Bigint* get() const { return rep; }
+
+ bool isneg() const { return rep->sign != 0; }
+
+ template<typename _T>
+ void d2b(_T d, _G_int32_t *e, _G_int32_t *bits);
+ void mult(bigint& ar, bigint& br);
+ bigint& multadd(int m, int a);
+ bigint& pow5mult(int k);
+ void lshift(int k);
+ bigint& diff(bigint& ba, bigint& bb);
+ int quorem(const bigint &S);
+};
+
+
+// Transfers the significant bits of d's mantissa, including any implicit top
+// bit to this bigint. As a result, the lsb of x[0] will be 1. Returns
+// significant number of bits in *bits. Returns a modified exponent such that
+// the decimal point is to the right of the lsb of x[0].
+template<typename _T>
+void
+bigint::d2b(_T d, _G_int32_t *e, _G_int32_t *bits)
+{
+ // Shift d fraction until bottom bit is 1. Store fraction into result
+ // (which may be 1 or 2 ints). Store e adjusted for the shift of d.
+ // bits is the number of bits d was shifted.
+ int de; // Exponent of d
+ int i, k;
+ unsigned32 *x, y, z;
+
+ resize(1);
+ x = rep->x;
+
+ fptype<_T> fd = d;
+ z = fd.imant0(); // Mantissa plus implicit top 1
+ de = fd.bexp(); // Exponent part of d
+// y = fd.mant1(); // Low order mantissa bits
+
+ // Shift fraction so bit 0 is 1, store into x[0] (and x[1] if
+ // needed). k is distance the fraction was shifted.
+
+ int mantsize = fd.mantsize();
+
+ // Set j to the lowest order word with significant bits
+ int j;
+ int shift = 0;
+
+ // Find lowest non-zero mantissa word
+ for (j=mantsize-1; j>0 ; j--)
+ if ((y=fd.mant(j)) != 0) break;
+
+ int skipped = mantsize - j - 1; // Number of words == 0
+
+ // Transfer words to array
+ int idx = 0;
+ for (; j>0 ; j--)
+ x[idx++] = fd.mant(j);
+ x[idx++] = z;
+
+ // Right shift data so the lsb of x[0] is 1.
+ shift = lo0bits(&x[0]);
+ for (int m=1; m < idx; m++) {
+ // Need to test because 0x1c5ad3 << 32 != 0. I guess this is just
+ // undefined behavior. If a bug, it's present in 2.95, 3.3, 3.4.
+ if (shift) {
+ x[m-1] |= x[m] << (32-shift);
+ x[m] >>= shift;
+ }
+ }
+
+ while (!x[idx-1]) idx--; // in case upper words are 0 for denorm
+ i = rep->wds = idx;
+
+ k = shift + 32 * skipped;
+
+// // original code
+// if (y) {
+// if ((k = lo0bits(&y))) {
+// x[0] = y | z << (32 - k);
+// z >>= k;
+// }
+// else
+// x[0] = y;
+// i = rep->wds = (x[1] = z) ? 2 : 1;
+// }
+// else {
+// #ifdef DEBUG
+// if (!z)
+// Bug("Zero passed to d2b");
+// #endif
+// k = lo0bits(&z);
+// x[0] = z;
+// i = rep->wds = 1;
+// k += 32;
+// }
+
+ // Adjust exponent for bias and fraction shift
+ if (fd.isdenorm()) {
+ // denorm case
+ *e = de - fptype<_T>::bias - (fptype<_T>::mant_size-1) + 1 + k;
+ *bits = 32*i - __builtin_clz(x[i-1]);
+ }
+ else {
+#ifdef IBM
+ *e = (de - fptype<_T>::bias - (fptype<_T>::mant_size-1) << 2) + k;
+ *bits = 4*P + 8 - k - __builtin_clz(fd.mant0());
+#else
+ *e = de - fptype<_T>::bias - (fptype<_T>::mant_size-1) + k;
+ *bits = fptype<_T>::mant_size - k;
+#endif
+ }
+}
+
+void
+bigint::mult(bigint& ar, bigint& br)
+{
+ int k, wa, wb, wc;
+ unsigned32* x;
+ unsigned32 carry, y, z;
+ unsigned32 *xa, *xae; // ptr to a->x, end of x array
+ unsigned32 *xb, *xbe; // ptr to b->x, end of x array
+ unsigned32 *xc, *xc0;
+ unsigned32 z2;
+ Bigint* a;
+ Bigint* b;
+ if (ar.size() < br.size()) {
+ a = br.get();
+ b = ar.get();
+ }
+ else {
+ a = ar.get();
+ b = br.get();
+ }
+ k = a->k;
+ wa = a->wds; // current bytes in ar
+ wb = b->wds; // current bytes in br
+ wc = wa + wb;
+ if (wc > a->maxwds)
+ k++; // double storage in dest
+ resize(k);
+ for(x = rep->x, xa = x + wc; x < xa; x++)
+ *x = 0;
+ xa = a->x;
+ xae = xa + wa;
+ xb = b->x;
+ xbe = xb + wb;
+ xc0 = rep->x;
+
+ // iterate over words in b
+ for(; xb < xbe; xb++, xc0++) {
+ if ((y = *xb & 0xffff)) { // grab bottom 16 bits of xb
+ x = xa;
+ xc = xc0;
+ carry = 0;
+ do {
+ // repeated 16x16 into 32 multiplications
+ z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
+ carry = z >> 16;
+ z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
+ carry = z2 >> 16;
+ *xc++ = z2 << 16 | z & 0xffff;
+ }
+ while(x < xae);
+ *xc = carry;
+ }
+ if ((y = *xb >> 16)) {
+ x = xa;
+ xc = xc0;
+ carry = 0;
+ z2 = *xc;
+ do {
+ z = (*x & 0xffff) * y + (*xc >> 16) + carry;
+ carry = z >> 16;
+ *xc++ = z << 16 | z2 & 0xffff;
+ z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
+ carry = z2 >> 16;
+ }
+ while(x < xae);
+ *xc = z2;
+ }
+ }
+ for(xc0 = rep->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ;
+ rep->wds = wc;
+}
+
+/* Return b*m+a. b is modified.
+ Assumption: 0xFFFF*m+a fits in 32 bits. */
+
+bigint&
+bigint::multadd(int m, int a)
+{
+ int i, wds;
+ unsigned32 *x, y;
+ unsigned32 xi, z;
+
+ wds = rep->wds;
+ x = rep->x;
+ i = 0;
+ do {
+ xi = *x;
+ y = (xi & 0xffff) * m + a;
+ z = (xi >> 16) * m + (y >> 16);
+ a = (int)(z >> 16);
+ *x++ = (z << 16) + (y & 0xffff);
+ }
+ while(++i < wds);
+ if (a) {
+ if (wds >= rep->maxwds)
+ resize(rep->k+1);
+ rep->x[wds++] = a;
+ rep->wds = wds;
+ }
+ return *this;
+}
+
+/* Returns b*(5**k). b is modified. */
+/* Re-written by Per Bothner to not need a static list. */
+
+bigint&
+bigint::pow5mult(int k)
+{
+ static int p05[6] = { 5, 25, 125, 625, 3125, 15625 };
+
+ for (; k > 6; k -= 6)
+ multadd(15625, 0); /* b *= 5**6 */
+ if (k != 0)
+ multadd(p05[k-1], 0);
+ return *this;
+}
+
+
+/* Re-written by Per Bothner so shift can be in place. */
+
+void
+bigint::lshift(int k)
+{
+ int i;
+ unsigned32 *x, *x1, *xe;
+ int old_wds = rep->wds;
+ int n = k >> 5;
+ int k1 = rep->k;
+ int n1 = n + old_wds + 1;
+
+ if (k == 0)
+ return;
+
+ // compute k where 2^k is num words needed
+ for(i = rep->maxwds; n1 > i; i <<= 1)
+ k1++;
+ resize(k1);
+
+ xe = rep->x; /* Source limit */
+ x = xe + old_wds; /* Source pointer */
+ x1 = x + n; /* Destination pointer */
+ if (k &= 0x1f) {
+ int k1 = 32 - k;
+ unsigned32 z = *--x;
+ if ((*x1 = (z >> k1)) != 0) {
+ ++n1;
+ }
+ while (x > xe) {
+ unsigned32 w = *--x;
+ *--x1 = (z << k) | (w >> k1);
+ z = w;
+ }
+ *--x1 = z << k;
+ }
+ else
+ // lshift % 32 = 0, just copy the words to the left.
+ do {
+ *--x1 = *--x;
+ } while(x > xe);
+ while (x1 > xe)
+ *--x1 = 0;
+ rep->wds = n1 - 1;
+}
+
+/* Do: c = a-b. */
+
+bigint&
+bigint::diff(bigint& ba, bigint& bb)
+{
+ const Bigint* a;
+ const Bigint* b;
+ int i, wa, wb;
+ _G_int32_t borrow, y; /* We need signed shifts here. */
+ const unsigned32 *xa, *xae, *xb, *xbe;
+ unsigned32 *xc;
+ _G_int32_t z;
+
+ i = cmp(ba,bb);
+ if (!i) {
+ resize(0);
+ rep->wds = 1;
+ rep->x[0] = 0;
+ return *this;
+ }
+
+ a = ba.get();
+ b = bb.get();
+ if (i < 0) {
+ const Bigint *tmp = a;
+ a = b;
+ b = tmp;
+ i = 1;
+ }
+ else
+ i = 0;
+ resize(a->k);
+ rep->sign = i;
+ wa = a->wds;
+ xa = a->x; // Start of a array
+ xae = xa + wa; // End of a array
+ wb = b->wds;
+ xb = b->x; // Start of b array
+ xbe = xb + wb; // End of b array
+ xc = rep->x;
+ borrow = 0;
+ do {
+ y = (*xa & 0xffff) - (*xb & 0xffff) + borrow;
+ borrow = y >> 16;
+ Sign_Extend(borrow, y);
+ z = (*xa++ >> 16) - (*xb++ >> 16) + borrow;
+ borrow = z >> 16;
+ Sign_Extend(borrow, z);
+ *xc++ = z << 16 | y & 0xffff;
+ }
+ while(xb < xbe);
+ while(xa < xae) {
+ y = (*xa & 0xffff) + borrow;
+ borrow = y >> 16;
+ Sign_Extend(borrow, y);
+ z = (*xa++ >> 16) + borrow;
+ borrow = z >> 16;
+ Sign_Extend(borrow, z);
+ *xc++ = z << 16 | y & 0xffff;
+ }
+ while(!*--xc)
+ wa--;
+ rep->wds = wa;
+ return *this;
+}
+
+// Return quotient of *this/S. this = remainder.
+// Assumes that returned quotient is < 9, i.e. this->wds == S->wds
+//
+int
+bigint::quorem(const bigint &S)
+{
+ int n;
+ _G_int32_t borrow, y;
+ unsigned32 carry, q, ys;
+ unsigned32 *bx, *bxe;
+ const unsigned32 *sx, *sxe;
+ _G_int32_t z;
+ unsigned32 si, zs;
+
+ n = S.size();
+#ifdef DEBUG
+ /*debug*/ if (rep->wds > n)
+ /*debug*/ Bug("oversize b in quorem");
+#endif
+ if (rep->wds < n)
+ return 0;
+ sx = S.rep->x;
+ sxe = sx + --n;
+ bx = rep->x;
+ bxe = bx + n;
+ q = *bxe / (*sxe + 1); /* ensure q <= true quotient */
+#ifdef DEBUG
+ /*debug*/ if (q > 9)
+ /*debug*/ Bug("oversized quotient in quorem");
+#endif
+ if (q) {
+ borrow = 0;
+ carry = 0;
+ do {
+ si = *sx++;
+ ys = (si & 0xffff) * q + carry;
+ zs = (si >> 16) * q + (ys >> 16);
+ carry = zs >> 16;
+ y = (*bx & 0xffff) - (ys & 0xffff) + borrow;
+ borrow = y >> 16;
+ Sign_Extend(borrow, y);
+ z = (*bx >> 16) - (zs & 0xffff) + borrow;
+ borrow = z >> 16;
+ Sign_Extend(borrow, z);
+ *bx++ = z << 16 | y & 0xffff;
+ }
+ while(sx <= sxe);
+ if (!*bxe) {
+ bx = rep->x;
+ while(--bxe > bx && !*bxe)
+ --n;
+ rep->wds = n;
+ }
+ }
+ if (cmp(*this, S) >= 0) {
+ q++;
+ borrow = 0;
+ carry = 0;
+ bx = rep->x;
+ sx = S.rep->x;
+ do {
+ si = *sx++;
+ ys = (si & 0xffff) + carry;
+ zs = (si >> 16) + (ys >> 16);
+ carry = zs >> 16;
+ y = (*bx & 0xffff) - (ys & 0xffff) + borrow;
+ borrow = y >> 16;
+ Sign_Extend(borrow, y);
+ z = (*bx >> 16) - (zs & 0xffff) + borrow;
+ borrow = z >> 16;
+ Sign_Extend(borrow, z);
+ *bx++ = z << 16 | y & 0xffff;
+ }
+ while(sx <= sxe);
+ bx = rep->x;
+ bxe = bx + n;
+ if (!*bxe) {
+ while(--bxe > bx && !*bxe)
+ --n;
+ rep->wds = n;
+ }
+ }
+ return q;
+}
+
+
+int
+cmp(const bigint& a, const bigint& b)
+{
+ int i = a.size();
+ int j = b.size();
+#ifdef DEBUG
+ if (i > 1 && !a[i-1])
+ Bug("cmp called with a->x[a->wds-1] == 0");
+ if (j > 1 && !b[j-1])
+ Bug("cmp called with b->x[b->wds-1] == 0");
+#endif
+ if (i -= j)
+ return i;
+ while (j--) {
+ if (a[j] != b[j])
+ return a[j] < b[j] ? -1 : 1;
+ }
+ return 0;
+}
+
+
+template
+void
+bigint::d2b<double>(double d, _G_int32_t *e, _G_int32_t *bits);
+
+template
+void
+bigint::d2b<long double>(long double d, _G_int32_t *e, _G_int32_t *bits);
+
+
+// Enough entries to handle long double with 112-bit mantissa
+static const double
+tens[] = {
+ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+ 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+ 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
+ 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
+ 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49
+};
+/* Ten_pmax = floor(mant_size*log(2)/log(5)) */
+// IEEE & IBM double is 22
+// VAX double is 24
+// IEEE quad is 48
+// This is the number of elements in tens[] to use.
+static const int Ten_pmax = int(fptype<double>::mant_size * log(2)/log(5));
+
+#ifdef IEEE
+static const double bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
+#define n_bigtens 5
+static const double bigtensl[] = { 1e16, 1e32, 1e64, 1e128, 1e256, 1e512,
+ 1e1024, 1e2048, 1e4096 };
+#define n_bigtensl 9
+#else
+#ifdef IBM
+static const double bigtens[] = { 1e16, 1e32, 1e64 };
+#define n_bigtens 3
+#else // VAX
+/* Also used for the case when !_DOUBLE_IS_32BITS. */
+static const double bigtens[] = { 1e16, 1e32 };
+#define n_bigtens 2
+#endif
+#endif
+
+template<typename _T>
+_T bigten(int i);
+
+template<>
+static inline double bigten<double>(int i) { return bigtens[i]; }
+
+template<>
+static inline long double bigten<long double>(int i) { return bigtensl[i]; }
+
+template<typename _T>
+_T maxbigten();
+
+template<>
+static inline double maxbigten<double>()
+{ return bigtens[n_bigtens-1]; }
+
+template<>
+static inline long double maxbigten<long double>()
+{ return bigtensl[n_bigtensl-1]; }
+
+
+
+// dec_exponent performs the first portion of the conversion to a string. It
+// computes an upper bound on the decimal exponent that can be used to size a
+// buffer to print into. It must be passed back into dtoa.
+//
+// This will only be used for double and long double
+template<typename _T>
+int
+__dec_exponent(_T d, int &special, bool &sign)
+{
+ /* Arguments ndigits, decpt, sign are similar to those
+ of ecvt and fcvt; trailing zeros are suppressed from
+ the returned string. If not null, *rve is set to point
+ to the end of the return value. If d is +-Infinity or NaN,
+ then *decpt is set to 9999.
+
+ special returns true if the number is nan, infinity, or 0.
+ type contains -1 for nan, 0 for 0, and 1 for infinity. This allows
+ the calling routine to handle the special cases.
+ */
+
+ fptype<_T> fd = d;
+
+ _G_int32_t i;
+ _G_int32_t k; // Holds base 10 exponent
+
+ _T d2, ds;
+
+ // Save sign value into user var
+ if (fd.isneg()) {
+ /* set sign for everything, including 0's and NaNs */
+ sign = true;
+ fd.clearneg(); // clear sign bit
+ }
+ else
+ sign = false;
+
+ // Handle IEEE and VAX infinity and nan. Apparently IBM has none.
+ if (fd.isinf())
+ {
+ special = 1;
+ return 8;
+ }
+ if (fd.isnan())
+ {
+ special = 2;
+ return 3;
+ }
+
+ special = 0;
+
+ fd.normalize(); // only for IBM
+ if (fd.iszero()) return 0;
+
+ d = fd;
+
+ // i = exponent(d) unbiased
+ fd = d;
+ i = (int)fd.bexp();
+
+ if (fd.isdenorm()) {
+ /* d is denormalized */
+ bigint b; // Gets significant bits of frac(d)
+ _G_int32_t bbits, be;
+
+ // be is adjusted exponent, bbits is significant bits in b. b
+ // contains significant bits of fraction of d. (if bbits is integer, be is new exp)
+ // b = d2b(b, d, &be, &bbits);
+ b.d2b(d, &be, &bbits);
+
+ i = bbits + be + (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1);
+ fptype<_T> ftmp = fd.mantissa_denorm(i);
+ ftmp.add_to_exp(-31); /* adjust exponent */
+ d2 = ftmp;
+ i -= (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1) + 1;
+ }
+ else {
+ // Forcibly sets unbiased exponent to 0
+ fd.zeroexp();
+ d2 = fd;
+ i -= fptype<_T>::bias;
+
+#ifdef IBM
+ // IBM-specific adjustment
+ // j can be 11 - (>8)
+ // This seems to break if none of the top 3 bits are set.
+ if (j = 11 - __builtin_clz(fd.mant0()))
+ d2 /= 1 << j;
+ i <<= 2;
+ i += j;
+#endif
+ }
+
+ /* Now i is the unbiased base-2 exponent. */
+
+ // Compute integer base 10 exponent into k
+
+ /* log(x) ~=~ log(1.5) + (x-1.5)/1.5
+ * log10(x) = log(x) / log(10)
+ * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
+ * log10(d) = i*log(2)/log(10) + log10(d2)
+ *
+ * This suggests computing an approximation k to log10(d) by
+ *
+ * k = i*0.301029995663981
+ * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
+ *
+ * We want k to be too large rather than too small.
+ * The error in the first-order Taylor series approximation
+ * is in our favor, so we just round up the constant enough
+ * to compensate for any error in the multiplication of
+ * (i) by 0.301029995663981; since |i| <= 1077,
+ * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
+ * adding 1e-13 to the constant term more than suffices.
+ * Hence we adjust the constant term to 0.1760912590558.
+ * (We could get a more accurate k by invoking log10,
+ * but this is probably not worthwhile.)
+ *
+ * (From Richard Henderson)
+ * The comment should be updated for long double. I guess the
+ * overestimate is still ok, since while |i| <= 16881 is much
+ * larger, 2^-64 and 2^-113 are much smaller.
+ */
+ ds = (d2-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
+ k = (int)ds;
+ if (ds < 0. && ds != k)
+ k--; /* want k = floor(ds) */
+ return k;
+}
+
+template
+int
+__dec_exponent<double>(double d, int &special, bool &sign);
+
+template
+int
+__dec_exponent<long double>(long double d, int &special, bool &sign);
+
+/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
+ *
+ * Inspired by "How to Print Floating-Point Numbers Accurately" by
+ * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 92-101].
+ *
+ * Modifications:
+ * 1. Rather than iterating, we use a simple numeric overestimate
+ * to determine k = floor(log10(d)). We scale relevant
+ * quantities using O(log2(k)) rather than O(k) multiplications.
+ * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
+ * try to generate digits strictly left to right. Instead, we
+ * compute with fewer bits and propagate the carry if necessary
+ * when rounding the final digit up. This is often faster.
+ * 3. Under the assumption that input will be rounded nearest,
+ * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
+ * That is, we allow equality in stopping tests when the
+ * round-nearest rule will give the same floating-point value
+ * as would satisfaction of the stopping test with strict
+ * inequality.
+ * 4. We remove common factors of powers of 2 from relevant
+ * quantities.
+ * 5. When converting floating-point integers less than 1e16,
+ * we use floating-point arithmetic rather than resorting
+ * to multiple-precision integers.
+ * 6. When asked to produce fewer than 15 digits, we first try
+ * to get by with floating-point arithmetic; we resort to
+ * multiple-precision integer arithmetic only if we cannot
+ * guarantee that the floating-point calculation has given
+ * the correctly rounded result. For k requested digits and
+ * "uniformly" distributed input, the probability is
+ * something like 10^(k-15) that we must resort to the long
+ * calculation.
+ */
+
+template<typename _T>
+int
+__dtoa(char* buf, _T d, int exponent, int mode, int ndigits, bool trim)
+{
+ // buf is a buffer whose size was computed using dec_exponent(), so as to be
+ // large enough to hold the number if needed.
+
+ // exponent is the value computed by dec_exponent
+
+ // trim is a flag that says whether trailing zeros should be trimmed off
+
+ // decpt returns the actual decimal point location. It will often be
+ // exponent, but may be adjusted in some cases.
+
+ /* Arguments ndigits, decpt, sign are similar to those of ecvt and fcvt;
+ trailing zeros are suppressed from the returned string. If d is
+ +-Infinity or NaN, then *decpt is set to 9999.
+
+ mode:
+ 0 ==> shortest string that yields d when read in
+ and rounded to nearest.
+ 1 ==> like 0, but with Steele & White stopping rule;
+ e.g. with IEEE P754 arithmetic , mode 0 gives
+ 1e23 whereas mode 1 gives 9.999999999999999e22.
+ 2 ==> max(1,ndigits) significant digits. This gives a
+ return value similar to that of ecvt, except
+ that trailing zeros are suppressed.
+ 3 ==> through ndigits past the decimal point. This
+ gives a return value similar to that from fcvt,
+ except that trailing zeros are suppressed, and
+ ndigits can be negative.
+
+Leftright modes have been removed.
+ 4-9 should give the same return values as 2-3, i.e.,
+ 4 <= mode <= 9 ==> same return as mode
+ 2 + (mode & 1). These modes are mainly for
+ debugging; often they run slower but sometimes
+ faster than modes 2-3.
+ 4,5,8,9 ==> left-to-right digit generation.
+ 6-9 ==> don't try fast floating-point estimate
+ (if applicable).
+
+ Values of mode other than 0-9 are treated as mode 0.
+
+ Sufficient space is allocated to the return value
+ to hold the suppressed trailing zeros.
+ */
+
+ fptype<_T> fd = d;
+
+ _G_int32_t bbits, b2, b5, be, dig, i, ilim, ilim1,
+ j, j1;
+ _G_int32_t k; // Holds base 10 exponent
+ _G_int32_t k_check, m2, m5, s2, s5, try_quick;
+ _G_int32_t L;
+ int denorm;
+ bigint b; // Gets significant bits of frac(d)
+ bigint S;
+ _T ds, eps;
+ fptype<_T> fpeps;
+ char *s, *s0, *stmp;
+
+ fd.clearneg(); // clear sign bit
+
+ fd.normalize(); // only for IBM
+
+ // Handle 0 for all cases.
+ // XXX deal with trim
+ if (!fd) {
+ strcpy(buf, "0");
+ return 1;
+ }
+
+ d = fd;
+
+ // be is exponent adjusted to move lowest 1 bit of mantissa to the 0th bit
+ // and place the decimal point to the right of it. bbits is significant
+ // bits of b's mantissa after shifting. b contains significant bits of
+ // fraction of d. (if bbits is integer, be is new exp)
+ b.d2b(d, &be, &bbits);
+
+ // i = exponent(d) unbiased
+ fd = d;
+ i = (int)fd.bexp();
+
+ if (fd.has_denorm() && i == 0) {
+ /* d is denormalized */
+ i = bbits + be + (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1);
+ i -= (fptype<_T>::bias + (fptype<_T>::mant_size-1) - 1) + 1;
+ denorm = 1;
+ }
+ else {
+ i -= fptype<_T>::bias;
+#ifdef IBM
+ j = 11 - __builtin_clz(fd.mant0());
+ i <<= 2;
+ i += j;
+#endif
+ denorm = 0;
+ }
+
+ /* Now i is the unbiased base-2 exponent. */
+
+ // Compute integer base 10 exponent into k
+
+ // k is a bound on the integer base 10 exponent of d. We may need to tweak
+ // it a little.
+ k = exponent;
+ k_check = 1;
+ if (k >= 0 && k < Ten_pmax) {
+ if (d < tens[k])
+ k--;
+ k_check = 0;
+ }
+
+ j = bbits - i - 1; // mantissa bits w/ exp=0
+ if (j >= 0) {
+ b2 = 0;
+ s2 = j;
+ }
+ else {
+ b2 = -j;
+ s2 = 0;
+ }
+ if (k >= 0) {
+ b5 = 0;
+ s5 = k;
+ s2 += k;
+ }
+ else {
+ b2 -= k;
+ b5 = -k;
+ s5 = 0;
+ }
+ if (mode < 0 || mode > 9)
+ mode = 0;
+ try_quick = 1;
+ if (mode > 5) {
+ mode -= 4;
+ try_quick = 0;
+ }
+
+// leftright = 1;
+ switch(mode) {
+ case 0:
+ case 1:
+ ilim = ilim1 = -1;
+ i = 18;
+ ndigits = 0;
+ break;
+ case 2:
+// leftright = 0;
+ /* no break */
+ case 4:
+ if (ndigits <= 0)
+ ndigits = 1;
+ ilim = ilim1 = i = ndigits;
+ break;
+ case 3:
+// leftright = 0;
+ /* no break */
+ case 5:
+ i = ndigits + k + 1;
+ ilim = i;
+ ilim1 = i - 1;
+ if (i <= 0)
+ i = 1;
+ }
+ /* i is now an upper bound of the number of digits to generate. */
+
+ s = s0 = buf;
+
+ if (ilim >= 0 && ilim <= fptype<_T>::Quick_max && try_quick) {
+
+ /* Try to get by with floating-point arithmetic. */
+
+ _T d2 = d;
+ _G_int32_t i = 0;
+ _G_int32_t k0 = k;
+ _G_int32_t ilim0 = ilim;
+ _G_int32_t ieps = 2; /* conservative */
+
+ if (k > 0) {
+ ds = tens[k&0xf]; // 1e0 -> 1e15
+ j = k >> 4;
+ if (j & fptype<_T>::Bletch) {
+ /* prevent overflows */
+ // n_bigtens
+ j &= fptype<_T>::Bletch - 1;
+ d /= maxbigten<_T>();
+ ieps++;
+ }
+ for(; j; j >>= 1, i++)
+ if (j & 1) {
+ ieps++;
+ ds *= bigten<_T>(i);
+ }
+ d /= ds;
+ }
+ else if ((j1 = -k)) {
+ d *= tens[j1 & 0xf];
+ for(j = j1 >> 4; j; j >>= 1, i++)
+ if (j & 1) {
+ ieps++;
+ d *= bigten<_T>(i);
+ }
+ }
+ if (k_check && d < 1. && ilim > 0) {
+ if (ilim1 <= 0)
+ goto fast_failed;
+ ilim = ilim1;
+ k--;
+ d *= 10.;
+ ieps++;
+ }
+ eps = ieps*d + 7.;
+ fpeps = eps;
+ fpeps.add_to_exp(-(fptype<_T>::mant_size-1));
+ eps = fpeps;
+ if (ilim == 0) {
+ d -= 5.;
+ if (d > eps)
+ goto one_digit;
+ if (d < -eps)
+ goto no_digits;
+ goto fast_failed;
+ }
+
+ /* Generate ilim digits, then fix them up. */
+ eps *= tens[ilim-1];
+ for(i = 1;; i++, d *= 10.) {
+ L = (_G_int32_t)d;
+ d -= L;
+ *s++ = '0' + (int)L;
+ if (i == ilim) {
+ stmp = s; // This should be under bump_up, but there's a
+ // codegen bug, I think
+ if (d > 0.5 + eps)
+ goto bump_up;
+ else if (d < 0.5 - eps) {
+ // Strips off trailing 0's
+ if (trim) {
+ while(*--s == '0');
+ s++;
+ }
+ goto ret1;
+ }
+ break;
+ }
+ }
+ fast_failed:
+ s = s0;
+ d = d2;
+ k = k0;
+ ilim = ilim0;
+ }
+
+ /* Do we have a "small" integer? */
+
+ if (be >= 0 && k <= fptype<_T>::Int_max) {
+ /* Yes. */
+ ds = tens[k];
+ if (ndigits < 0 && ilim <= 0) {
+ if (ilim < 0 || d <= 5*ds)
+ goto no_digits;
+ goto one_digit;
+ }
+ for(i = 1;; i++) {
+ L = (_G_int32_t)(d / ds);
+ d -= L*ds;
+#ifdef Check_FLT_ROUNDS
+ /* If FLT_ROUNDS == 2, L will usually be high by 1 */
+ if (d < 0) {
+ L--;
+ d += ds;
+ }
+#endif
+ *s++ = '0' + (int)L;
+ if (i == ilim) {
+ d += d;
+ if (d > ds || (d == ds && L & 1)) {
+ stmp = s; // This should be under bump_up, but there's a
+ // codegen bug in 3.4 as of 9/4/03, I think.
+ bump_up:
+ while( *--s == '9' ) {
+ *s='0';
+ if (s == s0) {
+ k++;
+ *s = '0';
+ break;
+ }
+ }
+ ++*s++;
+ if (!trim) s = stmp;
+ }
+ break;
+ }
+ if (!(d *= 10.))
+ break;
+ }
+ goto ret1;
+ }
+
+// return 0; // XXX bail out to avoid mp int calcs
+
+ m2 = b2;
+ m5 = b5;
+ if (m2 > 0 && s2 > 0) {
+ i = m2 < s2 ? m2 : s2;
+ b2 -= i;
+ m2 -= i;
+ s2 -= i;
+ }
+ if (b5 > 0) {
+ b.pow5mult(b5);
+ }
+ S = 1;
+ if (s5 > 0)
+ S.pow5mult(s5);
+
+ /* Check for special case that d is a normalized power of 2. */
+
+ if (mode < 2) {
+ fd = d;
+ if (fd.ispwr2()) {
+ /* The special case */
+ b2 += fptype<_T>::log2p;
+ s2 += fptype<_T>::log2p;
+ }
+ }
+
+ /* Arrange for convenient computation of quotients:
+ * shift left if necessary so divisor has 4 leading 0 bits.
+ *
+ * Perhaps we should just compute leading 28 bits of S once
+ * and for all and pass them and a shift to quorem, so it
+ * can do shifts and ors to compute the numerator for q.
+ */
+ if ((i = ((s5 ? 32 - __builtin_clz(S.back()) : 1) + s2) & 0x1f))
+ i = 32 - i;
+ if (i > 4) {
+ i -= 4;
+ b2 += i;
+ m2 += i;
+ s2 += i;
+ }
+ else if (i < 4) {
+ i += 28;
+ b2 += i;
+ m2 += i;
+ s2 += i;
+ }
+ if (b2 > 0)
+ b.lshift(b2);
+ if (s2 > 0)
+ S.lshift(s2);
+ if (k_check) {
+ if (cmp(b,S) < 0) {
+ k--;
+ b.multadd(10, 0); /* we botched the k estimate */
+ ilim = ilim1;
+ }
+ }
+ if (ilim <= 0 && mode > 2) {
+ if (ilim < 0 || cmp(b,S.multadd(5,0)) <= 0) {
+ /* no digits, fcvt style */
+ no_digits:
+ k = -1 - ndigits;
+ goto ret;
+ }
+ one_digit:
+ *s++ = '1';
+ k++;
+ goto ret;
+ }
+ for(i = 1;; i++) {
+ *s++ = dig = b.quorem(S) + '0';
+ if (i >= ilim)
+ break;
+ b.multadd(10, 0);
+ }
+
+ /* Round off last digit */
+
+ b.lshift(1);
+ j = cmp(b, S);
+ if (j > 0 || (j == 0 && dig & 1)) {
+ roundoff:
+ stmp = s;
+ while(*--s == '9') {
+ *s = '0';
+ if (s == s0) {
+ k++;
+ *s++ = '1';
+ goto ret;
+ }
+ }
+ ++*s++;
+ if (!trim) s = stmp;
+ }
+ else {
+ if (trim) {
+ while(*--s == '0');
+ s++;
+ }
+ }
+ ret:
+ ret1:
+ *s = 0;
+ return k + 1;
+}
+
+// Instantiations
+template
+ int
+ __dtoa<double>(char* buf, double d, int exponent, int mode, int ndigits,
+ bool trim);
+template
+ int
+ __dtoa<long double>(char* buf, long double d, int exponent, int mode,
+ int ndigits, bool trim);
+
+template
+int std::__float_scientific_to_char<char>(char*, char const*, char,
+ const char*, int, int,
+ std::_Ios_Fmtflags, bool);
+template
+int std::__float_fixed_to_char<wchar_t>(wchar_t*, wchar_t const*, wchar_t,
+ const char*, int, int,
+ std::_Ios_Fmtflags, bool);
+template
+int std::__float_scientific_to_char<wchar_t>(wchar_t*, wchar_t const*,
+ wchar_t, const char*, int, int,
+ std::_Ios_Fmtflags, bool);
+template
+int std::__float_fixed_to_char<char>(char*, char const*, char, const char*,
+ int, int, std::_Ios_Fmtflags, bool);
+
+#endif /* __USE_DTOA */
+
+}