Performance improvement for std::to_chars(char* first, char* last, /* integer-type */ value, int base = 10 );
Ehrnsperger, Markus
markus_ehrnsperger@yahoo.de
Tue Jul 30 05:21:15 GMT 2024
On 2024-07-29 12:16, Jonathan Wakely wrote:
> On Mon, 29 Jul 2024 at 10:45, Jonathan Wakely <jwakely.gcc@gmail.com> wrote:
>> On Mon, 29 Jul 2024 at 09:42, Ehrnsperger, Markus
>> <markus_ehrnsperger@yahoo.de> wrote:
>>> Hi,
>>>
>>>
>>> I'm attaching two files:
>>>
>>> 1.: to_chars10.h:
>>>
>>> This is intended to be included in libstdc++ / gcc to achieve performance improvements. It is an implementation of
>>>
>>> to_chars10(char* first, char* last, /* integer-type */ value);
>>>
>>> Parameters are identical to std::to_chars(char* first, char* last, /* integer-type */ value, int base = 10 ); . It only works for base == 10.
>>>
>>> If it is included in libstdc++, to_chars10(...) could be renamed to std::to_chars(char* first, char* last, /* integer-type */ value) to provide an overload for the default base = 10
>> Thanks for the email. This isn't in the form of a patch that we can
>> accept as-is, although I see that the license is compatible with
>> libstdc++, so if you are looking to contribute it then that could be
>> done either by assigning copyright to the FSF or under the DCO terms.
>> See https://gcc.gnu.org/contribute.html#legal for more details.
>>
>> I haven't looked at the code in detail, but is it a similar approach
>> to https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ ?
>> How does it compare to the performance of that algorithm?
>>
>> I have an incomplete implementation of that algorithm for libstdc++
>> somewhere, but I haven't looked at it for a while.
> I took a closer look and the reinterpret_casts worried me, so I tried
> your test code with UBsan. There are a number of errors that would
> need to be fixed before we would consider using this code.
Attached are new versions of to_chars10.cpp, to_chars10.h and the new
file itoa_better_y.h
Changes:
- I removed all reinterpret_casts, and tested with -fsanitize=undefined
- I added itoa_better_y.h from
https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ to the
performance test.
Note: There is only one line in the benchmark test for itoa_better_y due
to limited features of itoa_better_y:
Benchmarking random unsigned 32 bit itoa_better_y ...
to_chars10.h: Signed-off-by: Markus Ehrnsperger
<Markus_Ehrnsperger@yahoo.de>
The other files are only for performance tests.
>
>
>>
>>> 2.: to_chars10.cpp:
>>>
>>> This is a test program for to_chars10 verifying the correctness of the results, and measuring the performance. The actual performance improvement is system dependent, so please test on your own system.
>>>
>>> On my system the performance improvement is about factor two, my results are:
>>>
>>>
>>> Test int8_t verifying to_chars10 = std::to_chars ... OK
>>> Test uint8_t verifying to_chars10 = std::to_chars ... OK
>>> Test int16_t verifying to_chars10 = std::to_chars ... OK
>>> Test uint16_t verifying to_chars10 = std::to_chars ... OK
>>> Test int32_t verifying to_chars10 = std::to_chars ... OK
>>> Test uint32_t verifying to_chars10 = std::to_chars ... OK
>>> Test int64_t verifying to_chars10 = std::to_chars ... OK
>>> Test uint64_t verifying to_chars10 = std::to_chars ... OK
>>>
>>> Benchmarking test case tested method ... time (lower is better)
>>> Benchmarking random unsigned 64 bit to_chars10 ... 0.00957
>>> Benchmarking random unsigned 64 bit std::to_chars ... 0.01854
>>> Benchmarking random signed 64 bit to_chars10 ... 0.01018
>>> Benchmarking random signed 64 bit std::to_chars ... 0.02297
>>> Benchmarking random unsigned 32 bit to_chars10 ... 0.00620
>>> Benchmarking random unsigned 32 bit std::to_chars ... 0.01275
>>> Benchmarking random signed 32 bit to_chars10 ... 0.00783
>>> Benchmarking random signed 32 bit std::to_chars ... 0.01606
>>> Benchmarking random unsigned 16 bit to_chars10 ... 0.00536
>>> Benchmarking random unsigned 16 bit std::to_chars ... 0.00871
>>> Benchmarking random signed 16 bit to_chars10 ... 0.00664
>>> Benchmarking random signed 16 bit std::to_chars ... 0.01154
>>> Benchmarking random unsigned 08 bit to_chars10 ... 0.00393
>>> Benchmarking random unsigned 08 bit std::to_chars ... 0.00626
>>> Benchmarking random signed 08 bit to_chars10 ... 0.00465
>>> Benchmarking random signed 08 bit std::to_chars ... 0.01089
>>>
>>>
>>> Thanks, Markus
>>>
>>>
>>>
-------------- next part --------------
// code from https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/
static constexpr char radix_100_table[] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4',
'0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4',
'1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
'2', '0', '2', '1', '2', '2', '2', '3', '2', '4',
'2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
'3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
'3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
'4', '0', '4', '1', '4', '2', '4', '3', '4', '4',
'4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
'5', '0', '5', '1', '5', '2', '5', '3', '5', '4',
'5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
'6', '0', '6', '1', '6', '2', '6', '3', '6', '4',
'6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
'7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
'8', '0', '8', '1', '8', '2', '8', '3', '8', '4',
'8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
'9', '0', '9', '1', '9', '2', '9', '3', '9', '4',
'9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
};
char* itoa_better_y(std::uint32_t n, char* buffer) {
std::uint64_t prod;
auto get_next_two_digits = [&]() {
prod = std::uint32_t(prod) * std::uint64_t(100);
return int(prod >> 32);
};
auto print_1 = [&](int digit) {
buffer[0] = char(digit + '0');
buffer += 1;
};
auto print_2 = [&] (int two_digits) {
std::memcpy(buffer, radix_100_table + two_digits * 2, 2);
buffer += 2;
};
auto print = [&](std::uint64_t magic_number, int extra_shift, auto remaining_count) {
prod = n * magic_number;
prod >>= extra_shift;
auto two_digits = int(prod >> 32);
if (two_digits < 10) {
print_1(two_digits);
for (int i = 0; i < remaining_count; ++i) {
print_2(get_next_two_digits());
}
}
else {
print_2(two_digits);
for (int i = 0; i < remaining_count; ++i) {
print_2(get_next_two_digits());
}
}
};
if (n < 100) {
if (n < 10) {
// 1 digit.
print_1(n);
}
else {
// 2 digit.
print_2(n);
}
}
else {
if (n < 100'0000) {
if (n < 1'0000) {
// 3 or 4 digits.
// 42949673 = ceil(2^32 / 10^2)
print(42949673, 0, std::integral_constant<int, 1>{});
}
else {
// 5 or 6 digits.
// 429497 = ceil(2^32 / 10^4)
print(429497, 0, std::integral_constant<int, 2>{});
}
}
else {
if (n < 1'0000'0000) {
// 7 or 8 digits.
// 281474978 = ceil(2^48 / 10^6) + 1
print(281474978, 16, std::integral_constant<int, 3>{});
}
else {
if (n < 10'0000'0000) {
// 9 digits.
// 1441151882 = ceil(2^57 / 10^8) + 1
prod = n * std::uint64_t(1441151882);
prod >>= 25;
print_1(int(prod >> 32));
print_2(get_next_two_digits());
print_2(get_next_two_digits());
print_2(get_next_two_digits());
print_2(get_next_two_digits());
}
else {
// 10 digits.
// 1441151881 = ceil(2^57 / 10^8)
prod = n * std::uint64_t(1441151881);
prod >>= 25;
print_2(int(prod >> 32));
print_2(get_next_two_digits());
print_2(get_next_two_digits());
print_2(get_next_two_digits());
print_2(get_next_two_digits());
}
}
}
}
return buffer;
}
-------------- next part --------------
// g++ -std=c++17 -O3 -g -Wall to_chars10.cpp (performance test)
// g++ -std=c++17 -O3 -g -Wall -fsanitize=undefined to_chars10.cpp (test of code correctness)
/*
Copyright (C) Markus Ehrnsperger. All rights reserved.
Licence: GNU General Public License version 3
This program does:
- check correctness of to_chars10
- compare performance of to_chars10 with std::to_chars
Note: Part of the code is copied / modifies from https://github.com/miloyip/itoa-benchmark
*/
#include "to_chars10.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <cstring>
#include <chrono>
#include <random>
#include <charconv>
#include <algorithm>
#include "itoa_better_y.h"
const unsigned kIterationPerDigit = 100000;
const unsigned kIterationForRandom = 100;
const unsigned kTrial = 10;
template <typename T>
struct Traits { };
template <>
struct Traits<uint8_t> {
enum { kBufferSize = 3 };
enum { kMaxDigit = 3 };
static uint8_t Negate(uint8_t x) { return x; };
};
template <>
struct Traits<int8_t> {
enum { kBufferSize = 4 };
enum { kMaxDigit = 3 };
static int8_t Negate(int8_t x) { return -x; };
};
template <>
struct Traits<uint16_t> {
enum { kBufferSize = 5 };
enum { kMaxDigit = 5 };
static uint16_t Negate(uint16_t x) { return x; };
};
template <>
struct Traits<int16_t> {
enum { kBufferSize = 6 };
enum { kMaxDigit = 5 };
static int16_t Negate(int16_t x) { return -x; };
};
template <>
struct Traits<uint32_t> {
enum { kBufferSize = 10 };
enum { kMaxDigit = 10 };
static uint32_t Negate(uint32_t x) { return x; };
};
template <>
struct Traits<int32_t> {
enum { kBufferSize = 11 };
enum { kMaxDigit = 10 };
static int32_t Negate(int32_t x) { return -x; };
};
template <>
struct Traits<uint64_t> {
enum { kBufferSize = 20 };
enum { kMaxDigit = 20 };
static uint64_t Negate(uint64_t x) { return x; };
};
template <>
struct Traits<int64_t> {
enum { kBufferSize = 20 };
enum { kMaxDigit = 19 };
static int64_t Negate(int64_t x) { return -x; };
};
template <typename T>
void VerifyValue(T value, std::to_chars_result(*f)(char*, char*, T, int), std::to_chars_result(*g)(char*, char*, T, int), const char* test, const char* fname, const char* gname) {
char buffer_f[Traits<T>::kBufferSize];
char buffer_g[Traits<T>::kBufferSize];
std::to_chars_result r_f = f(buffer_f, buffer_f+sizeof(buffer_f), value, 10);
std::to_chars_result r_g = g(buffer_g, buffer_g+sizeof(buffer_g), value, 10);
if (r_f.ec != r_g.ec) {
std::cout << "\nError: " << fname << " -> " << std::make_error_code(r_f.ec).message();
std::cout << ", " << gname << " -> " << std::make_error_code(r_g.ec).message() << "\n";
std::cout << "Value " << +value << ", sizeof(buffer) " << sizeof(buffer_f) << "\n";
std::cout << "Test " << test << "\n";
exit(0);
}
if (r_f.ec == std::errc()) {
if (std::string_view(buffer_f, r_f.ptr-buffer_f) != std::string_view(buffer_g, r_g.ptr-buffer_g)) {
std::cout << "\nError: " << fname << " -> " << std::string_view(buffer_f, r_f.ptr-buffer_f);
std::cout << ", " << gname << " -> " << std::string_view(buffer_g, r_g.ptr-buffer_g) << "\n";
std::cout << "Value " << +value << ", sizeof(buffer) " << sizeof(buffer_f) << "\n";
std::cout << "Test " << test << "\n";
exit(0);
}
}
}
template <typename T>
void Verify(std::to_chars_result(*f)(char*, char*, T, int), std::to_chars_result(*g)(char*, char*, T, int), const char* test, const char* fname, const char* gname) {
std::cout << "Test " << test << " verifying " << fname << " = " << gname << " ... ";
// Boundary cases
VerifyValue<T>(0, f, g, test, fname, gname);
VerifyValue<T>(std::numeric_limits<T>::min(), f, g, test, fname, gname);
VerifyValue<T>(std::numeric_limits<T>::max(), f, g, test, fname, gname);
// 2^n - 1, 2^n, 10^n - 1, 10^n until overflow
for (uint32_t power = 2; power <= 10; power += 8) {
T i = 1, last;
do {
VerifyValue<T>(i - 1, f, g, test, fname, gname);
VerifyValue<T>(i, f, g, test, fname, gname);
if (std::numeric_limits<T>::min() < 0) {
VerifyValue<T>(Traits<T>::Negate(i), f, g, test, fname, gname);
VerifyValue<T>(Traits<T>::Negate(i + 1), f, g, test, fname, gname);
}
if (i >= std::numeric_limits<T>::max() / (T)power) break;
last = i;
i *= power;
} while (last < i);
}
std::cout << "OK\n";
}
template <class T>
class RandomData {
public:
static T* GetData() {
static RandomData singleton;
return singleton.mData;
}
static const size_t kCountPerDigit = 1000;
static const size_t kCount = kCountPerDigit * Traits<T>::kMaxDigit;
private:
RandomData() :
mData(new T[kCount])
{
T* p = mData;
T start = 1;
for (int digit = 1; digit <= Traits<T>::kMaxDigit; digit++) {
T end = (digit == Traits<T>::kMaxDigit) ? std::numeric_limits<T>::max() : start * 10;
T v = start;
T sign = 1;
for (size_t i = 0; i < kCountPerDigit; i++) {
*p++ = v * sign;
sign = Traits<T>::Negate(sign);
if (++v == end)
v = start;
}
start = end;
}
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(mData, mData + kCount, g);
}
~RandomData() {
delete[] mData;
}
T* mData;
};
template <typename T>
void BenchRandom(std::to_chars_result(*f)(char*, char*, T, int), const char* type) {
printf("Benchmarking random %-20s ... ", type);
char buffer[Traits<T>::kBufferSize];
T* data = RandomData<T>::GetData();
size_t n = RandomData<T>::kCount;
double duration = std::numeric_limits<double>::max();
for (unsigned trial = 0; trial < kTrial; trial++) {
std::chrono::time_point<std::chrono::high_resolution_clock> begin;
std::chrono::duration<double> timeNeeded;
begin = std::chrono::high_resolution_clock::now();
for (unsigned iteration = 0; iteration < kIterationForRandom; iteration++)
for (size_t i = 0; i < n; i++)
f(buffer, buffer+Traits<T>::kBufferSize, data[i], 10);
timeNeeded = std::chrono::high_resolution_clock::now() - begin;
duration = std::min(duration, timeNeeded.count() );
}
duration *= 1e6 / (kIterationForRandom * n); // convert to nano second per operation
printf("%8.5f\n", duration);
}
template<typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
inline std::to_chars_result to_chars10(char* first, char* last, T value, int base) {
return to_chars10(first, last, value);
}
template<typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
inline std::to_chars_result itoa_better_y(char* first, char* last, T value, int base) {
if (__builtin_expect(to_chars10_internal::to_chars10_range_check(first, last, value), true))
return to_chars10_internal::to_chars_success(itoa_better_y(value, first));
return to_chars10_internal::to_chars_error(last);
}
int main()
{
Verify<int8_t>(to_chars10, std::to_chars, " int8_t", "to_chars10", "std::to_chars");
Verify<uint8_t>(to_chars10, std::to_chars, " uint8_t", "to_chars10", "std::to_chars");
Verify<uint8_t>(itoa_better_y, std::to_chars, " uint8_t", "itoa_better_y", "std::to_chars");
Verify<int16_t>(to_chars10, std::to_chars, " int16_t", "to_chars10", "std::to_chars");
Verify<uint16_t>(to_chars10, std::to_chars, "uint16_t", "to_chars10", "std::to_chars");
Verify<uint16_t>(itoa_better_y, std::to_chars, "uint16_t", "itoa_better_y", "std::to_chars");
Verify<int32_t>(to_chars10, std::to_chars, " int32_t", "to_chars10", "std::to_chars");
Verify<uint32_t>(to_chars10, std::to_chars, "uint32_t", "to_chars10", "std::to_chars");
Verify<uint32_t>(itoa_better_y, std::to_chars, "uint32_t", "itoa_better_y", "std::to_chars");
Verify<int64_t>(to_chars10, std::to_chars, " int64_t", "to_chars10", "std::to_chars");
Verify<uint64_t>(to_chars10, std::to_chars, "uint64_t", "to_chars10", "std::to_chars");
std::cout << "\n";
std::cout << "Benchmarking test case tested method ... time (lower is better)\n";
BenchRandom<uint64_t>(to_chars10 , "unsigned 64 bit to_chars10 ");
BenchRandom<uint64_t>(std::to_chars , "unsigned 64 bit std::to_chars ");
BenchRandom<int64_t>(to_chars10 , " signed 64 bit to_chars10 ");
BenchRandom<int64_t>(std::to_chars , " signed 64 bit std::to_chars ");
BenchRandom<uint32_t>(to_chars10 , "unsigned 32 bit to_chars10 ");
BenchRandom<uint32_t>(itoa_better_y , "unsigned 32 bit itoa_better_y ");
BenchRandom<uint32_t>(std::to_chars , "unsigned 32 bit std::to_chars ");
BenchRandom<int32_t>(to_chars10 , " signed 32 bit to_chars10 ");
BenchRandom<int32_t>(std::to_chars , " signed 32 bit std::to_chars ");
BenchRandom<uint16_t>(to_chars10 , "unsigned 16 bit to_chars10 ");
BenchRandom<uint16_t>(std::to_chars , "unsigned 16 bit std::to_chars ");
BenchRandom<int16_t>(to_chars10 , " signed 16 bit to_chars10 ");
BenchRandom<int16_t>(std::to_chars , " signed 16 bit std::to_chars ");
BenchRandom<uint8_t>(to_chars10 , "unsigned 08 bit to_chars10 ");
BenchRandom<uint8_t>(std::to_chars , "unsigned 08 bit std::to_chars ");
BenchRandom<int8_t>(to_chars10 , " signed 08 bit to_chars10 ");
BenchRandom<int8_t>(std::to_chars , " signed 08 bit std::to_chars ");
}
-------------- next part --------------
/*
Copyright (C) Markus Ehrnsperger. All rights reserved.
Licence: GNU General Public License version 3, with the GCC RUNTIME LIBRARY EXCEPTION
Usage:
use
res = to_chars10(first, last, value);
instead of
res = std::to_chars(first, last, value, 10);
same parameters, return values, ... . So, identical but faster
*/
#ifndef TO_CHARS10_H
#define TO_CHARS10_H
#include <cstdint>
#include <cstring>
#include <charconv>
#include <endian.h>
namespace to_chars10_internal {
/*
Naming schema
char *itoaN_M(char *b, uintXX_t i): ==================================
- write between N and M digits to b.
- if less than N digits are needed for i, left fill with '0'
- return the one-past-the-end pointer of the digits written
- even if b is returned (because i == 0 && N == 0), b[0] might change.
- i < 10^M must be valid. This is not checked
*/
alignas(2) static const char digits_100[200] = {
'0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
'1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
'2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
'3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
'4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
'5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
'6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
'7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
'8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
'9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
};
// ======== 0-2 digits ========================================================
// max uint16_t 65535 -> large enough for 1-2 digits
inline char *itoa0_2(char *b, uint16_t i, bool write_ten) {
// write 0 to 2 chars to b. 0 <= i < 100
// if write_ten == true, write the higher(i/10) digit
// return b+write_ten
// note: if the lower(i%10) digit is needed, the caller has to increase b.
const char *src = digits_100 + 2*i;
*b = *src;
b += write_ten;
*b = *++src;
return b;
}
inline char *itoa0_2(char *b, uint16_t i) {
// write 0 to 2 chars to b and return the new position, 0 <= i < 100
return itoa0_2(b, i, i>9) + (i != 0);
}
inline char *itoa1_2(char *b, uint16_t i) {
// write 1 or 2 chars to b and return the new position, 0 <= i < 100
return itoa0_2(b, i, i>9) + 1;
}
// ======== 1-4 digits ========================================================
// max uint16_t 65535 -> large enough for 1-4 digits
inline char *itoa1_4(char *b, uint16_t i) {
// write 1 to 4 chars to b and return the new position, 0 <= i < 10^4
uint16_t q = ((uint32_t)i * 5243U) >> 19; // q = i/100;
b = itoa0_2(b, q);
return itoa0_2(b, i - q*100, i>9) + 1;
}
inline char *itoa3_4(char *b, uint16_t i) {
// write 3 to 4 chars to b and return the new position, 0 <= i < 10^4
// Left-fill with 0.
uint16_t q = ((uint32_t)i * 5243U) >> 19; // q = i/100;
b = itoa1_2(b, q);
memcpy(b, digits_100 + ((i - q*100) << 1), 2);
return b+2;
}
inline char *itoa4_4(char *b, uint16_t i) {
// write exactly 4 chars to b. Left-fill with 0. i < 10^4
uint16_t q = ((uint32_t)i * 5243U) >> 19; // q = i/100; i < 43699
memcpy(b, digits_100 + (q << 1), 2);
b += 2;
memcpy(b, digits_100 + ((i - q*100) << 1), 2);
return b+2;
}
// ======== 1-8 digits ========================================================
// max uint32_t 4294967295 (10 digits) -> large enough for 1-8 digits
inline char *itoa5_8(char *b, uint32_t i) {
// write 5 to 8 chars to b and return the new position, 0 <= i < 10^8
// Left-fill with 0.
uint32_t q = i/10000;
b = itoa1_4(b, q);
return itoa4_4(b, i - q*10000);
}
inline char *itoa1_8(char *b, uint32_t i) {
// write 1 to 8 chars to b and return the new position, 0 <= i < 10^8
uint32_t q = i/1000000;
uint32_t j = i - q*1000000;
b = itoa0_2(b, q);
q = j/10000;
j = j - q*10000;
b = itoa0_2(b, q, i>99999) + (i>9999);
q = (j * 5243U) >> 19; // q = j/100;
b = itoa0_2(b, q, i>999) + (i>99);
return itoa0_2(b, j - q*100, i>9) + 1;
}
inline char *itoa8_8(char *b, uint32_t i) {
// write exactly 8 chars to b. Left-fill with 0. i < 10^8 (required, but not checked)
for (int j = 6; j > 0; j-=2) {
uint32_t q = i/100;
memcpy(b+j, digits_100 + ((i - q*100) << 1), 2);
i = q;
}
memcpy(b, digits_100 + (i<< 1), 2);
return b+8;
}
// ======== 8-16 digits ========================================================
template<typename T> inline char *itoa9_16(char *b, T i) {
// write 9 to 16 chars to b and return the new position, 10^8 <= i < 10^16
T q = i/100000000;
b = itoa1_8(b, q);
return itoa8_8(b, i - q*100000000);
}
// ======== implementation of itoa(char *b, T i), depending on type T =========
// max uint8_t 256
template<typename T, std::enable_if_t<sizeof(T) == 1, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline char *itoa(char *b, T i) {
// write 1 to 3 digits to b and return the new position
if (i < 100) return itoa1_2(b, i);
T q = i/100; // 256/100 = 2
*b++ = q + '0';
memcpy(b, digits_100 + ((i - q*100) << 1), 2);
return b+2;
}
// max uint16_t 65535 5 digits
template<typename T, std::enable_if_t<sizeof(T) == 2, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline char *itoa(char *b, T i) {
// write 1 to 5 digits to b and return the new position
if (i < 100) return itoa1_2(b, i);
if (i < 10000) return itoa3_4(b, i);
T q = i/10000; // 65535/10000 = 6
*b++ = q + '0';
return itoa4_4(b, i - q*10000);
}
// max uint32_t 4294967295 (10 digits)
template<typename T, std::enable_if_t<sizeof(T) <= 4, bool> = true, std::enable_if_t<sizeof(T) >= 3, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline char *itoa(char *b, T i) {
// write 1 to 10 digits to b and return the new position
if (i < 100) return itoa1_2(b, i);
if (i < 10000) return itoa3_4(b, i);
if (i < 100000000) return itoa5_8(b, i);
T q = i/100000000; // 4294967295/100000000 = 42;
b = itoa1_2(b, q);
return itoa8_8(b, i - q*100000000);
}
// max uint64_t 18446744073709551615 (20 digits)
template<typename T, std::enable_if_t<sizeof(T) >= 5, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline char *itoa(char *b, T i) {
// write 1 to 20 digits to b and return the new position
if (i < 100) return itoa1_2(b, i);
if (i < 10000) return itoa3_4(b, i);
if (i < 100000000) return itoa5_8(b, i);
if (i < 10000000000000000) return itoa9_16(b, i);
T q = i/10000000000000000; // 18446744073709551615/10000000000000000 = 1844
b = itoa1_4(b, q);
i = i - q*10000000000000000; // now i up to 16 digits
q = i/100000000;
itoa8_8(b , q);
itoa8_8(b + 8, i - q*100000000);
return b+16;
}
template<typename T, std::enable_if_t<std::is_signed_v<T>, bool> = true>
inline char *itoa(char *b, T i) {
// write 1 to 20 chars to b and return the new position
typedef std::make_unsigned_t<T> TU;
bool minus = i < 0;
TU u = (TU)(i) - 2 * (TU)(i) * minus;
*b = '-';
return itoa(b + minus, u);
}
// ======== verify if the provided range is large enough =========
// max_int[0] = 0
// max_int[N>0] = largest integer number that can be presented with N digits
static const uint64_t max_int[20] = {
0,
9,
99,
999,
9999,
99999,
999999,
9999999,
99999999,
999999999,
9999999999,
99999999999,
999999999999,
9999999999999,
99999999999999,
999999999999999,
9999999999999999,
99999999999999999,
999999999999999999,
9999999999999999999u };
// ===== max_chars_for_to_chars10(T value) ===============================
// return the maximum number of charaters that will be used by to_chars10
// for any value of data type T
template<typename T, std::enable_if_t<sizeof(T) == 1, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 3; // 256
}
template<typename T, std::enable_if_t<sizeof(T) == 1, bool> = true, std::enable_if_t<std::is_signed_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 4; // -128
}
template<typename T, std::enable_if_t<sizeof(T) == 2, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 5; // 65535
}
template<typename T, std::enable_if_t<sizeof(T) == 2, bool> = true, std::enable_if_t<std::is_signed_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 6; // -32768
}
template<typename T, std::enable_if_t<sizeof(T) == 4, bool> = true, std::enable_if_t<std::is_unsigned_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 10; // 4294967295
}
template<typename T, std::enable_if_t<sizeof(T) == 4, bool> = true, std::enable_if_t<std::is_signed_v<T>, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 11; // -2147483648
}
template<typename T, std::enable_if_t<sizeof(T) == 8, bool> = true>
inline std::ptrdiff_t max_chars_for_to_chars10(T value) {
return 20;
}
inline std::to_chars_result to_chars_error(char* last) {
std::to_chars_result res;
res.ec = std::errc::value_too_large;
res.ptr = last;
return res;
}
inline std::to_chars_result to_chars_success(char* last) {
std::to_chars_result res;
res.ec = std::errc();
res.ptr = last;
return res;
}
template<typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
inline bool to_chars10_range_check(char* first, char* last, T value) {
// return true if [first, last) is large enough for value
if (__builtin_expect(last - first >= max_chars_for_to_chars10(value), true)) return true;
if (__builtin_expect(last <= first, false)) return false;
if (value >= 0) {
// last - first > 0
return (uint64_t)value <= max_int[last - first];
} else {
// value != 0, && last - first - 1 < 19
return (int64_t)value >= -(int64_t)max_int[last - first - 1];
}
}
} // end of namespace to_chars10_internal
template<typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
inline std::to_chars_result to_chars10(char* first, char* last, T value) {
// same as std::to_chars(char* first, char* last, T value, 10)
if (__builtin_expect(to_chars10_internal::to_chars10_range_check(first, last, value), true))
return to_chars10_internal::to_chars_success(to_chars10_internal::itoa(first, value));
return to_chars10_internal::to_chars_error(last);
}
#endif // TO_CHARS10_H
More information about the Libstdc++
mailing list