include/boost/json/detail/ryu/impl/d2s.ipp

TLA	Baseline
	1		// Copyright 2018 Ulf Adams	1		// Copyright 2018 Ulf Adams
	2		//	2		//
	3		// The contents of this file may be used under the terms of the Apache License,	3		// The contents of this file may be used under the terms of the Apache License,
	4		// Version 2.0.	4		// Version 2.0.
	5		//	5		//
	6		// (See accompanying file LICENSE-Apache or copy at	6		// (See accompanying file LICENSE-Apache or copy at
	7		// http://www.apache.org/licenses/LICENSE-2.0)	7		// http://www.apache.org/licenses/LICENSE-2.0)
	8		//	8		//
	9		// Alternatively, the contents of this file may be used under the terms of	9		// Alternatively, the contents of this file may be used under the terms of
	10		// the Boost Software License, Version 1.0.	10		// the Boost Software License, Version 1.0.
	11		// (See accompanying file LICENSE-Boost or copy at	11		// (See accompanying file LICENSE-Boost or copy at
	12		// https://www.boost.org/LICENSE_1_0.txt)	12		// https://www.boost.org/LICENSE_1_0.txt)
	13		//	13		//
	14		// Unless required by applicable law or agreed to in writing, this software	14		// Unless required by applicable law or agreed to in writing, this software
	15		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY	15		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	16		// KIND, either express or implied.	16		// KIND, either express or implied.
	17			17
	18		// Runtime compiler options:	18		// Runtime compiler options:
	19		// -DRYU_DEBUG Generate verbose debugging output to stdout.	19		// -DRYU_DEBUG Generate verbose debugging output to stdout.
	20		//	20		//
	21		// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,	21		// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,
	22		// depending on your compiler.	22		// depending on your compiler.
	23		//	23		//
	24		// -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every	24		// -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every
	25		// required power of 5, only store every 26th entry, and compute	25		// required power of 5, only store every 26th entry, and compute
	26		// intermediate values with a multiplication. This reduces the lookup table	26		// intermediate values with a multiplication. This reduces the lookup table
	27		// size by about 10x (only one case, and only double) at the cost of some	27		// size by about 10x (only one case, and only double) at the cost of some
	28		// performance. Currently requires MSVC intrinsics.	28		// performance. Currently requires MSVC intrinsics.
	29			29
	30		/*	30		/*
	31		This is a derivative work	31		This is a derivative work
	32		*/	32		*/
	33			33
	34		#ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP	34		#ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP
	35		#define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP	35		#define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP
	36			36
	37		#include <boost/json/detail/ryu/ryu.hpp>	37		#include <boost/json/detail/ryu/ryu.hpp>
	38		#include <cstdlib>	38		#include <cstdlib>
	39		#include <cstring>	39		#include <cstring>
	40			40
	41		#ifdef RYU_DEBUG	41		#ifdef RYU_DEBUG
	42		#include <stdio.h>	42		#include <stdio.h>
	43		#endif	43		#endif
	44			44
	45		// ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined.	45		// ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined.
	46		// Let's do the same for now.	46		// Let's do the same for now.
	47		#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS)	47		#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS)
	48		#define BOOST_JSON_RYU_HAS_UINT128	48		#define BOOST_JSON_RYU_HAS_UINT128
	49		#elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)	49		#elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
	50		#define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS	50		#define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS
	51		#endif	51		#endif
	52			52
	53		#include <boost/json/detail/ryu/detail/common.hpp>	53		#include <boost/json/detail/ryu/detail/common.hpp>
	54		#include <boost/json/detail/ryu/detail/digit_table.hpp>	54		#include <boost/json/detail/ryu/detail/digit_table.hpp>
	55		#include <boost/json/detail/ryu/detail/d2s.hpp>	55		#include <boost/json/detail/ryu/detail/d2s.hpp>
	56		#include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp>	56		#include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp>
	57			57
	58		namespace boost {	58		namespace boost {
	59		namespace json {	59		namespace json {
	60		namespace detail {	60		namespace detail {
	61			61
	62		namespace ryu {	62		namespace ryu {
	63		namespace detail {	63		namespace detail {
	64			64
	65		// We need a 64x128-bit multiplication and a subsequent 128-bit shift.	65		// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
	66		// Multiplication:	66		// Multiplication:
	67		// The 64-bit factor is variable and passed in, the 128-bit factor comes	67		// The 64-bit factor is variable and passed in, the 128-bit factor comes
	68		// from a lookup table. We know that the 64-bit factor only has 55	68		// from a lookup table. We know that the 64-bit factor only has 55
	69		// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit	69		// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
	70		// factor only has 124 significant bits (i.e., the 4 topmost bits are	70		// factor only has 124 significant bits (i.e., the 4 topmost bits are
	71		// zeros).	71		// zeros).
	72		// Shift:	72		// Shift:
	73		// In principle, the multiplication result requires 55 + 124 = 179 bits to	73		// In principle, the multiplication result requires 55 + 124 = 179 bits to
	74		// represent. However, we then shift this value to the right by j, which is	74		// represent. However, we then shift this value to the right by j, which is
	75		// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64	75		// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
	76		// bits. This means that we only need the topmost 64 significant bits of	76		// bits. This means that we only need the topmost 64 significant bits of
	77		// the 64x128-bit multiplication.	77		// the 64x128-bit multiplication.
	78		//	78		//
	79		// There are several ways to do this:	79		// There are several ways to do this:
	80		// 1. Best case: the compiler exposes a 128-bit type.	80		// 1. Best case: the compiler exposes a 128-bit type.
	81		// We perform two 64x64-bit multiplications, add the higher 64 bits of the	81		// We perform two 64x64-bit multiplications, add the higher 64 bits of the
	82		// lower result to the higher result, and shift by j - 64 bits.	82		// lower result to the higher result, and shift by j - 64 bits.
	83		//	83		//
	84		// We explicitly cast from 64-bit to 128-bit, so the compiler can tell	84		// We explicitly cast from 64-bit to 128-bit, so the compiler can tell
	85		// that these are only 64-bit inputs, and can map these to the best	85		// that these are only 64-bit inputs, and can map these to the best
	86		// possible sequence of assembly instructions.	86		// possible sequence of assembly instructions.
	87		// x64 machines happen to have matching assembly instructions for	87		// x64 machines happen to have matching assembly instructions for
	88		// 64x64-bit multiplications and 128-bit shifts.	88		// 64x64-bit multiplications and 128-bit shifts.
	89		//	89		//
	90		// 2. Second best case: the compiler exposes intrinsics for the x64 assembly	90		// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
	91		// instructions mentioned in 1.	91		// instructions mentioned in 1.
	92		//	92		//
	93		// 3. We only have 64x64 bit instructions that return the lower 64 bits of	93		// 3. We only have 64x64 bit instructions that return the lower 64 bits of
	94		// the result, i.e., we have to use plain C.	94		// the result, i.e., we have to use plain C.
	95		// Our inputs are less than the full width, so we have three options:	95		// Our inputs are less than the full width, so we have three options:
	96		// a. Ignore this fact and just implement the intrinsics manually.	96		// a. Ignore this fact and just implement the intrinsics manually.
	97		// b. Split both into 31-bit pieces, which guarantees no internal overflow,	97		// b. Split both into 31-bit pieces, which guarantees no internal overflow,
	98		// but requires extra work upfront (unless we change the lookup table).	98		// but requires extra work upfront (unless we change the lookup table).
	99		// c. Split only the first factor into 31-bit pieces, which also guarantees	99		// c. Split only the first factor into 31-bit pieces, which also guarantees
	100		// no internal overflow, but requires extra work since the intermediate	100		// no internal overflow, but requires extra work since the intermediate
	101		// results are not perfectly aligned.	101		// results are not perfectly aligned.
	102		#if defined(BOOST_JSON_RYU_HAS_UINT128)	102		#if defined(BOOST_JSON_RYU_HAS_UINT128)
	103			103
	104		// Best case: use 128-bit type.	104		// Best case: use 128-bit type.
	105		inline	105		inline
	106		std::uint64_t	106		std::uint64_t
HIT CBC	107	786	mulShift(	107	786	mulShift(
	108		const std::uint64_t m,	108		const std::uint64_t m,
	109		const std::uint64_t* const mul,	109		const std::uint64_t* const mul,
	110		const std::int32_t j) noexcept	110		const std::int32_t j) noexcept
	111		{	111		{
HIT CBC	112	786	const uint128_t b0 = ((uint128_t) m) * mul[0];	112	786	const uint128_t b0 = ((uint128_t) m) * mul[0];
HIT CBC	113	786	const uint128_t b2 = ((uint128_t) m) * mul[1];	113	786	const uint128_t b2 = ((uint128_t) m) * mul[1];
HIT CBC	114	786	return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64));	114	786	return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64));
	115		}	115		}
	116			116
	117		inline	117		inline
	118		uint64_t	118		uint64_t
HIT CBC	119	262	mulShiftAll(	119	262	mulShiftAll(
	120		const std::uint64_t m,	120		const std::uint64_t m,
	121		const std::uint64_t* const mul,	121		const std::uint64_t* const mul,
	122		std::int32_t const j,	122		std::int32_t const j,
	123		std::uint64_t* const vp,	123		std::uint64_t* const vp,
	124		std::uint64_t* const vm,	124		std::uint64_t* const vm,
	125		const std::uint32_t mmShift) noexcept	125		const std::uint32_t mmShift) noexcept
	126		{	126		{
	127		// m <<= 2;	127		// m <<= 2;
	128		// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0	128		// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
	129		// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64	129		// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
	130		//	130		//
	131		// uint128_t hi = (b0 >> 64) + b2;	131		// uint128_t hi = (b0 >> 64) + b2;
	132		// uint128_t lo = b0 & 0xffffffffffffffffull;	132		// uint128_t lo = b0 & 0xffffffffffffffffull;
	133		// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];	133		// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
	134		// uint128_t vpLo = lo + (factor << 1);	134		// uint128_t vpLo = lo + (factor << 1);
	135		// *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));	135		// *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
	136		// uint128_t vmLo = lo - (factor << mmShift);	136		// uint128_t vmLo = lo - (factor << mmShift);
	137		// *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));	137		// *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
	138		// return (std::uint64_t) (hi >> (j - 64));	138		// return (std::uint64_t) (hi >> (j - 64));
HIT CBC	139	262	vp = mulShift(4 m + 2, mul, j);	139	262	vp = mulShift(4 m + 2, mul, j);
HIT CBC	140	262	vm = mulShift(4 m - 1 - mmShift, mul, j);	140	262	vm = mulShift(4 m - 1 - mmShift, mul, j);
HIT CBC	141	262	return mulShift(4 * m, mul, j);	141	262	return mulShift(4 * m, mul, j);
	142		}	142		}
	143			143
	144		#elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)	144		#elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
	145			145
	146		inline	146		inline
	147		std::uint64_t	147		std::uint64_t
	148		mulShift(	148		mulShift(
	149		const std::uint64_t m,	149		const std::uint64_t m,
	150		const std::uint64_t* const mul,	150		const std::uint64_t* const mul,
	151		const std::int32_t j) noexcept	151		const std::int32_t j) noexcept
	152		{	152		{
	153		// m is maximum 55 bits	153		// m is maximum 55 bits
	154		std::uint64_t high1; // 128	154		std::uint64_t high1; // 128
	155		std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64	155		std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64
	156		std::uint64_t high0; // 64	156		std::uint64_t high0; // 64
	157		umul128(m, mul[0], &high0); // 0	157		umul128(m, mul[0], &high0); // 0
	158		std::uint64_t const sum = high0 + low1;	158		std::uint64_t const sum = high0 + low1;
	159		if (sum < high0)	159		if (sum < high0)
	160		++high1; // overflow into high1	160		++high1; // overflow into high1
	161		return shiftright128(sum, high1, j - 64);	161		return shiftright128(sum, high1, j - 64);
	162		}	162		}
	163			163
	164		inline	164		inline
	165		std::uint64_t	165		std::uint64_t
	166		mulShiftAll(	166		mulShiftAll(
	167		const std::uint64_t m,	167		const std::uint64_t m,
	168		const std::uint64_t* const mul,	168		const std::uint64_t* const mul,
	169		const std::int32_t j,	169		const std::int32_t j,
	170		std::uint64_t* const vp,	170		std::uint64_t* const vp,
	171		std::uint64_t* const vm,	171		std::uint64_t* const vm,
	172		const std::uint32_t mmShift) noexcept	172		const std::uint32_t mmShift) noexcept
	173		{	173		{
	174		vp = mulShift(4 m + 2, mul, j);	174		vp = mulShift(4 m + 2, mul, j);
	175		vm = mulShift(4 m - 1 - mmShift, mul, j);	175		vm = mulShift(4 m - 1 - mmShift, mul, j);
	176		return mulShift(4 * m, mul, j);	176		return mulShift(4 * m, mul, j);
	177		}	177		}
	178			178
	179		#else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)	179		#else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
	180			180
	181		inline	181		inline
	182		std::uint64_t	182		std::uint64_t
	183		mulShiftAll(	183		mulShiftAll(
	184		std::uint64_t m,	184		std::uint64_t m,
	185		const std::uint64_t* const mul,	185		const std::uint64_t* const mul,
	186		const std::int32_t j,	186		const std::int32_t j,
	187		std::uint64_t* const vp,	187		std::uint64_t* const vp,
	188		std::uint64_t* const vm,	188		std::uint64_t* const vm,
	189		const std::uint32_t mmShift)	189		const std::uint32_t mmShift)
	190		{	190		{
	191		m <<= 1;	191		m <<= 1;
	192		// m is maximum 55 bits	192		// m is maximum 55 bits
	193		std::uint64_t tmp;	193		std::uint64_t tmp;
	194		std::uint64_t const lo = umul128(m, mul[0], &tmp);	194		std::uint64_t const lo = umul128(m, mul[0], &tmp);
	195		std::uint64_t hi;	195		std::uint64_t hi;
	196		std::uint64_t const mid = tmp + umul128(m, mul[1], &hi);	196		std::uint64_t const mid = tmp + umul128(m, mul[1], &hi);
	197		hi += mid < tmp; // overflow into hi	197		hi += mid < tmp; // overflow into hi
	198			198
	199		const std::uint64_t lo2 = lo + mul[0];	199		const std::uint64_t lo2 = lo + mul[0];
	200		const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo);	200		const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo);
	201		const std::uint64_t hi2 = hi + (mid2 < mid);	201		const std::uint64_t hi2 = hi + (mid2 < mid);
	202		*vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1));	202		*vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1));
	203			203
	204		if (mmShift == 1)	204		if (mmShift == 1)
	205		{	205		{
	206		const std::uint64_t lo3 = lo - mul[0];	206		const std::uint64_t lo3 = lo - mul[0];
	207		const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo);	207		const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo);
	208		const std::uint64_t hi3 = hi - (mid3 > mid);	208		const std::uint64_t hi3 = hi - (mid3 > mid);
	209		*vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1));	209		*vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1));
	210		}	210		}
	211		else	211		else
	212		{	212		{
	213		const std::uint64_t lo3 = lo + lo;	213		const std::uint64_t lo3 = lo + lo;
	214		const std::uint64_t mid3 = mid + mid + (lo3 < lo);	214		const std::uint64_t mid3 = mid + mid + (lo3 < lo);
	215		const std::uint64_t hi3 = hi + hi + (mid3 < mid);	215		const std::uint64_t hi3 = hi + hi + (mid3 < mid);
	216		const std::uint64_t lo4 = lo3 - mul[0];	216		const std::uint64_t lo4 = lo3 - mul[0];
	217		const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);	217		const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
	218		const std::uint64_t hi4 = hi3 - (mid4 > mid3);	218		const std::uint64_t hi4 = hi3 - (mid4 > mid3);
	219		*vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64));	219		*vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64));
	220		}	220		}
	221			221
	222		return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1));	222		return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1));
	223		}	223		}
	224			224
	225		#endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS	225		#endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS
	226			226
	227		inline	227		inline
	228		std::uint32_t	228		std::uint32_t
HIT CBC	229	538	decimalLength17(	229	538	decimalLength17(
	230		const std::uint64_t v)	230		const std::uint64_t v)
	231		{	231		{
	232		// This is slightly faster than a loop.	232		// This is slightly faster than a loop.
	233		// The average output length is 16.38 digits, so we check high-to-low.	233		// The average output length is 16.38 digits, so we check high-to-low.
	234		// Function precondition: v is not an 18, 19, or 20-digit number.	234		// Function precondition: v is not an 18, 19, or 20-digit number.
	235		// (17 digits are sufficient for round-tripping.)	235		// (17 digits are sufficient for round-tripping.)
HIT CBC	236	538	BOOST_ASSERT(v < 100000000000000000L);	236	538	BOOST_ASSERT(v < 100000000000000000L);
HIT CBC	237	538	if (v >= 10000000000000000L) { return 17; }	237	538	if (v >= 10000000000000000L) { return 17; }
HIT CBC	238	528	if (v >= 1000000000000000L) { return 16; }	238	528	if (v >= 1000000000000000L) { return 16; }
HIT CBC	239	509	if (v >= 100000000000000L) { return 15; }	239	509	if (v >= 100000000000000L) { return 15; }
HIT CBC	240	505	if (v >= 10000000000000L) { return 14; }	240	505	if (v >= 10000000000000L) { return 14; }
HIT CBC	241	500	if (v >= 1000000000000L) { return 13; }	241	500	if (v >= 1000000000000L) { return 13; }
HIT CBC	242	494	if (v >= 100000000000L) { return 12; }	242	494	if (v >= 100000000000L) { return 12; }
HIT CBC	243	489	if (v >= 10000000000L) { return 11; }	243	489	if (v >= 10000000000L) { return 11; }
HIT CBC	244	484	if (v >= 1000000000L) { return 10; }	244	484	if (v >= 1000000000L) { return 10; }
HIT CBC	245	474	if (v >= 100000000L) { return 9; }	245	474	if (v >= 100000000L) { return 9; }
HIT CBC	246	467	if (v >= 10000000L) { return 8; }	246	467	if (v >= 10000000L) { return 8; }
HIT CBC	247	461	if (v >= 1000000L) { return 7; }	247	461	if (v >= 1000000L) { return 7; }
HIT CBC	248	455	if (v >= 100000L) { return 6; }	248	455	if (v >= 100000L) { return 6; }
HIT CBC	249	450	if (v >= 10000L) { return 5; }	249	450	if (v >= 10000L) { return 5; }
HIT CBC	250	445	if (v >= 1000L) { return 4; }	250	445	if (v >= 1000L) { return 4; }
HIT CBC	251	439	if (v >= 100L) { return 3; }	251	439	if (v >= 100L) { return 3; }
HIT CBC	252	421	if (v >= 10L) { return 2; }	252	421	if (v >= 10L) { return 2; }
HIT CBC	253	415	return 1;	253	415	return 1;
	254		}	254		}
	255			255
	256		// A floating decimal representing m * 10^e.	256		// A floating decimal representing m * 10^e.
	257		struct floating_decimal_64	257		struct floating_decimal_64
	258		{	258		{
	259		std::uint64_t mantissa;	259		std::uint64_t mantissa;
	260		// Decimal exponent's range is -324 to 308	260		// Decimal exponent's range is -324 to 308
	261		// inclusive, and can fit in a short if needed.	261		// inclusive, and can fit in a short if needed.
	262		std::int32_t exponent;	262		std::int32_t exponent;
	263		};	263		};
	264			264
	265		inline	265		inline
	266		floating_decimal_64	266		floating_decimal_64
HIT CBC	267	262	d2d(	267	262	d2d(
	268		const std::uint64_t ieeeMantissa,	268		const std::uint64_t ieeeMantissa,
	269		const std::uint32_t ieeeExponent)	269		const std::uint32_t ieeeExponent)
	270		{	270		{
	271		std::int32_t e2;	271		std::int32_t e2;
	272		std::uint64_t m2;	272		std::uint64_t m2;
HIT CBC	273	262	if (ieeeExponent == 0)	273	262	if (ieeeExponent == 0)
	274		{	274		{
	275		// We subtract 2 so that the bounds computation has 2 additional bits.	275		// We subtract 2 so that the bounds computation has 2 additional bits.
HIT CBC	276	15	e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;	276	15	e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
HIT CBC	277	15	m2 = ieeeMantissa;	277	15	m2 = ieeeMantissa;
	278		}	278		}
	279		else	279		else
	280		{	280		{
HIT CBC	281	247	e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;	281	247	e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
HIT CBC	282	247	m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;	282	247	m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
	283		}	283		}
HIT CBC	284	262	const bool even = (m2 & 1) == 0;	284	262	const bool even = (m2 & 1) == 0;
HIT CBC	285	262	const bool acceptBounds = even;	285	262	const bool acceptBounds = even;
	286			286
	287		#ifdef RYU_DEBUG	287		#ifdef RYU_DEBUG
	288		printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);	288		printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);
	289		#endif	289		#endif
	290			290
	291		// Step 2: Determine the interval of valid decimal representations.	291		// Step 2: Determine the interval of valid decimal representations.
HIT CBC	292	262	const std::uint64_t mv = 4 * m2;	292	262	const std::uint64_t mv = 4 * m2;
	293		// Implicit bool -> int conversion. True is 1, false is 0.	293		// Implicit bool -> int conversion. True is 1, false is 0.
HIT CBC	294	262	const std::uint32_t mmShift = ieeeMantissa != 0 \|\| ieeeExponent <= 1;	294	262	const std::uint32_t mmShift = ieeeMantissa != 0 \|\| ieeeExponent <= 1;
	295		// We would compute mp and mm like this:	295		// We would compute mp and mm like this:
	296		// uint64_t mp = 4 * m2 + 2;	296		// uint64_t mp = 4 * m2 + 2;
	297		// uint64_t mm = mv - 1 - mmShift;	297		// uint64_t mm = mv - 1 - mmShift;
	298			298
	299		// Step 3: Convert to a decimal power base using 128-bit arithmetic.	299		// Step 3: Convert to a decimal power base using 128-bit arithmetic.
	300		std::uint64_t vr, vp, vm;	300		std::uint64_t vr, vp, vm;
	301		std::int32_t e10;	301		std::int32_t e10;
HIT CBC	302	262	bool vmIsTrailingZeros = false;	302	262	bool vmIsTrailingZeros = false;
HIT CBC	303	262	bool vrIsTrailingZeros = false;	303	262	bool vrIsTrailingZeros = false;
HIT CBC	304	262	if (e2 >= 0) {	304	262	if (e2 >= 0) {
	305		// I tried special-casing q == 0, but there was no effect on performance.	305		// I tried special-casing q == 0, but there was no effect on performance.
	306		// This expression is slightly faster than max(0, log10Pow2(e2) - 1).	306		// This expression is slightly faster than max(0, log10Pow2(e2) - 1).
HIT CBC	307	128	const std::uint32_t q = log10Pow2(e2) - (e2 > 3);	307	128	const std::uint32_t q = log10Pow2(e2) - (e2 > 3);
HIT CBC	308	128	e10 = (std::int32_t)q;	308	128	e10 = (std::int32_t)q;
HIT CBC	309	128	const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1;	309	128	const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1;
HIT CBC	310	128	const std::int32_t i = -e2 + (std::int32_t)q + k;	310	128	const std::int32_t i = -e2 + (std::int32_t)q + k;
	311		#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)	311		#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)
	312		uint64_t pow5[2];	312		uint64_t pow5[2];
	313		double_computeInvPow5(q, pow5);	313		double_computeInvPow5(q, pow5);
	314		vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);	314		vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);
	315		#else	315		#else
HIT CBC	316	128	vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift);	316	128	vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift);
	317		#endif	317		#endif
	318		#ifdef RYU_DEBUG	318		#ifdef RYU_DEBUG
	319		printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);	319		printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);
	320		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);	320		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
	321		#endif	321		#endif
HIT CBC	322	128	if (q <= 21)	322	128	if (q <= 21)
	323		{	323		{
	324		// This should use q <= 22, but I think 21 is also safe. Smaller values	324		// This should use q <= 22, but I think 21 is also safe. Smaller values
	325		// may still be safe, but it's more difficult to reason about them.	325		// may still be safe, but it's more difficult to reason about them.
	326		// Only one of mp, mv, and mm can be a multiple of 5, if any.	326		// Only one of mp, mv, and mm can be a multiple of 5, if any.
HIT CBC	327	114	const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv));	327	114	const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv));
HIT CBC	328	114	if (mvMod5 == 0)	328	114	if (mvMod5 == 0)
	329		{	329		{
HIT CBC	330	86	vrIsTrailingZeros = multipleOfPowerOf5(mv, q);	330	86	vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
	331		}	331		}
HIT CBC	332	28	else if (acceptBounds)	332	28	else if (acceptBounds)
	333		{	333		{
	334		// Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q	334		// Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
	335		// <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q	335		// <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
	336		// <=> true && pow5Factor(mm) >= q, since e2 >= q.	336		// <=> true && pow5Factor(mm) >= q, since e2 >= q.
HIT CBC	337	11	vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);	337	11	vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
	338		}	338		}
	339		else	339		else
	340		{	340		{
	341		// Same as min(e2 + 1, pow5Factor(mp)) >= q.	341		// Same as min(e2 + 1, pow5Factor(mp)) >= q.
HIT CBC	342	17	vp -= multipleOfPowerOf5(mv + 2, q);	342	17	vp -= multipleOfPowerOf5(mv + 2, q);
	343		}	343		}
	344		}	344		}
	345		}	345		}
	346		else	346		else
	347		{	347		{
	348		// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).	348		// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
HIT CBC	349	134	const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1);	349	134	const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1);
HIT CBC	350	134	e10 = (std::int32_t)q + e2;	350	134	e10 = (std::int32_t)q + e2;
HIT CBC	351	134	const std::int32_t i = -e2 - (std::int32_t)q;	351	134	const std::int32_t i = -e2 - (std::int32_t)q;
HIT CBC	352	134	const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;	352	134	const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
HIT CBC	353	134	const std::int32_t j = (std::int32_t)q - k;	353	134	const std::int32_t j = (std::int32_t)q - k;
	354		#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)	354		#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)
	355		std::uint64_t pow5[2];	355		std::uint64_t pow5[2];
	356		double_computePow5(i, pow5);	356		double_computePow5(i, pow5);
	357		vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);	357		vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);
	358		#else	358		#else
HIT CBC	359	134	vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift);	359	134	vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift);
	360		#endif	360		#endif
	361		#ifdef RYU_DEBUG	361		#ifdef RYU_DEBUG
	362		printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);	362		printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);
	363		printf("%u %d %d %d\n", q, i, k, j);	363		printf("%u %d %d %d\n", q, i, k, j);
	364		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);	364		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
	365		#endif	365		#endif
HIT CBC	366	134	if (q <= 1)	366	134	if (q <= 1)
	367		{	367		{
	368		// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.	368		// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
	369		// mv = 4 * m2, so it always has at least two trailing 0 bits.	369		// mv = 4 * m2, so it always has at least two trailing 0 bits.
HIT CBC	370	3	vrIsTrailingZeros = true;	370	3	vrIsTrailingZeros = true;
HIT CBC	371	3	if (acceptBounds)	371	3	if (acceptBounds)
	372		{	372		{
	373		// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.	373		// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
HIT CBC	374	3	vmIsTrailingZeros = mmShift == 1;	374	3	vmIsTrailingZeros = mmShift == 1;
	375		}	375		}
	376		else	376		else
	377		{	377		{
	378		// mp = mv + 2, so it always has at least one trailing 0 bit.	378		// mp = mv + 2, so it always has at least one trailing 0 bit.
MIS UBC	379	✗	--vp;	379	✗	--vp;
	380		}	380		}
	381		}	381		}
HIT CBC	382	131	else if (q < 63)	382	131	else if (q < 63)
	383		{	383		{
	384		// TODO(ulfjack): Use a tighter bound here.	384		// TODO(ulfjack): Use a tighter bound here.
	385		// We want to know if the full product has at least q trailing zeros.	385		// We want to know if the full product has at least q trailing zeros.
	386		// We need to compute min(p2(mv), p5(mv) - e2) >= q	386		// We need to compute min(p2(mv), p5(mv) - e2) >= q
	387		// <=> p2(mv) >= q && p5(mv) - e2 >= q	387		// <=> p2(mv) >= q && p5(mv) - e2 >= q
	388		// <=> p2(mv) >= q (because -e2 >= q)	388		// <=> p2(mv) >= q (because -e2 >= q)
HIT CBC	389	96	vrIsTrailingZeros = multipleOfPowerOf2(mv, q);	389	96	vrIsTrailingZeros = multipleOfPowerOf2(mv, q);
	390		#ifdef RYU_DEBUG	390		#ifdef RYU_DEBUG
	391		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");	391		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
	392		#endif	392		#endif
	393		}	393		}
	394		}	394		}
	395		#ifdef RYU_DEBUG	395		#ifdef RYU_DEBUG
	396		printf("e10=%d\n", e10);	396		printf("e10=%d\n", e10);
	397		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);	397		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
	398		printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");	398		printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
	399		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");	399		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
	400		#endif	400		#endif
	401			401
	402		// Step 4: Find the shortest decimal representation in the interval of valid representations.	402		// Step 4: Find the shortest decimal representation in the interval of valid representations.
HIT CBC	403	262	std::int32_t removed = 0;	403	262	std::int32_t removed = 0;
HIT CBC	404	262	std::uint8_t lastRemovedDigit = 0;	404	262	std::uint8_t lastRemovedDigit = 0;
	405		std::uint64_t output;	405		std::uint64_t output;
	406		// On average, we remove ~2 digits.	406		// On average, we remove ~2 digits.
HIT CBC	407	262	if (vmIsTrailingZeros \|\| vrIsTrailingZeros)	407	262	if (vmIsTrailingZeros \|\| vrIsTrailingZeros)
	408		{	408		{
	409		// General case, which happens rarely (~0.7%).	409		// General case, which happens rarely (~0.7%).
	410		for (;;)	410		for (;;)
	411		{	411		{
HIT CBC	412	1663	const std::uint64_t vpDiv10 = div10(vp);	412	1663	const std::uint64_t vpDiv10 = div10(vp);
HIT CBC	413	1663	const std::uint64_t vmDiv10 = div10(vm);	413	1663	const std::uint64_t vmDiv10 = div10(vm);
HIT CBC	414	1663	if (vpDiv10 <= vmDiv10)	414	1663	if (vpDiv10 <= vmDiv10)
HIT CBC	415	94	break;	415	94	break;
HIT CBC	416	1569	const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);	416	1569	const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);
HIT CBC	417	1569	const std::uint64_t vrDiv10 = div10(vr);	417	1569	const std::uint64_t vrDiv10 = div10(vr);
HIT CBC	418	1569	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);	418	1569	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HIT CBC	419	1569	vmIsTrailingZeros &= vmMod10 == 0;	419	1569	vmIsTrailingZeros &= vmMod10 == 0;
HIT CBC	420	1569	vrIsTrailingZeros &= lastRemovedDigit == 0;	420	1569	vrIsTrailingZeros &= lastRemovedDigit == 0;
HIT CBC	421	1569	lastRemovedDigit = (uint8_t)vrMod10;	421	1569	lastRemovedDigit = (uint8_t)vrMod10;
HIT CBC	422	1569	vr = vrDiv10;	422	1569	vr = vrDiv10;
HIT CBC	423	1569	vp = vpDiv10;	423	1569	vp = vpDiv10;
HIT CBC	424	1569	vm = vmDiv10;	424	1569	vm = vmDiv10;
HIT CBC	425	1569	++removed;	425	1569	++removed;
HIT CBC	426	1569	}	426	1569	}
	427		#ifdef RYU_DEBUG	427		#ifdef RYU_DEBUG
	428		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);	428		printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
	429		printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");	429		printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
	430		#endif	430		#endif
HIT CBC	431	94	if (vmIsTrailingZeros)	431	94	if (vmIsTrailingZeros)
	432		{	432		{
	433		for (;;)	433		for (;;)
	434		{	434		{
HIT CBC	435	3	const std::uint64_t vmDiv10 = div10(vm);	435	3	const std::uint64_t vmDiv10 = div10(vm);
HIT CBC	436	3	const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);	436	3	const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);
HIT CBC	437	3	if (vmMod10 != 0)	437	3	if (vmMod10 != 0)
HIT CBC	438	2	break;	438	2	break;
HIT CBC	439	1	const std::uint64_t vpDiv10 = div10(vp);	439	1	const std::uint64_t vpDiv10 = div10(vp);
HIT CBC	440	1	const std::uint64_t vrDiv10 = div10(vr);	440	1	const std::uint64_t vrDiv10 = div10(vr);
HIT CBC	441	1	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);	441	1	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HIT CBC	442	1	vrIsTrailingZeros &= lastRemovedDigit == 0;	442	1	vrIsTrailingZeros &= lastRemovedDigit == 0;
HIT CBC	443	1	lastRemovedDigit = (uint8_t)vrMod10;	443	1	lastRemovedDigit = (uint8_t)vrMod10;
HIT CBC	444	1	vr = vrDiv10;	444	1	vr = vrDiv10;
HIT CBC	445	1	vp = vpDiv10;	445	1	vp = vpDiv10;
HIT CBC	446	1	vm = vmDiv10;	446	1	vm = vmDiv10;
HIT CBC	447	1	++removed;	447	1	++removed;
HIT CBC	448	1	}	448	1	}
	449		}	449		}
	450		#ifdef RYU_DEBUG	450		#ifdef RYU_DEBUG
	451		printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);	451		printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
	452		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");	452		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
	453		#endif	453		#endif
HIT CBC	454	94	if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)	454	94	if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
	455		{	455		{
	456		// Round even if the exact number is .....50..0.	456		// Round even if the exact number is .....50..0.
HIT CBC	457	1	lastRemovedDigit = 4;	457	1	lastRemovedDigit = 4;
	458		}	458		}
	459		// We need to take vr + 1 if vr is outside bounds or we need to round up.	459		// We need to take vr + 1 if vr is outside bounds or we need to round up.
HIT CBC	460	94	output = vr + ((vr == vm && (!acceptBounds \|\| !vmIsTrailingZeros)) \|\| lastRemovedDigit >= 5);	460	94	output = vr + ((vr == vm && (!acceptBounds \|\| !vmIsTrailingZeros)) \|\| lastRemovedDigit >= 5);
HIT CBC	461	94	}	461	94	}
	462		else	462		else
	463		{	463		{
	464		// Specialized for the common case (~99.3%). Percentages below are relative to this.	464		// Specialized for the common case (~99.3%). Percentages below are relative to this.
HIT CBC	465	168	bool roundUp = false;	465	168	bool roundUp = false;
HIT CBC	466	168	const std::uint64_t vpDiv100 = div100(vp);	466	168	const std::uint64_t vpDiv100 = div100(vp);
HIT CBC	467	168	const std::uint64_t vmDiv100 = div100(vm);	467	168	const std::uint64_t vmDiv100 = div100(vm);
HIT CBC	468	168	if (vpDiv100 > vmDiv100)	468	168	if (vpDiv100 > vmDiv100)
	469		{	469		{
	470		// Optimization: remove two digits at a time (~86.2%).	470		// Optimization: remove two digits at a time (~86.2%).
HIT CBC	471	161	const std::uint64_t vrDiv100 = div100(vr);	471	161	const std::uint64_t vrDiv100 = div100(vr);
HIT CBC	472	161	const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100);	472	161	const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100);
HIT CBC	473	161	roundUp = vrMod100 >= 50;	473	161	roundUp = vrMod100 >= 50;
HIT CBC	474	161	vr = vrDiv100;	474	161	vr = vrDiv100;
HIT CBC	475	161	vp = vpDiv100;	475	161	vp = vpDiv100;
HIT CBC	476	161	vm = vmDiv100;	476	161	vm = vmDiv100;
HIT CBC	477	161	removed += 2;	477	161	removed += 2;
	478		}	478		}
	479		// Loop iterations below (approximately), without optimization above:	479		// Loop iterations below (approximately), without optimization above:
	480		// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%	480		// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
	481		// Loop iterations below (approximately), with optimization above:	481		// Loop iterations below (approximately), with optimization above:
	482		// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%	482		// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
	483		for (;;)	483		for (;;)
	484		{	484		{
HIT CBC	485	2256	const std::uint64_t vpDiv10 = div10(vp);	485	2256	const std::uint64_t vpDiv10 = div10(vp);
HIT CBC	486	2256	const std::uint64_t vmDiv10 = div10(vm);	486	2256	const std::uint64_t vmDiv10 = div10(vm);
HIT CBC	487	2256	if (vpDiv10 <= vmDiv10)	487	2256	if (vpDiv10 <= vmDiv10)
HIT CBC	488	168	break;	488	168	break;
HIT CBC	489	2088	const std::uint64_t vrDiv10 = div10(vr);	489	2088	const std::uint64_t vrDiv10 = div10(vr);
HIT CBC	490	2088	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);	490	2088	const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HIT CBC	491	2088	roundUp = vrMod10 >= 5;	491	2088	roundUp = vrMod10 >= 5;
HIT CBC	492	2088	vr = vrDiv10;	492	2088	vr = vrDiv10;
HIT CBC	493	2088	vp = vpDiv10;	493	2088	vp = vpDiv10;
HIT CBC	494	2088	vm = vmDiv10;	494	2088	vm = vmDiv10;
HIT CBC	495	2088	++removed;	495	2088	++removed;
HIT CBC	496	2088	}	496	2088	}
	497		#ifdef RYU_DEBUG	497		#ifdef RYU_DEBUG
	498		printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false");	498		printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false");
	499		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");	499		printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
	500		#endif	500		#endif

1

2

//

2

//

3

// The contents of this file may be used under the terms of the Apache License,

3

// The contents of this file may be used under the terms of the Apache License,

4

// Version 2.0.

4

// Version 2.0.

5

//

5

//

6

// (See accompanying file LICENSE-Apache or copy at

6

// (See accompanying file LICENSE-Apache or copy at

7

// http://www.apache.org/licenses/LICENSE-2.0)

7

// http://www.apache.org/licenses/LICENSE-2.0)

8

//

8

//

9

// Alternatively, the contents of this file may be used under the terms of

9

// Alternatively, the contents of this file may be used under the terms of

10

// the Boost Software License, Version 1.0.

10

// the Boost Software License, Version 1.0.

11

// (See accompanying file LICENSE-Boost or copy at

11

// (See accompanying file LICENSE-Boost or copy at

12

// https://www.boost.org/LICENSE_1_0.txt)

12

// https://www.boost.org/LICENSE_1_0.txt)

13

//

13

//

14

// Unless required by applicable law or agreed to in writing, this software

14

// Unless required by applicable law or agreed to in writing, this software

15

// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

15

// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

16

// KIND, either express or implied.

16

// KIND, either express or implied.

17

18

// Runtime compiler options:

18

// Runtime compiler options:

19

// -DRYU_DEBUG Generate verbose debugging output to stdout.

19

// -DRYU_DEBUG Generate verbose debugging output to stdout.

20

//

20

//

21

// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,

21

// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,

22

// depending on your compiler.

22

// depending on your compiler.

23

//

23

//

24

// -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every

24

// -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every

25

// required power of 5, only store every 26th entry, and compute

25

// required power of 5, only store every 26th entry, and compute

26

// intermediate values with a multiplication. This reduces the lookup table

26

// intermediate values with a multiplication. This reduces the lookup table

27

// size by about 10x (only one case, and only double) at the cost of some

27

// size by about 10x (only one case, and only double) at the cost of some

28

// performance. Currently requires MSVC intrinsics.

28

// performance. Currently requires MSVC intrinsics.

29

30

/*

30

/*

31

This is a derivative work

31

This is a derivative work

32

*/

32

*/

33

34

#ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP

34

#ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP

35

#define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP

35

#define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP

36

37

#include <boost/json/detail/ryu/ryu.hpp>

37

#include <boost/json/detail/ryu/ryu.hpp>

38

#include <cstdlib>

38

#include <cstdlib>

39

#include <cstring>

39

#include <cstring>

40

41

#ifdef RYU_DEBUG

41

#ifdef RYU_DEBUG

42

#include <stdio.h>

42

#include <stdio.h>

43

#endif

43

#endif

44

45

// ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined.

45

// ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined.

46

// Let's do the same for now.

46

// Let's do the same for now.

47

#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS)

47

#if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS)

48

#define BOOST_JSON_RYU_HAS_UINT128

48

#define BOOST_JSON_RYU_HAS_UINT128

49

#elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)

49

#elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)

50

#define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS

50

#define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS

51

#endif

51

#endif

52

53

#include <boost/json/detail/ryu/detail/common.hpp>

53

#include <boost/json/detail/ryu/detail/common.hpp>

54

#include <boost/json/detail/ryu/detail/digit_table.hpp>

54

#include <boost/json/detail/ryu/detail/digit_table.hpp>

55

#include <boost/json/detail/ryu/detail/d2s.hpp>

55

#include <boost/json/detail/ryu/detail/d2s.hpp>

56

#include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp>

56

#include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp>

57

58

namespace boost {

58

namespace boost {

59

namespace json {

59

namespace json {

60

namespace detail {

60

namespace detail {

61

62

namespace ryu {

62

namespace ryu {

63

namespace detail {

63

namespace detail {

64

65

// We need a 64x128-bit multiplication and a subsequent 128-bit shift.

65

// We need a 64x128-bit multiplication and a subsequent 128-bit shift.

66

// Multiplication:

66

// Multiplication:

67

// The 64-bit factor is variable and passed in, the 128-bit factor comes

67

// The 64-bit factor is variable and passed in, the 128-bit factor comes

68

// from a lookup table. We know that the 64-bit factor only has 55

68

// from a lookup table. We know that the 64-bit factor only has 55

69

// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit

69

// significant bits (i.e., the 9 topmost bits are zeros). The 128-bit

70

// factor only has 124 significant bits (i.e., the 4 topmost bits are

70

// factor only has 124 significant bits (i.e., the 4 topmost bits are

71

// zeros).

71

// zeros).

72

// Shift:

72

// Shift:

73

// In principle, the multiplication result requires 55 + 124 = 179 bits to

73

// In principle, the multiplication result requires 55 + 124 = 179 bits to

74

// represent. However, we then shift this value to the right by j, which is

74

// represent. However, we then shift this value to the right by j, which is

75

// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64

75

// at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64

76

// bits. This means that we only need the topmost 64 significant bits of

76

// bits. This means that we only need the topmost 64 significant bits of

77

// the 64x128-bit multiplication.

77

// the 64x128-bit multiplication.

78

//

78

//

79

// There are several ways to do this:

79

// There are several ways to do this:

80

// 1. Best case: the compiler exposes a 128-bit type.

80

// 1. Best case: the compiler exposes a 128-bit type.

81

// We perform two 64x64-bit multiplications, add the higher 64 bits of the

81

// We perform two 64x64-bit multiplications, add the higher 64 bits of the

82

// lower result to the higher result, and shift by j - 64 bits.

82

// lower result to the higher result, and shift by j - 64 bits.

83

//

83

//

84

// We explicitly cast from 64-bit to 128-bit, so the compiler can tell

84

// We explicitly cast from 64-bit to 128-bit, so the compiler can tell

85

// that these are only 64-bit inputs, and can map these to the best

85

// that these are only 64-bit inputs, and can map these to the best

86

// possible sequence of assembly instructions.

86

// possible sequence of assembly instructions.

87

// x64 machines happen to have matching assembly instructions for

87

// x64 machines happen to have matching assembly instructions for

88

// 64x64-bit multiplications and 128-bit shifts.

88

// 64x64-bit multiplications and 128-bit shifts.

89

//

89

//

90

// 2. Second best case: the compiler exposes intrinsics for the x64 assembly

90

// 2. Second best case: the compiler exposes intrinsics for the x64 assembly

91

// instructions mentioned in 1.

91

// instructions mentioned in 1.

92

//

92

//

93

// 3. We only have 64x64 bit instructions that return the lower 64 bits of

93

// 3. We only have 64x64 bit instructions that return the lower 64 bits of

94

// the result, i.e., we have to use plain C.

94

// the result, i.e., we have to use plain C.

95

// Our inputs are less than the full width, so we have three options:

95

// Our inputs are less than the full width, so we have three options:

96

// a. Ignore this fact and just implement the intrinsics manually.

96

// a. Ignore this fact and just implement the intrinsics manually.

97

// b. Split both into 31-bit pieces, which guarantees no internal overflow,

97

// b. Split both into 31-bit pieces, which guarantees no internal overflow,

98

// but requires extra work upfront (unless we change the lookup table).

98

// but requires extra work upfront (unless we change the lookup table).

99

// c. Split only the first factor into 31-bit pieces, which also guarantees

99

// c. Split only the first factor into 31-bit pieces, which also guarantees

100

// no internal overflow, but requires extra work since the intermediate

100

// no internal overflow, but requires extra work since the intermediate

101

// results are not perfectly aligned.

101

// results are not perfectly aligned.

102

#if defined(BOOST_JSON_RYU_HAS_UINT128)

102

#if defined(BOOST_JSON_RYU_HAS_UINT128)

103

104

// Best case: use 128-bit type.

104

// Best case: use 128-bit type.

105

inline

105

inline

106

std::uint64_t

106

std::uint64_t

HIT CBC

107

786

mulShift(

107

786

mulShift(

108

const std::uint64_t m,

108

const std::uint64_t m,

109

const std::uint64_t* const mul,

109

const std::uint64_t* const mul,

110

const std::int32_t j) noexcept

110

const std::int32_t j) noexcept

111

{

111

{

HIT CBC

112

786

const uint128_t b0 = ((uint128_t) m) * mul[0];

112

786

const uint128_t b0 = ((uint128_t) m) * mul[0];

HIT CBC

113

786

const uint128_t b2 = ((uint128_t) m) * mul[1];

113

786

const uint128_t b2 = ((uint128_t) m) * mul[1];

HIT CBC

114

786

return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64));

114

786

return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64));

115

}

115

}

116

117

inline

117

inline

118

uint64_t

118

uint64_t

HIT CBC

119

262

mulShiftAll(

119

262

mulShiftAll(

120

const std::uint64_t m,

120

const std::uint64_t m,

121

const std::uint64_t* const mul,

121

const std::uint64_t* const mul,

122

std::int32_t const j,

122

std::int32_t const j,

123

std::uint64_t* const vp,

123

std::uint64_t* const vp,

124

std::uint64_t* const vm,

124

std::uint64_t* const vm,

125

const std::uint32_t mmShift) noexcept

125

const std::uint32_t mmShift) noexcept

126

{

126

{

127

// m <<= 2;

127

// m <<= 2;

128

// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0

128

// uint128_t b0 = ((uint128_t) m) * mul[0]; // 0

129

// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64

129

// uint128_t b2 = ((uint128_t) m) * mul[1]; // 64

130

//

130

//

131

// uint128_t hi = (b0 >> 64) + b2;

131

// uint128_t hi = (b0 >> 64) + b2;

132

// uint128_t lo = b0 & 0xffffffffffffffffull;

132

// uint128_t lo = b0 & 0xffffffffffffffffull;

133

// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];

133

// uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];

134

// uint128_t vpLo = lo + (factor << 1);

134

// uint128_t vpLo = lo + (factor << 1);

135

// *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));

135

// *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));

136

// uint128_t vmLo = lo - (factor << mmShift);

136

// uint128_t vmLo = lo - (factor << mmShift);

137

// *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));

137

// *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));

138

// return (std::uint64_t) (hi >> (j - 64));

138

// return (std::uint64_t) (hi >> (j - 64));

HIT CBC

139

262

*vp = mulShift(4 * m + 2, mul, j);

139

262

*vp = mulShift(4 * m + 2, mul, j);

HIT CBC

140

262

*vm = mulShift(4 * m - 1 - mmShift, mul, j);

140

262

*vm = mulShift(4 * m - 1 - mmShift, mul, j);

HIT CBC

141

262

return mulShift(4 * m, mul, j);

141

262

return mulShift(4 * m, mul, j);

142

}

142

}

143

144

#elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)

144

#elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)

145

146

inline

146

inline

147

std::uint64_t

147

std::uint64_t

148

mulShift(

148

mulShift(

149

const std::uint64_t m,

149

const std::uint64_t m,

150

const std::uint64_t* const mul,

150

const std::uint64_t* const mul,

151

const std::int32_t j) noexcept

151

const std::int32_t j) noexcept

152

{

152

{

153

// m is maximum 55 bits

153

// m is maximum 55 bits

154

std::uint64_t high1; // 128

154

std::uint64_t high1; // 128

155

std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64

155

std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64

156

std::uint64_t high0; // 64

156

std::uint64_t high0; // 64

157

umul128(m, mul[0], &high0); // 0

157

umul128(m, mul[0], &high0); // 0

158

std::uint64_t const sum = high0 + low1;

158

std::uint64_t const sum = high0 + low1;

159

if (sum < high0)

159

if (sum < high0)

160

++high1; // overflow into high1

160

++high1; // overflow into high1

161

return shiftright128(sum, high1, j - 64);

161

return shiftright128(sum, high1, j - 64);

162

}

162

}

163

164

inline

164

inline

165

std::uint64_t

165

std::uint64_t

166

mulShiftAll(

166

mulShiftAll(

167

const std::uint64_t m,

167

const std::uint64_t m,

168

const std::uint64_t* const mul,

168

const std::uint64_t* const mul,

169

const std::int32_t j,

169

const std::int32_t j,

170

std::uint64_t* const vp,

170

std::uint64_t* const vp,

171

std::uint64_t* const vm,

171

std::uint64_t* const vm,

172

const std::uint32_t mmShift) noexcept

172

const std::uint32_t mmShift) noexcept

173

{

173

{

174

*vp = mulShift(4 * m + 2, mul, j);

174

*vp = mulShift(4 * m + 2, mul, j);

175

*vm = mulShift(4 * m - 1 - mmShift, mul, j);

175

*vm = mulShift(4 * m - 1 - mmShift, mul, j);

176

return mulShift(4 * m, mul, j);

176

return mulShift(4 * m, mul, j);

177

}

177

}

178

179

#else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)

179

#else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)

180

181

inline

181

inline

182

std::uint64_t

182

std::uint64_t

183

mulShiftAll(

183

mulShiftAll(

184

std::uint64_t m,

184

std::uint64_t m,

185

const std::uint64_t* const mul,

185

const std::uint64_t* const mul,

186

const std::int32_t j,

186

const std::int32_t j,

187

std::uint64_t* const vp,

187

std::uint64_t* const vp,

188

std::uint64_t* const vm,

188

std::uint64_t* const vm,

189

const std::uint32_t mmShift)

189

const std::uint32_t mmShift)

190

{

190

{

191

m <<= 1;

191

m <<= 1;

192

// m is maximum 55 bits

192

// m is maximum 55 bits

193

std::uint64_t tmp;

193

std::uint64_t tmp;

194

std::uint64_t const lo = umul128(m, mul[0], &tmp);

194

std::uint64_t const lo = umul128(m, mul[0], &tmp);

195

std::uint64_t hi;

195

std::uint64_t hi;

196

std::uint64_t const mid = tmp + umul128(m, mul[1], &hi);

196

std::uint64_t const mid = tmp + umul128(m, mul[1], &hi);

197

hi += mid < tmp; // overflow into hi

197

hi += mid < tmp; // overflow into hi

198

199

const std::uint64_t lo2 = lo + mul[0];

199

const std::uint64_t lo2 = lo + mul[0];

200

const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo);

200

const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo);

201

const std::uint64_t hi2 = hi + (mid2 < mid);

201

const std::uint64_t hi2 = hi + (mid2 < mid);

202

*vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1));

202

*vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1));

203

204

if (mmShift == 1)

204

if (mmShift == 1)

205

{

205

{

206

const std::uint64_t lo3 = lo - mul[0];

206

const std::uint64_t lo3 = lo - mul[0];

207

const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo);

207

const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo);

208

const std::uint64_t hi3 = hi - (mid3 > mid);

208

const std::uint64_t hi3 = hi - (mid3 > mid);

209

*vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1));

209

*vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1));

210

}

210

}

211

else

211

else

212

{

212

{

213

const std::uint64_t lo3 = lo + lo;

213

const std::uint64_t lo3 = lo + lo;

214

const std::uint64_t mid3 = mid + mid + (lo3 < lo);

214

const std::uint64_t mid3 = mid + mid + (lo3 < lo);

215

const std::uint64_t hi3 = hi + hi + (mid3 < mid);

215

const std::uint64_t hi3 = hi + hi + (mid3 < mid);

216

const std::uint64_t lo4 = lo3 - mul[0];

216

const std::uint64_t lo4 = lo3 - mul[0];

217

const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);

217

const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);

218

const std::uint64_t hi4 = hi3 - (mid4 > mid3);

218

const std::uint64_t hi4 = hi3 - (mid4 > mid3);

219

*vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64));

219

*vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64));

220

}

220

}

221

222

return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1));

222

return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1));

223

}

223

}

224

225

#endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS

225

#endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS

226

227

inline

227

inline

228

std::uint32_t

228

std::uint32_t

HIT CBC

229

538

decimalLength17(

229

538

decimalLength17(

230

const std::uint64_t v)

230

const std::uint64_t v)

231

{

231

{

232

// This is slightly faster than a loop.

232

// This is slightly faster than a loop.

233

// The average output length is 16.38 digits, so we check high-to-low.

233

// The average output length is 16.38 digits, so we check high-to-low.

234

// Function precondition: v is not an 18, 19, or 20-digit number.

234

// Function precondition: v is not an 18, 19, or 20-digit number.

235

// (17 digits are sufficient for round-tripping.)

235

// (17 digits are sufficient for round-tripping.)

HIT CBC

236

538

BOOST_ASSERT(v < 100000000000000000L);

236

538

BOOST_ASSERT(v < 100000000000000000L);

HIT CBC

237

538

if (v >= 10000000000000000L) { return 17; }

237

538

if (v >= 10000000000000000L) { return 17; }

HIT CBC

238

528

if (v >= 1000000000000000L) { return 16; }

238

528

if (v >= 1000000000000000L) { return 16; }

HIT CBC

239

509

if (v >= 100000000000000L) { return 15; }

239

509

if (v >= 100000000000000L) { return 15; }

HIT CBC

240

505

if (v >= 10000000000000L) { return 14; }

240

505

if (v >= 10000000000000L) { return 14; }

HIT CBC

241

500

if (v >= 1000000000000L) { return 13; }

241

500

if (v >= 1000000000000L) { return 13; }

HIT CBC

242

494

if (v >= 100000000000L) { return 12; }

242

494

if (v >= 100000000000L) { return 12; }

HIT CBC

243

489

if (v >= 10000000000L) { return 11; }

243

489

if (v >= 10000000000L) { return 11; }

HIT CBC

244

484

if (v >= 1000000000L) { return 10; }

244

484

if (v >= 1000000000L) { return 10; }

HIT CBC

245

474

if (v >= 100000000L) { return 9; }

245

474

if (v >= 100000000L) { return 9; }

HIT CBC

246

467

if (v >= 10000000L) { return 8; }

246

467

if (v >= 10000000L) { return 8; }

HIT CBC

247

461

if (v >= 1000000L) { return 7; }

247

461

if (v >= 1000000L) { return 7; }

HIT CBC

248

455

if (v >= 100000L) { return 6; }

248

455

if (v >= 100000L) { return 6; }

HIT CBC

249

450

if (v >= 10000L) { return 5; }

249

450

if (v >= 10000L) { return 5; }

HIT CBC

250

445

if (v >= 1000L) { return 4; }

250

445

if (v >= 1000L) { return 4; }

HIT CBC

251

439

if (v >= 100L) { return 3; }

251

439

if (v >= 100L) { return 3; }

HIT CBC

252

421

if (v >= 10L) { return 2; }

252

421

if (v >= 10L) { return 2; }

HIT CBC

253

415

return 1;

253

415

return 1;

254

}

254

}

255

256

// A floating decimal representing m * 10^e.

256

// A floating decimal representing m * 10^e.

257

struct floating_decimal_64

257

struct floating_decimal_64

258

{

258

{

259

std::uint64_t mantissa;

259

std::uint64_t mantissa;

260

// Decimal exponent's range is -324 to 308

260

// Decimal exponent's range is -324 to 308

261

// inclusive, and can fit in a short if needed.

261

// inclusive, and can fit in a short if needed.

262

std::int32_t exponent;

262

std::int32_t exponent;

263

};

263

};

264

265

inline

265

inline

266

floating_decimal_64

266

floating_decimal_64

HIT CBC

267

262

d2d(

267

262

d2d(

268

const std::uint64_t ieeeMantissa,

268

const std::uint64_t ieeeMantissa,

269

const std::uint32_t ieeeExponent)

269

const std::uint32_t ieeeExponent)

270

{

270

{

271

std::int32_t e2;

271

std::int32_t e2;

272

std::uint64_t m2;

272

std::uint64_t m2;

HIT CBC

273

262

if (ieeeExponent == 0)

273

262

if (ieeeExponent == 0)

274

{

274

{

275

// We subtract 2 so that the bounds computation has 2 additional bits.

275

// We subtract 2 so that the bounds computation has 2 additional bits.

HIT CBC

276

15

e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;

276

15

e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;

HIT CBC

277

15

m2 = ieeeMantissa;

277

15

m2 = ieeeMantissa;

278

}

278

}

279

else

279

else

280

{

280

{

HIT CBC

281

247

e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;

281

247

e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;

HIT CBC

282

247

m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;

282

247

m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;

283

}

283

}

HIT CBC

284

262

const bool even = (m2 & 1) == 0;

284

262

const bool even = (m2 & 1) == 0;

HIT CBC

285

262

const bool acceptBounds = even;

285

262

const bool acceptBounds = even;

286

287

#ifdef RYU_DEBUG

287

#ifdef RYU_DEBUG

288

printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);

288

printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);

289

#endif

289

#endif

290

291

// Step 2: Determine the interval of valid decimal representations.

291

// Step 2: Determine the interval of valid decimal representations.

HIT CBC

292

262

const std::uint64_t mv = 4 * m2;

292

262

const std::uint64_t mv = 4 * m2;

293

// Implicit bool -> int conversion. True is 1, false is 0.

293

// Implicit bool -> int conversion. True is 1, false is 0.

HIT CBC

294

262

const std::uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;

294

262

const std::uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;

295

// We would compute mp and mm like this:

295

// We would compute mp and mm like this:

296

// uint64_t mp = 4 * m2 + 2;

296

// uint64_t mp = 4 * m2 + 2;

297

// uint64_t mm = mv - 1 - mmShift;

297

// uint64_t mm = mv - 1 - mmShift;

298

299

// Step 3: Convert to a decimal power base using 128-bit arithmetic.

299

// Step 3: Convert to a decimal power base using 128-bit arithmetic.

300

std::uint64_t vr, vp, vm;

300

std::uint64_t vr, vp, vm;

301

std::int32_t e10;

301

std::int32_t e10;

HIT CBC

302

262

bool vmIsTrailingZeros = false;

302

262

bool vmIsTrailingZeros = false;

HIT CBC

303

262

bool vrIsTrailingZeros = false;

303

262

bool vrIsTrailingZeros = false;

HIT CBC

304

262

if (e2 >= 0) {

304

262

if (e2 >= 0) {

305

// I tried special-casing q == 0, but there was no effect on performance.

305

// I tried special-casing q == 0, but there was no effect on performance.

306

// This expression is slightly faster than max(0, log10Pow2(e2) - 1).

306

// This expression is slightly faster than max(0, log10Pow2(e2) - 1).

HIT CBC

307

128

const std::uint32_t q = log10Pow2(e2) - (e2 > 3);

307

128

const std::uint32_t q = log10Pow2(e2) - (e2 > 3);

HIT CBC

308

128

e10 = (std::int32_t)q;

308

128

e10 = (std::int32_t)q;

HIT CBC

309

128

const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1;

309

128

const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1;

HIT CBC

310

128

const std::int32_t i = -e2 + (std::int32_t)q + k;

310

128

const std::int32_t i = -e2 + (std::int32_t)q + k;

311

#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)

311

#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)

312

uint64_t pow5[2];

312

uint64_t pow5[2];

313

double_computeInvPow5(q, pow5);

313

double_computeInvPow5(q, pow5);

314

vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);

314

vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);

315

#else

315

#else

HIT CBC

316

128

vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift);

316

128

vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift);

317

#endif

317

#endif

318

#ifdef RYU_DEBUG

318

#ifdef RYU_DEBUG

319

printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);

319

printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);

320

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

320

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

321

#endif

321

#endif

HIT CBC

322

128

if (q <= 21)

322

128

if (q <= 21)

323

{

323

{

324

// This should use q <= 22, but I think 21 is also safe. Smaller values

324

// This should use q <= 22, but I think 21 is also safe. Smaller values

325

// may still be safe, but it's more difficult to reason about them.

325

// may still be safe, but it's more difficult to reason about them.

326

// Only one of mp, mv, and mm can be a multiple of 5, if any.

326

// Only one of mp, mv, and mm can be a multiple of 5, if any.

HIT CBC

327

114

const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv));

327

114

const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv));

HIT CBC

328

114

if (mvMod5 == 0)

328

114

if (mvMod5 == 0)

329

{

329

{

HIT CBC

330

86

vrIsTrailingZeros = multipleOfPowerOf5(mv, q);

330

86

vrIsTrailingZeros = multipleOfPowerOf5(mv, q);

331

}

331

}

HIT CBC

332

28

else if (acceptBounds)

332

28

else if (acceptBounds)

333

{

333

{

334

// Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q

334

// Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q

335

// <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q

335

// <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q

336

// <=> true && pow5Factor(mm) >= q, since e2 >= q.

336

// <=> true && pow5Factor(mm) >= q, since e2 >= q.

HIT CBC

337

11

vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);

337

11

vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);

338

}

338

}

339

else

339

else

340

{

340

{

341

// Same as min(e2 + 1, pow5Factor(mp)) >= q.

341

// Same as min(e2 + 1, pow5Factor(mp)) >= q.

HIT CBC

342

17

vp -= multipleOfPowerOf5(mv + 2, q);

342

17

vp -= multipleOfPowerOf5(mv + 2, q);

343

}

343

}

344

}

344

}

345

}

345

}

346

else

346

else

347

{

347

{

348

// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).

348

// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).

HIT CBC

349

134

const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1);

349

134

const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1);

HIT CBC

350

134

e10 = (std::int32_t)q + e2;

350

134

e10 = (std::int32_t)q + e2;

HIT CBC

351

134

const std::int32_t i = -e2 - (std::int32_t)q;

351

134

const std::int32_t i = -e2 - (std::int32_t)q;

HIT CBC

352

134

const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;

352

134

const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;

HIT CBC

353

134

const std::int32_t j = (std::int32_t)q - k;

353

134

const std::int32_t j = (std::int32_t)q - k;

354

#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)

354

#if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)

355

std::uint64_t pow5[2];

355

std::uint64_t pow5[2];

356

double_computePow5(i, pow5);

356

double_computePow5(i, pow5);

357

vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);

357

vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);

358

#else

358

#else

HIT CBC

359

134

vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift);

359

134

vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift);

360

#endif

360

#endif

361

#ifdef RYU_DEBUG

361

#ifdef RYU_DEBUG

362

printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);

362

printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);

363

printf("%u %d %d %d\n", q, i, k, j);

363

printf("%u %d %d %d\n", q, i, k, j);

364

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

364

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

365

#endif

365

#endif

HIT CBC

366

134

if (q <= 1)

366

134

if (q <= 1)

367

{

367

{

368

// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.

368

// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.

369

// mv = 4 * m2, so it always has at least two trailing 0 bits.

369

// mv = 4 * m2, so it always has at least two trailing 0 bits.

HIT CBC

370

3

vrIsTrailingZeros = true;

370

3

vrIsTrailingZeros = true;

HIT CBC

371

3

if (acceptBounds)

371

3

if (acceptBounds)

372

{

372

{

373

// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.

373

// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.

HIT CBC

374

3

vmIsTrailingZeros = mmShift == 1;

374

3

vmIsTrailingZeros = mmShift == 1;

375

}

375

}

376

else

376

else

377

{

377

{

378

// mp = mv + 2, so it always has at least one trailing 0 bit.

378

// mp = mv + 2, so it always has at least one trailing 0 bit.

MIS UBC

379

✗

--vp;

379

✗

--vp;

380

}

380

}

381

}

381

}

HIT CBC

382

131

else if (q < 63)

382

131

else if (q < 63)

383

{

383

{

384

// TODO(ulfjack): Use a tighter bound here.

384

// TODO(ulfjack): Use a tighter bound here.

385

// We want to know if the full product has at least q trailing zeros.

385

// We want to know if the full product has at least q trailing zeros.

386

// We need to compute min(p2(mv), p5(mv) - e2) >= q

386

// We need to compute min(p2(mv), p5(mv) - e2) >= q

387

// <=> p2(mv) >= q && p5(mv) - e2 >= q

387

// <=> p2(mv) >= q && p5(mv) - e2 >= q

388

// <=> p2(mv) >= q (because -e2 >= q)

388

// <=> p2(mv) >= q (because -e2 >= q)

HIT CBC

389

96

vrIsTrailingZeros = multipleOfPowerOf2(mv, q);

389

96

vrIsTrailingZeros = multipleOfPowerOf2(mv, q);

390

#ifdef RYU_DEBUG

390

#ifdef RYU_DEBUG

391

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

391

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

392

#endif

392

#endif

393

}

393

}

394

}

394

}

395

#ifdef RYU_DEBUG

395

#ifdef RYU_DEBUG

396

printf("e10=%d\n", e10);

396

printf("e10=%d\n", e10);

397

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

397

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

398

printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");

398

printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");

399

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

399

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

400

#endif

400

#endif

401

402

// Step 4: Find the shortest decimal representation in the interval of valid representations.

402

// Step 4: Find the shortest decimal representation in the interval of valid representations.

HIT CBC

403

262

std::int32_t removed = 0;

403

262

std::int32_t removed = 0;

HIT CBC

404

262

std::uint8_t lastRemovedDigit = 0;

404

262

std::uint8_t lastRemovedDigit = 0;

405

std::uint64_t output;

405

std::uint64_t output;

406

// On average, we remove ~2 digits.

406

// On average, we remove ~2 digits.

HIT CBC

407

262

if (vmIsTrailingZeros || vrIsTrailingZeros)

407

262

if (vmIsTrailingZeros || vrIsTrailingZeros)

408

{

408

{

409

// General case, which happens rarely (~0.7%).

409

// General case, which happens rarely (~0.7%).

410

for (;;)

410

for (;;)

411

{

411

{

HIT CBC

412

1663

const std::uint64_t vpDiv10 = div10(vp);

412

1663

const std::uint64_t vpDiv10 = div10(vp);

HIT CBC

413

1663

const std::uint64_t vmDiv10 = div10(vm);

413

1663

const std::uint64_t vmDiv10 = div10(vm);

HIT CBC

414

1663

if (vpDiv10 <= vmDiv10)

414

1663

if (vpDiv10 <= vmDiv10)

HIT CBC

415

94

break;

415

94

break;

HIT CBC

416

1569

const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);

416

1569

const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);

HIT CBC

417

1569

const std::uint64_t vrDiv10 = div10(vr);

417

1569

const std::uint64_t vrDiv10 = div10(vr);

HIT CBC

418

1569

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

418

1569

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

HIT CBC

419

1569

vmIsTrailingZeros &= vmMod10 == 0;

419

1569

vmIsTrailingZeros &= vmMod10 == 0;

HIT CBC

420

1569

vrIsTrailingZeros &= lastRemovedDigit == 0;

420

1569

vrIsTrailingZeros &= lastRemovedDigit == 0;

HIT CBC

421

1569

lastRemovedDigit = (uint8_t)vrMod10;

421

1569

lastRemovedDigit = (uint8_t)vrMod10;

HIT CBC

422

1569

vr = vrDiv10;

422

1569

vr = vrDiv10;

HIT CBC

423

1569

vp = vpDiv10;

423

1569

vp = vpDiv10;

HIT CBC

424

1569

vm = vmDiv10;

424

1569

vm = vmDiv10;

HIT CBC

425

1569

++removed;

425

1569

++removed;

HIT CBC

426

1569

}

426

1569

}

427

#ifdef RYU_DEBUG

427

#ifdef RYU_DEBUG

428

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

428

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

429

printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");

429

printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");

430

#endif

430

#endif

HIT CBC

431

94

if (vmIsTrailingZeros)

431

94

if (vmIsTrailingZeros)

432

{

432

{

433

for (;;)

433

for (;;)

434

{

434

{

HIT CBC

435

3

const std::uint64_t vmDiv10 = div10(vm);

435

3

const std::uint64_t vmDiv10 = div10(vm);

HIT CBC

436

3

const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);

436

3

const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);

HIT CBC

437

3

if (vmMod10 != 0)

437

3

if (vmMod10 != 0)

HIT CBC

438

2

break;

438

2

break;

HIT CBC

439

1

const std::uint64_t vpDiv10 = div10(vp);

439

1

const std::uint64_t vpDiv10 = div10(vp);

HIT CBC

440

1

const std::uint64_t vrDiv10 = div10(vr);

440

1

const std::uint64_t vrDiv10 = div10(vr);

HIT CBC

441

1

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

441

1

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

HIT CBC

442

1

vrIsTrailingZeros &= lastRemovedDigit == 0;

442

1

vrIsTrailingZeros &= lastRemovedDigit == 0;

HIT CBC

443

1

lastRemovedDigit = (uint8_t)vrMod10;

443

1

lastRemovedDigit = (uint8_t)vrMod10;

HIT CBC

444

1

vr = vrDiv10;

444

1

vr = vrDiv10;

HIT CBC

445

1

vp = vpDiv10;

445

1

vp = vpDiv10;

HIT CBC

446

1

vm = vmDiv10;

446

1

vm = vmDiv10;

HIT CBC

447

1

++removed;

447

1

++removed;

HIT CBC

448

1

}

448

1

}

449

}

449

}

450

#ifdef RYU_DEBUG

450

#ifdef RYU_DEBUG

451

printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);

451

printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);

452

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

452

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

453

#endif

453

#endif

HIT CBC

454

94

if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)

454

94

if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)

455

{

455

{

456

// Round even if the exact number is .....50..0.

456

// Round even if the exact number is .....50..0.

HIT CBC

457

1

lastRemovedDigit = 4;

457

1

lastRemovedDigit = 4;

458

}

458

}

459

// We need to take vr + 1 if vr is outside bounds or we need to round up.

459

// We need to take vr + 1 if vr is outside bounds or we need to round up.

HIT CBC

460

94

output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);

460

94

output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);

HIT CBC

461

94

}

461

94

}

462

else

462

else

463

{

463

{

464

// Specialized for the common case (~99.3%). Percentages below are relative to this.

464

// Specialized for the common case (~99.3%). Percentages below are relative to this.

HIT CBC

465

168

bool roundUp = false;

465

168

bool roundUp = false;

HIT CBC

466

168

const std::uint64_t vpDiv100 = div100(vp);

466

168

const std::uint64_t vpDiv100 = div100(vp);

HIT CBC

467

168

const std::uint64_t vmDiv100 = div100(vm);

467

168

const std::uint64_t vmDiv100 = div100(vm);

HIT CBC

468

168

if (vpDiv100 > vmDiv100)

468

168

if (vpDiv100 > vmDiv100)

469

{

469

{

470

// Optimization: remove two digits at a time (~86.2%).

470

// Optimization: remove two digits at a time (~86.2%).

HIT CBC

471

161

const std::uint64_t vrDiv100 = div100(vr);

471

161

const std::uint64_t vrDiv100 = div100(vr);

HIT CBC

472

161

const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100);

472

161

const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100);

HIT CBC

473

161

roundUp = vrMod100 >= 50;

473

161

roundUp = vrMod100 >= 50;

HIT CBC

474

161

vr = vrDiv100;

474

161

vr = vrDiv100;

HIT CBC

475

161

vp = vpDiv100;

475

161

vp = vpDiv100;

HIT CBC

476

161

vm = vmDiv100;

476

161

vm = vmDiv100;

HIT CBC

477

161

removed += 2;

477

161

removed += 2;

478

}

478

}

479

// Loop iterations below (approximately), without optimization above:

479

// Loop iterations below (approximately), without optimization above:

480

// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%

480

// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%

481

// Loop iterations below (approximately), with optimization above:

481

// Loop iterations below (approximately), with optimization above:

482

// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%

482

// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%

483

for (;;)

483

for (;;)

484

{

484

{

HIT CBC

485

2256

const std::uint64_t vpDiv10 = div10(vp);

485

2256

const std::uint64_t vpDiv10 = div10(vp);

HIT CBC

486

2256

const std::uint64_t vmDiv10 = div10(vm);

486

2256

const std::uint64_t vmDiv10 = div10(vm);

HIT CBC

487

2256

if (vpDiv10 <= vmDiv10)

487

2256

if (vpDiv10 <= vmDiv10)

HIT CBC

488

168

break;

488

168

break;

HIT CBC

489

2088

const std::uint64_t vrDiv10 = div10(vr);

489

2088

const std::uint64_t vrDiv10 = div10(vr);

HIT CBC

490

2088

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

490

2088

const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);

HIT CBC

491

2088

roundUp = vrMod10 >= 5;

491

2088

roundUp = vrMod10 >= 5;

HIT CBC

492

2088

vr = vrDiv10;

492

2088

vr = vrDiv10;

HIT CBC

493

2088

vp = vpDiv10;

493

2088

vp = vpDiv10;

HIT CBC

494

2088

vm = vmDiv10;

494

2088

vm = vmDiv10;

HIT CBC

495

2088

++removed;

495

2088

++removed;

HIT CBC

496

2088

}

496

2088

}

497

#ifdef RYU_DEBUG

497

#ifdef RYU_DEBUG

498

printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false");

498

printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false");

499

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

499

printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");

500

#endif

500

#endif

501

// We need to take vr + 1 if vr is outside bounds or we need to round up.

501

// We need to take vr + 1 if vr is outside bounds or we need to round up.

HIT CBC

502

168

output = vr + (vr == vm || roundUp);

502

168

output = vr + (vr == vm || roundUp);

503

}

503

}

HIT CBC

504

262

const std::int32_t exp = e10 + removed;

504

262

const std::int32_t exp = e10 + removed;

505

506

#ifdef RYU_DEBUG

506

#ifdef RYU_DEBUG

507

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

507

printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);

508

printf("O=%" PRIu64 "\n", output);

508

printf("O=%" PRIu64 "\n", output);

509

printf("EXP=%d\n", exp);

509

printf("EXP=%d\n", exp);

510

#endif

510

#endif

511

512

floating_decimal_64 fd;

512

floating_decimal_64 fd;

HIT CBC

513

262

fd.exponent = exp;

513

262

fd.exponent = exp;

HIT CBC

514

262

fd.mantissa = output;

514

262

fd.mantissa = output;

HIT CBC

515

262

return fd;

515

262

return fd;

516

}

516

}

517

518

inline

518

inline

519

int

519

int

HIT CBC

520

538

to_chars(

520

538

to_chars(

521

const floating_decimal_64 v,

521

const floating_decimal_64 v,

522

const bool sign,

522

const bool sign,

523

char* const result)

523

char* const result)

524

{

524

{

525

// Step 5: Print the decimal representation.

525

// Step 5: Print the decimal representation.

HIT CBC

526

538

int index = 0;

526

538

int index = 0;

HIT CBC

527

538

if (sign)

527

538

if (sign)

HIT CBC

528

129

result[index++] = '-';

528

129

result[index++] = '-';

529

HIT CBC

530

538

std::uint64_t output = v.mantissa;

530

538

std::uint64_t output = v.mantissa;

HIT CBC

531

538

std::uint32_t const olength = decimalLength17(output);

531

538

std::uint32_t const olength = decimalLength17(output);

532

533

#ifdef RYU_DEBUG

533

#ifdef RYU_DEBUG

534

printf("DIGITS=%" PRIu64 "\n", v.mantissa);

534

printf("DIGITS=%" PRIu64 "\n", v.mantissa);

535

printf("OLEN=%u\n", olength);

535

printf("OLEN=%u\n", olength);

536

printf("EXP=%u\n", v.exponent + olength);

536

printf("EXP=%u\n", v.exponent + olength);

537

#endif

537

#endif

538

539

// Print the decimal digits.

539

// Print the decimal digits.

540

// The following code is equivalent to:

540

// The following code is equivalent to:

541

// for (uint32_t i = 0; i < olength - 1; ++i) {

541

// for (uint32_t i = 0; i < olength - 1; ++i) {

542

// const uint32_t c = output % 10; output /= 10;

542

// const uint32_t c = output % 10; output /= 10;

543

// result[index + olength - i] = (char) ('0' + c);

543

// result[index + olength - i] = (char) ('0' + c);

544

// }

544

// }

545

// result[index] = '0' + output % 10;

545

// result[index] = '0' + output % 10;

546

HIT CBC

547

538

std::uint32_t i = 0;

547

538

std::uint32_t i = 0;

548

// We prefer 32-bit operations, even on 64-bit platforms.

548

// We prefer 32-bit operations, even on 64-bit platforms.

549

// We have at most 17 digits, and uint32_t can store 9 digits.

549

// We have at most 17 digits, and uint32_t can store 9 digits.

550

// If output doesn't fit into uint32_t, we cut off 8 digits,

550

// If output doesn't fit into uint32_t, we cut off 8 digits,

551

// so the rest will fit into uint32_t.

551

// so the rest will fit into uint32_t.

HIT CBC

552

538

if ((output >> 32) != 0)

552

538

if ((output >> 32) != 0)

553

{

553

{

554

// Expensive 64-bit division.

554

// Expensive 64-bit division.

HIT CBC

555

59

std::uint64_t const q = div1e8(output);

555

59

std::uint64_t const q = div1e8(output);

HIT CBC

556

59

std::uint32_t output2 = ((std::uint32_t)output) - 100000000 * ((std::uint32_t)q);

556

59

std::uint32_t output2 = ((std::uint32_t)output) - 100000000 * ((std::uint32_t)q);

HIT CBC

557

59

output = q;

557

59

output = q;

558

HIT CBC

559

59

const std::uint32_t c = output2 % 10000;

559

59

const std::uint32_t c = output2 % 10000;

HIT CBC

560

59

output2 /= 10000;

560

59

output2 /= 10000;

HIT CBC

561

59

const std::uint32_t d = output2 % 10000;

561

59

const std::uint32_t d = output2 % 10000;

HIT CBC

562

59

const std::uint32_t c0 = (c % 100) << 1;

562

59

const std::uint32_t c0 = (c % 100) << 1;

HIT CBC

563

59

const std::uint32_t c1 = (c / 100) << 1;

563

59

const std::uint32_t c1 = (c / 100) << 1;

HIT CBC

564

59

const std::uint32_t d0 = (d % 100) << 1;

564

59

const std::uint32_t d0 = (d % 100) << 1;

HIT CBC

565

59

const std::uint32_t d1 = (d / 100) << 1;

565

59

const std::uint32_t d1 = (d / 100) << 1;

HIT CBC

566

59

std::memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);

566

59

std::memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);

HIT CBC

567

59

std::memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);

567

59

std::memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);

HIT CBC

568

59

std::memcpy(result + index + olength - i - 5, DIGIT_TABLE() + d0, 2);

568

59

std::memcpy(result + index + olength - i - 5, DIGIT_TABLE() + d0, 2);

HIT CBC

569

59

std::memcpy(result + index + olength - i - 7, DIGIT_TABLE() + d1, 2);

569

59

std::memcpy(result + index + olength - i - 7, DIGIT_TABLE() + d1, 2);

HIT CBC

570

59

i += 8;

570

59

i += 8;

571

}

571

}

HIT CBC

572

538

uint32_t output2 = (std::uint32_t)output;

572

538

uint32_t output2 = (std::uint32_t)output;

HIT CBC

573

638

while (output2 >= 10000)

573

638

while (output2 >= 10000)

574

{

574

{

575

#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217

575

#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217

576

const uint32_t c = output2 - 10000 * (output2 / 10000);

576

const uint32_t c = output2 - 10000 * (output2 / 10000);

577

#else

577

#else

HIT CBC

578

100

const uint32_t c = output2 % 10000;

578

100

const uint32_t c = output2 % 10000;

579

#endif

579

#endif

HIT CBC

580

100

output2 /= 10000;

580

100

output2 /= 10000;

HIT CBC

581

100

const uint32_t c0 = (c % 100) << 1;

581

100

const uint32_t c0 = (c % 100) << 1;

HIT CBC

582

100

const uint32_t c1 = (c / 100) << 1;

582

100

const uint32_t c1 = (c / 100) << 1;

HIT CBC

583

100

memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);

583

100

memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);

HIT CBC

584

100

memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);

584

100

memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);

HIT CBC

585

100

i += 4;

585

100

i += 4;

586

}

586

}

HIT CBC

587

538

if (output2 >= 100) {

587

538

if (output2 >= 100) {

HIT CBC

588

69

const uint32_t c = (output2 % 100) << 1;

588

69

const uint32_t c = (output2 % 100) << 1;

HIT CBC

589

69

output2 /= 100;

589

69

output2 /= 100;

HIT CBC

590

69

memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c, 2);

590

69

memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c, 2);

HIT CBC

591

69

i += 2;

591

69

i += 2;

592

}

592

}

HIT CBC

593

538

if (output2 >= 10) {

593

538

if (output2 >= 10) {

HIT CBC

594

62

const uint32_t c = output2 << 1;

594

62

const uint32_t c = output2 << 1;

595

// We can't use memcpy here: the decimal dot goes between these two digits.

595

// We can't use memcpy here: the decimal dot goes between these two digits.

HIT CBC

596

62

result[index + olength - i] = DIGIT_TABLE()[c + 1];

596

62

result[index + olength - i] = DIGIT_TABLE()[c + 1];

HIT CBC

597

62

result[index] = DIGIT_TABLE()[c];

597

62

result[index] = DIGIT_TABLE()[c];

598

}

598

}

599

else {

599

else {

HIT CBC

600

476

result[index] = (char)('0' + output2);

600

476

result[index] = (char)('0' + output2);

601

}

601

}

602

603

// Print decimal point if needed.

603

// Print decimal point if needed.

HIT CBC

604

538

if (olength > 1) {

604

538

if (olength > 1) {

HIT CBC

605

123

result[index + 1] = '.';

605

123

result[index + 1] = '.';

HIT CBC

606

123

index += olength + 1;

606

123

index += olength + 1;

607

}

607

}

608

else {

608

else {

HIT CBC

609

415

++index;

609

415

++index;

610

}

610

}

611

612

// Print the exponent.

612

// Print the exponent.

HIT CBC

613

538

result[index++] = 'E';

613

538

result[index++] = 'E';

HIT CBC

614

538

int32_t exp = v.exponent + (int32_t)olength - 1;

614

538

int32_t exp = v.exponent + (int32_t)olength - 1;

HIT CBC

615

538

if (exp < 0) {

615

538

if (exp < 0) {

HIT CBC

616

92

result[index++] = '-';

616

92

result[index++] = '-';

HIT CBC

617

92

exp = -exp;

617

92

exp = -exp;

618

}

618

}

619

HIT CBC

620

538

if (exp >= 100) {

620

538

if (exp >= 100) {

HIT CBC

621

33

const int32_t c = exp % 10;

621

33

const int32_t c = exp % 10;

HIT CBC

622

33

memcpy(result + index, DIGIT_TABLE() + 2 * (exp / 10), 2);

622

33

memcpy(result + index, DIGIT_TABLE() + 2 * (exp / 10), 2);

HIT CBC

623

33

result[index + 2] = (char)('0' + c);

623

33

result[index + 2] = (char)('0' + c);

HIT CBC

624

33

index += 3;

624

33

index += 3;

625

}

625

}

HIT CBC

626

505

else if (exp >= 10) {

626

505

else if (exp >= 10) {

HIT CBC

627

180

memcpy(result + index, DIGIT_TABLE() + 2 * exp, 2);

627

180

memcpy(result + index, DIGIT_TABLE() + 2 * exp, 2);

HIT CBC

628

180

index += 2;

628

180

index += 2;

629

}

629

}

630

else {

630

else {

HIT CBC

631

325

result[index++] = (char)('0' + exp);

631

325

result[index++] = (char)('0' + exp);

632

}

632

}

633

HIT CBC

634

538

return index;

634

538

return index;

635

}

635

}

636

HIT CBC

637

538

static inline bool d2d_small_int(const uint64_t ieeeMantissa, const uint32_t ieeeExponent,

637

538

static inline bool d2d_small_int(const uint64_t ieeeMantissa, const uint32_t ieeeExponent,

638

floating_decimal_64* const v) {

638

floating_decimal_64* const v) {

HIT CBC

639

538

const uint64_t m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;

639

538

const uint64_t m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;

HIT CBC

640

538

const int32_t e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;

640

538

const int32_t e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;

641

HIT CBC

642

538

if (e2 > 0) {

642

538

if (e2 > 0) {

643

// f = m2 * 2^e2 >= 2^53 is an integer.

643

// f = m2 * 2^e2 >= 2^53 is an integer.

644

// Ignore this case for now.

644

// Ignore this case for now.

HIT CBC

645

131

return false;

645

131

return false;

646

}

646

}

647

HIT CBC

648

407

if (e2 < -52) {

648

407

if (e2 < -52) {

649

// f < 1.

649

// f < 1.

HIT CBC

650

92

return false;

650

92

return false;

651

}

651

}

652

653

// Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53.

653

// Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53.

654

// Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0.

654

// Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0.

HIT CBC

655

315

const uint64_t mask = (1ull << -e2) - 1;

655

315

const uint64_t mask = (1ull << -e2) - 1;

HIT CBC

656

315

const uint64_t fraction = m2 & mask;

656

315

const uint64_t fraction = m2 & mask;

HIT CBC

657

315

if (fraction != 0) {

657

315

if (fraction != 0) {

HIT CBC

658

39

return false;

658

39

return false;

659

}

659

}

660

661

// f is an integer in the range [1, 2^53).

661

// f is an integer in the range [1, 2^53).

662

// Note: mantissa might contain trailing (decimal) 0's.

662

// Note: mantissa might contain trailing (decimal) 0's.

663

// Note: since 2^53 < 10^16, there is no need to adjust decimalLength17().

663

// Note: since 2^53 < 10^16, there is no need to adjust decimalLength17().

HIT CBC

664

276

v->mantissa = m2 >> -e2;

664

276

v->mantissa = m2 >> -e2;

HIT CBC

665

276

v->exponent = 0;

665

276

v->exponent = 0;

HIT CBC

666

276

return true;

666

276

return true;

667

}

667

}

668

669

} // detail

669

} // detail

670

671

int

671

int

HIT CBC

672

609

d2s_buffered_n(

672

609

d2s_buffered_n(

673

double f,

673

double f,

674

char* result,

674

char* result,

675

bool allow_infinity_and_nan) noexcept

675

bool allow_infinity_and_nan) noexcept

676

{

676

{

677

using namespace detail;

677

using namespace detail;

678

// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.

678

// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.

HIT CBC

679

609

std::uint64_t const bits = double_to_bits(f);

679

609

std::uint64_t const bits = double_to_bits(f);

680

681

#ifdef RYU_DEBUG

681

#ifdef RYU_DEBUG

682

printf("IN=");

682

printf("IN=");

683

for (std::int32_t bit = 63; bit >= 0; --bit) {

683

for (std::int32_t bit = 63; bit >= 0; --bit) {

684

printf("%d", (int)((bits >> bit) & 1));

684

printf("%d", (int)((bits >> bit) & 1));

685

}

685

}

686

printf("\n");

686

printf("\n");

687

#endif

687

#endif

688

689

// Decode bits into sign, mantissa, and exponent.

689

// Decode bits into sign, mantissa, and exponent.

HIT CBC

690

609

const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;

690

609

const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;

HIT CBC

691

609

const std::uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);

691

609

const std::uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);

HIT CBC

692

609

const std::uint32_t ieeeExponent = (std::uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));

692

609

const std::uint32_t ieeeExponent = (std::uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));

693

// Case distinction; exit early for the easy cases.

693

// Case distinction; exit early for the easy cases.

HIT CBC

694

609

if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) {

694

609

if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) {

695

// We changed how special numbers are output by default

695

// We changed how special numbers are output by default

HIT CBC

696

71

if (allow_infinity_and_nan)

696

71

if (allow_infinity_and_nan)

HIT CBC

697

11

return copy_special_str(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);

697

11

return copy_special_str(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);

698

else

698

else

HIT CBC

699

60

return copy_special_str_conforming(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);

699

60

return copy_special_str_conforming(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);

700

701

}

701

}

702

703

floating_decimal_64 v;

703

floating_decimal_64 v;

HIT CBC

704

538

const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);

704

538

const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);

HIT CBC

705

538

if (isSmallInt) {

705

538

if (isSmallInt) {

706

// For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros.

706

// For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros.

707

// For scientific notation we need to move these zeros into the exponent.

707

// For scientific notation we need to move these zeros into the exponent.

708

// (This is not needed for fixed-point notation, so it might be beneficial to trim

708

// (This is not needed for fixed-point notation, so it might be beneficial to trim

709

// trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.)

709

// trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.)

710

for (;;) {

710

for (;;) {

HIT CBC

711

698

std::uint64_t const q = div10(v.mantissa);

711

698

std::uint64_t const q = div10(v.mantissa);

HIT CBC

712

698

std::uint32_t const r = ((std::uint32_t) v.mantissa) - 10 * ((std::uint32_t) q);

712

698

std::uint32_t const r = ((std::uint32_t) v.mantissa) - 10 * ((std::uint32_t) q);

HIT CBC

713

698

if (r != 0)

713

698

if (r != 0)

HIT CBC

714

276

break;

714

276

break;

HIT CBC

715

422

v.mantissa = q;

715

422

v.mantissa = q;

HIT CBC

716

422

++v.exponent;

716

422

++v.exponent;

HIT CBC

717

422

}

717

422

}

718

}

718

}

719

else {

719

else {

HIT CBC

720

262

v = d2d(ieeeMantissa, ieeeExponent);

720

262

v = d2d(ieeeMantissa, ieeeExponent);

721

}

721

}

722

HIT CBC

723

538

return to_chars(v, ieeeSign, result);

723

538

return to_chars(v, ieeeSign, result);

724

}

724

}

725

726

} // ryu

726

} // ryu

727

728

} // detail

728

} // detail

729

} // namespace json

729

} // namespace json

730

} // namespace boost

730

} // namespace boost

731

732

#endif

732

#endif