99.56% Lines (224/225) 100.00% Functions (7/7)
TLA Baseline Branch
Line Hits Code Line Hits Code
1   // Copyright 2018 Ulf Adams 1   // Copyright 2018 Ulf Adams
2   // 2   //
3   // The contents of this file may be used under the terms of the Apache License, 3   // The contents of this file may be used under the terms of the Apache License,
4   // Version 2.0. 4   // Version 2.0.
5   // 5   //
6   // (See accompanying file LICENSE-Apache or copy at 6   // (See accompanying file LICENSE-Apache or copy at
7   // http://www.apache.org/licenses/LICENSE-2.0) 7   // http://www.apache.org/licenses/LICENSE-2.0)
8   // 8   //
9   // Alternatively, the contents of this file may be used under the terms of 9   // Alternatively, the contents of this file may be used under the terms of
10   // the Boost Software License, Version 1.0. 10   // the Boost Software License, Version 1.0.
11   // (See accompanying file LICENSE-Boost or copy at 11   // (See accompanying file LICENSE-Boost or copy at
12   // https://www.boost.org/LICENSE_1_0.txt) 12   // https://www.boost.org/LICENSE_1_0.txt)
13   // 13   //
14   // Unless required by applicable law or agreed to in writing, this software 14   // Unless required by applicable law or agreed to in writing, this software
15   // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15   // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   // KIND, either express or implied. 16   // KIND, either express or implied.
17   17  
18   // Runtime compiler options: 18   // Runtime compiler options:
19   // -DRYU_DEBUG Generate verbose debugging output to stdout. 19   // -DRYU_DEBUG Generate verbose debugging output to stdout.
20   // 20   //
21   // -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower, 21   // -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,
22   // depending on your compiler. 22   // depending on your compiler.
23   // 23   //
24   // -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every 24   // -DRYU_OPTIMIZE_SIZE Use smaller lookup tables. Instead of storing every
25   // required power of 5, only store every 26th entry, and compute 25   // required power of 5, only store every 26th entry, and compute
26   // intermediate values with a multiplication. This reduces the lookup table 26   // intermediate values with a multiplication. This reduces the lookup table
27   // size by about 10x (only one case, and only double) at the cost of some 27   // size by about 10x (only one case, and only double) at the cost of some
28   // performance. Currently requires MSVC intrinsics. 28   // performance. Currently requires MSVC intrinsics.
29   29  
30   /* 30   /*
31   This is a derivative work 31   This is a derivative work
32   */ 32   */
33   33  
34   #ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP 34   #ifndef BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP
35   #define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP 35   #define BOOST_JSON_DETAIL_RYU_IMPL_D2S_IPP
36   36  
37   #include <boost/json/detail/ryu/ryu.hpp> 37   #include <boost/json/detail/ryu/ryu.hpp>
38   #include <cstdlib> 38   #include <cstdlib>
39   #include <cstring> 39   #include <cstring>
40   40  
41   #ifdef RYU_DEBUG 41   #ifdef RYU_DEBUG
42   #include <stdio.h> 42   #include <stdio.h>
43   #endif 43   #endif
44   44  
45   // ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined. 45   // ABSL avoids uint128_t on Win32 even if __SIZEOF_INT128__ is defined.
46   // Let's do the same for now. 46   // Let's do the same for now.
47   #if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) 47   #if defined(__SIZEOF_INT128__) && !defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS)
48   #define BOOST_JSON_RYU_HAS_UINT128 48   #define BOOST_JSON_RYU_HAS_UINT128
49   #elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64) 49   #elif defined(_MSC_VER) && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
50   #define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS 50   #define BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS
51   #endif 51   #endif
52   52  
53   #include <boost/json/detail/ryu/detail/common.hpp> 53   #include <boost/json/detail/ryu/detail/common.hpp>
54   #include <boost/json/detail/ryu/detail/digit_table.hpp> 54   #include <boost/json/detail/ryu/detail/digit_table.hpp>
55   #include <boost/json/detail/ryu/detail/d2s.hpp> 55   #include <boost/json/detail/ryu/detail/d2s.hpp>
56   #include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp> 56   #include <boost/json/detail/ryu/detail/d2s_intrinsics.hpp>
57   57  
58   namespace boost { 58   namespace boost {
59   namespace json { 59   namespace json {
60   namespace detail { 60   namespace detail {
61   61  
62   namespace ryu { 62   namespace ryu {
63   namespace detail { 63   namespace detail {
64   64  
65   // We need a 64x128-bit multiplication and a subsequent 128-bit shift. 65   // We need a 64x128-bit multiplication and a subsequent 128-bit shift.
66   // Multiplication: 66   // Multiplication:
67   // The 64-bit factor is variable and passed in, the 128-bit factor comes 67   // The 64-bit factor is variable and passed in, the 128-bit factor comes
68   // from a lookup table. We know that the 64-bit factor only has 55 68   // from a lookup table. We know that the 64-bit factor only has 55
69   // significant bits (i.e., the 9 topmost bits are zeros). The 128-bit 69   // significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
70   // factor only has 124 significant bits (i.e., the 4 topmost bits are 70   // factor only has 124 significant bits (i.e., the 4 topmost bits are
71   // zeros). 71   // zeros).
72   // Shift: 72   // Shift:
73   // In principle, the multiplication result requires 55 + 124 = 179 bits to 73   // In principle, the multiplication result requires 55 + 124 = 179 bits to
74   // represent. However, we then shift this value to the right by j, which is 74   // represent. However, we then shift this value to the right by j, which is
75   // at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64 75   // at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
76   // bits. This means that we only need the topmost 64 significant bits of 76   // bits. This means that we only need the topmost 64 significant bits of
77   // the 64x128-bit multiplication. 77   // the 64x128-bit multiplication.
78   // 78   //
79   // There are several ways to do this: 79   // There are several ways to do this:
80   // 1. Best case: the compiler exposes a 128-bit type. 80   // 1. Best case: the compiler exposes a 128-bit type.
81   // We perform two 64x64-bit multiplications, add the higher 64 bits of the 81   // We perform two 64x64-bit multiplications, add the higher 64 bits of the
82   // lower result to the higher result, and shift by j - 64 bits. 82   // lower result to the higher result, and shift by j - 64 bits.
83   // 83   //
84   // We explicitly cast from 64-bit to 128-bit, so the compiler can tell 84   // We explicitly cast from 64-bit to 128-bit, so the compiler can tell
85   // that these are only 64-bit inputs, and can map these to the best 85   // that these are only 64-bit inputs, and can map these to the best
86   // possible sequence of assembly instructions. 86   // possible sequence of assembly instructions.
87   // x64 machines happen to have matching assembly instructions for 87   // x64 machines happen to have matching assembly instructions for
88   // 64x64-bit multiplications and 128-bit shifts. 88   // 64x64-bit multiplications and 128-bit shifts.
89   // 89   //
90   // 2. Second best case: the compiler exposes intrinsics for the x64 assembly 90   // 2. Second best case: the compiler exposes intrinsics for the x64 assembly
91   // instructions mentioned in 1. 91   // instructions mentioned in 1.
92   // 92   //
93   // 3. We only have 64x64 bit instructions that return the lower 64 bits of 93   // 3. We only have 64x64 bit instructions that return the lower 64 bits of
94   // the result, i.e., we have to use plain C. 94   // the result, i.e., we have to use plain C.
95   // Our inputs are less than the full width, so we have three options: 95   // Our inputs are less than the full width, so we have three options:
96   // a. Ignore this fact and just implement the intrinsics manually. 96   // a. Ignore this fact and just implement the intrinsics manually.
97   // b. Split both into 31-bit pieces, which guarantees no internal overflow, 97   // b. Split both into 31-bit pieces, which guarantees no internal overflow,
98   // but requires extra work upfront (unless we change the lookup table). 98   // but requires extra work upfront (unless we change the lookup table).
99   // c. Split only the first factor into 31-bit pieces, which also guarantees 99   // c. Split only the first factor into 31-bit pieces, which also guarantees
100   // no internal overflow, but requires extra work since the intermediate 100   // no internal overflow, but requires extra work since the intermediate
101   // results are not perfectly aligned. 101   // results are not perfectly aligned.
102   #if defined(BOOST_JSON_RYU_HAS_UINT128) 102   #if defined(BOOST_JSON_RYU_HAS_UINT128)
103   103  
104   // Best case: use 128-bit type. 104   // Best case: use 128-bit type.
105   inline 105   inline
106   std::uint64_t 106   std::uint64_t
HITCBC 107   786 mulShift( 107   786 mulShift(
108   const std::uint64_t m, 108   const std::uint64_t m,
109   const std::uint64_t* const mul, 109   const std::uint64_t* const mul,
110   const std::int32_t j) noexcept 110   const std::int32_t j) noexcept
111   { 111   {
HITCBC 112   786 const uint128_t b0 = ((uint128_t) m) * mul[0]; 112   786 const uint128_t b0 = ((uint128_t) m) * mul[0];
HITCBC 113   786 const uint128_t b2 = ((uint128_t) m) * mul[1]; 113   786 const uint128_t b2 = ((uint128_t) m) * mul[1];
HITCBC 114   786 return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64)); 114   786 return (std::uint64_t) (((b0 >> 64) + b2) >> (j - 64));
115   } 115   }
116   116  
117   inline 117   inline
118   uint64_t 118   uint64_t
HITCBC 119   262 mulShiftAll( 119   262 mulShiftAll(
120   const std::uint64_t m, 120   const std::uint64_t m,
121   const std::uint64_t* const mul, 121   const std::uint64_t* const mul,
122   std::int32_t const j, 122   std::int32_t const j,
123   std::uint64_t* const vp, 123   std::uint64_t* const vp,
124   std::uint64_t* const vm, 124   std::uint64_t* const vm,
125   const std::uint32_t mmShift) noexcept 125   const std::uint32_t mmShift) noexcept
126   { 126   {
127   // m <<= 2; 127   // m <<= 2;
128   // uint128_t b0 = ((uint128_t) m) * mul[0]; // 0 128   // uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
129   // uint128_t b2 = ((uint128_t) m) * mul[1]; // 64 129   // uint128_t b2 = ((uint128_t) m) * mul[1]; // 64
130   // 130   //
131   // uint128_t hi = (b0 >> 64) + b2; 131   // uint128_t hi = (b0 >> 64) + b2;
132   // uint128_t lo = b0 & 0xffffffffffffffffull; 132   // uint128_t lo = b0 & 0xffffffffffffffffull;
133   // uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0]; 133   // uint128_t factor = (((uint128_t) mul[1]) << 64) + mul[0];
134   // uint128_t vpLo = lo + (factor << 1); 134   // uint128_t vpLo = lo + (factor << 1);
135   // *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64)); 135   // *vp = (std::uint64_t) ((hi + (vpLo >> 64)) >> (j - 64));
136   // uint128_t vmLo = lo - (factor << mmShift); 136   // uint128_t vmLo = lo - (factor << mmShift);
137   // *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64)); 137   // *vm = (std::uint64_t) ((hi + (vmLo >> 64) - (((uint128_t) 1ull) << 64)) >> (j - 64));
138   // return (std::uint64_t) (hi >> (j - 64)); 138   // return (std::uint64_t) (hi >> (j - 64));
HITCBC 139   262 *vp = mulShift(4 * m + 2, mul, j); 139   262 *vp = mulShift(4 * m + 2, mul, j);
HITCBC 140   262 *vm = mulShift(4 * m - 1 - mmShift, mul, j); 140   262 *vm = mulShift(4 * m - 1 - mmShift, mul, j);
HITCBC 141   262 return mulShift(4 * m, mul, j); 141   262 return mulShift(4 * m, mul, j);
142   } 142   }
143   143  
144   #elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS) 144   #elif defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
145   145  
146   inline 146   inline
147   std::uint64_t 147   std::uint64_t
148   mulShift( 148   mulShift(
149   const std::uint64_t m, 149   const std::uint64_t m,
150   const std::uint64_t* const mul, 150   const std::uint64_t* const mul,
151   const std::int32_t j) noexcept 151   const std::int32_t j) noexcept
152   { 152   {
153   // m is maximum 55 bits 153   // m is maximum 55 bits
154   std::uint64_t high1; // 128 154   std::uint64_t high1; // 128
155   std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64 155   std::uint64_t const low1 = umul128(m, mul[1], &high1); // 64
156   std::uint64_t high0; // 64 156   std::uint64_t high0; // 64
157   umul128(m, mul[0], &high0); // 0 157   umul128(m, mul[0], &high0); // 0
158   std::uint64_t const sum = high0 + low1; 158   std::uint64_t const sum = high0 + low1;
159   if (sum < high0) 159   if (sum < high0)
160   ++high1; // overflow into high1 160   ++high1; // overflow into high1
161   return shiftright128(sum, high1, j - 64); 161   return shiftright128(sum, high1, j - 64);
162   } 162   }
163   163  
164   inline 164   inline
165   std::uint64_t 165   std::uint64_t
166   mulShiftAll( 166   mulShiftAll(
167   const std::uint64_t m, 167   const std::uint64_t m,
168   const std::uint64_t* const mul, 168   const std::uint64_t* const mul,
169   const std::int32_t j, 169   const std::int32_t j,
170   std::uint64_t* const vp, 170   std::uint64_t* const vp,
171   std::uint64_t* const vm, 171   std::uint64_t* const vm,
172   const std::uint32_t mmShift) noexcept 172   const std::uint32_t mmShift) noexcept
173   { 173   {
174   *vp = mulShift(4 * m + 2, mul, j); 174   *vp = mulShift(4 * m + 2, mul, j);
175   *vm = mulShift(4 * m - 1 - mmShift, mul, j); 175   *vm = mulShift(4 * m - 1 - mmShift, mul, j);
176   return mulShift(4 * m, mul, j); 176   return mulShift(4 * m, mul, j);
177   } 177   }
178   178  
179   #else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS) 179   #else // !defined(BOOST_JSON_RYU_HAS_UINT128) && !defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
180   180  
181   inline 181   inline
182   std::uint64_t 182   std::uint64_t
183   mulShiftAll( 183   mulShiftAll(
184   std::uint64_t m, 184   std::uint64_t m,
185   const std::uint64_t* const mul, 185   const std::uint64_t* const mul,
186   const std::int32_t j, 186   const std::int32_t j,
187   std::uint64_t* const vp, 187   std::uint64_t* const vp,
188   std::uint64_t* const vm, 188   std::uint64_t* const vm,
189   const std::uint32_t mmShift) 189   const std::uint32_t mmShift)
190   { 190   {
191   m <<= 1; 191   m <<= 1;
192   // m is maximum 55 bits 192   // m is maximum 55 bits
193   std::uint64_t tmp; 193   std::uint64_t tmp;
194   std::uint64_t const lo = umul128(m, mul[0], &tmp); 194   std::uint64_t const lo = umul128(m, mul[0], &tmp);
195   std::uint64_t hi; 195   std::uint64_t hi;
196   std::uint64_t const mid = tmp + umul128(m, mul[1], &hi); 196   std::uint64_t const mid = tmp + umul128(m, mul[1], &hi);
197   hi += mid < tmp; // overflow into hi 197   hi += mid < tmp; // overflow into hi
198   198  
199   const std::uint64_t lo2 = lo + mul[0]; 199   const std::uint64_t lo2 = lo + mul[0];
200   const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo); 200   const std::uint64_t mid2 = mid + mul[1] + (lo2 < lo);
201   const std::uint64_t hi2 = hi + (mid2 < mid); 201   const std::uint64_t hi2 = hi + (mid2 < mid);
202   *vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1)); 202   *vp = shiftright128(mid2, hi2, (std::uint32_t)(j - 64 - 1));
203   203  
204   if (mmShift == 1) 204   if (mmShift == 1)
205   { 205   {
206   const std::uint64_t lo3 = lo - mul[0]; 206   const std::uint64_t lo3 = lo - mul[0];
207   const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo); 207   const std::uint64_t mid3 = mid - mul[1] - (lo3 > lo);
208   const std::uint64_t hi3 = hi - (mid3 > mid); 208   const std::uint64_t hi3 = hi - (mid3 > mid);
209   *vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1)); 209   *vm = shiftright128(mid3, hi3, (std::uint32_t)(j - 64 - 1));
210   } 210   }
211   else 211   else
212   { 212   {
213   const std::uint64_t lo3 = lo + lo; 213   const std::uint64_t lo3 = lo + lo;
214   const std::uint64_t mid3 = mid + mid + (lo3 < lo); 214   const std::uint64_t mid3 = mid + mid + (lo3 < lo);
215   const std::uint64_t hi3 = hi + hi + (mid3 < mid); 215   const std::uint64_t hi3 = hi + hi + (mid3 < mid);
216   const std::uint64_t lo4 = lo3 - mul[0]; 216   const std::uint64_t lo4 = lo3 - mul[0];
217   const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3); 217   const std::uint64_t mid4 = mid3 - mul[1] - (lo4 > lo3);
218   const std::uint64_t hi4 = hi3 - (mid4 > mid3); 218   const std::uint64_t hi4 = hi3 - (mid4 > mid3);
219   *vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64)); 219   *vm = shiftright128(mid4, hi4, (std::uint32_t)(j - 64));
220   } 220   }
221   221  
222   return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1)); 222   return shiftright128(mid, hi, (std::uint32_t)(j - 64 - 1));
223   } 223   }
224   224  
225   #endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS 225   #endif // BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS
226   226  
227   inline 227   inline
228   std::uint32_t 228   std::uint32_t
HITCBC 229   538 decimalLength17( 229   538 decimalLength17(
230   const std::uint64_t v) 230   const std::uint64_t v)
231   { 231   {
232   // This is slightly faster than a loop. 232   // This is slightly faster than a loop.
233   // The average output length is 16.38 digits, so we check high-to-low. 233   // The average output length is 16.38 digits, so we check high-to-low.
234   // Function precondition: v is not an 18, 19, or 20-digit number. 234   // Function precondition: v is not an 18, 19, or 20-digit number.
235   // (17 digits are sufficient for round-tripping.) 235   // (17 digits are sufficient for round-tripping.)
HITCBC 236   538 BOOST_ASSERT(v < 100000000000000000L); 236   538 BOOST_ASSERT(v < 100000000000000000L);
HITCBC 237   538 if (v >= 10000000000000000L) { return 17; } 237   538 if (v >= 10000000000000000L) { return 17; }
HITCBC 238   528 if (v >= 1000000000000000L) { return 16; } 238   528 if (v >= 1000000000000000L) { return 16; }
HITCBC 239   509 if (v >= 100000000000000L) { return 15; } 239   509 if (v >= 100000000000000L) { return 15; }
HITCBC 240   505 if (v >= 10000000000000L) { return 14; } 240   505 if (v >= 10000000000000L) { return 14; }
HITCBC 241   500 if (v >= 1000000000000L) { return 13; } 241   500 if (v >= 1000000000000L) { return 13; }
HITCBC 242   494 if (v >= 100000000000L) { return 12; } 242   494 if (v >= 100000000000L) { return 12; }
HITCBC 243   489 if (v >= 10000000000L) { return 11; } 243   489 if (v >= 10000000000L) { return 11; }
HITCBC 244   484 if (v >= 1000000000L) { return 10; } 244   484 if (v >= 1000000000L) { return 10; }
HITCBC 245   474 if (v >= 100000000L) { return 9; } 245   474 if (v >= 100000000L) { return 9; }
HITCBC 246   467 if (v >= 10000000L) { return 8; } 246   467 if (v >= 10000000L) { return 8; }
HITCBC 247   461 if (v >= 1000000L) { return 7; } 247   461 if (v >= 1000000L) { return 7; }
HITCBC 248   455 if (v >= 100000L) { return 6; } 248   455 if (v >= 100000L) { return 6; }
HITCBC 249   450 if (v >= 10000L) { return 5; } 249   450 if (v >= 10000L) { return 5; }
HITCBC 250   445 if (v >= 1000L) { return 4; } 250   445 if (v >= 1000L) { return 4; }
HITCBC 251   439 if (v >= 100L) { return 3; } 251   439 if (v >= 100L) { return 3; }
HITCBC 252   421 if (v >= 10L) { return 2; } 252   421 if (v >= 10L) { return 2; }
HITCBC 253   415 return 1; 253   415 return 1;
254   } 254   }
255   255  
256   // A floating decimal representing m * 10^e. 256   // A floating decimal representing m * 10^e.
257   struct floating_decimal_64 257   struct floating_decimal_64
258   { 258   {
259   std::uint64_t mantissa; 259   std::uint64_t mantissa;
260   // Decimal exponent's range is -324 to 308 260   // Decimal exponent's range is -324 to 308
261   // inclusive, and can fit in a short if needed. 261   // inclusive, and can fit in a short if needed.
262   std::int32_t exponent; 262   std::int32_t exponent;
263   }; 263   };
264   264  
265   inline 265   inline
266   floating_decimal_64 266   floating_decimal_64
HITCBC 267   262 d2d( 267   262 d2d(
268   const std::uint64_t ieeeMantissa, 268   const std::uint64_t ieeeMantissa,
269   const std::uint32_t ieeeExponent) 269   const std::uint32_t ieeeExponent)
270   { 270   {
271   std::int32_t e2; 271   std::int32_t e2;
272   std::uint64_t m2; 272   std::uint64_t m2;
HITCBC 273   262 if (ieeeExponent == 0) 273   262 if (ieeeExponent == 0)
274   { 274   {
275   // We subtract 2 so that the bounds computation has 2 additional bits. 275   // We subtract 2 so that the bounds computation has 2 additional bits.
HITCBC 276   15 e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; 276   15 e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
HITCBC 277   15 m2 = ieeeMantissa; 277   15 m2 = ieeeMantissa;
278   } 278   }
279   else 279   else
280   { 280   {
HITCBC 281   247 e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; 281   247 e2 = (std::int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
HITCBC 282   247 m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 282   247 m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
283   } 283   }
HITCBC 284   262 const bool even = (m2 & 1) == 0; 284   262 const bool even = (m2 & 1) == 0;
HITCBC 285   262 const bool acceptBounds = even; 285   262 const bool acceptBounds = even;
286   286  
287   #ifdef RYU_DEBUG 287   #ifdef RYU_DEBUG
288   printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2); 288   printf("-> %" PRIu64 " * 2^%d\n", m2, e2 + 2);
289   #endif 289   #endif
290   290  
291   // Step 2: Determine the interval of valid decimal representations. 291   // Step 2: Determine the interval of valid decimal representations.
HITCBC 292   262 const std::uint64_t mv = 4 * m2; 292   262 const std::uint64_t mv = 4 * m2;
293   // Implicit bool -> int conversion. True is 1, false is 0. 293   // Implicit bool -> int conversion. True is 1, false is 0.
HITCBC 294   262 const std::uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; 294   262 const std::uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
295   // We would compute mp and mm like this: 295   // We would compute mp and mm like this:
296   // uint64_t mp = 4 * m2 + 2; 296   // uint64_t mp = 4 * m2 + 2;
297   // uint64_t mm = mv - 1 - mmShift; 297   // uint64_t mm = mv - 1 - mmShift;
298   298  
299   // Step 3: Convert to a decimal power base using 128-bit arithmetic. 299   // Step 3: Convert to a decimal power base using 128-bit arithmetic.
300   std::uint64_t vr, vp, vm; 300   std::uint64_t vr, vp, vm;
301   std::int32_t e10; 301   std::int32_t e10;
HITCBC 302   262 bool vmIsTrailingZeros = false; 302   262 bool vmIsTrailingZeros = false;
HITCBC 303   262 bool vrIsTrailingZeros = false; 303   262 bool vrIsTrailingZeros = false;
HITCBC 304   262 if (e2 >= 0) { 304   262 if (e2 >= 0) {
305   // I tried special-casing q == 0, but there was no effect on performance. 305   // I tried special-casing q == 0, but there was no effect on performance.
306   // This expression is slightly faster than max(0, log10Pow2(e2) - 1). 306   // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
HITCBC 307   128 const std::uint32_t q = log10Pow2(e2) - (e2 > 3); 307   128 const std::uint32_t q = log10Pow2(e2) - (e2 > 3);
HITCBC 308   128 e10 = (std::int32_t)q; 308   128 e10 = (std::int32_t)q;
HITCBC 309   128 const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1; 309   128 const std::int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1;
HITCBC 310   128 const std::int32_t i = -e2 + (std::int32_t)q + k; 310   128 const std::int32_t i = -e2 + (std::int32_t)q + k;
311   #if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE) 311   #if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)
312   uint64_t pow5[2]; 312   uint64_t pow5[2];
313   double_computeInvPow5(q, pow5); 313   double_computeInvPow5(q, pow5);
314   vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift); 314   vr = mulShiftAll(m2, pow5, i, &vp, &vm, mmShift);
315   #else 315   #else
HITCBC 316   128 vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift); 316   128 vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT()[q], i, &vp, &vm, mmShift);
317   #endif 317   #endif
318   #ifdef RYU_DEBUG 318   #ifdef RYU_DEBUG
319   printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q); 319   printf("%" PRIu64 " * 2^%d / 10^%u\n", mv, e2, q);
320   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm); 320   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
321   #endif 321   #endif
HITCBC 322   128 if (q <= 21) 322   128 if (q <= 21)
323   { 323   {
324   // This should use q <= 22, but I think 21 is also safe. Smaller values 324   // This should use q <= 22, but I think 21 is also safe. Smaller values
325   // may still be safe, but it's more difficult to reason about them. 325   // may still be safe, but it's more difficult to reason about them.
326   // Only one of mp, mv, and mm can be a multiple of 5, if any. 326   // Only one of mp, mv, and mm can be a multiple of 5, if any.
HITCBC 327   114 const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv)); 327   114 const std::uint32_t mvMod5 = ((std::uint32_t)mv) - 5 * ((std::uint32_t)div5(mv));
HITCBC 328   114 if (mvMod5 == 0) 328   114 if (mvMod5 == 0)
329   { 329   {
HITCBC 330   86 vrIsTrailingZeros = multipleOfPowerOf5(mv, q); 330   86 vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
331   } 331   }
HITCBC 332   28 else if (acceptBounds) 332   28 else if (acceptBounds)
333   { 333   {
334   // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q 334   // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
335   // <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q 335   // <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
336   // <=> true && pow5Factor(mm) >= q, since e2 >= q. 336   // <=> true && pow5Factor(mm) >= q, since e2 >= q.
HITCBC 337   11 vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q); 337   11 vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
338   } 338   }
339   else 339   else
340   { 340   {
341   // Same as min(e2 + 1, pow5Factor(mp)) >= q. 341   // Same as min(e2 + 1, pow5Factor(mp)) >= q.
HITCBC 342   17 vp -= multipleOfPowerOf5(mv + 2, q); 342   17 vp -= multipleOfPowerOf5(mv + 2, q);
343   } 343   }
344   } 344   }
345   } 345   }
346   else 346   else
347   { 347   {
348   // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). 348   // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
HITCBC 349   134 const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1); 349   134 const std::uint32_t q = log10Pow5(-e2) - (-e2 > 1);
HITCBC 350   134 e10 = (std::int32_t)q + e2; 350   134 e10 = (std::int32_t)q + e2;
HITCBC 351   134 const std::int32_t i = -e2 - (std::int32_t)q; 351   134 const std::int32_t i = -e2 - (std::int32_t)q;
HITCBC 352   134 const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; 352   134 const std::int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
HITCBC 353   134 const std::int32_t j = (std::int32_t)q - k; 353   134 const std::int32_t j = (std::int32_t)q - k;
354   #if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE) 354   #if defined(BOOST_JSON_RYU_OPTIMIZE_SIZE)
355   std::uint64_t pow5[2]; 355   std::uint64_t pow5[2];
356   double_computePow5(i, pow5); 356   double_computePow5(i, pow5);
357   vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift); 357   vr = mulShiftAll(m2, pow5, j, &vp, &vm, mmShift);
358   #else 358   #else
HITCBC 359   134 vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift); 359   134 vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT()[i], j, &vp, &vm, mmShift);
360   #endif 360   #endif
361   #ifdef RYU_DEBUG 361   #ifdef RYU_DEBUG
362   printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q); 362   printf("%" PRIu64 " * 5^%d / 10^%u\n", mv, -e2, q);
363   printf("%u %d %d %d\n", q, i, k, j); 363   printf("%u %d %d %d\n", q, i, k, j);
364   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm); 364   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
365   #endif 365   #endif
HITCBC 366   134 if (q <= 1) 366   134 if (q <= 1)
367   { 367   {
368   // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. 368   // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
369   // mv = 4 * m2, so it always has at least two trailing 0 bits. 369   // mv = 4 * m2, so it always has at least two trailing 0 bits.
HITCBC 370   3 vrIsTrailingZeros = true; 370   3 vrIsTrailingZeros = true;
HITCBC 371   3 if (acceptBounds) 371   3 if (acceptBounds)
372   { 372   {
373   // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. 373   // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
HITCBC 374   3 vmIsTrailingZeros = mmShift == 1; 374   3 vmIsTrailingZeros = mmShift == 1;
375   } 375   }
376   else 376   else
377   { 377   {
378   // mp = mv + 2, so it always has at least one trailing 0 bit. 378   // mp = mv + 2, so it always has at least one trailing 0 bit.
MISUBC 379   --vp; 379   --vp;
380   } 380   }
381   } 381   }
HITCBC 382   131 else if (q < 63) 382   131 else if (q < 63)
383   { 383   {
384   // TODO(ulfjack): Use a tighter bound here. 384   // TODO(ulfjack): Use a tighter bound here.
385   // We want to know if the full product has at least q trailing zeros. 385   // We want to know if the full product has at least q trailing zeros.
386   // We need to compute min(p2(mv), p5(mv) - e2) >= q 386   // We need to compute min(p2(mv), p5(mv) - e2) >= q
387   // <=> p2(mv) >= q && p5(mv) - e2 >= q 387   // <=> p2(mv) >= q && p5(mv) - e2 >= q
388   // <=> p2(mv) >= q (because -e2 >= q) 388   // <=> p2(mv) >= q (because -e2 >= q)
HITCBC 389   96 vrIsTrailingZeros = multipleOfPowerOf2(mv, q); 389   96 vrIsTrailingZeros = multipleOfPowerOf2(mv, q);
390   #ifdef RYU_DEBUG 390   #ifdef RYU_DEBUG
391   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 391   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
392   #endif 392   #endif
393   } 393   }
394   } 394   }
395   #ifdef RYU_DEBUG 395   #ifdef RYU_DEBUG
396   printf("e10=%d\n", e10); 396   printf("e10=%d\n", e10);
397   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm); 397   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
398   printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false"); 398   printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false");
399   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 399   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
400   #endif 400   #endif
401   401  
402   // Step 4: Find the shortest decimal representation in the interval of valid representations. 402   // Step 4: Find the shortest decimal representation in the interval of valid representations.
HITCBC 403   262 std::int32_t removed = 0; 403   262 std::int32_t removed = 0;
HITCBC 404   262 std::uint8_t lastRemovedDigit = 0; 404   262 std::uint8_t lastRemovedDigit = 0;
405   std::uint64_t output; 405   std::uint64_t output;
406   // On average, we remove ~2 digits. 406   // On average, we remove ~2 digits.
HITCBC 407   262 if (vmIsTrailingZeros || vrIsTrailingZeros) 407   262 if (vmIsTrailingZeros || vrIsTrailingZeros)
408   { 408   {
409   // General case, which happens rarely (~0.7%). 409   // General case, which happens rarely (~0.7%).
410   for (;;) 410   for (;;)
411   { 411   {
HITCBC 412   1663 const std::uint64_t vpDiv10 = div10(vp); 412   1663 const std::uint64_t vpDiv10 = div10(vp);
HITCBC 413   1663 const std::uint64_t vmDiv10 = div10(vm); 413   1663 const std::uint64_t vmDiv10 = div10(vm);
HITCBC 414   1663 if (vpDiv10 <= vmDiv10) 414   1663 if (vpDiv10 <= vmDiv10)
HITCBC 415   94 break; 415   94 break;
HITCBC 416   1569 const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10); 416   1569 const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);
HITCBC 417   1569 const std::uint64_t vrDiv10 = div10(vr); 417   1569 const std::uint64_t vrDiv10 = div10(vr);
HITCBC 418   1569 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10); 418   1569 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HITCBC 419   1569 vmIsTrailingZeros &= vmMod10 == 0; 419   1569 vmIsTrailingZeros &= vmMod10 == 0;
HITCBC 420   1569 vrIsTrailingZeros &= lastRemovedDigit == 0; 420   1569 vrIsTrailingZeros &= lastRemovedDigit == 0;
HITCBC 421   1569 lastRemovedDigit = (uint8_t)vrMod10; 421   1569 lastRemovedDigit = (uint8_t)vrMod10;
HITCBC 422   1569 vr = vrDiv10; 422   1569 vr = vrDiv10;
HITCBC 423   1569 vp = vpDiv10; 423   1569 vp = vpDiv10;
HITCBC 424   1569 vm = vmDiv10; 424   1569 vm = vmDiv10;
HITCBC 425   1569 ++removed; 425   1569 ++removed;
HITCBC 426   1569 } 426   1569 }
427   #ifdef RYU_DEBUG 427   #ifdef RYU_DEBUG
428   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm); 428   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
429   printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false"); 429   printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false");
430   #endif 430   #endif
HITCBC 431   94 if (vmIsTrailingZeros) 431   94 if (vmIsTrailingZeros)
432   { 432   {
433   for (;;) 433   for (;;)
434   { 434   {
HITCBC 435   3 const std::uint64_t vmDiv10 = div10(vm); 435   3 const std::uint64_t vmDiv10 = div10(vm);
HITCBC 436   3 const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10); 436   3 const std::uint32_t vmMod10 = ((std::uint32_t)vm) - 10 * ((std::uint32_t)vmDiv10);
HITCBC 437   3 if (vmMod10 != 0) 437   3 if (vmMod10 != 0)
HITCBC 438   2 break; 438   2 break;
HITCBC 439   1 const std::uint64_t vpDiv10 = div10(vp); 439   1 const std::uint64_t vpDiv10 = div10(vp);
HITCBC 440   1 const std::uint64_t vrDiv10 = div10(vr); 440   1 const std::uint64_t vrDiv10 = div10(vr);
HITCBC 441   1 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10); 441   1 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HITCBC 442   1 vrIsTrailingZeros &= lastRemovedDigit == 0; 442   1 vrIsTrailingZeros &= lastRemovedDigit == 0;
HITCBC 443   1 lastRemovedDigit = (uint8_t)vrMod10; 443   1 lastRemovedDigit = (uint8_t)vrMod10;
HITCBC 444   1 vr = vrDiv10; 444   1 vr = vrDiv10;
HITCBC 445   1 vp = vpDiv10; 445   1 vp = vpDiv10;
HITCBC 446   1 vm = vmDiv10; 446   1 vm = vmDiv10;
HITCBC 447   1 ++removed; 447   1 ++removed;
HITCBC 448   1 } 448   1 }
449   } 449   }
450   #ifdef RYU_DEBUG 450   #ifdef RYU_DEBUG
451   printf("%" PRIu64 " %d\n", vr, lastRemovedDigit); 451   printf("%" PRIu64 " %d\n", vr, lastRemovedDigit);
452   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 452   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
453   #endif 453   #endif
HITCBC 454   94 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) 454   94 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
455   { 455   {
456   // Round even if the exact number is .....50..0. 456   // Round even if the exact number is .....50..0.
HITCBC 457   1 lastRemovedDigit = 4; 457   1 lastRemovedDigit = 4;
458   } 458   }
459   // We need to take vr + 1 if vr is outside bounds or we need to round up. 459   // We need to take vr + 1 if vr is outside bounds or we need to round up.
HITCBC 460   94 output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); 460   94 output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
HITCBC 461   94 } 461   94 }
462   else 462   else
463   { 463   {
464   // Specialized for the common case (~99.3%). Percentages below are relative to this. 464   // Specialized for the common case (~99.3%). Percentages below are relative to this.
HITCBC 465   168 bool roundUp = false; 465   168 bool roundUp = false;
HITCBC 466   168 const std::uint64_t vpDiv100 = div100(vp); 466   168 const std::uint64_t vpDiv100 = div100(vp);
HITCBC 467   168 const std::uint64_t vmDiv100 = div100(vm); 467   168 const std::uint64_t vmDiv100 = div100(vm);
HITCBC 468   168 if (vpDiv100 > vmDiv100) 468   168 if (vpDiv100 > vmDiv100)
469   { 469   {
470   // Optimization: remove two digits at a time (~86.2%). 470   // Optimization: remove two digits at a time (~86.2%).
HITCBC 471   161 const std::uint64_t vrDiv100 = div100(vr); 471   161 const std::uint64_t vrDiv100 = div100(vr);
HITCBC 472   161 const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100); 472   161 const std::uint32_t vrMod100 = ((std::uint32_t)vr) - 100 * ((std::uint32_t)vrDiv100);
HITCBC 473   161 roundUp = vrMod100 >= 50; 473   161 roundUp = vrMod100 >= 50;
HITCBC 474   161 vr = vrDiv100; 474   161 vr = vrDiv100;
HITCBC 475   161 vp = vpDiv100; 475   161 vp = vpDiv100;
HITCBC 476   161 vm = vmDiv100; 476   161 vm = vmDiv100;
HITCBC 477   161 removed += 2; 477   161 removed += 2;
478   } 478   }
479   // Loop iterations below (approximately), without optimization above: 479   // Loop iterations below (approximately), without optimization above:
480   // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% 480   // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
481   // Loop iterations below (approximately), with optimization above: 481   // Loop iterations below (approximately), with optimization above:
482   // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% 482   // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
483   for (;;) 483   for (;;)
484   { 484   {
HITCBC 485   2256 const std::uint64_t vpDiv10 = div10(vp); 485   2256 const std::uint64_t vpDiv10 = div10(vp);
HITCBC 486   2256 const std::uint64_t vmDiv10 = div10(vm); 486   2256 const std::uint64_t vmDiv10 = div10(vm);
HITCBC 487   2256 if (vpDiv10 <= vmDiv10) 487   2256 if (vpDiv10 <= vmDiv10)
HITCBC 488   168 break; 488   168 break;
HITCBC 489   2088 const std::uint64_t vrDiv10 = div10(vr); 489   2088 const std::uint64_t vrDiv10 = div10(vr);
HITCBC 490   2088 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10); 490   2088 const std::uint32_t vrMod10 = ((std::uint32_t)vr) - 10 * ((std::uint32_t)vrDiv10);
HITCBC 491   2088 roundUp = vrMod10 >= 5; 491   2088 roundUp = vrMod10 >= 5;
HITCBC 492   2088 vr = vrDiv10; 492   2088 vr = vrDiv10;
HITCBC 493   2088 vp = vpDiv10; 493   2088 vp = vpDiv10;
HITCBC 494   2088 vm = vmDiv10; 494   2088 vm = vmDiv10;
HITCBC 495   2088 ++removed; 495   2088 ++removed;
HITCBC 496   2088 } 496   2088 }
497   #ifdef RYU_DEBUG 497   #ifdef RYU_DEBUG
498   printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false"); 498   printf("%" PRIu64 " roundUp=%s\n", vr, roundUp ? "true" : "false");
499   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 499   printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
500   #endif 500   #endif
501   // We need to take vr + 1 if vr is outside bounds or we need to round up. 501   // We need to take vr + 1 if vr is outside bounds or we need to round up.
HITCBC 502   168 output = vr + (vr == vm || roundUp); 502   168 output = vr + (vr == vm || roundUp);
503   } 503   }
HITCBC 504   262 const std::int32_t exp = e10 + removed; 504   262 const std::int32_t exp = e10 + removed;
505   505  
506   #ifdef RYU_DEBUG 506   #ifdef RYU_DEBUG
507   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm); 507   printf("V+=%" PRIu64 "\nV =%" PRIu64 "\nV-=%" PRIu64 "\n", vp, vr, vm);
508   printf("O=%" PRIu64 "\n", output); 508   printf("O=%" PRIu64 "\n", output);
509   printf("EXP=%d\n", exp); 509   printf("EXP=%d\n", exp);
510   #endif 510   #endif
511   511  
512   floating_decimal_64 fd; 512   floating_decimal_64 fd;
HITCBC 513   262 fd.exponent = exp; 513   262 fd.exponent = exp;
HITCBC 514   262 fd.mantissa = output; 514   262 fd.mantissa = output;
HITCBC 515   262 return fd; 515   262 return fd;
516   } 516   }
517   517  
518   inline 518   inline
519   int 519   int
HITCBC 520   538 to_chars( 520   538 to_chars(
521   const floating_decimal_64 v, 521   const floating_decimal_64 v,
522   const bool sign, 522   const bool sign,
523   char* const result) 523   char* const result)
524   { 524   {
525   // Step 5: Print the decimal representation. 525   // Step 5: Print the decimal representation.
HITCBC 526   538 int index = 0; 526   538 int index = 0;
HITCBC 527   538 if (sign) 527   538 if (sign)
HITCBC 528   129 result[index++] = '-'; 528   129 result[index++] = '-';
529   529  
HITCBC 530   538 std::uint64_t output = v.mantissa; 530   538 std::uint64_t output = v.mantissa;
HITCBC 531   538 std::uint32_t const olength = decimalLength17(output); 531   538 std::uint32_t const olength = decimalLength17(output);
532   532  
533   #ifdef RYU_DEBUG 533   #ifdef RYU_DEBUG
534   printf("DIGITS=%" PRIu64 "\n", v.mantissa); 534   printf("DIGITS=%" PRIu64 "\n", v.mantissa);
535   printf("OLEN=%u\n", olength); 535   printf("OLEN=%u\n", olength);
536   printf("EXP=%u\n", v.exponent + olength); 536   printf("EXP=%u\n", v.exponent + olength);
537   #endif 537   #endif
538   538  
539   // Print the decimal digits. 539   // Print the decimal digits.
540   // The following code is equivalent to: 540   // The following code is equivalent to:
541   // for (uint32_t i = 0; i < olength - 1; ++i) { 541   // for (uint32_t i = 0; i < olength - 1; ++i) {
542   // const uint32_t c = output % 10; output /= 10; 542   // const uint32_t c = output % 10; output /= 10;
543   // result[index + olength - i] = (char) ('0' + c); 543   // result[index + olength - i] = (char) ('0' + c);
544   // } 544   // }
545   // result[index] = '0' + output % 10; 545   // result[index] = '0' + output % 10;
546   546  
HITCBC 547   538 std::uint32_t i = 0; 547   538 std::uint32_t i = 0;
548   // We prefer 32-bit operations, even on 64-bit platforms. 548   // We prefer 32-bit operations, even on 64-bit platforms.
549   // We have at most 17 digits, and uint32_t can store 9 digits. 549   // We have at most 17 digits, and uint32_t can store 9 digits.
550   // If output doesn't fit into uint32_t, we cut off 8 digits, 550   // If output doesn't fit into uint32_t, we cut off 8 digits,
551   // so the rest will fit into uint32_t. 551   // so the rest will fit into uint32_t.
HITCBC 552   538 if ((output >> 32) != 0) 552   538 if ((output >> 32) != 0)
553   { 553   {
554   // Expensive 64-bit division. 554   // Expensive 64-bit division.
HITCBC 555   59 std::uint64_t const q = div1e8(output); 555   59 std::uint64_t const q = div1e8(output);
HITCBC 556   59 std::uint32_t output2 = ((std::uint32_t)output) - 100000000 * ((std::uint32_t)q); 556   59 std::uint32_t output2 = ((std::uint32_t)output) - 100000000 * ((std::uint32_t)q);
HITCBC 557   59 output = q; 557   59 output = q;
558   558  
HITCBC 559   59 const std::uint32_t c = output2 % 10000; 559   59 const std::uint32_t c = output2 % 10000;
HITCBC 560   59 output2 /= 10000; 560   59 output2 /= 10000;
HITCBC 561   59 const std::uint32_t d = output2 % 10000; 561   59 const std::uint32_t d = output2 % 10000;
HITCBC 562   59 const std::uint32_t c0 = (c % 100) << 1; 562   59 const std::uint32_t c0 = (c % 100) << 1;
HITCBC 563   59 const std::uint32_t c1 = (c / 100) << 1; 563   59 const std::uint32_t c1 = (c / 100) << 1;
HITCBC 564   59 const std::uint32_t d0 = (d % 100) << 1; 564   59 const std::uint32_t d0 = (d % 100) << 1;
HITCBC 565   59 const std::uint32_t d1 = (d / 100) << 1; 565   59 const std::uint32_t d1 = (d / 100) << 1;
HITCBC 566   59 std::memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2); 566   59 std::memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);
HITCBC 567   59 std::memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2); 567   59 std::memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);
HITCBC 568   59 std::memcpy(result + index + olength - i - 5, DIGIT_TABLE() + d0, 2); 568   59 std::memcpy(result + index + olength - i - 5, DIGIT_TABLE() + d0, 2);
HITCBC 569   59 std::memcpy(result + index + olength - i - 7, DIGIT_TABLE() + d1, 2); 569   59 std::memcpy(result + index + olength - i - 7, DIGIT_TABLE() + d1, 2);
HITCBC 570   59 i += 8; 570   59 i += 8;
571   } 571   }
HITCBC 572   538 uint32_t output2 = (std::uint32_t)output; 572   538 uint32_t output2 = (std::uint32_t)output;
HITCBC 573   638 while (output2 >= 10000) 573   638 while (output2 >= 10000)
574   { 574   {
575   #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 575   #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
576   const uint32_t c = output2 - 10000 * (output2 / 10000); 576   const uint32_t c = output2 - 10000 * (output2 / 10000);
577   #else 577   #else
HITCBC 578   100 const uint32_t c = output2 % 10000; 578   100 const uint32_t c = output2 % 10000;
579   #endif 579   #endif
HITCBC 580   100 output2 /= 10000; 580   100 output2 /= 10000;
HITCBC 581   100 const uint32_t c0 = (c % 100) << 1; 581   100 const uint32_t c0 = (c % 100) << 1;
HITCBC 582   100 const uint32_t c1 = (c / 100) << 1; 582   100 const uint32_t c1 = (c / 100) << 1;
HITCBC 583   100 memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2); 583   100 memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c0, 2);
HITCBC 584   100 memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2); 584   100 memcpy(result + index + olength - i - 3, DIGIT_TABLE() + c1, 2);
HITCBC 585   100 i += 4; 585   100 i += 4;
586   } 586   }
HITCBC 587   538 if (output2 >= 100) { 587   538 if (output2 >= 100) {
HITCBC 588   69 const uint32_t c = (output2 % 100) << 1; 588   69 const uint32_t c = (output2 % 100) << 1;
HITCBC 589   69 output2 /= 100; 589   69 output2 /= 100;
HITCBC 590   69 memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c, 2); 590   69 memcpy(result + index + olength - i - 1, DIGIT_TABLE() + c, 2);
HITCBC 591   69 i += 2; 591   69 i += 2;
592   } 592   }
HITCBC 593   538 if (output2 >= 10) { 593   538 if (output2 >= 10) {
HITCBC 594   62 const uint32_t c = output2 << 1; 594   62 const uint32_t c = output2 << 1;
595   // We can't use memcpy here: the decimal dot goes between these two digits. 595   // We can't use memcpy here: the decimal dot goes between these two digits.
HITCBC 596   62 result[index + olength - i] = DIGIT_TABLE()[c + 1]; 596   62 result[index + olength - i] = DIGIT_TABLE()[c + 1];
HITCBC 597   62 result[index] = DIGIT_TABLE()[c]; 597   62 result[index] = DIGIT_TABLE()[c];
598   } 598   }
599   else { 599   else {
HITCBC 600   476 result[index] = (char)('0' + output2); 600   476 result[index] = (char)('0' + output2);
601   } 601   }
602   602  
603   // Print decimal point if needed. 603   // Print decimal point if needed.
HITCBC 604   538 if (olength > 1) { 604   538 if (olength > 1) {
HITCBC 605   123 result[index + 1] = '.'; 605   123 result[index + 1] = '.';
HITCBC 606   123 index += olength + 1; 606   123 index += olength + 1;
607   } 607   }
608   else { 608   else {
HITCBC 609   415 ++index; 609   415 ++index;
610   } 610   }
611   611  
612   // Print the exponent. 612   // Print the exponent.
HITCBC 613   538 result[index++] = 'E'; 613   538 result[index++] = 'E';
HITCBC 614   538 int32_t exp = v.exponent + (int32_t)olength - 1; 614   538 int32_t exp = v.exponent + (int32_t)olength - 1;
HITCBC 615   538 if (exp < 0) { 615   538 if (exp < 0) {
HITCBC 616   92 result[index++] = '-'; 616   92 result[index++] = '-';
HITCBC 617   92 exp = -exp; 617   92 exp = -exp;
618   } 618   }
619   619  
HITCBC 620   538 if (exp >= 100) { 620   538 if (exp >= 100) {
HITCBC 621   33 const int32_t c = exp % 10; 621   33 const int32_t c = exp % 10;
HITCBC 622   33 memcpy(result + index, DIGIT_TABLE() + 2 * (exp / 10), 2); 622   33 memcpy(result + index, DIGIT_TABLE() + 2 * (exp / 10), 2);
HITCBC 623   33 result[index + 2] = (char)('0' + c); 623   33 result[index + 2] = (char)('0' + c);
HITCBC 624   33 index += 3; 624   33 index += 3;
625   } 625   }
HITCBC 626   505 else if (exp >= 10) { 626   505 else if (exp >= 10) {
HITCBC 627   180 memcpy(result + index, DIGIT_TABLE() + 2 * exp, 2); 627   180 memcpy(result + index, DIGIT_TABLE() + 2 * exp, 2);
HITCBC 628   180 index += 2; 628   180 index += 2;
629   } 629   }
630   else { 630   else {
HITCBC 631   325 result[index++] = (char)('0' + exp); 631   325 result[index++] = (char)('0' + exp);
632   } 632   }
633   633  
HITCBC 634   538 return index; 634   538 return index;
635   } 635   }
636   636  
HITCBC 637   538 static inline bool d2d_small_int(const uint64_t ieeeMantissa, const uint32_t ieeeExponent, 637   538 static inline bool d2d_small_int(const uint64_t ieeeMantissa, const uint32_t ieeeExponent,
638   floating_decimal_64* const v) { 638   floating_decimal_64* const v) {
HITCBC 639   538 const uint64_t m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 639   538 const uint64_t m2 = (1ull << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
HITCBC 640   538 const int32_t e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 640   538 const int32_t e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
641   641  
HITCBC 642   538 if (e2 > 0) { 642   538 if (e2 > 0) {
643   // f = m2 * 2^e2 >= 2^53 is an integer. 643   // f = m2 * 2^e2 >= 2^53 is an integer.
644   // Ignore this case for now. 644   // Ignore this case for now.
HITCBC 645   131 return false; 645   131 return false;
646   } 646   }
647   647  
HITCBC 648   407 if (e2 < -52) { 648   407 if (e2 < -52) {
649   // f < 1. 649   // f < 1.
HITCBC 650   92 return false; 650   92 return false;
651   } 651   }
652   652  
653   // Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53. 653   // Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53.
654   // Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0. 654   // Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0.
HITCBC 655   315 const uint64_t mask = (1ull << -e2) - 1; 655   315 const uint64_t mask = (1ull << -e2) - 1;
HITCBC 656   315 const uint64_t fraction = m2 & mask; 656   315 const uint64_t fraction = m2 & mask;
HITCBC 657   315 if (fraction != 0) { 657   315 if (fraction != 0) {
HITCBC 658   39 return false; 658   39 return false;
659   } 659   }
660   660  
661   // f is an integer in the range [1, 2^53). 661   // f is an integer in the range [1, 2^53).
662   // Note: mantissa might contain trailing (decimal) 0's. 662   // Note: mantissa might contain trailing (decimal) 0's.
663   // Note: since 2^53 < 10^16, there is no need to adjust decimalLength17(). 663   // Note: since 2^53 < 10^16, there is no need to adjust decimalLength17().
HITCBC 664   276 v->mantissa = m2 >> -e2; 664   276 v->mantissa = m2 >> -e2;
HITCBC 665   276 v->exponent = 0; 665   276 v->exponent = 0;
HITCBC 666   276 return true; 666   276 return true;
667   } 667   }
668   668  
669   } // detail 669   } // detail
670   670  
671   int 671   int
HITCBC 672   609 d2s_buffered_n( 672   609 d2s_buffered_n(
673   double f, 673   double f,
674   char* result, 674   char* result,
675   bool allow_infinity_and_nan) noexcept 675   bool allow_infinity_and_nan) noexcept
676   { 676   {
677   using namespace detail; 677   using namespace detail;
678   // Step 1: Decode the floating-point number, and unify normalized and subnormal cases. 678   // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
HITCBC 679   609 std::uint64_t const bits = double_to_bits(f); 679   609 std::uint64_t const bits = double_to_bits(f);
680   680  
681   #ifdef RYU_DEBUG 681   #ifdef RYU_DEBUG
682   printf("IN="); 682   printf("IN=");
683   for (std::int32_t bit = 63; bit >= 0; --bit) { 683   for (std::int32_t bit = 63; bit >= 0; --bit) {
684   printf("%d", (int)((bits >> bit) & 1)); 684   printf("%d", (int)((bits >> bit) & 1));
685   } 685   }
686   printf("\n"); 686   printf("\n");
687   #endif 687   #endif
688   688  
689   // Decode bits into sign, mantissa, and exponent. 689   // Decode bits into sign, mantissa, and exponent.
HITCBC 690   609 const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; 690   609 const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
HITCBC 691   609 const std::uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1); 691   609 const std::uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
HITCBC 692   609 const std::uint32_t ieeeExponent = (std::uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); 692   609 const std::uint32_t ieeeExponent = (std::uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
693   // Case distinction; exit early for the easy cases. 693   // Case distinction; exit early for the easy cases.
HITCBC 694   609 if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) { 694   609 if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) {
695   // We changed how special numbers are output by default 695   // We changed how special numbers are output by default
HITCBC 696   71 if (allow_infinity_and_nan) 696   71 if (allow_infinity_and_nan)
HITCBC 697   11 return copy_special_str(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0); 697   11 return copy_special_str(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);
698   else 698   else
HITCBC 699   60 return copy_special_str_conforming(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0); 699   60 return copy_special_str_conforming(result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0);
700   700  
701   } 701   }
702   702  
703   floating_decimal_64 v; 703   floating_decimal_64 v;
HITCBC 704   538 const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v); 704   538 const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
HITCBC 705   538 if (isSmallInt) { 705   538 if (isSmallInt) {
706   // For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros. 706   // For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros.
707   // For scientific notation we need to move these zeros into the exponent. 707   // For scientific notation we need to move these zeros into the exponent.
708   // (This is not needed for fixed-point notation, so it might be beneficial to trim 708   // (This is not needed for fixed-point notation, so it might be beneficial to trim
709   // trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.) 709   // trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.)
710   for (;;) { 710   for (;;) {
HITCBC 711   698 std::uint64_t const q = div10(v.mantissa); 711   698 std::uint64_t const q = div10(v.mantissa);
HITCBC 712   698 std::uint32_t const r = ((std::uint32_t) v.mantissa) - 10 * ((std::uint32_t) q); 712   698 std::uint32_t const r = ((std::uint32_t) v.mantissa) - 10 * ((std::uint32_t) q);
HITCBC 713   698 if (r != 0) 713   698 if (r != 0)
HITCBC 714   276 break; 714   276 break;
HITCBC 715   422 v.mantissa = q; 715   422 v.mantissa = q;
HITCBC 716   422 ++v.exponent; 716   422 ++v.exponent;
HITCBC 717   422 } 717   422 }
718   } 718   }
719   else { 719   else {
HITCBC 720   262 v = d2d(ieeeMantissa, ieeeExponent); 720   262 v = d2d(ieeeMantissa, ieeeExponent);
721   } 721   }
722   722  
HITCBC 723   538 return to_chars(v, ieeeSign, result); 723   538 return to_chars(v, ieeeSign, result);
724   } 724   }
725   725  
726   } // ryu 726   } // ryu
727   727  
728   } // detail 728   } // detail
729   } // namespace json 729   } // namespace json
730   } // namespace boost 730   } // namespace boost
731   731  
732   #endif 732   #endif