97.81% Lines (134/137) 100.00% Functions (6/6)
TLA Baseline Branch
Line Hits Code Line Hits Code
1   // 1   //
2   // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), 2   // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
3   // Vinnie Falco (vinnie.falco@gmail.com) 3   // Vinnie Falco (vinnie.falco@gmail.com)
4   // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) 4   // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
5   // 5   //
6   // Distributed under the Boost Software License, Version 1.0. (See accompanying 6   // Distributed under the Boost Software License, Version 1.0. (See accompanying
7   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8   // 8   //
9   // Official repository: https://github.com/boostorg/json 9   // Official repository: https://github.com/boostorg/json
10   // 10   //
11   11  
12   #ifndef BOOST_JSON_DETAIL_SSE2_HPP 12   #ifndef BOOST_JSON_DETAIL_SSE2_HPP
13   #define BOOST_JSON_DETAIL_SSE2_HPP 13   #define BOOST_JSON_DETAIL_SSE2_HPP
14   14  
15   #include <boost/json/detail/config.hpp> 15   #include <boost/json/detail/config.hpp>
16   #include <boost/json/detail/utf8.hpp> 16   #include <boost/json/detail/utf8.hpp>
17   #include <cstddef> 17   #include <cstddef>
18   #include <cstring> 18   #include <cstring>
19   #ifdef BOOST_JSON_USE_SSE2 19   #ifdef BOOST_JSON_USE_SSE2
20   # include <emmintrin.h> 20   # include <emmintrin.h>
21   # include <xmmintrin.h> 21   # include <xmmintrin.h>
22   # ifdef _MSC_VER 22   # ifdef _MSC_VER
23   # include <intrin.h> 23   # include <intrin.h>
24   # endif 24   # endif
25   #endif 25   #endif
26   26  
27   namespace boost { 27   namespace boost {
28   namespace json { 28   namespace json {
29   namespace detail { 29   namespace detail {
30   30  
31   #ifdef BOOST_JSON_USE_SSE2 31   #ifdef BOOST_JSON_USE_SSE2
32   32  
33   template<bool AllowBadUTF8> 33   template<bool AllowBadUTF8>
34   inline 34   inline
35   const char* 35   const char*
HITCBC 36   2177 count_valid( 36   2177 count_valid(
37   char const* p, 37   char const* p,
38   const char* end) noexcept 38   const char* end) noexcept
39   { 39   {
HITCBC 40   2177 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' 40   2177 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
HITCBC 41   2177 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' 41   2177 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
HITCBC 42   2177 __m128i const q3 = _mm_set1_epi8( 0x1F ); 42   2177 __m128i const q3 = _mm_set1_epi8( 0x1F );
43   43  
HITCBC 44   2415 while(end - p >= 16) 44   2415 while(end - p >= 16)
45   { 45   {
HITCBC 46   924 __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); 46   924 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
HITCBC 47   924 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote 47   924 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
HITCBC 48   924 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash 48   924 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
HITCBC 49   924 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash 49   924 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
HITCBC 50   924 __m128i v5 = _mm_min_epu8( v1, q3 ); 50   924 __m128i v5 = _mm_min_epu8( v1, q3 );
HITCBC 51   924 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls 51   924 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
HITCBC 52   924 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control 52   924 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
53   53  
HITCBC 54   924 int w = _mm_movemask_epi8( v7 ); 54   924 int w = _mm_movemask_epi8( v7 );
55   55  
HITCBC 56   924 if( w != 0 ) 56   924 if( w != 0 )
57   { 57   {
58   int m; 58   int m;
59   #if defined(__GNUC__) || defined(__clang__) 59   #if defined(__GNUC__) || defined(__clang__)
HITCBC 60   686 m = __builtin_ffs( w ) - 1; 60   686 m = __builtin_ffs( w ) - 1;
61   #else 61   #else
62   unsigned long index; 62   unsigned long index;
63   _BitScanForward( &index, w ); 63   _BitScanForward( &index, w );
64   m = index; 64   m = index;
65   #endif 65   #endif
HITCBC 66   686 return p + m; 66   686 return p + m;
67   } 67   }
68   68  
HITCBC 69   238 p += 16; 69   238 p += 16;
70   } 70   }
71   71  
HITCBC 72   3738 while(p != end) 72   3738 while(p != end)
73   { 73   {
HITCBC 74   3689 const unsigned char c = *p; 74   3689 const unsigned char c = *p;
HITCBC 75   3689 if(c == '\x22' || c == '\\' || c < 0x20) 75   3689 if(c == '\x22' || c == '\\' || c < 0x20)
76   break; 76   break;
HITCBC 77   2247 ++p; 77   2247 ++p;
78   } 78   }
79   79  
HITCBC 80   1491 return p; 80   1491 return p;
81   } 81   }
82   82  
83   template<> 83   template<>
84   inline 84   inline
85   const char* 85   const char*
HITCBC 86   162602 count_valid<false>( 86   162602 count_valid<false>(
87   char const* p, 87   char const* p,
88   const char* end) noexcept 88   const char* end) noexcept
89   { 89   {
HITCBC 90   162602 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' 90   162602 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
HITCBC 91   162602 __m128i const q2 = _mm_set1_epi8( '\\' ); 91   162602 __m128i const q2 = _mm_set1_epi8( '\\' );
HITCBC 92   162602 __m128i const q3 = _mm_set1_epi8( 0x20 ); 92   162602 __m128i const q3 = _mm_set1_epi8( 0x20 );
93   93  
HITCBC 94   12202131 while(end - p >= 16) 94   12202131 while(end - p >= 16)
95   { 95   {
HITCBC 96   12090929 __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); 96   12090929 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
97   97  
HITCBC 98   12090929 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); 98   12090929 __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
HITCBC 99   12090929 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); 99   12090929 __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
HITCBC 100   12090929 __m128i v4 = _mm_cmplt_epi8( v1, q3 ); 100   12090929 __m128i v4 = _mm_cmplt_epi8( v1, q3 );
101   101  
HITCBC 102   12090929 __m128i v5 = _mm_or_si128( v2, v3 ); 102   12090929 __m128i v5 = _mm_or_si128( v2, v3 );
HITCBC 103   12090929 __m128i v6 = _mm_or_si128( v5, v4 ); 103   12090929 __m128i v6 = _mm_or_si128( v5, v4 );
104   104  
HITCBC 105   12090929 int w = _mm_movemask_epi8( v6 ); 105   12090929 int w = _mm_movemask_epi8( v6 );
106   106  
HITCBC 107   12090929 if( w != 0 ) 107   12090929 if( w != 0 )
108   { 108   {
109   int m; 109   int m;
110   #if defined(__GNUC__) || defined(__clang__) 110   #if defined(__GNUC__) || defined(__clang__)
HITCBC 111   51400 m = __builtin_ffs( w ) - 1; 111   51400 m = __builtin_ffs( w ) - 1;
112   #else 112   #else
113   unsigned long index; 113   unsigned long index;
114   _BitScanForward( &index, w ); 114   _BitScanForward( &index, w );
115   m = index; 115   m = index;
116   #endif 116   #endif
HITCBC 117   51400 p += m; 117   51400 p += m;
HITCBC 118   51400 break; 118   51400 break;
119   } 119   }
120   120  
HITCBC 121   12039529 p += 16; 121   12039529 p += 16;
122   } 122   }
123   123  
HITCBC 124   479946 while(p != end) 124   479946 while(p != end)
125   { 125   {
HITCBC 126   449775 const unsigned char c = *p; 126   449775 const unsigned char c = *p;
HITCBC 127   449775 if(c == '\x22' || c == '\\' || c < 0x20) 127   449775 if(c == '\x22' || c == '\\' || c < 0x20)
128   break; 128   break;
HITCBC 129   320806 if(c < 0x80) 129   320806 if(c < 0x80)
130   { 130   {
HITCBC 131   307616 ++p; 131   307616 ++p;
HITCBC 132   307616 continue; 132   307616 continue;
133   } 133   }
134   // validate utf-8 134   // validate utf-8
HITCBC 135   13190 uint16_t first = classify_utf8(c); 135   13190 uint16_t first = classify_utf8(c);
HITCBC 136   13190 uint8_t len = first & 0xFF; 136   13190 uint8_t len = first & 0xFF;
HITCBC 137   13190 if(BOOST_JSON_UNLIKELY(end - p < len)) 137   13190 if(BOOST_JSON_UNLIKELY(end - p < len))
HITCBC 138   1905 break; 138   1905 break;
HITCBC 139   11285 if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) 139   11285 if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
HITCBC 140   1557 break; 140   1557 break;
HITCBC 141   9728 p += len; 141   9728 p += len;
142   } 142   }
143   143  
HITCBC 144   162602 return p; 144   162602 return p;
145   } 145   }
146   146  
147   #else 147   #else
148   148  
149   template<bool AllowBadUTF8> 149   template<bool AllowBadUTF8>
150   char const* 150   char const*
151   count_valid( 151   count_valid(
152   char const* p, 152   char const* p,
153   char const* end) noexcept 153   char const* end) noexcept
154   { 154   {
155   while(p != end) 155   while(p != end)
156   { 156   {
157   const unsigned char c = *p; 157   const unsigned char c = *p;
158   if(c == '\x22' || c == '\\' || c < 0x20) 158   if(c == '\x22' || c == '\\' || c < 0x20)
159   break; 159   break;
160   ++p; 160   ++p;
161   } 161   }
162   162  
163   return p; 163   return p;
164   } 164   }
165   165  
166   template<> 166   template<>
167   inline 167   inline
168   char const* 168   char const*
169   count_valid<false>( 169   count_valid<false>(
170   char const* p, 170   char const* p,
171   char const* end) noexcept 171   char const* end) noexcept
172   { 172   {
173   while(p != end) 173   while(p != end)
174   { 174   {
175   const unsigned char c = *p; 175   const unsigned char c = *p;
176   if(c == '\x22' || c == '\\' || c < 0x20) 176   if(c == '\x22' || c == '\\' || c < 0x20)
177   break; 177   break;
178   if(c < 0x80) 178   if(c < 0x80)
179   { 179   {
180   ++p; 180   ++p;
181   continue; 181   continue;
182   } 182   }
183   // validate utf-8 183   // validate utf-8
184   uint16_t first = classify_utf8(c); 184   uint16_t first = classify_utf8(c);
185   uint8_t len = first & 0xFF; 185   uint8_t len = first & 0xFF;
186   if(BOOST_JSON_UNLIKELY(end - p < len)) 186   if(BOOST_JSON_UNLIKELY(end - p < len))
187   break; 187   break;
188   if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) 188   if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
189   break; 189   break;
190   p += len; 190   p += len;
191   } 191   }
192   192  
193   return p; 193   return p;
194   } 194   }
195   195  
196   #endif 196   #endif
197   197  
198   // KRYSTIAN NOTE: does not stop to validate 198   // KRYSTIAN NOTE: does not stop to validate
199   // count_unescaped 199   // count_unescaped
200   200  
201   #ifdef BOOST_JSON_USE_SSE2 201   #ifdef BOOST_JSON_USE_SSE2
202   202  
203   inline 203   inline
204   size_t 204   size_t
HITCBC 205   34441 count_unescaped( 205   34441 count_unescaped(
206   char const* s, 206   char const* s,
207   size_t n) noexcept 207   size_t n) noexcept
208   { 208   {
209   209  
HITCBC 210   34441 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' 210   34441 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
HITCBC 211   34441 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' 211   34441 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
HITCBC 212   34441 __m128i const q3 = _mm_set1_epi8( 0x1F ); 212   34441 __m128i const q3 = _mm_set1_epi8( 0x1F );
213   213  
HITCBC 214   34441 char const * s0 = s; 214   34441 char const * s0 = s;
215   215  
HITCBC 216   4096152 while( n >= 16 ) 216   4096152 while( n >= 16 )
217   { 217   {
HITCBC 218   4061711 __m128i v1 = _mm_loadu_si128( (__m128i const*)s ); 218   4061711 __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
HITCBC 219   4061711 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote 219   4061711 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
HITCBC 220   4061711 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash 220   4061711 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
HITCBC 221   4061711 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash 221   4061711 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
HITCBC 222   4061711 __m128i v5 = _mm_min_epu8( v1, q3 ); 222   4061711 __m128i v5 = _mm_min_epu8( v1, q3 );
HITCBC 223   4061711 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls 223   4061711 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
HITCBC 224   4061711 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control 224   4061711 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
225   225  
HITCBC 226   4061711 int w = _mm_movemask_epi8( v7 ); 226   4061711 int w = _mm_movemask_epi8( v7 );
227   227  
HITCBC 228   4061711 if( w != 0 ) 228   4061711 if( w != 0 )
229   { 229   {
230   int m; 230   int m;
231   #if defined(__GNUC__) || defined(__clang__) 231   #if defined(__GNUC__) || defined(__clang__)
MISUBC 232   m = __builtin_ffs( w ) - 1; 232   m = __builtin_ffs( w ) - 1;
233   #else 233   #else
234   unsigned long index; 234   unsigned long index;
235   _BitScanForward( &index, w ); 235   _BitScanForward( &index, w );
236   m = index; 236   m = index;
237   #endif 237   #endif
238   238  
MISUBC 239   s += m; 239   s += m;
MISUBC 240   break; 240   break;
241   } 241   }
242   242  
HITCBC 243   4061711 s += 16; 243   4061711 s += 16;
HITCBC 244   4061711 n -= 16; 244   4061711 n -= 16;
245   } 245   }
246   246  
HITCBC 247   34441 return s - s0; 247   34441 return s - s0;
248   } 248   }
249   249  
250   #else 250   #else
251   251  
252   inline 252   inline
253   std::size_t 253   std::size_t
254   count_unescaped( 254   count_unescaped(
255   char const*, 255   char const*,
256   std::size_t) noexcept 256   std::size_t) noexcept
257   { 257   {
258   return 0; 258   return 0;
259   } 259   }
260   260  
261   #endif 261   #endif
262   262  
263   // count_digits 263   // count_digits
264   264  
265   #ifdef BOOST_JSON_USE_SSE2 265   #ifdef BOOST_JSON_USE_SSE2
266   266  
267   // assumes p..p+15 are valid 267   // assumes p..p+15 are valid
HITCBC 268   2024516 inline int count_digits( char const* p ) noexcept 268   2024516 inline int count_digits( char const* p ) noexcept
269   { 269   {
HITCBC 270   2024516 __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); 270   2024516 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
HITCBC 271   4049032 v1 = _mm_add_epi8(v1, _mm_set1_epi8(70)); 271   4049032 v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
HITCBC 272   4049032 v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118)); 272   4049032 v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
273   273  
HITCBC 274   2024516 int m = _mm_movemask_epi8(v1); 274   2024516 int m = _mm_movemask_epi8(v1);
275   275  
276   int n; 276   int n;
277   277  
HITCBC 278   2024516 if( m == 0 ) 278   2024516 if( m == 0 )
279   { 279   {
HITCBC 280   2012400 n = 16; 280   2012400 n = 16;
281   } 281   }
282   else 282   else
283   { 283   {
284   #if defined(__GNUC__) || defined(__clang__) 284   #if defined(__GNUC__) || defined(__clang__)
HITCBC 285   12116 n = __builtin_ffs( m ) - 1; 285   12116 n = __builtin_ffs( m ) - 1;
286   #else 286   #else
287   unsigned long index; 287   unsigned long index;
288   _BitScanForward( &index, m ); 288   _BitScanForward( &index, m );
289   n = static_cast<int>(index); 289   n = static_cast<int>(index);
290   #endif 290   #endif
291   } 291   }
292   292  
HITCBC 293   2024516 return n; 293   2024516 return n;
294   } 294   }
295   295  
296   #else 296   #else
297   297  
298   // assumes p..p+15 are valid 298   // assumes p..p+15 are valid
299   inline int count_digits( char const* p ) noexcept 299   inline int count_digits( char const* p ) noexcept
300   { 300   {
301   int n = 0; 301   int n = 0;
302   302  
303   for( ; n < 16; ++n ) 303   for( ; n < 16; ++n )
304   { 304   {
305   unsigned char const d = *p++ - '0'; 305   unsigned char const d = *p++ - '0';
306   if(d > 9) break; 306   if(d > 9) break;
307   } 307   }
308   308  
309   return n; 309   return n;
310   } 310   }
311   311  
312   #endif 312   #endif
313   313  
314   // parse_unsigned 314   // parse_unsigned
315   315  
HITCBC 316   2019313 inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept 316   2019313 inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
317   { 317   {
HITCBC 318   10064473 while( n >= 4 ) 318   10064473 while( n >= 4 )
319   { 319   {
320   // faster on on clang for x86, 320   // faster on on clang for x86,
321   // slower on gcc 321   // slower on gcc
322   #ifdef __clang__ 322   #ifdef __clang__
323   r = r * 10 + p[0] - '0'; 323   r = r * 10 + p[0] - '0';
324   r = r * 10 + p[1] - '0'; 324   r = r * 10 + p[1] - '0';
325   r = r * 10 + p[2] - '0'; 325   r = r * 10 + p[2] - '0';
326   r = r * 10 + p[3] - '0'; 326   r = r * 10 + p[3] - '0';
327   #else 327   #else
328   uint32_t v; 328   uint32_t v;
HITCBC 329   8045160 std::memcpy( &v, p, 4 ); 329   8045160 std::memcpy( &v, p, 4 );
HITCBC 330   8045160 endian::native_to_little_inplace(v); 330   8045160 endian::native_to_little_inplace(v);
331   331  
HITCBC 332   8045160 v -= 0x30303030; 332   8045160 v -= 0x30303030;
333   333  
HITCBC 334   8045160 unsigned w0 = v & 0xFF; 334   8045160 unsigned w0 = v & 0xFF;
HITCBC 335   8045160 unsigned w1 = (v >> 8) & 0xFF; 335   8045160 unsigned w1 = (v >> 8) & 0xFF;
HITCBC 336   8045160 unsigned w2 = (v >> 16) & 0xFF; 336   8045160 unsigned w2 = (v >> 16) & 0xFF;
HITCBC 337   8045160 unsigned w3 = (v >> 24); 337   8045160 unsigned w3 = (v >> 24);
338   338  
HITCBC 339   8045160 r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; 339   8045160 r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
340   #endif 340   #endif
HITCBC 341   8045160 p += 4; 341   8045160 p += 4;
HITCBC 342   8045160 n -= 4; 342   8045160 n -= 4;
343   } 343   }
344   344  
HITCBC 345   2019313 switch( n ) 345   2019313 switch( n )
346   { 346   {
HITCBC 347   2010658 case 0: 347   2010658 case 0:
HITCBC 348   2010658 break; 348   2010658 break;
HITCBC 349   5484 case 1: 349   5484 case 1:
HITCBC 350   5484 r = r * 10 + p[0] - '0'; 350   5484 r = r * 10 + p[0] - '0';
HITCBC 351   5484 break; 351   5484 break;
HITCBC 352   1714 case 2: 352   1714 case 2:
HITCBC 353   1714 r = r * 10 + p[0] - '0'; 353   1714 r = r * 10 + p[0] - '0';
HITCBC 354   1714 r = r * 10 + p[1] - '0'; 354   1714 r = r * 10 + p[1] - '0';
HITCBC 355   1714 break; 355   1714 break;
HITCBC 356   1457 case 3: 356   1457 case 3:
HITCBC 357   1457 r = r * 10 + p[0] - '0'; 357   1457 r = r * 10 + p[0] - '0';
HITCBC 358   1457 r = r * 10 + p[1] - '0'; 358   1457 r = r * 10 + p[1] - '0';
HITCBC 359   1457 r = r * 10 + p[2] - '0'; 359   1457 r = r * 10 + p[2] - '0';
HITCBC 360   1457 break; 360   1457 break;
361   } 361   }
HITCBC 362   2019313 return r; 362   2019313 return r;
363   } 363   }
364   364  
365   // KRYSTIAN: this function is unused 365   // KRYSTIAN: this function is unused
366   // count_leading 366   // count_leading
367   367  
368   /* 368   /*
369   #ifdef BOOST_JSON_USE_SSE2 369   #ifdef BOOST_JSON_USE_SSE2
370   370  
371   // assumes p..p+15 371   // assumes p..p+15
372   inline std::size_t count_leading( char const * p, char ch ) noexcept 372   inline std::size_t count_leading( char const * p, char ch ) noexcept
373   { 373   {
374   __m128i const q1 = _mm_set1_epi8( ch ); 374   __m128i const q1 = _mm_set1_epi8( ch );
375   375  
376   __m128i v = _mm_loadu_si128( (__m128i const*)p ); 376   __m128i v = _mm_loadu_si128( (__m128i const*)p );
377   377  
378   __m128i w = _mm_cmpeq_epi8( v, q1 ); 378   __m128i w = _mm_cmpeq_epi8( v, q1 );
379   379  
380   int m = _mm_movemask_epi8( w ) ^ 0xFFFF; 380   int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
381   381  
382   std::size_t n; 382   std::size_t n;
383   383  
384   if( m == 0 ) 384   if( m == 0 )
385   { 385   {
386   n = 16; 386   n = 16;
387   } 387   }
388   else 388   else
389   { 389   {
390   #if defined(__GNUC__) || defined(__clang__) 390   #if defined(__GNUC__) || defined(__clang__)
391   n = __builtin_ffs( m ) - 1; 391   n = __builtin_ffs( m ) - 1;
392   #else 392   #else
393   unsigned long index; 393   unsigned long index;
394   _BitScanForward( &index, m ); 394   _BitScanForward( &index, m );
395   n = index; 395   n = index;
396   #endif 396   #endif
397   } 397   }
398   398  
399   return n; 399   return n;
400   } 400   }
401   401  
402   #else 402   #else
403   403  
404   // assumes p..p+15 404   // assumes p..p+15
405   inline std::size_t count_leading( char const * p, char ch ) noexcept 405   inline std::size_t count_leading( char const * p, char ch ) noexcept
406   { 406   {
407   std::size_t n = 0; 407   std::size_t n = 0;
408   408  
409   for( ; n < 16 && *p == ch; ++p, ++n ); 409   for( ; n < 16 && *p == ch; ++p, ++n );
410   410  
411   return n; 411   return n;
412   } 412   }
413   413  
414   #endif 414   #endif
415   */ 415   */
416   416  
417   // count_whitespace 417   // count_whitespace
418   418  
419   #ifdef BOOST_JSON_USE_SSE2 419   #ifdef BOOST_JSON_USE_SSE2
420   420  
HITCBC 421   4701007 inline const char* count_whitespace( char const* p, const char* end ) noexcept 421   4701007 inline const char* count_whitespace( char const* p, const char* end ) noexcept
422   { 422   {
HITCBC 423   4701007 if( p == end ) 423   4701007 if( p == end )
424   { 424   {
HITCBC 425   2133783 return p; 425   2133783 return p;
426   } 426   }
427   427  
HITCBC 428   2567224 if( static_cast<unsigned char>( *p ) > 0x20 ) 428   2567224 if( static_cast<unsigned char>( *p ) > 0x20 )
429   { 429   {
HITCBC 430   2484431 return p; 430   2484431 return p;
431   } 431   }
432   432  
HITCBC 433   82793 __m128i const q1 = _mm_set1_epi8( ' ' ); 433   82793 __m128i const q1 = _mm_set1_epi8( ' ' );
HITCBC 434   82793 __m128i const q2 = _mm_set1_epi8( '\n' ); 434   82793 __m128i const q2 = _mm_set1_epi8( '\n' );
HITCBC 435   82793 __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r' 435   82793 __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
HITCBC 436   82793 __m128i const q4 = _mm_set1_epi8( '\r' ); 436   82793 __m128i const q4 = _mm_set1_epi8( '\r' );
437   437  
HITCBC 438   183300 while( end - p >= 16 ) 438   183300 while( end - p >= 16 )
439   { 439   {
HITCBC 440   105374 __m128i v0 = _mm_loadu_si128( (__m128i const*)p ); 440   105374 __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
441   441  
HITCBC 442   316122 __m128i w0 = _mm_or_si128( 442   316122 __m128i w0 = _mm_or_si128(
443   _mm_cmpeq_epi8( v0, q1 ), 443   _mm_cmpeq_epi8( v0, q1 ),
444   _mm_cmpeq_epi8( v0, q2 )); 444   _mm_cmpeq_epi8( v0, q2 ));
HITCBC 445   105374 __m128i v1 = _mm_or_si128( v0, q3 ); 445   105374 __m128i v1 = _mm_or_si128( v0, q3 );
HITCBC 446   105374 __m128i w1 = _mm_cmpeq_epi8( v1, q4 ); 446   105374 __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
HITCBC 447   105374 __m128i w2 = _mm_or_si128( w0, w1 ); 447   105374 __m128i w2 = _mm_or_si128( w0, w1 );
448   448  
HITCBC 449   105374 int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF; 449   105374 int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
450   450  
HITCBC 451   105374 if( m != 0 ) 451   105374 if( m != 0 )
452   { 452   {
453   #if defined(__GNUC__) || defined(__clang__) 453   #if defined(__GNUC__) || defined(__clang__)
HITCBC 454   4867 std::size_t c = __builtin_ffs( m ) - 1; 454   4867 std::size_t c = __builtin_ffs( m ) - 1;
455   #else 455   #else
456   unsigned long index; 456   unsigned long index;
457   _BitScanForward( &index, m ); 457   _BitScanForward( &index, m );
458   std::size_t c = index; 458   std::size_t c = index;
459   #endif 459   #endif
460   460  
HITCBC 461   4867 p += c; 461   4867 p += c;
HITCBC 462   4867 return p; 462   4867 return p;
463   } 463   }
464   464  
HITCBC 465   100507 p += 16; 465   100507 p += 16;
466   } 466   }
467   467  
HITCBC 468   462520 while( p != end ) 468   462520 while( p != end )
469   { 469   {
HITCBC 470   401661 if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' ) 470   401661 if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
471   { 471   {
HITCBC 472   17067 return p; 472   17067 return p;
473   } 473   }
474   474  
HITCBC 475   384594 ++p; 475   384594 ++p;
476   } 476   }
477   477  
HITCBC 478   60859 return p; 478   60859 return p;
479   } 479   }
480   480  
481   /* 481   /*
482   482  
483   // slightly faster on msvc-14.2, slightly slower on clang-win 483   // slightly faster on msvc-14.2, slightly slower on clang-win
484   484  
485   inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept 485   inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
486   { 486   {
487   char const * p0 = p; 487   char const * p0 = p;
488   488  
489   while( n > 0 ) 489   while( n > 0 )
490   { 490   {
491   char ch = *p; 491   char ch = *p;
492   492  
493   if( ch == '\n' || ch == '\r' ) 493   if( ch == '\n' || ch == '\r' )
494   { 494   {
495   ++p; 495   ++p;
496   --n; 496   --n;
497   continue; 497   continue;
498   } 498   }
499   499  
500   if( ch != ' ' && ch != '\t' ) 500   if( ch != ' ' && ch != '\t' )
501   { 501   {
502   break; 502   break;
503   } 503   }
504   504  
505   ++p; 505   ++p;
506   --n; 506   --n;
507   507  
508   while( n >= 16 ) 508   while( n >= 16 )
509   { 509   {
510   std::size_t n2 = count_leading( p, ch ); 510   std::size_t n2 = count_leading( p, ch );
511   511  
512   p += n2; 512   p += n2;
513   n -= n2; 513   n -= n2;
514   514  
515   if( n2 < 16 ) 515   if( n2 < 16 )
516   { 516   {
517   break; 517   break;
518   } 518   }
519   } 519   }
520   } 520   }
521   521  
522   return p - p0; 522   return p - p0;
523   } 523   }
524   */ 524   */
525   525  
526   #else 526   #else
527   527  
528   inline const char* count_whitespace( char const* p, const char* end ) noexcept 528   inline const char* count_whitespace( char const* p, const char* end ) noexcept
529   { 529   {
530   530  
531   for(; p != end; ++p) 531   for(; p != end; ++p)
532   { 532   {
533   char const c = *p; 533   char const c = *p;
534   if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break; 534   if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
535   } 535   }
536   536  
537   return p; 537   return p;
538   } 538   }
539   539  
540   #endif 540   #endif
541   541  
542   } // detail 542   } // detail
543   } // namespace json 543   } // namespace json
544   } // namespace boost 544   } // namespace boost
545   545  
546   #endif 546   #endif