97.81% Lines (134/137)
100.00% Functions (6/6)
| TLA | Baseline | Branch | ||||||
|---|---|---|---|---|---|---|---|---|
| Line | Hits | Code | Line | Hits | Code | |||
| 1 | // | 1 | // | |||||
| 2 | // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), | 2 | // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), | |||||
| 3 | // Vinnie Falco (vinnie.falco@gmail.com) | 3 | // Vinnie Falco (vinnie.falco@gmail.com) | |||||
| 4 | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) | 4 | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) | |||||
| 5 | // | 5 | // | |||||
| 6 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | 6 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |||||
| 7 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | 7 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |||||
| 8 | // | 8 | // | |||||
| 9 | // Official repository: https://github.com/boostorg/json | 9 | // Official repository: https://github.com/boostorg/json | |||||
| 10 | // | 10 | // | |||||
| 11 | 11 | |||||||
| 12 | #ifndef BOOST_JSON_DETAIL_SSE2_HPP | 12 | #ifndef BOOST_JSON_DETAIL_SSE2_HPP | |||||
| 13 | #define BOOST_JSON_DETAIL_SSE2_HPP | 13 | #define BOOST_JSON_DETAIL_SSE2_HPP | |||||
| 14 | 14 | |||||||
| 15 | #include <boost/json/detail/config.hpp> | 15 | #include <boost/json/detail/config.hpp> | |||||
| 16 | #include <boost/json/detail/utf8.hpp> | 16 | #include <boost/json/detail/utf8.hpp> | |||||
| 17 | #include <cstddef> | 17 | #include <cstddef> | |||||
| 18 | #include <cstring> | 18 | #include <cstring> | |||||
| 19 | #ifdef BOOST_JSON_USE_SSE2 | 19 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 20 | # include <emmintrin.h> | 20 | # include <emmintrin.h> | |||||
| 21 | # include <xmmintrin.h> | 21 | # include <xmmintrin.h> | |||||
| 22 | # ifdef _MSC_VER | 22 | # ifdef _MSC_VER | |||||
| 23 | # include <intrin.h> | 23 | # include <intrin.h> | |||||
| 24 | # endif | 24 | # endif | |||||
| 25 | #endif | 25 | #endif | |||||
| 26 | 26 | |||||||
| 27 | namespace boost { | 27 | namespace boost { | |||||
| 28 | namespace json { | 28 | namespace json { | |||||
| 29 | namespace detail { | 29 | namespace detail { | |||||
| 30 | 30 | |||||||
| 31 | #ifdef BOOST_JSON_USE_SSE2 | 31 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 32 | 32 | |||||||
| 33 | template<bool AllowBadUTF8> | 33 | template<bool AllowBadUTF8> | |||||
| 34 | inline | 34 | inline | |||||
| 35 | const char* | 35 | const char* | |||||
| HITCBC | 36 | 2177 | count_valid( | 36 | 2177 | count_valid( | ||
| 37 | char const* p, | 37 | char const* p, | |||||
| 38 | const char* end) noexcept | 38 | const char* end) noexcept | |||||
| 39 | { | 39 | { | |||||
| HITCBC | 40 | 2177 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | 40 | 2177 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | ||
| HITCBC | 41 | 2177 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | 41 | 2177 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | ||
| HITCBC | 42 | 2177 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | 42 | 2177 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | ||
| 43 | 43 | |||||||
| HITCBC | 44 | 2415 | while(end - p >= 16) | 44 | 2415 | while(end - p >= 16) | ||
| 45 | { | 45 | { | |||||
| HITCBC | 46 | 924 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | 46 | 924 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | ||
| HITCBC | 47 | 924 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | 47 | 924 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | ||
| HITCBC | 48 | 924 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | 48 | 924 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | ||
| HITCBC | 49 | 924 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | 49 | 924 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | ||
| HITCBC | 50 | 924 | __m128i v5 = _mm_min_epu8( v1, q3 ); | 50 | 924 | __m128i v5 = _mm_min_epu8( v1, q3 ); | ||
| HITCBC | 51 | 924 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | 51 | 924 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | ||
| HITCBC | 52 | 924 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | 52 | 924 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | ||
| 53 | 53 | |||||||
| HITCBC | 54 | 924 | int w = _mm_movemask_epi8( v7 ); | 54 | 924 | int w = _mm_movemask_epi8( v7 ); | ||
| 55 | 55 | |||||||
| HITCBC | 56 | 924 | if( w != 0 ) | 56 | 924 | if( w != 0 ) | ||
| 57 | { | 57 | { | |||||
| 58 | int m; | 58 | int m; | |||||
| 59 | #if defined(__GNUC__) || defined(__clang__) | 59 | #if defined(__GNUC__) || defined(__clang__) | |||||
| HITCBC | 60 | 686 | m = __builtin_ffs( w ) - 1; | 60 | 686 | m = __builtin_ffs( w ) - 1; | ||
| 61 | #else | 61 | #else | |||||
| 62 | unsigned long index; | 62 | unsigned long index; | |||||
| 63 | _BitScanForward( &index, w ); | 63 | _BitScanForward( &index, w ); | |||||
| 64 | m = index; | 64 | m = index; | |||||
| 65 | #endif | 65 | #endif | |||||
| HITCBC | 66 | 686 | return p + m; | 66 | 686 | return p + m; | ||
| 67 | } | 67 | } | |||||
| 68 | 68 | |||||||
| HITCBC | 69 | 238 | p += 16; | 69 | 238 | p += 16; | ||
| 70 | } | 70 | } | |||||
| 71 | 71 | |||||||
| HITCBC | 72 | 3738 | while(p != end) | 72 | 3738 | while(p != end) | ||
| 73 | { | 73 | { | |||||
| HITCBC | 74 | 3689 | const unsigned char c = *p; | 74 | 3689 | const unsigned char c = *p; | ||
| HITCBC | 75 | 3689 | if(c == '\x22' || c == '\\' || c < 0x20) | 75 | 3689 | if(c == '\x22' || c == '\\' || c < 0x20) | ||
| 76 | break; | 76 | break; | |||||
| HITCBC | 77 | 2247 | ++p; | 77 | 2247 | ++p; | ||
| 78 | } | 78 | } | |||||
| 79 | 79 | |||||||
| HITCBC | 80 | 1491 | return p; | 80 | 1491 | return p; | ||
| 81 | } | 81 | } | |||||
| 82 | 82 | |||||||
| 83 | template<> | 83 | template<> | |||||
| 84 | inline | 84 | inline | |||||
| 85 | const char* | 85 | const char* | |||||
| HITCBC | 86 | 162602 | count_valid<false>( | 86 | 162602 | count_valid<false>( | ||
| 87 | char const* p, | 87 | char const* p, | |||||
| 88 | const char* end) noexcept | 88 | const char* end) noexcept | |||||
| 89 | { | 89 | { | |||||
| HITCBC | 90 | 162602 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | 90 | 162602 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | ||
| HITCBC | 91 | 162602 | __m128i const q2 = _mm_set1_epi8( '\\' ); | 91 | 162602 | __m128i const q2 = _mm_set1_epi8( '\\' ); | ||
| HITCBC | 92 | 162602 | __m128i const q3 = _mm_set1_epi8( 0x20 ); | 92 | 162602 | __m128i const q3 = _mm_set1_epi8( 0x20 ); | ||
| 93 | 93 | |||||||
| HITCBC | 94 | 12202131 | while(end - p >= 16) | 94 | 12202131 | while(end - p >= 16) | ||
| 95 | { | 95 | { | |||||
| HITCBC | 96 | 12090929 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | 96 | 12090929 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | ||
| 97 | 97 | |||||||
| HITCBC | 98 | 12090929 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); | 98 | 12090929 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); | ||
| HITCBC | 99 | 12090929 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); | 99 | 12090929 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); | ||
| HITCBC | 100 | 12090929 | __m128i v4 = _mm_cmplt_epi8( v1, q3 ); | 100 | 12090929 | __m128i v4 = _mm_cmplt_epi8( v1, q3 ); | ||
| 101 | 101 | |||||||
| HITCBC | 102 | 12090929 | __m128i v5 = _mm_or_si128( v2, v3 ); | 102 | 12090929 | __m128i v5 = _mm_or_si128( v2, v3 ); | ||
| HITCBC | 103 | 12090929 | __m128i v6 = _mm_or_si128( v5, v4 ); | 103 | 12090929 | __m128i v6 = _mm_or_si128( v5, v4 ); | ||
| 104 | 104 | |||||||
| HITCBC | 105 | 12090929 | int w = _mm_movemask_epi8( v6 ); | 105 | 12090929 | int w = _mm_movemask_epi8( v6 ); | ||
| 106 | 106 | |||||||
| HITCBC | 107 | 12090929 | if( w != 0 ) | 107 | 12090929 | if( w != 0 ) | ||
| 108 | { | 108 | { | |||||
| 109 | int m; | 109 | int m; | |||||
| 110 | #if defined(__GNUC__) || defined(__clang__) | 110 | #if defined(__GNUC__) || defined(__clang__) | |||||
| HITCBC | 111 | 51400 | m = __builtin_ffs( w ) - 1; | 111 | 51400 | m = __builtin_ffs( w ) - 1; | ||
| 112 | #else | 112 | #else | |||||
| 113 | unsigned long index; | 113 | unsigned long index; | |||||
| 114 | _BitScanForward( &index, w ); | 114 | _BitScanForward( &index, w ); | |||||
| 115 | m = index; | 115 | m = index; | |||||
| 116 | #endif | 116 | #endif | |||||
| HITCBC | 117 | 51400 | p += m; | 117 | 51400 | p += m; | ||
| HITCBC | 118 | 51400 | break; | 118 | 51400 | break; | ||
| 119 | } | 119 | } | |||||
| 120 | 120 | |||||||
| HITCBC | 121 | 12039529 | p += 16; | 121 | 12039529 | p += 16; | ||
| 122 | } | 122 | } | |||||
| 123 | 123 | |||||||
| HITCBC | 124 | 479946 | while(p != end) | 124 | 479946 | while(p != end) | ||
| 125 | { | 125 | { | |||||
| HITCBC | 126 | 449775 | const unsigned char c = *p; | 126 | 449775 | const unsigned char c = *p; | ||
| HITCBC | 127 | 449775 | if(c == '\x22' || c == '\\' || c < 0x20) | 127 | 449775 | if(c == '\x22' || c == '\\' || c < 0x20) | ||
| 128 | break; | 128 | break; | |||||
| HITCBC | 129 | 320806 | if(c < 0x80) | 129 | 320806 | if(c < 0x80) | ||
| 130 | { | 130 | { | |||||
| HITCBC | 131 | 307616 | ++p; | 131 | 307616 | ++p; | ||
| HITCBC | 132 | 307616 | continue; | 132 | 307616 | continue; | ||
| 133 | } | 133 | } | |||||
| 134 | // validate utf-8 | 134 | // validate utf-8 | |||||
| HITCBC | 135 | 13190 | uint16_t first = classify_utf8(c); | 135 | 13190 | uint16_t first = classify_utf8(c); | ||
| HITCBC | 136 | 13190 | uint8_t len = first & 0xFF; | 136 | 13190 | uint8_t len = first & 0xFF; | ||
| HITCBC | 137 | 13190 | if(BOOST_JSON_UNLIKELY(end - p < len)) | 137 | 13190 | if(BOOST_JSON_UNLIKELY(end - p < len)) | ||
| HITCBC | 138 | 1905 | break; | 138 | 1905 | break; | ||
| HITCBC | 139 | 11285 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | 139 | 11285 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | ||
| HITCBC | 140 | 1557 | break; | 140 | 1557 | break; | ||
| HITCBC | 141 | 9728 | p += len; | 141 | 9728 | p += len; | ||
| 142 | } | 142 | } | |||||
| 143 | 143 | |||||||
| HITCBC | 144 | 162602 | return p; | 144 | 162602 | return p; | ||
| 145 | } | 145 | } | |||||
| 146 | 146 | |||||||
| 147 | #else | 147 | #else | |||||
| 148 | 148 | |||||||
| 149 | template<bool AllowBadUTF8> | 149 | template<bool AllowBadUTF8> | |||||
| 150 | char const* | 150 | char const* | |||||
| 151 | count_valid( | 151 | count_valid( | |||||
| 152 | char const* p, | 152 | char const* p, | |||||
| 153 | char const* end) noexcept | 153 | char const* end) noexcept | |||||
| 154 | { | 154 | { | |||||
| 155 | while(p != end) | 155 | while(p != end) | |||||
| 156 | { | 156 | { | |||||
| 157 | const unsigned char c = *p; | 157 | const unsigned char c = *p; | |||||
| 158 | if(c == '\x22' || c == '\\' || c < 0x20) | 158 | if(c == '\x22' || c == '\\' || c < 0x20) | |||||
| 159 | break; | 159 | break; | |||||
| 160 | ++p; | 160 | ++p; | |||||
| 161 | } | 161 | } | |||||
| 162 | 162 | |||||||
| 163 | return p; | 163 | return p; | |||||
| 164 | } | 164 | } | |||||
| 165 | 165 | |||||||
| 166 | template<> | 166 | template<> | |||||
| 167 | inline | 167 | inline | |||||
| 168 | char const* | 168 | char const* | |||||
| 169 | count_valid<false>( | 169 | count_valid<false>( | |||||
| 170 | char const* p, | 170 | char const* p, | |||||
| 171 | char const* end) noexcept | 171 | char const* end) noexcept | |||||
| 172 | { | 172 | { | |||||
| 173 | while(p != end) | 173 | while(p != end) | |||||
| 174 | { | 174 | { | |||||
| 175 | const unsigned char c = *p; | 175 | const unsigned char c = *p; | |||||
| 176 | if(c == '\x22' || c == '\\' || c < 0x20) | 176 | if(c == '\x22' || c == '\\' || c < 0x20) | |||||
| 177 | break; | 177 | break; | |||||
| 178 | if(c < 0x80) | 178 | if(c < 0x80) | |||||
| 179 | { | 179 | { | |||||
| 180 | ++p; | 180 | ++p; | |||||
| 181 | continue; | 181 | continue; | |||||
| 182 | } | 182 | } | |||||
| 183 | // validate utf-8 | 183 | // validate utf-8 | |||||
| 184 | uint16_t first = classify_utf8(c); | 184 | uint16_t first = classify_utf8(c); | |||||
| 185 | uint8_t len = first & 0xFF; | 185 | uint8_t len = first & 0xFF; | |||||
| 186 | if(BOOST_JSON_UNLIKELY(end - p < len)) | 186 | if(BOOST_JSON_UNLIKELY(end - p < len)) | |||||
| 187 | break; | 187 | break; | |||||
| 188 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | 188 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | |||||
| 189 | break; | 189 | break; | |||||
| 190 | p += len; | 190 | p += len; | |||||
| 191 | } | 191 | } | |||||
| 192 | 192 | |||||||
| 193 | return p; | 193 | return p; | |||||
| 194 | } | 194 | } | |||||
| 195 | 195 | |||||||
| 196 | #endif | 196 | #endif | |||||
| 197 | 197 | |||||||
| 198 | // KRYSTIAN NOTE: does not stop to validate | 198 | // KRYSTIAN NOTE: does not stop to validate | |||||
| 199 | // count_unescaped | 199 | // count_unescaped | |||||
| 200 | 200 | |||||||
| 201 | #ifdef BOOST_JSON_USE_SSE2 | 201 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 202 | 202 | |||||||
| 203 | inline | 203 | inline | |||||
| 204 | size_t | 204 | size_t | |||||
| HITCBC | 205 | 34441 | count_unescaped( | 205 | 34441 | count_unescaped( | ||
| 206 | char const* s, | 206 | char const* s, | |||||
| 207 | size_t n) noexcept | 207 | size_t n) noexcept | |||||
| 208 | { | 208 | { | |||||
| 209 | 209 | |||||||
| HITCBC | 210 | 34441 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | 210 | 34441 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | ||
| HITCBC | 211 | 34441 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | 211 | 34441 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | ||
| HITCBC | 212 | 34441 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | 212 | 34441 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | ||
| 213 | 213 | |||||||
| HITCBC | 214 | 34441 | char const * s0 = s; | 214 | 34441 | char const * s0 = s; | ||
| 215 | 215 | |||||||
| HITCBC | 216 | 4096152 | while( n >= 16 ) | 216 | 4096152 | while( n >= 16 ) | ||
| 217 | { | 217 | { | |||||
| HITCBC | 218 | 4061711 | __m128i v1 = _mm_loadu_si128( (__m128i const*)s ); | 218 | 4061711 | __m128i v1 = _mm_loadu_si128( (__m128i const*)s ); | ||
| HITCBC | 219 | 4061711 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | 219 | 4061711 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | ||
| HITCBC | 220 | 4061711 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | 220 | 4061711 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | ||
| HITCBC | 221 | 4061711 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | 221 | 4061711 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | ||
| HITCBC | 222 | 4061711 | __m128i v5 = _mm_min_epu8( v1, q3 ); | 222 | 4061711 | __m128i v5 = _mm_min_epu8( v1, q3 ); | ||
| HITCBC | 223 | 4061711 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | 223 | 4061711 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | ||
| HITCBC | 224 | 4061711 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | 224 | 4061711 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | ||
| 225 | 225 | |||||||
| HITCBC | 226 | 4061711 | int w = _mm_movemask_epi8( v7 ); | 226 | 4061711 | int w = _mm_movemask_epi8( v7 ); | ||
| 227 | 227 | |||||||
| HITCBC | 228 | 4061711 | if( w != 0 ) | 228 | 4061711 | if( w != 0 ) | ||
| 229 | { | 229 | { | |||||
| 230 | int m; | 230 | int m; | |||||
| 231 | #if defined(__GNUC__) || defined(__clang__) | 231 | #if defined(__GNUC__) || defined(__clang__) | |||||
| MISUBC | 232 | ✗ | m = __builtin_ffs( w ) - 1; | 232 | ✗ | m = __builtin_ffs( w ) - 1; | ||
| 233 | #else | 233 | #else | |||||
| 234 | unsigned long index; | 234 | unsigned long index; | |||||
| 235 | _BitScanForward( &index, w ); | 235 | _BitScanForward( &index, w ); | |||||
| 236 | m = index; | 236 | m = index; | |||||
| 237 | #endif | 237 | #endif | |||||
| 238 | 238 | |||||||
| MISUBC | 239 | ✗ | s += m; | 239 | ✗ | s += m; | ||
| MISUBC | 240 | ✗ | break; | 240 | ✗ | break; | ||
| 241 | } | 241 | } | |||||
| 242 | 242 | |||||||
| HITCBC | 243 | 4061711 | s += 16; | 243 | 4061711 | s += 16; | ||
| HITCBC | 244 | 4061711 | n -= 16; | 244 | 4061711 | n -= 16; | ||
| 245 | } | 245 | } | |||||
| 246 | 246 | |||||||
| HITCBC | 247 | 34441 | return s - s0; | 247 | 34441 | return s - s0; | ||
| 248 | } | 248 | } | |||||
| 249 | 249 | |||||||
| 250 | #else | 250 | #else | |||||
| 251 | 251 | |||||||
| 252 | inline | 252 | inline | |||||
| 253 | std::size_t | 253 | std::size_t | |||||
| 254 | count_unescaped( | 254 | count_unescaped( | |||||
| 255 | char const*, | 255 | char const*, | |||||
| 256 | std::size_t) noexcept | 256 | std::size_t) noexcept | |||||
| 257 | { | 257 | { | |||||
| 258 | return 0; | 258 | return 0; | |||||
| 259 | } | 259 | } | |||||
| 260 | 260 | |||||||
| 261 | #endif | 261 | #endif | |||||
| 262 | 262 | |||||||
| 263 | // count_digits | 263 | // count_digits | |||||
| 264 | 264 | |||||||
| 265 | #ifdef BOOST_JSON_USE_SSE2 | 265 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 266 | 266 | |||||||
| 267 | // assumes p..p+15 are valid | 267 | // assumes p..p+15 are valid | |||||
| HITCBC | 268 | 2024516 | inline int count_digits( char const* p ) noexcept | 268 | 2024516 | inline int count_digits( char const* p ) noexcept | ||
| 269 | { | 269 | { | |||||
| HITCBC | 270 | 2024516 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | 270 | 2024516 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | ||
| HITCBC | 271 | 4049032 | v1 = _mm_add_epi8(v1, _mm_set1_epi8(70)); | 271 | 4049032 | v1 = _mm_add_epi8(v1, _mm_set1_epi8(70)); | ||
| HITCBC | 272 | 4049032 | v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118)); | 272 | 4049032 | v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118)); | ||
| 273 | 273 | |||||||
| HITCBC | 274 | 2024516 | int m = _mm_movemask_epi8(v1); | 274 | 2024516 | int m = _mm_movemask_epi8(v1); | ||
| 275 | 275 | |||||||
| 276 | int n; | 276 | int n; | |||||
| 277 | 277 | |||||||
| HITCBC | 278 | 2024516 | if( m == 0 ) | 278 | 2024516 | if( m == 0 ) | ||
| 279 | { | 279 | { | |||||
| HITCBC | 280 | 2012400 | n = 16; | 280 | 2012400 | n = 16; | ||
| 281 | } | 281 | } | |||||
| 282 | else | 282 | else | |||||
| 283 | { | 283 | { | |||||
| 284 | #if defined(__GNUC__) || defined(__clang__) | 284 | #if defined(__GNUC__) || defined(__clang__) | |||||
| HITCBC | 285 | 12116 | n = __builtin_ffs( m ) - 1; | 285 | 12116 | n = __builtin_ffs( m ) - 1; | ||
| 286 | #else | 286 | #else | |||||
| 287 | unsigned long index; | 287 | unsigned long index; | |||||
| 288 | _BitScanForward( &index, m ); | 288 | _BitScanForward( &index, m ); | |||||
| 289 | n = static_cast<int>(index); | 289 | n = static_cast<int>(index); | |||||
| 290 | #endif | 290 | #endif | |||||
| 291 | } | 291 | } | |||||
| 292 | 292 | |||||||
| HITCBC | 293 | 2024516 | return n; | 293 | 2024516 | return n; | ||
| 294 | } | 294 | } | |||||
| 295 | 295 | |||||||
| 296 | #else | 296 | #else | |||||
| 297 | 297 | |||||||
| 298 | // assumes p..p+15 are valid | 298 | // assumes p..p+15 are valid | |||||
| 299 | inline int count_digits( char const* p ) noexcept | 299 | inline int count_digits( char const* p ) noexcept | |||||
| 300 | { | 300 | { | |||||
| 301 | int n = 0; | 301 | int n = 0; | |||||
| 302 | 302 | |||||||
| 303 | for( ; n < 16; ++n ) | 303 | for( ; n < 16; ++n ) | |||||
| 304 | { | 304 | { | |||||
| 305 | unsigned char const d = *p++ - '0'; | 305 | unsigned char const d = *p++ - '0'; | |||||
| 306 | if(d > 9) break; | 306 | if(d > 9) break; | |||||
| 307 | } | 307 | } | |||||
| 308 | 308 | |||||||
| 309 | return n; | 309 | return n; | |||||
| 310 | } | 310 | } | |||||
| 311 | 311 | |||||||
| 312 | #endif | 312 | #endif | |||||
| 313 | 313 | |||||||
| 314 | // parse_unsigned | 314 | // parse_unsigned | |||||
| 315 | 315 | |||||||
| HITCBC | 316 | 2019313 | inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept | 316 | 2019313 | inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept | ||
| 317 | { | 317 | { | |||||
| HITCBC | 318 | 10064473 | while( n >= 4 ) | 318 | 10064473 | while( n >= 4 ) | ||
| 319 | { | 319 | { | |||||
| 320 | // faster on on clang for x86, | 320 | // faster on on clang for x86, | |||||
| 321 | // slower on gcc | 321 | // slower on gcc | |||||
| 322 | #ifdef __clang__ | 322 | #ifdef __clang__ | |||||
| 323 | r = r * 10 + p[0] - '0'; | 323 | r = r * 10 + p[0] - '0'; | |||||
| 324 | r = r * 10 + p[1] - '0'; | 324 | r = r * 10 + p[1] - '0'; | |||||
| 325 | r = r * 10 + p[2] - '0'; | 325 | r = r * 10 + p[2] - '0'; | |||||
| 326 | r = r * 10 + p[3] - '0'; | 326 | r = r * 10 + p[3] - '0'; | |||||
| 327 | #else | 327 | #else | |||||
| 328 | uint32_t v; | 328 | uint32_t v; | |||||
| HITCBC | 329 | 8045160 | std::memcpy( &v, p, 4 ); | 329 | 8045160 | std::memcpy( &v, p, 4 ); | ||
| HITCBC | 330 | 8045160 | endian::native_to_little_inplace(v); | 330 | 8045160 | endian::native_to_little_inplace(v); | ||
| 331 | 331 | |||||||
| HITCBC | 332 | 8045160 | v -= 0x30303030; | 332 | 8045160 | v -= 0x30303030; | ||
| 333 | 333 | |||||||
| HITCBC | 334 | 8045160 | unsigned w0 = v & 0xFF; | 334 | 8045160 | unsigned w0 = v & 0xFF; | ||
| HITCBC | 335 | 8045160 | unsigned w1 = (v >> 8) & 0xFF; | 335 | 8045160 | unsigned w1 = (v >> 8) & 0xFF; | ||
| HITCBC | 336 | 8045160 | unsigned w2 = (v >> 16) & 0xFF; | 336 | 8045160 | unsigned w2 = (v >> 16) & 0xFF; | ||
| HITCBC | 337 | 8045160 | unsigned w3 = (v >> 24); | 337 | 8045160 | unsigned w3 = (v >> 24); | ||
| 338 | 338 | |||||||
| HITCBC | 339 | 8045160 | r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; | 339 | 8045160 | r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; | ||
| 340 | #endif | 340 | #endif | |||||
| HITCBC | 341 | 8045160 | p += 4; | 341 | 8045160 | p += 4; | ||
| HITCBC | 342 | 8045160 | n -= 4; | 342 | 8045160 | n -= 4; | ||
| 343 | } | 343 | } | |||||
| 344 | 344 | |||||||
| HITCBC | 345 | 2019313 | switch( n ) | 345 | 2019313 | switch( n ) | ||
| 346 | { | 346 | { | |||||
| HITCBC | 347 | 2010658 | case 0: | 347 | 2010658 | case 0: | ||
| HITCBC | 348 | 2010658 | break; | 348 | 2010658 | break; | ||
| HITCBC | 349 | 5484 | case 1: | 349 | 5484 | case 1: | ||
| HITCBC | 350 | 5484 | r = r * 10 + p[0] - '0'; | 350 | 5484 | r = r * 10 + p[0] - '0'; | ||
| HITCBC | 351 | 5484 | break; | 351 | 5484 | break; | ||
| HITCBC | 352 | 1714 | case 2: | 352 | 1714 | case 2: | ||
| HITCBC | 353 | 1714 | r = r * 10 + p[0] - '0'; | 353 | 1714 | r = r * 10 + p[0] - '0'; | ||
| HITCBC | 354 | 1714 | r = r * 10 + p[1] - '0'; | 354 | 1714 | r = r * 10 + p[1] - '0'; | ||
| HITCBC | 355 | 1714 | break; | 355 | 1714 | break; | ||
| HITCBC | 356 | 1457 | case 3: | 356 | 1457 | case 3: | ||
| HITCBC | 357 | 1457 | r = r * 10 + p[0] - '0'; | 357 | 1457 | r = r * 10 + p[0] - '0'; | ||
| HITCBC | 358 | 1457 | r = r * 10 + p[1] - '0'; | 358 | 1457 | r = r * 10 + p[1] - '0'; | ||
| HITCBC | 359 | 1457 | r = r * 10 + p[2] - '0'; | 359 | 1457 | r = r * 10 + p[2] - '0'; | ||
| HITCBC | 360 | 1457 | break; | 360 | 1457 | break; | ||
| 361 | } | 361 | } | |||||
| HITCBC | 362 | 2019313 | return r; | 362 | 2019313 | return r; | ||
| 363 | } | 363 | } | |||||
| 364 | 364 | |||||||
| 365 | // KRYSTIAN: this function is unused | 365 | // KRYSTIAN: this function is unused | |||||
| 366 | // count_leading | 366 | // count_leading | |||||
| 367 | 367 | |||||||
| 368 | /* | 368 | /* | |||||
| 369 | #ifdef BOOST_JSON_USE_SSE2 | 369 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 370 | 370 | |||||||
| 371 | // assumes p..p+15 | 371 | // assumes p..p+15 | |||||
| 372 | inline std::size_t count_leading( char const * p, char ch ) noexcept | 372 | inline std::size_t count_leading( char const * p, char ch ) noexcept | |||||
| 373 | { | 373 | { | |||||
| 374 | __m128i const q1 = _mm_set1_epi8( ch ); | 374 | __m128i const q1 = _mm_set1_epi8( ch ); | |||||
| 375 | 375 | |||||||
| 376 | __m128i v = _mm_loadu_si128( (__m128i const*)p ); | 376 | __m128i v = _mm_loadu_si128( (__m128i const*)p ); | |||||
| 377 | 377 | |||||||
| 378 | __m128i w = _mm_cmpeq_epi8( v, q1 ); | 378 | __m128i w = _mm_cmpeq_epi8( v, q1 ); | |||||
| 379 | 379 | |||||||
| 380 | int m = _mm_movemask_epi8( w ) ^ 0xFFFF; | 380 | int m = _mm_movemask_epi8( w ) ^ 0xFFFF; | |||||
| 381 | 381 | |||||||
| 382 | std::size_t n; | 382 | std::size_t n; | |||||
| 383 | 383 | |||||||
| 384 | if( m == 0 ) | 384 | if( m == 0 ) | |||||
| 385 | { | 385 | { | |||||
| 386 | n = 16; | 386 | n = 16; | |||||
| 387 | } | 387 | } | |||||
| 388 | else | 388 | else | |||||
| 389 | { | 389 | { | |||||
| 390 | #if defined(__GNUC__) || defined(__clang__) | 390 | #if defined(__GNUC__) || defined(__clang__) | |||||
| 391 | n = __builtin_ffs( m ) - 1; | 391 | n = __builtin_ffs( m ) - 1; | |||||
| 392 | #else | 392 | #else | |||||
| 393 | unsigned long index; | 393 | unsigned long index; | |||||
| 394 | _BitScanForward( &index, m ); | 394 | _BitScanForward( &index, m ); | |||||
| 395 | n = index; | 395 | n = index; | |||||
| 396 | #endif | 396 | #endif | |||||
| 397 | } | 397 | } | |||||
| 398 | 398 | |||||||
| 399 | return n; | 399 | return n; | |||||
| 400 | } | 400 | } | |||||
| 401 | 401 | |||||||
| 402 | #else | 402 | #else | |||||
| 403 | 403 | |||||||
| 404 | // assumes p..p+15 | 404 | // assumes p..p+15 | |||||
| 405 | inline std::size_t count_leading( char const * p, char ch ) noexcept | 405 | inline std::size_t count_leading( char const * p, char ch ) noexcept | |||||
| 406 | { | 406 | { | |||||
| 407 | std::size_t n = 0; | 407 | std::size_t n = 0; | |||||
| 408 | 408 | |||||||
| 409 | for( ; n < 16 && *p == ch; ++p, ++n ); | 409 | for( ; n < 16 && *p == ch; ++p, ++n ); | |||||
| 410 | 410 | |||||||
| 411 | return n; | 411 | return n; | |||||
| 412 | } | 412 | } | |||||
| 413 | 413 | |||||||
| 414 | #endif | 414 | #endif | |||||
| 415 | */ | 415 | */ | |||||
| 416 | 416 | |||||||
| 417 | // count_whitespace | 417 | // count_whitespace | |||||
| 418 | 418 | |||||||
| 419 | #ifdef BOOST_JSON_USE_SSE2 | 419 | #ifdef BOOST_JSON_USE_SSE2 | |||||
| 420 | 420 | |||||||
| HITCBC | 421 | 4701007 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | 421 | 4701007 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | ||
| 422 | { | 422 | { | |||||
| HITCBC | 423 | 4701007 | if( p == end ) | 423 | 4701007 | if( p == end ) | ||
| 424 | { | 424 | { | |||||
| HITCBC | 425 | 2133783 | return p; | 425 | 2133783 | return p; | ||
| 426 | } | 426 | } | |||||
| 427 | 427 | |||||||
| HITCBC | 428 | 2567224 | if( static_cast<unsigned char>( *p ) > 0x20 ) | 428 | 2567224 | if( static_cast<unsigned char>( *p ) > 0x20 ) | ||
| 429 | { | 429 | { | |||||
| HITCBC | 430 | 2484431 | return p; | 430 | 2484431 | return p; | ||
| 431 | } | 431 | } | |||||
| 432 | 432 | |||||||
| HITCBC | 433 | 82793 | __m128i const q1 = _mm_set1_epi8( ' ' ); | 433 | 82793 | __m128i const q1 = _mm_set1_epi8( ' ' ); | ||
| HITCBC | 434 | 82793 | __m128i const q2 = _mm_set1_epi8( '\n' ); | 434 | 82793 | __m128i const q2 = _mm_set1_epi8( '\n' ); | ||
| HITCBC | 435 | 82793 | __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r' | 435 | 82793 | __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r' | ||
| HITCBC | 436 | 82793 | __m128i const q4 = _mm_set1_epi8( '\r' ); | 436 | 82793 | __m128i const q4 = _mm_set1_epi8( '\r' ); | ||
| 437 | 437 | |||||||
| HITCBC | 438 | 183300 | while( end - p >= 16 ) | 438 | 183300 | while( end - p >= 16 ) | ||
| 439 | { | 439 | { | |||||
| HITCBC | 440 | 105374 | __m128i v0 = _mm_loadu_si128( (__m128i const*)p ); | 440 | 105374 | __m128i v0 = _mm_loadu_si128( (__m128i const*)p ); | ||
| 441 | 441 | |||||||
| HITCBC | 442 | 316122 | __m128i w0 = _mm_or_si128( | 442 | 316122 | __m128i w0 = _mm_or_si128( | ||
| 443 | _mm_cmpeq_epi8( v0, q1 ), | 443 | _mm_cmpeq_epi8( v0, q1 ), | |||||
| 444 | _mm_cmpeq_epi8( v0, q2 )); | 444 | _mm_cmpeq_epi8( v0, q2 )); | |||||
| HITCBC | 445 | 105374 | __m128i v1 = _mm_or_si128( v0, q3 ); | 445 | 105374 | __m128i v1 = _mm_or_si128( v0, q3 ); | ||
| HITCBC | 446 | 105374 | __m128i w1 = _mm_cmpeq_epi8( v1, q4 ); | 446 | 105374 | __m128i w1 = _mm_cmpeq_epi8( v1, q4 ); | ||
| HITCBC | 447 | 105374 | __m128i w2 = _mm_or_si128( w0, w1 ); | 447 | 105374 | __m128i w2 = _mm_or_si128( w0, w1 ); | ||
| 448 | 448 | |||||||
| HITCBC | 449 | 105374 | int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF; | 449 | 105374 | int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF; | ||
| 450 | 450 | |||||||
| HITCBC | 451 | 105374 | if( m != 0 ) | 451 | 105374 | if( m != 0 ) | ||
| 452 | { | 452 | { | |||||
| 453 | #if defined(__GNUC__) || defined(__clang__) | 453 | #if defined(__GNUC__) || defined(__clang__) | |||||
| HITCBC | 454 | 4867 | std::size_t c = __builtin_ffs( m ) - 1; | 454 | 4867 | std::size_t c = __builtin_ffs( m ) - 1; | ||
| 455 | #else | 455 | #else | |||||
| 456 | unsigned long index; | 456 | unsigned long index; | |||||
| 457 | _BitScanForward( &index, m ); | 457 | _BitScanForward( &index, m ); | |||||
| 458 | std::size_t c = index; | 458 | std::size_t c = index; | |||||
| 459 | #endif | 459 | #endif | |||||
| 460 | 460 | |||||||
| HITCBC | 461 | 4867 | p += c; | 461 | 4867 | p += c; | ||
| HITCBC | 462 | 4867 | return p; | 462 | 4867 | return p; | ||
| 463 | } | 463 | } | |||||
| 464 | 464 | |||||||
| HITCBC | 465 | 100507 | p += 16; | 465 | 100507 | p += 16; | ||
| 466 | } | 466 | } | |||||
| 467 | 467 | |||||||
| HITCBC | 468 | 462520 | while( p != end ) | 468 | 462520 | while( p != end ) | ||
| 469 | { | 469 | { | |||||
| HITCBC | 470 | 401661 | if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' ) | 470 | 401661 | if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' ) | ||
| 471 | { | 471 | { | |||||
| HITCBC | 472 | 17067 | return p; | 472 | 17067 | return p; | ||
| 473 | } | 473 | } | |||||
| 474 | 474 | |||||||
| HITCBC | 475 | 384594 | ++p; | 475 | 384594 | ++p; | ||
| 476 | } | 476 | } | |||||
| 477 | 477 | |||||||
| HITCBC | 478 | 60859 | return p; | 478 | 60859 | return p; | ||
| 479 | } | 479 | } | |||||
| 480 | 480 | |||||||
| 481 | /* | 481 | /* | |||||
| 482 | 482 | |||||||
| 483 | // slightly faster on msvc-14.2, slightly slower on clang-win | 483 | // slightly faster on msvc-14.2, slightly slower on clang-win | |||||
| 484 | 484 | |||||||
| 485 | inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept | 485 | inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept | |||||
| 486 | { | 486 | { | |||||
| 487 | char const * p0 = p; | 487 | char const * p0 = p; | |||||
| 488 | 488 | |||||||
| 489 | while( n > 0 ) | 489 | while( n > 0 ) | |||||
| 490 | { | 490 | { | |||||
| 491 | char ch = *p; | 491 | char ch = *p; | |||||
| 492 | 492 | |||||||
| 493 | if( ch == '\n' || ch == '\r' ) | 493 | if( ch == '\n' || ch == '\r' ) | |||||
| 494 | { | 494 | { | |||||
| 495 | ++p; | 495 | ++p; | |||||
| 496 | --n; | 496 | --n; | |||||
| 497 | continue; | 497 | continue; | |||||
| 498 | } | 498 | } | |||||
| 499 | 499 | |||||||
| 500 | if( ch != ' ' && ch != '\t' ) | 500 | if( ch != ' ' && ch != '\t' ) | |||||
| 501 | { | 501 | { | |||||
| 502 | break; | 502 | break; | |||||
| 503 | } | 503 | } | |||||
| 504 | 504 | |||||||
| 505 | ++p; | 505 | ++p; | |||||
| 506 | --n; | 506 | --n; | |||||
| 507 | 507 | |||||||
| 508 | while( n >= 16 ) | 508 | while( n >= 16 ) | |||||
| 509 | { | 509 | { | |||||
| 510 | std::size_t n2 = count_leading( p, ch ); | 510 | std::size_t n2 = count_leading( p, ch ); | |||||
| 511 | 511 | |||||||
| 512 | p += n2; | 512 | p += n2; | |||||
| 513 | n -= n2; | 513 | n -= n2; | |||||
| 514 | 514 | |||||||
| 515 | if( n2 < 16 ) | 515 | if( n2 < 16 ) | |||||
| 516 | { | 516 | { | |||||
| 517 | break; | 517 | break; | |||||
| 518 | } | 518 | } | |||||
| 519 | } | 519 | } | |||||
| 520 | } | 520 | } | |||||
| 521 | 521 | |||||||
| 522 | return p - p0; | 522 | return p - p0; | |||||
| 523 | } | 523 | } | |||||
| 524 | */ | 524 | */ | |||||
| 525 | 525 | |||||||
| 526 | #else | 526 | #else | |||||
| 527 | 527 | |||||||
| 528 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | 528 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | |||||
| 529 | { | 529 | { | |||||
| 530 | 530 | |||||||
| 531 | for(; p != end; ++p) | 531 | for(; p != end; ++p) | |||||
| 532 | { | 532 | { | |||||
| 533 | char const c = *p; | 533 | char const c = *p; | |||||
| 534 | if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break; | 534 | if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break; | |||||
| 535 | } | 535 | } | |||||
| 536 | 536 | |||||||
| 537 | return p; | 537 | return p; | |||||
| 538 | } | 538 | } | |||||
| 539 | 539 | |||||||
| 540 | #endif | 540 | #endif | |||||
| 541 | 541 | |||||||
| 542 | } // detail | 542 | } // detail | |||||
| 543 | } // namespace json | 543 | } // namespace json | |||||
| 544 | } // namespace boost | 544 | } // namespace boost | |||||
| 545 | 545 | |||||||
| 546 | #endif | 546 | #endif | |||||