100.00% Lines (62/62)
100.00% Functions (10/10)
| TLA | Baseline | Branch | ||||||
|---|---|---|---|---|---|---|---|---|
| Line | Hits | Code | Line | Hits | Code | |||
| 1 | // | 1 | // | |||||
| 2 | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) | 2 | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) | |||||
| 3 | // | 3 | // | |||||
| 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |||||
| 5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | 5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |||||
| 6 | // | 6 | // | |||||
| 7 | // Official repository: https://github.com/boostorg/json | 7 | // Official repository: https://github.com/boostorg/json | |||||
| 8 | // | 8 | // | |||||
| 9 | 9 | |||||||
| 10 | #ifndef BOOST_JSON_DETAIL_UTF8_HPP | 10 | #ifndef BOOST_JSON_DETAIL_UTF8_HPP | |||||
| 11 | #define BOOST_JSON_DETAIL_UTF8_HPP | 11 | #define BOOST_JSON_DETAIL_UTF8_HPP | |||||
| 12 | 12 | |||||||
| 13 | #include <boost/endian/conversion.hpp> | 13 | #include <boost/endian/conversion.hpp> | |||||
| 14 | #include <boost/json/detail/config.hpp> | 14 | #include <boost/json/detail/config.hpp> | |||||
| 15 | 15 | |||||||
| 16 | #include <cstddef> | 16 | #include <cstddef> | |||||
| 17 | #include <cstring> | 17 | #include <cstring> | |||||
| 18 | #include <cstdint> | 18 | #include <cstdint> | |||||
| 19 | 19 | |||||||
| 20 | namespace boost { | 20 | namespace boost { | |||||
| 21 | namespace json { | 21 | namespace json { | |||||
| 22 | namespace detail { | 22 | namespace detail { | |||||
| 23 | 23 | |||||||
| 24 | template<int N> | 24 | template<int N> | |||||
| 25 | std::uint32_t | 25 | std::uint32_t | |||||
| HITCBC | 26 | 21733 | load_little_endian(void const* p) | 26 | 21733 | load_little_endian(void const* p) | ||
| 27 | { | 27 | { | |||||
| HITCBC | 28 | 21733 | std::uint32_t v = 0; | 28 | 21733 | std::uint32_t v = 0; | ||
| HITCBC | 29 | 21733 | std::memcpy(&v, p, N); | 29 | 21733 | std::memcpy(&v, p, N); | ||
| HITCBC | 30 | 21733 | endian::little_to_native_inplace(v); | 30 | 21733 | endian::little_to_native_inplace(v); | ||
| HITCBC | 31 | 21733 | return v; | 31 | 21733 | return v; | ||
| 32 | } | 32 | } | |||||
| 33 | 33 | |||||||
| 34 | inline | 34 | inline | |||||
| 35 | uint16_t | 35 | uint16_t | |||||
| HITCBC | 36 | 16690 | classify_utf8(char c) | 36 | 16690 | classify_utf8(char c) | ||
| 37 | { | 37 | { | |||||
| 38 | // 0x000 = invalid | 38 | // 0x000 = invalid | |||||
| 39 | // 0x102 = 2 bytes, second byte [80, BF] | 39 | // 0x102 = 2 bytes, second byte [80, BF] | |||||
| 40 | // 0x203 = 3 bytes, second byte [A0, BF] | 40 | // 0x203 = 3 bytes, second byte [A0, BF] | |||||
| 41 | // 0x303 = 3 bytes, second byte [80, BF] | 41 | // 0x303 = 3 bytes, second byte [80, BF] | |||||
| 42 | // 0x403 = 3 bytes, second byte [80, 9F] | 42 | // 0x403 = 3 bytes, second byte [80, 9F] | |||||
| 43 | // 0x504 = 4 bytes, second byte [90, BF] | 43 | // 0x504 = 4 bytes, second byte [90, BF] | |||||
| 44 | // 0x604 = 4 bytes, second byte [80, BF] | 44 | // 0x604 = 4 bytes, second byte [80, BF] | |||||
| 45 | // 0x704 = 4 bytes, second byte [80, 8F] | 45 | // 0x704 = 4 bytes, second byte [80, 8F] | |||||
| 46 | static constexpr uint16_t first[128] | 46 | static constexpr uint16_t first[128] | |||||
| 47 | { | 47 | { | |||||
| 48 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 48 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 49 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 49 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 50 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 50 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 51 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 51 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 52 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 52 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 53 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 53 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 54 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 54 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 55 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 55 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 56 | 56 | |||||||
| 57 | 0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | 57 | 0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | |||||
| 58 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | 58 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | |||||
| 59 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | 59 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | |||||
| 60 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | 60 | 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, | |||||
| 61 | 0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, | 61 | 0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, | |||||
| 62 | 0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303, | 62 | 0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303, | |||||
| 63 | 0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000, | 63 | 0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000, | |||||
| 64 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | 64 | 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, | |||||
| 65 | }; | 65 | }; | |||||
| HITCBC | 66 | 16690 | return first[static_cast<unsigned char>(c & 0x7F)]; | 66 | 16690 | return first[static_cast<unsigned char>(c & 0x7F)]; | ||
| 67 | } | 67 | } | |||||
| 68 | 68 | |||||||
| 69 | inline | 69 | inline | |||||
| 70 | bool | 70 | bool | |||||
| HITCBC | 71 | 13177 | is_valid_utf8(const char* p, uint16_t first) | 71 | 13177 | is_valid_utf8(const char* p, uint16_t first) | ||
| 72 | { | 72 | { | |||||
| 73 | uint32_t v; | 73 | uint32_t v; | |||||
| HITCBC | 74 | 13177 | switch(first >> 8) | 74 | 13177 | switch(first >> 8) | ||
| 75 | { | 75 | { | |||||
| HITCBC | 76 | 362 | default: | 76 | 362 | default: | ||
| HITCBC | 77 | 362 | return false; | 77 | 362 | return false; | ||
| 78 | 78 | |||||||
| 79 | // 2 bytes, second byte [80, BF] | 79 | // 2 bytes, second byte [80, BF] | |||||
| HITCBC | 80 | 2348 | case 1: | 80 | 2348 | case 1: | ||
| HITCBC | 81 | 2348 | v = load_little_endian<2>(p); | 81 | 2348 | v = load_little_endian<2>(p); | ||
| HITCBC | 82 | 2348 | return (v & 0xC000) == 0x8000; | 82 | 2348 | return (v & 0xC000) == 0x8000; | ||
| 83 | 83 | |||||||
| 84 | // 3 bytes, second byte [A0, BF] | 84 | // 3 bytes, second byte [A0, BF] | |||||
| HITCBC | 85 | 665 | case 2: | 85 | 665 | case 2: | ||
| HITCBC | 86 | 665 | v = load_little_endian<3>(p); | 86 | 665 | v = load_little_endian<3>(p); | ||
| HITCBC | 87 | 665 | return (v & 0xC0E000) == 0x80A000; | 87 | 665 | return (v & 0xC0E000) == 0x80A000; | ||
| 88 | 88 | |||||||
| 89 | // 3 bytes, second byte [80, BF] | 89 | // 3 bytes, second byte [80, BF] | |||||
| HITCBC | 90 | 3882 | case 3: | 90 | 3882 | case 3: | ||
| HITCBC | 91 | 3882 | v = load_little_endian<3>(p); | 91 | 3882 | v = load_little_endian<3>(p); | ||
| HITCBC | 92 | 3882 | return (v & 0xC0C000) == 0x808000; | 92 | 3882 | return (v & 0xC0C000) == 0x808000; | ||
| 93 | 93 | |||||||
| 94 | // 3 bytes, second byte [80, 9F] | 94 | // 3 bytes, second byte [80, 9F] | |||||
| HITCBC | 95 | 725 | case 4: | 95 | 725 | case 4: | ||
| HITCBC | 96 | 725 | v = load_little_endian<3>(p); | 96 | 725 | v = load_little_endian<3>(p); | ||
| HITCBC | 97 | 725 | return (v & 0xC0E000) == 0x808000; | 97 | 725 | return (v & 0xC0E000) == 0x808000; | ||
| 98 | 98 | |||||||
| 99 | // 4 bytes, second byte [90, BF] | 99 | // 4 bytes, second byte [90, BF] | |||||
| HITCBC | 100 | 1310 | case 5: | 100 | 1310 | case 5: | ||
| HITCBC | 101 | 1310 | v = load_little_endian<4>(p); | 101 | 1310 | v = load_little_endian<4>(p); | ||
| HITCBC | 102 | 1310 | return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00; | 102 | 1310 | return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00; | ||
| 103 | 103 | |||||||
| 104 | // 4 bytes, second byte [80, BF] | 104 | // 4 bytes, second byte [80, BF] | |||||
| HITCBC | 105 | 2346 | case 6: | 105 | 2346 | case 6: | ||
| HITCBC | 106 | 2346 | v = load_little_endian<4>(p); | 106 | 2346 | v = load_little_endian<4>(p); | ||
| HITCBC | 107 | 2346 | return (v & 0xC0C0C000) == 0x80808000; | 107 | 2346 | return (v & 0xC0C0C000) == 0x80808000; | ||
| 108 | 108 | |||||||
| 109 | // 4 bytes, second byte [80, 8F] | 109 | // 4 bytes, second byte [80, 8F] | |||||
| HITCBC | 110 | 1539 | case 7: | 110 | 1539 | case 7: | ||
| HITCBC | 111 | 1539 | v = load_little_endian<4>(p); | 111 | 1539 | v = load_little_endian<4>(p); | ||
| HITCBC | 112 | 1539 | return (v & 0xC0C0F000) == 0x80808000; | 112 | 1539 | return (v & 0xC0C0F000) == 0x80808000; | ||
| 113 | } | 113 | } | |||||
| 114 | } | 114 | } | |||||
| 115 | 115 | |||||||
| 116 | class utf8_sequence | 116 | class utf8_sequence | |||||
| 117 | { | 117 | { | |||||
| 118 | char seq_[4]; | 118 | char seq_[4]; | |||||
| 119 | uint16_t first_; | 119 | uint16_t first_; | |||||
| 120 | uint8_t size_; | 120 | uint8_t size_; | |||||
| 121 | 121 | |||||||
| 122 | public: | 122 | public: | |||||
| 123 | void | 123 | void | |||||
| HITCBC | 124 | 3466 | save( | 124 | 3466 | save( | ||
| 125 | const char* p, | 125 | const char* p, | |||||
| 126 | std::size_t remain) noexcept | 126 | std::size_t remain) noexcept | |||||
| 127 | { | 127 | { | |||||
| HITCBC | 128 | 3466 | first_ = classify_utf8(*p ); | 128 | 3466 | first_ = classify_utf8(*p ); | ||
| HITCBC | 129 | 3466 | if(remain >= length()) | 129 | 3466 | if(remain >= length()) | ||
| HITCBC | 130 | 1560 | size_ = length(); | 130 | 1560 | size_ = length(); | ||
| 131 | else | 131 | else | |||||
| HITCBC | 132 | 1906 | size_ = static_cast<uint8_t>(remain); | 132 | 1906 | size_ = static_cast<uint8_t>(remain); | ||
| HITCBC | 133 | 3466 | std::memcpy(seq_, p, size_); | 133 | 3466 | std::memcpy(seq_, p, size_); | ||
| HITCBC | 134 | 3466 | } | 134 | 3466 | } | ||
| 135 | 135 | |||||||
| 136 | uint8_t | 136 | uint8_t | |||||
| HITCBC | 137 | 21338 | length() const noexcept | 137 | 21338 | length() const noexcept | ||
| 138 | { | 138 | { | |||||
| HITCBC | 139 | 21338 | return first_ & 0xFF; | 139 | 21338 | return first_ & 0xFF; | ||
| 140 | } | 140 | } | |||||
| 141 | 141 | |||||||
| 142 | bool | 142 | bool | |||||
| HITCBC | 143 | 3469 | complete() const noexcept | 143 | 3469 | complete() const noexcept | ||
| 144 | { | 144 | { | |||||
| HITCBC | 145 | 3469 | return size_ >= length(); | 145 | 3469 | return size_ >= length(); | ||
| 146 | } | 146 | } | |||||
| 147 | 147 | |||||||
| 148 | // returns true if complete | 148 | // returns true if complete | |||||
| 149 | bool | 149 | bool | |||||
| HITCBC | 150 | 1864 | append( | 150 | 1864 | append( | ||
| 151 | const char* p, | 151 | const char* p, | |||||
| 152 | std::size_t remain) noexcept | 152 | std::size_t remain) noexcept | |||||
| 153 | { | 153 | { | |||||
| HITCBC | 154 | 1864 | if(BOOST_JSON_UNLIKELY(needed() == 0)) | 154 | 1864 | if(BOOST_JSON_UNLIKELY(needed() == 0)) | ||
| HITCBC | 155 | 1 | return true; | 155 | 1 | return true; | ||
| HITCBC | 156 | 1863 | if(BOOST_JSON_LIKELY(remain >= needed())) | 156 | 1863 | if(BOOST_JSON_LIKELY(remain >= needed())) | ||
| 157 | { | 157 | { | |||||
| HITCBC | 158 | 1862 | std::memcpy( | 158 | 1862 | std::memcpy( | ||
| HITCBC | 159 | 1862 | seq_ + size_, p, needed()); | 159 | 1862 | seq_ + size_, p, needed()); | ||
| HITCBC | 160 | 1862 | size_ = length(); | 160 | 1862 | size_ = length(); | ||
| HITCBC | 161 | 1862 | return true; | 161 | 1862 | return true; | ||
| 162 | } | 162 | } | |||||
| HITCBC | 163 | 1 | if(BOOST_JSON_LIKELY(remain > 0)) | 163 | 1 | if(BOOST_JSON_LIKELY(remain > 0)) | ||
| 164 | { | 164 | { | |||||
| HITCBC | 165 | 1 | std::memcpy(seq_ + size_, p, remain); | 165 | 1 | std::memcpy(seq_ + size_, p, remain); | ||
| HITCBC | 166 | 1 | size_ += static_cast<uint8_t>(remain); | 166 | 1 | size_ += static_cast<uint8_t>(remain); | ||
| 167 | } | 167 | } | |||||
| HITCBC | 168 | 1 | return false; | 168 | 1 | return false; | ||
| 169 | } | 169 | } | |||||
| 170 | 170 | |||||||
| 171 | const char* | 171 | const char* | |||||
| HITCBC | 172 | 1658 | data() const noexcept | 172 | 1658 | data() const noexcept | ||
| 173 | { | 173 | { | |||||
| HITCBC | 174 | 1658 | return seq_; | 174 | 1658 | return seq_; | ||
| 175 | } | 175 | } | |||||
| 176 | 176 | |||||||
| 177 | uint8_t | 177 | uint8_t | |||||
| HITCBC | 178 | 7457 | needed() const noexcept | 178 | 7457 | needed() const noexcept | ||
| 179 | { | 179 | { | |||||
| HITCBC | 180 | 7457 | return length() - size_; | 180 | 7457 | return length() - size_; | ||
| 181 | } | 181 | } | |||||
| 182 | 182 | |||||||
| 183 | bool | 183 | bool | |||||
| HITCBC | 184 | 1866 | valid() const noexcept | 184 | 1866 | valid() const noexcept | ||
| 185 | { | 185 | { | |||||
| HITCBC | 186 | 1866 | BOOST_ASSERT(size_ >= length()); | 186 | 1866 | BOOST_ASSERT(size_ >= length()); | ||
| HITCBC | 187 | 1866 | return is_valid_utf8(seq_, first_); | 187 | 1866 | return is_valid_utf8(seq_, first_); | ||
| 188 | } | 188 | } | |||||
| 189 | }; | 189 | }; | |||||
| 190 | 190 | |||||||
| 191 | } // detail | 191 | } // detail | |||||
| 192 | } // namespace json | 192 | } // namespace json | |||||
| 193 | } // namespace boost | 193 | } // namespace boost | |||||
| 194 | 194 | |||||||
| 195 | #endif | 195 | #endif | |||||