100.00% Lines (62/62) 100.00% Functions (10/10)
TLA Baseline Branch
Line Hits Code Line Hits Code
1   // 1   //
2   // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) 2   // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
3   // 3   //
4   // Distributed under the Boost Software License, Version 1.0. (See accompanying 4   // Distributed under the Boost Software License, Version 1.0. (See accompanying
5   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6   // 6   //
7   // Official repository: https://github.com/boostorg/json 7   // Official repository: https://github.com/boostorg/json
8   // 8   //
9   9  
10   #ifndef BOOST_JSON_DETAIL_UTF8_HPP 10   #ifndef BOOST_JSON_DETAIL_UTF8_HPP
11   #define BOOST_JSON_DETAIL_UTF8_HPP 11   #define BOOST_JSON_DETAIL_UTF8_HPP
12   12  
13   #include <boost/endian/conversion.hpp> 13   #include <boost/endian/conversion.hpp>
14   #include <boost/json/detail/config.hpp> 14   #include <boost/json/detail/config.hpp>
15   15  
16   #include <cstddef> 16   #include <cstddef>
17   #include <cstring> 17   #include <cstring>
18   #include <cstdint> 18   #include <cstdint>
19   19  
20   namespace boost { 20   namespace boost {
21   namespace json { 21   namespace json {
22   namespace detail { 22   namespace detail {
23   23  
24   template<int N> 24   template<int N>
25   std::uint32_t 25   std::uint32_t
HITCBC 26   21733 load_little_endian(void const* p) 26   21733 load_little_endian(void const* p)
27   { 27   {
HITCBC 28   21733 std::uint32_t v = 0; 28   21733 std::uint32_t v = 0;
HITCBC 29   21733 std::memcpy(&v, p, N); 29   21733 std::memcpy(&v, p, N);
HITCBC 30   21733 endian::little_to_native_inplace(v); 30   21733 endian::little_to_native_inplace(v);
HITCBC 31   21733 return v; 31   21733 return v;
32   } 32   }
33   33  
34   inline 34   inline
35   uint16_t 35   uint16_t
HITCBC 36   16690 classify_utf8(char c) 36   16690 classify_utf8(char c)
37   { 37   {
38   // 0x000 = invalid 38   // 0x000 = invalid
39   // 0x102 = 2 bytes, second byte [80, BF] 39   // 0x102 = 2 bytes, second byte [80, BF]
40   // 0x203 = 3 bytes, second byte [A0, BF] 40   // 0x203 = 3 bytes, second byte [A0, BF]
41   // 0x303 = 3 bytes, second byte [80, BF] 41   // 0x303 = 3 bytes, second byte [80, BF]
42   // 0x403 = 3 bytes, second byte [80, 9F] 42   // 0x403 = 3 bytes, second byte [80, 9F]
43   // 0x504 = 4 bytes, second byte [90, BF] 43   // 0x504 = 4 bytes, second byte [90, BF]
44   // 0x604 = 4 bytes, second byte [80, BF] 44   // 0x604 = 4 bytes, second byte [80, BF]
45   // 0x704 = 4 bytes, second byte [80, 8F] 45   // 0x704 = 4 bytes, second byte [80, 8F]
46   static constexpr uint16_t first[128] 46   static constexpr uint16_t first[128]
47   { 47   {
48   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 48   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
49   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 49   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
50   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 50   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
51   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 51   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
52   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 52   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
53   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 53   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
54   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 54   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
55   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 55   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
56   56  
57   0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 57   0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
58   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 58   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
59   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 59   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
60   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 60   0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
61   0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 61   0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303,
62   0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303, 62   0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303,
63   0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000, 63   0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000,
64   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 64   0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
65   }; 65   };
HITCBC 66   16690 return first[static_cast<unsigned char>(c & 0x7F)]; 66   16690 return first[static_cast<unsigned char>(c & 0x7F)];
67   } 67   }
68   68  
69   inline 69   inline
70   bool 70   bool
HITCBC 71   13177 is_valid_utf8(const char* p, uint16_t first) 71   13177 is_valid_utf8(const char* p, uint16_t first)
72   { 72   {
73   uint32_t v; 73   uint32_t v;
HITCBC 74   13177 switch(first >> 8) 74   13177 switch(first >> 8)
75   { 75   {
HITCBC 76   362 default: 76   362 default:
HITCBC 77   362 return false; 77   362 return false;
78   78  
79   // 2 bytes, second byte [80, BF] 79   // 2 bytes, second byte [80, BF]
HITCBC 80   2348 case 1: 80   2348 case 1:
HITCBC 81   2348 v = load_little_endian<2>(p); 81   2348 v = load_little_endian<2>(p);
HITCBC 82   2348 return (v & 0xC000) == 0x8000; 82   2348 return (v & 0xC000) == 0x8000;
83   83  
84   // 3 bytes, second byte [A0, BF] 84   // 3 bytes, second byte [A0, BF]
HITCBC 85   665 case 2: 85   665 case 2:
HITCBC 86   665 v = load_little_endian<3>(p); 86   665 v = load_little_endian<3>(p);
HITCBC 87   665 return (v & 0xC0E000) == 0x80A000; 87   665 return (v & 0xC0E000) == 0x80A000;
88   88  
89   // 3 bytes, second byte [80, BF] 89   // 3 bytes, second byte [80, BF]
HITCBC 90   3882 case 3: 90   3882 case 3:
HITCBC 91   3882 v = load_little_endian<3>(p); 91   3882 v = load_little_endian<3>(p);
HITCBC 92   3882 return (v & 0xC0C000) == 0x808000; 92   3882 return (v & 0xC0C000) == 0x808000;
93   93  
94   // 3 bytes, second byte [80, 9F] 94   // 3 bytes, second byte [80, 9F]
HITCBC 95   725 case 4: 95   725 case 4:
HITCBC 96   725 v = load_little_endian<3>(p); 96   725 v = load_little_endian<3>(p);
HITCBC 97   725 return (v & 0xC0E000) == 0x808000; 97   725 return (v & 0xC0E000) == 0x808000;
98   98  
99   // 4 bytes, second byte [90, BF] 99   // 4 bytes, second byte [90, BF]
HITCBC 100   1310 case 5: 100   1310 case 5:
HITCBC 101   1310 v = load_little_endian<4>(p); 101   1310 v = load_little_endian<4>(p);
HITCBC 102   1310 return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00; 102   1310 return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00;
103   103  
104   // 4 bytes, second byte [80, BF] 104   // 4 bytes, second byte [80, BF]
HITCBC 105   2346 case 6: 105   2346 case 6:
HITCBC 106   2346 v = load_little_endian<4>(p); 106   2346 v = load_little_endian<4>(p);
HITCBC 107   2346 return (v & 0xC0C0C000) == 0x80808000; 107   2346 return (v & 0xC0C0C000) == 0x80808000;
108   108  
109   // 4 bytes, second byte [80, 8F] 109   // 4 bytes, second byte [80, 8F]
HITCBC 110   1539 case 7: 110   1539 case 7:
HITCBC 111   1539 v = load_little_endian<4>(p); 111   1539 v = load_little_endian<4>(p);
HITCBC 112   1539 return (v & 0xC0C0F000) == 0x80808000; 112   1539 return (v & 0xC0C0F000) == 0x80808000;
113   } 113   }
114   } 114   }
115   115  
116   class utf8_sequence 116   class utf8_sequence
117   { 117   {
118   char seq_[4]; 118   char seq_[4];
119   uint16_t first_; 119   uint16_t first_;
120   uint8_t size_; 120   uint8_t size_;
121   121  
122   public: 122   public:
123   void 123   void
HITCBC 124   3466 save( 124   3466 save(
125   const char* p, 125   const char* p,
126   std::size_t remain) noexcept 126   std::size_t remain) noexcept
127   { 127   {
HITCBC 128   3466 first_ = classify_utf8(*p ); 128   3466 first_ = classify_utf8(*p );
HITCBC 129   3466 if(remain >= length()) 129   3466 if(remain >= length())
HITCBC 130   1560 size_ = length(); 130   1560 size_ = length();
131   else 131   else
HITCBC 132   1906 size_ = static_cast<uint8_t>(remain); 132   1906 size_ = static_cast<uint8_t>(remain);
HITCBC 133   3466 std::memcpy(seq_, p, size_); 133   3466 std::memcpy(seq_, p, size_);
HITCBC 134   3466 } 134   3466 }
135   135  
136   uint8_t 136   uint8_t
HITCBC 137   21338 length() const noexcept 137   21338 length() const noexcept
138   { 138   {
HITCBC 139   21338 return first_ & 0xFF; 139   21338 return first_ & 0xFF;
140   } 140   }
141   141  
142   bool 142   bool
HITCBC 143   3469 complete() const noexcept 143   3469 complete() const noexcept
144   { 144   {
HITCBC 145   3469 return size_ >= length(); 145   3469 return size_ >= length();
146   } 146   }
147   147  
148   // returns true if complete 148   // returns true if complete
149   bool 149   bool
HITCBC 150   1864 append( 150   1864 append(
151   const char* p, 151   const char* p,
152   std::size_t remain) noexcept 152   std::size_t remain) noexcept
153   { 153   {
HITCBC 154   1864 if(BOOST_JSON_UNLIKELY(needed() == 0)) 154   1864 if(BOOST_JSON_UNLIKELY(needed() == 0))
HITCBC 155   1 return true; 155   1 return true;
HITCBC 156   1863 if(BOOST_JSON_LIKELY(remain >= needed())) 156   1863 if(BOOST_JSON_LIKELY(remain >= needed()))
157   { 157   {
HITCBC 158   1862 std::memcpy( 158   1862 std::memcpy(
HITCBC 159   1862 seq_ + size_, p, needed()); 159   1862 seq_ + size_, p, needed());
HITCBC 160   1862 size_ = length(); 160   1862 size_ = length();
HITCBC 161   1862 return true; 161   1862 return true;
162   } 162   }
HITCBC 163   1 if(BOOST_JSON_LIKELY(remain > 0)) 163   1 if(BOOST_JSON_LIKELY(remain > 0))
164   { 164   {
HITCBC 165   1 std::memcpy(seq_ + size_, p, remain); 165   1 std::memcpy(seq_ + size_, p, remain);
HITCBC 166   1 size_ += static_cast<uint8_t>(remain); 166   1 size_ += static_cast<uint8_t>(remain);
167   } 167   }
HITCBC 168   1 return false; 168   1 return false;
169   } 169   }
170   170  
171   const char* 171   const char*
HITCBC 172   1658 data() const noexcept 172   1658 data() const noexcept
173   { 173   {
HITCBC 174   1658 return seq_; 174   1658 return seq_;
175   } 175   }
176   176  
177   uint8_t 177   uint8_t
HITCBC 178   7457 needed() const noexcept 178   7457 needed() const noexcept
179   { 179   {
HITCBC 180   7457 return length() - size_; 180   7457 return length() - size_;
181   } 181   }
182   182  
183   bool 183   bool
HITCBC 184   1866 valid() const noexcept 184   1866 valid() const noexcept
185   { 185   {
HITCBC 186   1866 BOOST_ASSERT(size_ >= length()); 186   1866 BOOST_ASSERT(size_ >= length());
HITCBC 187   1866 return is_valid_utf8(seq_, first_); 187   1866 return is_valid_utf8(seq_, first_);
188   } 188   }
189   }; 189   };
190   190  
191   } // detail 191   } // detail
192   } // namespace json 192   } // namespace json
193   } // namespace boost 193   } // namespace boost
194   194  
195   #endif 195   #endif