9 #ifndef UTF8_VALIDATOR_HPP
10 #define UTF8_VALIDATOR_HPP
14 namespace utf8_validator {
16 static const unsigned int UTF8_ACCEPT = 0;
17 static const unsigned int UTF8_REJECT = 1;
19 static const uint8_t utf8d[] = {
20 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
21 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
23 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
24 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
25 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
26 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
27 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3,
28 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,
29 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1,
30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1,
31 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,
32 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1,
33 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
37 decode(uint32_t* state, uint32_t* codep, uint8_t
byte) {
38 uint32_t type = utf8d[byte];
40 *codep = (*state != UTF8_ACCEPT) ?
41 (
byte & 0x3fu) | (*codep << 6) :
42 (0xff >> type) & (byte);
44 *state = utf8d[256 + *state*16 + type];
54 validator() : m_state(UTF8_ACCEPT),m_codepoint(0) {}
62 if (utf8_validator::decode(&m_state,&m_codepoint,
byte) == UTF8_REJECT) {
68 template <
typename iterator_type>
75 bool decode (iterator_type b, iterator_type e) {
76 for (iterator_type i = b; i != e; i++) {
77 if (utf8_validator::decode(&m_state,&m_codepoint,*i) == UTF8_REJECT) {
89 return m_state == UTF8_ACCEPT;
94 m_state = UTF8_ACCEPT;
105 inline bool validate(
const std::string& s) {
107 if (!v.decode(s.begin(),s.end())) {
115 #endif // UTF8_VALIDATOR_HPP