stdex
Additional custom or not Standard C++ covered algorithms
Loading...
Searching...
No Matches
parser.hpp
1/*
2 SPDX-License-Identifier: MIT
3 Copyright © 2023-2024 Amebis
4*/
5
6#pragma once
7
8#include "assert.hpp"
9#include "compat.hpp"
10#include "endian.hpp"
11#include "interval.hpp"
12#include "memory.hpp"
13#include "sgml.hpp"
14#include "string.hpp"
15#include <stdarg.h>
16#include <stdint.h>
17#include <math.h>
18#if defined(_WIN32)
19#include <winsock2.h>
20#if _MSC_VER >= 1300
21#include <ws2ipdef.h>
22#endif
23#include <ws2tcpip.h>
24#else
25#include <netinet/in.h>
26#endif
27#include <limits>
28#include <list>
29#include <locale>
30#include <memory>
31#include <set>
32#include <string_view>
33#include <string>
34
35#if defined(_MSC_VER)
36#pragma warning(push)
37#pragma warning(disable: 4100)
38#elif defined(__GNUC__)
39#pragma GCC diagnostic push
40#pragma GCC diagnostic ignored "-Wunknown-pragmas"
41#pragma GCC diagnostic ignored "-Wunused-parameter"
42#endif
43
44#define ENUM_FLAG_OPERATOR(T,X) \
45inline T operator X (const T lhs, const T rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X static_cast<std::underlying_type_t<T>>(rhs)); } \
46inline T operator X (const T lhs, const std::underlying_type_t<T> rhs) { return static_cast<T>(static_cast<std::underlying_type_t<T>>(lhs) X rhs); } \
47inline T operator X (const std::underlying_type_t<T> lhs, const T rhs) { return static_cast<T>(lhs X static_cast<std::underlying_type_t<T>>(rhs)); } \
48inline T& operator X= (T& lhs, const T rhs) { return lhs = lhs X rhs; } \
49inline T& operator X= (T& lhs, const std::underlying_type_t<T> rhs) { return lhs = lhs X rhs; }
50#define ENUM_FLAGS(T, type) \
51enum class T : type; \
52inline T operator ~ (T t) { return (T) (~static_cast<std::underlying_type_t <T>>(t)); } \
53ENUM_FLAG_OPERATOR(T,|) \
54ENUM_FLAG_OPERATOR(T,^) \
55ENUM_FLAG_OPERATOR(T,&) \
56enum class T : type
57
58#if defined(_WIN32)
59#elif defined(__APPLE__)
60#define s6_words __u6_addr.__u6_addr16
61#else
62#define s6_words s6_addr16
63#endif
64
65namespace stdex
66{
67 namespace parser
68 {
72 constexpr int match_default = 0;
73 constexpr int match_case_insensitive = 0x1;
74 constexpr int match_multiline = 0x2;
75
79 template <class T>
81 {
82 public:
83 basic_parser(_In_ const std::locale& locale = std::locale()) : m_locale(locale) {}
84 virtual ~basic_parser() {}
85
86 bool search(
87 _In_reads_or_z_opt_(end) const T* text,
88 _In_ size_t start = 0,
89 _In_ size_t end = SIZE_MAX,
90 _In_ int flags = match_default)
91 {
92 for (size_t i = start; i < end && text[i]; i++)
93 if (match(text, i, end, flags))
94 return true;
95 return false;
96 }
97
98 bool match(
99 _In_reads_or_z_opt_(end) const T* text,
100 _In_ size_t start = 0,
101 _In_ size_t end = SIZE_MAX,
102 _In_ int flags = match_default)
103 {
104 return do_match(text, start, end, flags);
105 }
106
107 bool match(
108 _In_ const std::basic_string_view<T, std::char_traits<T>> text,
109 _In_ size_t start = 0,
110 _In_ size_t end = SIZE_MAX,
111 _In_ int flags = match_default)
112 {
113 return match(text.data(), start, std::min<size_t>(end, text.size()), flags);
114 }
115
116 virtual void invalidate()
117 {
118 this->interval.invalidate();
119 }
120
122
123 protected:
124 virtual bool do_match(
125 _In_reads_or_z_opt_(end) const T* text,
126 _In_ size_t start = 0,
127 _In_ size_t end = SIZE_MAX,
128 _In_ int flags = match_default) = 0;
129
131 template <class T_out = wchar_t>
132 const T_out* next_sgml_cp(_In_ const char* text, _In_ size_t start, _In_ size_t end, _Out_ size_t& chr_end, _Out_ T_out(&buf)[5])
133 {
134 if (text[start] == '&') {
135 // Potential entity start
136 const auto& ctype = std::use_facet<std::ctype<T>>(m_locale);
137 for (chr_end = start + 1;; chr_end++) {
138 if (chr_end >= end || text[chr_end] == 0) {
139 // Unterminated entity
140 break;
141 }
142 if (text[chr_end] == ';') {
143 // Entity end
144 utf32_t buf32[2];
145 size_t n = chr_end - start - 1;
146 auto entity_w = utf32_to_wstr(sgml2uni(text + start + 1, n, buf32), buf);
147 if (entity_w) {
148 chr_end++;
149 return entity_w;
150 }
151 // Unknown entity.
152 break;
153 }
154 else if (text[chr_end] == '&' || ctype.is(ctype.space, text[chr_end])) {
155 // This char cannot possibly be a part of entity.
156 break;
157 }
158 }
159 }
160 buf[0] = text[start];
161 buf[1] = 0;
162 chr_end = start + 1;
163 return buf;
164 }
166
167 std::locale m_locale;
168 };
169
170 using parser = basic_parser<char>;
171 using wparser = basic_parser<wchar_t>;
172#ifdef _UNICODE
173 using tparser = wparser;
174#else
175 using tparser = parser;
176#endif
177 using sgml_parser = basic_parser<char>;
178
182 template <class T>
183 class basic_noop : public basic_parser<T>
184 {
185 protected:
186 virtual bool do_match(
187 _In_reads_or_z_opt_(end) const T* text,
188 _In_ size_t start = 0,
189 _In_ size_t end = SIZE_MAX,
190 _In_ int flags = match_default)
191 {
192 stdex_assert(text || start >= end);
193 if (start < end && text[start]) {
194 this->interval.start = this->interval.end = start;
195 return true;
196 }
197 this->interval.invalidate();
198 return false;
199 }
200 };
201
202 using noop = basic_noop<char>;
204#ifdef _UNICODE
205 using tnoop = wnoop;
206#else
207 using tnoop = noop;
208#endif
210
214 template <class T>
215 class basic_any_cu : public basic_parser<T>
216 {
217 public:
218 basic_any_cu(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
219
220 protected:
221 virtual bool do_match(
222 _In_reads_or_z_opt_(end) const T* text,
223 _In_ size_t start = 0,
224 _In_ size_t end = SIZE_MAX,
225 _In_ int flags = match_default)
226 {
227 stdex_assert(text || start >= end);
228 if (start < end && text[start]) {
229 this->interval.end = (this->interval.start = start) + 1;
230 return true;
231 }
232 this->interval.invalidate();
233 return false;
234 }
235 };
236
239#ifdef _UNICODE
240 using tany_cu = wany_cu;
241#else
242 using tany_cu = any_cu;
243#endif
244
248 class sgml_any_cp : public basic_any_cu<char>
249 {
250 public:
251 sgml_any_cp(_In_ const std::locale& locale = std::locale()) : basic_any_cu<char>(locale) {}
252
253 protected:
254 virtual bool do_match(
255 _In_reads_or_z_(end) const char* text,
256 _In_ size_t start = 0,
257 _In_ size_t end = SIZE_MAX,
258 _In_ int flags = match_default)
259 {
260 stdex_assert(text || start >= end);
261 if (start < end && text[start]) {
262 if (text[start] == '&') {
263 // SGML entity
264 const auto& ctype = std::use_facet<std::ctype<char>>(m_locale);
265 for (this->interval.end = start + 1; this->interval.end < end && text[this->interval.end]; this->interval.end++)
266 if (text[this->interval.end] == ';') {
267 this->interval.end++;
268 this->interval.start = start;
269 return true;
270 }
271 else if (text[this->interval.end] == '&' || ctype.is(ctype.space, text[this->interval.end]))
272 break;
273 // Unterminated entity
274 }
275 this->interval.end = (this->interval.start = start) + 1;
276 return true;
277 }
278 this->interval.invalidate();
279 return false;
280 }
281 };
282
286 template <class T>
287 class basic_cu : public basic_parser<T>
288 {
289 public:
290 basic_cu(T chr, bool invert = false, _In_ const std::locale& locale = std::locale()) :
292 m_chr(chr),
293 m_invert(invert)
294 {}
295
296 protected:
297 virtual bool do_match(
298 _In_reads_or_z_opt_(end) const T* text,
299 _In_ size_t start = 0,
300 _In_ size_t end = SIZE_MAX,
301 _In_ int flags = match_default)
302 {
303 stdex_assert(text || start >= end);
304 if (start < end && text[start]) {
305 bool r;
306 if (flags & match_case_insensitive) {
307 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
308 r = ctype.tolower(text[start]) == ctype.tolower(m_chr);
309 }
310 else
311 r = text[start] == m_chr;
312 if ((r && !m_invert) || (!r && m_invert)) {
313 this->interval.end = (this->interval.start = start) + 1;
314 return true;
315 }
316 }
317 this->interval.invalidate();
318 return false;
319 }
320
321 T m_chr;
322 bool m_invert;
323 };
324
325 using cu = basic_cu<char>;
326 using wcu = basic_cu<wchar_t>;
327#ifdef _UNICODE
328 using tcu = wcu;
329#else
330 using tcu = cu;
331#endif
332
336 class sgml_cp : public sgml_parser
337 {
338 public:
339 sgml_cp(const char* chr, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
341 m_invert(invert)
342 {
343 stdex_assert(chr || !count);
344 wchar_t buf[5];
345 size_t chr_end;
346 m_chr.assign(count ? next_sgml_cp(chr, 0, count, chr_end, buf) : L"");
347 }
348
349 protected:
350 virtual bool do_match(
351 _In_reads_or_z_(end) const char* text,
352 _In_ size_t start = 0,
353 _In_ size_t end = SIZE_MAX,
354 _In_ int flags = match_default)
355 {
356 stdex_assert(text || start >= end);
357 if (start < end && text[start]) {
358 wchar_t buf[5];
359 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
360 bool r = ((flags & match_case_insensitive) ?
361 stdex::strnicmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size(), m_locale) :
362 stdex::strncmp(chr, stdex::strlen(chr), m_chr.data(), m_chr.size())) == 0;
363 if ((r && !m_invert) || (!r && m_invert)) {
364 this->interval.start = start;
365 return true;
366 }
367 }
368 this->interval.invalidate();
369 return false;
370 }
371
372 std::wstring m_chr;
373 bool m_invert;
374 };
375
379 template <class T>
380 class basic_space_cu : public basic_parser<T>
381 {
382 public:
383 basic_space_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
385 m_invert(invert)
386 {}
387
388 protected:
389 virtual bool do_match(
390 _In_reads_or_z_opt_(end) const T* text,
391 _In_ size_t start = 0,
392 _In_ size_t end = SIZE_MAX,
393 _In_ int flags = match_default)
394 {
395 stdex_assert(text || start >= end);
396 if (start < end && text[start]) {
397 bool r =
398 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
399 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space, text[start]);
400 if ((r && !m_invert) || (!r && m_invert)) {
401 this->interval.end = (this->interval.start = start) + 1;
402 return true;
403 }
404 }
405 this->interval.invalidate();
406 return false;
407 }
408
409 bool m_invert;
410 };
411
414#ifdef _UNICODE
415 using tspace_cu = wspace_cu;
416#else
417 using tspace_cu = space_cu;
418#endif
419
423 class sgml_space_cp : public basic_space_cu<char>
424 {
425 public:
426 sgml_space_cp(_In_ bool invert = false, _In_ const std::locale& locale = std::locale()) :
428 {}
429
430 protected:
431 virtual bool do_match(
432 _In_reads_or_z_(end) const char* text,
433 _In_ size_t start = 0,
434 _In_ size_t end = SIZE_MAX,
435 _In_ int flags = match_default)
436 {
437 stdex_assert(text || start >= end);
438 if (start < end && text[start]) {
439 wchar_t buf[5];
440 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
441 const wchar_t* chr_end = chr + stdex::strlen(chr);
442 bool r =
443 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
444 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space, chr, chr_end) == chr_end;
445 if ((r && !m_invert) || (!r && m_invert)) {
446 this->interval.start = start;
447 return true;
448 }
449 }
450
451 this->interval.invalidate();
452 return false;
453 }
454 };
455
459 template <class T>
460 class basic_punct_cu : public basic_parser<T>
461 {
462 public:
463 basic_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
465 m_invert(invert)
466 {}
467
468 protected:
469 virtual bool do_match(
470 _In_reads_or_z_opt_(end) const T* text,
471 _In_ size_t start = 0,
472 _In_ size_t end = SIZE_MAX,
473 _In_ int flags = match_default)
474 {
475 stdex_assert(text || start >= end);
476 if (start < end && text[start]) {
477 bool r = std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::punct, text[start]);
478 if ((r && !m_invert) || (!r && m_invert)) {
479 this->interval.end = (this->interval.start = start) + 1;
480 return true;
481 }
482 }
483 this->interval.invalidate();
484 return false;
485 }
486
487 bool m_invert;
488 };
489
492#ifdef _UNICODE
493 using tpunct_cu = wpunct_cu;
494#else
495 using tpunct_cu = punct_cu;
496#endif
497
501 class sgml_punct_cp : public basic_punct_cu<char>
502 {
503 public:
504 sgml_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
506 {}
507
508 protected:
509 virtual bool do_match(
510 _In_reads_or_z_(end) const char* text,
511 _In_ size_t start = 0,
512 _In_ size_t end = SIZE_MAX,
513 _In_ int flags = match_default)
514 {
515 stdex_assert(text || start >= end);
516 if (start < end && text[start]) {
517 wchar_t buf[5];
518 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
519 const wchar_t* chr_end = chr + stdex::strlen(chr);
520 bool r = std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::punct, chr, chr_end) == chr_end;
521 if ((r && !m_invert) || (!r && m_invert)) {
522 this->interval.start = start;
523 return true;
524 }
525 }
526 this->interval.invalidate();
527 return false;
528 }
529 };
530
534 template <class T>
536 {
537 public:
538 basic_space_or_punct_cu(bool invert = false, _In_ const std::locale& locale = std::locale()) :
540 m_invert(invert)
541 {}
542
543 protected:
544 virtual bool do_match(
545 _In_reads_or_z_opt_(end) const T* text,
546 _In_ size_t start = 0,
547 _In_ size_t end = SIZE_MAX,
548 _In_ int flags = match_default)
549 {
550 stdex_assert(text || start >= end);
551 if (start < end && text[start]) {
552 bool r =
553 ((flags & match_multiline) || !stdex::islbreak(text[start])) &&
554 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::space | std::ctype_base::punct, text[start]);
555 if ((r && !m_invert) || (!r && m_invert)) {
556 this->interval.end = (this->interval.start = start) + 1;
557 return true;
558 }
559 }
560 this->interval.invalidate();
561 return false;
562 }
563
564 bool m_invert;
565 };
566
569#ifdef _UNICODE
571#else
573#endif
574
579 {
580 public:
581 sgml_space_or_punct_cp(bool invert = false, _In_ const std::locale& locale = std::locale()) :
583 {}
584
585 protected:
586 virtual bool do_match(
587 _In_reads_or_z_(end) const char* text,
588 _In_ size_t start = 0,
589 _In_ size_t end = SIZE_MAX,
590 _In_ int flags = match_default)
591 {
592 stdex_assert(text || start >= end);
593 if (start < end && text[start]) {
594 wchar_t buf[5];
595 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
596 const wchar_t* chr_end = chr + stdex::strlen(chr);
597 bool r =
598 ((flags & match_multiline) || !stdex::islbreak(chr, SIZE_MAX)) &&
599 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::space | std::ctype_base::punct, chr, chr_end) == chr_end;
600 if ((r && !m_invert) || (!r && m_invert)) {
601 this->interval.start = start;
602 return true;
603 }
604 }
605 this->interval.invalidate();
606 return false;
607 }
608 };
609
613 template <class T>
614 class basic_bol : public basic_parser<T>
615 {
616 public:
617 basic_bol(bool invert = false) : m_invert(invert) {}
618
619 protected:
620 virtual bool do_match(
621 _In_reads_or_z_opt_(end) const T* text,
622 _In_ size_t start = 0,
623 _In_ size_t end = SIZE_MAX,
624 _In_ int flags = match_default)
625 {
626 stdex_assert(text || !end);
627 stdex_assert(text || start >= end);
628 bool r = start == 0 || (start <= end && stdex::islbreak(text[start - 1]));
629 if ((r && !m_invert) || (!r && m_invert)) {
630 this->interval.end = this->interval.start = start;
631 return true;
632 }
633 this->interval.invalidate();
634 return false;
635 }
636
637 bool m_invert;
638 };
639
640 using bol = basic_bol<char>;
641 using wbol = basic_bol<wchar_t>;
642#ifdef _UNICODE
643 using tbol = wbol;
644#else
645 using tbol = bol;
646#endif
648
652 template <class T>
653 class basic_eol : public basic_parser<T>
654 {
655 public:
656 basic_eol(bool invert = false) : m_invert(invert) {}
657
658 protected:
659 virtual bool do_match(
660 _In_reads_or_z_opt_(end) const T* text,
661 _In_ size_t start = 0,
662 _In_ size_t end = SIZE_MAX,
663 _In_ int flags = match_default)
664 {
665 stdex_assert(text || start >= end);
666 bool r = start >= end || !text[start] || stdex::islbreak(text[start]);
667 if ((r && !m_invert) || (!r && m_invert)) {
668 this->interval.end = this->interval.start = start;
669 return true;
670 }
671 this->interval.invalidate();
672 return false;
673 }
674
675 bool m_invert;
676 };
677
678 using eol = basic_eol<char>;
679 using weol = basic_eol<wchar_t>;
680#ifdef _UNICODE
681 using teol = weol;
682#else
683 using teol = eol;
684#endif
686
687 template <class T>
688 class basic_set : public basic_parser<T>
689 {
690 public:
691 basic_set(bool invert = false, _In_ const std::locale& locale = std::locale()) :
693 hit_offset(SIZE_MAX),
694 m_invert(invert)
695 {}
696
697 virtual void invalidate()
698 {
699 hit_offset = SIZE_MAX;
701 }
702
703 size_t hit_offset;
704
705 protected:
706 virtual bool do_match(
707 _In_reads_or_z_opt_(end) const T* text,
708 _In_ size_t start = 0,
709 _In_ size_t end = SIZE_MAX,
710 _In_ int flags = match_default) = 0;
711
712 bool m_invert;
713 };
714
718 template <class T>
719 class basic_cu_set : public basic_set<T>
720 {
721 public:
723 _In_reads_or_z_(count) const T* set,
724 _In_ size_t count = SIZE_MAX,
725 _In_ bool invert = false,
726 _In_ const std::locale& locale = std::locale()) :
727 basic_set<T>(invert, locale)
728 {
729 if (set)
730 m_set.assign(set, set + stdex::strnlen(set, count));
731 }
732
733 protected:
734 virtual bool do_match(
735 _In_reads_or_z_opt_(end) const T* text,
736 _In_ size_t start = 0,
737 _In_ size_t end = SIZE_MAX,
738 _In_ int flags = match_default)
739 {
740 stdex_assert(text || start >= end);
741 if (start < end && text[start]) {
742 const T* set = m_set.data();
743 size_t r = (flags & match_case_insensitive) ?
744 stdex::strnichr(set, m_set.size(), text[start], this->m_locale) :
745 stdex::strnchr(set, m_set.size(), text[start]);
746 if ((r != stdex::npos && !this->m_invert) || (r == stdex::npos && this->m_invert)) {
747 this->hit_offset = r;
748 this->interval.end = (this->interval.start = start) + 1;
749 return true;
750 }
751 }
752 this->hit_offset = SIZE_MAX;
753 this->interval.invalidate();
754 return false;
755 }
756
757 std::basic_string<T> m_set;
758 };
759
762#ifdef _UNICODE
763 using tcu_set = wcu_set;
764#else
765 using tcu_set = cu_set;
766#endif
767
771 class sgml_cp_set : public basic_set<char>
772 {
773 public:
774 sgml_cp_set(const char* set, size_t count = SIZE_MAX, bool invert = false, _In_ const std::locale& locale = std::locale()) :
775 basic_set<char>(invert, locale)
776 {
777 if (set)
778 m_set = sgml2str(set, count);
779 }
780
781 protected:
782 virtual bool do_match(
783 _In_reads_or_z_(end) const char* text,
784 _In_ size_t start = 0,
785 _In_ size_t end = SIZE_MAX,
786 _In_ int flags = match_default)
787 {
788 stdex_assert(text || start >= end);
789 if (start < end && text[start]) {
790 wchar_t buf[5];
791 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
792 const wchar_t* set = m_set.data();
793 size_t r = (flags & match_case_insensitive) ?
794 stdex::strnistr(set, m_set.size(), chr, m_locale) :
795 stdex::strnstr(set, m_set.size(), chr);
796 if ((r != stdex::npos && !m_invert) || (r == stdex::npos && m_invert)) {
797 hit_offset = r;
798 this->interval.start = start;
799 return true;
800 }
801 }
802 hit_offset = SIZE_MAX;
803 this->interval.invalidate();
804 return false;
805 }
806
807 std::wstring m_set;
808 };
809
813 template <class T>
814 class basic_string : public basic_parser<T>
815 {
816 public:
818 _In_reads_or_z_(count) const T* str,
819 _In_ size_t count = SIZE_MAX,
820 _In_ const std::locale& locale = std::locale()) :
822 m_str(str, str + stdex::strnlen(str, count))
823 {}
824
825 protected:
826 virtual bool do_match(
827 _In_reads_or_z_opt_(end) const T* text,
828 _In_ size_t start = 0,
829 _In_ size_t end = SIZE_MAX,
830 _In_ int flags = match_default)
831 {
832 stdex_assert(text || start >= end);
833 size_t
834 m = m_str.size(),
835 n = std::min<size_t>(end - start, m);
836 bool r = ((flags & match_case_insensitive) ?
837 stdex::strnicmp(text + start, n, m_str.data(), m, this->m_locale) :
838 stdex::strncmp(text + start, n, m_str.data(), m)) == 0;
839 if (r) {
840 this->interval.end = (this->interval.start = start) + n;
841 return true;
842 }
843 this->interval.invalidate();
844 return false;
845 }
846
847 std::basic_string<T> m_str;
848 };
849
852#ifdef _UNICODE
853 using tstring = wstring;
854#else
855 using tstring = string;
856#endif
857
862 {
863 public:
864 sgml_string(const char* str, size_t count = SIZE_MAX, _In_ const std::locale& locale = std::locale()) :
866 m_str(sgml2str(str, count))
867 {}
868
869 protected:
870 virtual bool do_match(
871 _In_reads_or_z_(end) const char* text,
872 _In_ size_t start = 0,
873 _In_ size_t end = SIZE_MAX,
874 _In_ int flags = match_default)
875 {
876 stdex_assert(text || start >= end);
877 const wchar_t* str = m_str.data();
878 const bool case_insensitive = flags & match_case_insensitive ? true : false;
879 const auto& ctype = std::use_facet<std::ctype<wchar_t>>(m_locale);
880 for (this->interval.end = start;;) {
881 if (!*str) {
882 this->interval.start = start;
883 return true;
884 }
885 if (this->interval.end >= end || !text[this->interval.end]) {
886 this->interval.invalidate();
887 return false;
888 }
889 wchar_t buf[5];
890 const wchar_t* chr = next_sgml_cp(text, this->interval.end, end, this->interval.end, buf);
891 for (; *chr; ++str, ++chr) {
892 if (!*str ||
893 (case_insensitive ? ctype.tolower(*str) != ctype.tolower(*chr) : *str != *chr))
894 {
895 this->interval.invalidate();
896 return false;
897 }
898 }
899 }
900 }
901
902 std::wstring m_str;
903 };
904
908 template <class T>
910 {
911 public:
912 basic_iterations(const std::shared_ptr<basic_parser<T>>& el, size_t min_iterations = 0, size_t max_iterations = SIZE_MAX, bool greedy = true) :
913 m_el(el),
914 m_min_iterations(min_iterations),
915 m_max_iterations(max_iterations),
916 m_greedy(greedy)
917 {}
918
919 protected:
920 virtual bool do_match(
921 _In_reads_or_z_opt_(end) const T* text,
922 _In_ size_t start = 0,
923 _In_ size_t end = SIZE_MAX,
924 _In_ int flags = match_default)
925 {
926 stdex_assert(text || start >= end);
927 this->interval.start = this->interval.end = start;
928 for (size_t i = 0; ; i++) {
929 if ((!m_greedy && i >= m_min_iterations) || i >= m_max_iterations)
930 return true;
931 if (!m_el->match(text, this->interval.end, end, flags)) {
932 if (i >= m_min_iterations)
933 return true;
934 break;
935 }
936 if (m_el->interval.end == this->interval.end) {
937 // Element did match, but the matching interval was empty. Quit instead of spinning.
938 return true;
939 }
940 this->interval.end = m_el->interval.end;
941 }
942 this->interval.invalidate();
943 return false;
944 }
945
946 std::shared_ptr<basic_parser<T>> m_el;
949 bool m_greedy;
950 };
951
954#ifdef _UNICODE
955 using titerations = witerations;
956#else
957 using titerations = iterations;
958#endif
960
964 template <class T>
966 {
967 protected:
968 parser_collection(_In_ const std::locale& locale) : basic_parser<T>(locale) {}
969
970 public:
972 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el,
973 _In_ size_t count,
974 _In_ const std::locale& locale = std::locale()) :
976 {
977 stdex_assert(el || !count);
978 m_collection.reserve(count);
979 for (size_t i = 0; i < count; i++)
980 m_collection.push_back(el[i]);
981 }
982
984 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
985 _In_ const std::locale& locale = std::locale()) :
987 m_collection(std::move(collection))
988 {}
989
990 virtual void invalidate()
991 {
992 for (auto& el : m_collection)
993 el->invalidate();
995 }
996
997 protected:
998 std::vector<std::shared_ptr<basic_parser<T>>> m_collection;
999 };
1000
1004 template <class T>
1006 {
1007 public:
1009 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1010 _In_ size_t count = 0,
1011 _In_ const std::locale& locale = std::locale()) :
1012 parser_collection<T>(el, count, locale)
1013 {}
1014
1016 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1017 _In_ const std::locale& locale = std::locale()) :
1018 parser_collection<T>(std::move(collection), locale)
1019 {}
1020
1021 protected:
1022 virtual bool do_match(
1023 _In_reads_or_z_opt_(end) const T* text,
1024 _In_ size_t start = 0,
1025 _In_ size_t end = SIZE_MAX,
1026 _In_ int flags = match_default)
1027 {
1028 stdex_assert(text || start >= end);
1029 this->interval.end = start;
1030 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i) {
1031 if (!(*i)->match(text, this->interval.end, end, flags)) {
1032 for (++i; i != this->m_collection.end(); ++i)
1033 (*i)->invalidate();
1034 this->interval.invalidate();
1035 return false;
1036 }
1037 this->interval.end = (*i)->interval.end;
1038 }
1039 this->interval.start = start;
1040 return true;
1041 }
1042 };
1043
1046#ifdef _UNICODE
1047 using tsequence = wsequence;
1048#else
1049 using tsequence = sequence;
1050#endif
1052
1056 template <class T>
1058 {
1059 protected:
1060 basic_branch(_In_ const std::locale& locale) :
1062 hit_offset(SIZE_MAX)
1063 {}
1064
1065 public:
1067 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1068 _In_ size_t count = 0,
1069 _In_ const std::locale& locale = std::locale()) :
1070 parser_collection<T>(el, count, locale),
1071 hit_offset(SIZE_MAX)
1072 {}
1073
1075 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1076 _In_ const std::locale& locale = std::locale()) :
1077 parser_collection<T>(std::move(collection), locale),
1078 hit_offset(SIZE_MAX)
1079 {}
1080
1081 virtual void invalidate()
1082 {
1083 hit_offset = SIZE_MAX;
1085 }
1086
1087 size_t hit_offset;
1088
1089 protected:
1090 virtual bool do_match(
1091 _In_reads_or_z_opt_(end) const T* text,
1092 _In_ size_t start = 0,
1093 _In_ size_t end = SIZE_MAX,
1094 _In_ int flags = match_default)
1095 {
1096 stdex_assert(text || start >= end);
1097 hit_offset = 0;
1098 for (auto i = this->m_collection.begin(); i != this->m_collection.end(); ++i, ++hit_offset) {
1099 if ((*i)->match(text, start, end, flags)) {
1100 this->interval = (*i)->interval;
1101 for (++i; i != this->m_collection.end(); ++i)
1102 (*i)->invalidate();
1103 return true;
1104 }
1105 }
1106 hit_offset = SIZE_MAX;
1107 this->interval.invalidate();
1108 return false;
1109 }
1110 };
1111
1112 using branch = basic_branch<char>;
1114#ifdef _UNICODE
1115 using tbranch = wbranch;
1116#else
1117 using tbranch = branch;
1118#endif
1120
1124 template <class T, class T_parser = basic_string<T>>
1126 {
1127 public:
1129 _In_reads_(count) const T* str_z = nullptr,
1130 _In_ size_t count = 0,
1131 _In_ const std::locale& locale = std::locale()) :
1133 {
1134 build(str_z, count);
1135 }
1136
1137 basic_string_branch(_In_z_ const T* str, ...) :
1138 basic_branch<T>(std::locale())
1139 {
1140 va_list params;
1141 va_start(params, str);
1142 build(str, params);
1143 va_end(params);
1144 }
1145
1146 basic_string_branch(_In_ const std::locale& locale, _In_z_ const T* str, ...) :
1148 {
1149 va_list params;
1150 va_start(params, str);
1151 build(str, params);
1152 va_end(params);
1153 }
1154
1155 protected:
1156 void build(_In_reads_(count) const T* str_z, _In_ size_t count)
1157 {
1158 stdex_assert(str_z || !count);
1159 if (count) {
1160 size_t offset, n;
1161 for (
1162 offset = n = 0;
1163 offset < count && str_z[offset];
1164 offset += stdex::strnlen(str_z + offset, count - offset) + 1, ++n);
1165 this->m_collection.reserve(n);
1166 for (
1167 offset = 0;
1168 offset < count && str_z[offset];
1169 offset += stdex::strnlen(str_z + offset, count - offset) + 1)
1170 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str_z + offset, count - offset, this->m_locale)));
1171 }
1172 }
1173
1174 void build(_In_z_ const T* str, _In_ va_list params)
1175 {
1176 const T* p;
1177 for (
1178 this->m_collection.push_back(std::move(std::make_shared<T_parser>(str, SIZE_MAX, this->m_locale)));
1179 (p = va_arg(params, const T*)) != nullptr;
1180 this->m_collection.push_back(std::move(std::make_shared<T_parser>(p, SIZE_MAX, this->m_locale))));
1181 }
1182 };
1183
1186#ifdef _UNICODE
1188#else
1190#endif
1192
1196 template <class T>
1198 {
1199 public:
1201 _In_count_(count) const std::shared_ptr<basic_parser<T>>* el = nullptr,
1202 _In_ size_t count = 0,
1203 _In_ const std::locale& locale = std::locale()) :
1204 parser_collection<T>(el, count, locale)
1205 {}
1206
1208 _Inout_ std::vector<std::shared_ptr<basic_parser<T>>>&& collection,
1209 _In_ const std::locale& locale = std::locale()) :
1210 parser_collection<T>(std::move(collection), locale)
1211 {}
1212
1213 protected:
1214 virtual bool do_match(
1215 _In_reads_or_z_opt_(end) const T* text,
1216 _In_ size_t start = 0,
1217 _In_ size_t end = SIZE_MAX,
1218 _In_ int flags = match_default)
1219 {
1220 stdex_assert(text || start >= end);
1221 for (auto& el : this->m_collection)
1222 el->invalidate();
1223 if (match_recursively(text, start, end, flags)) {
1224 this->interval.start = start;
1225 return true;
1226 }
1227 this->interval.invalidate();
1228 return false;
1229 }
1230
1231 bool match_recursively(
1232 _In_reads_or_z_opt_(end) const T* text,
1233 _In_ size_t start = 0,
1234 _In_ size_t end = SIZE_MAX,
1235 _In_ int flags = match_default)
1236 {
1237 bool all_matched = true;
1238 for (auto& el : this->m_collection) {
1239 if (!el->interval) {
1240 // Element was not matched in permutatuion yet.
1241 all_matched = false;
1242 if (el->match(text, start, end, flags)) {
1243 // Element matched for the first time.
1244 if (match_recursively(text, el->interval.end, end, flags)) {
1245 // Rest of the elements matched too.
1246 return true;
1247 }
1248 el->invalidate();
1249 }
1250 }
1251 }
1252 if (all_matched) {
1253 this->interval.end = start;
1254 return true;
1255 }
1256 return false;
1257 }
1258 };
1259
1262#ifdef _UNICODE
1263 using tpermutation = wpermutation;
1264#else
1265 using tpermutation = permutation;
1266#endif
1268
1272 template <class T>
1273 class basic_integer : public basic_parser<T>
1274 {
1275 public:
1276 basic_integer(_In_ const std::locale& locale = std::locale()) :
1278 value(0)
1279 {}
1280
1281 virtual void invalidate()
1282 {
1283 value = 0;
1285 }
1286
1287 public:
1288 size_t value;
1289 };
1290
1294 template <class T>
1296 {
1297 public:
1299 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1300 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1301 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1302 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1303 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1304 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1305 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1306 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1307 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1308 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1309 _In_ const std::locale& locale = std::locale()) :
1311 m_digit_0(digit_0),
1312 m_digit_1(digit_1),
1313 m_digit_2(digit_2),
1314 m_digit_3(digit_3),
1315 m_digit_4(digit_4),
1316 m_digit_5(digit_5),
1317 m_digit_6(digit_6),
1318 m_digit_7(digit_7),
1319 m_digit_8(digit_8),
1320 m_digit_9(digit_9)
1321 {}
1322
1323 protected:
1324 virtual bool do_match(
1325 _In_reads_or_z_opt_(end) const T* text,
1326 _In_ size_t start = 0,
1327 _In_ size_t end = SIZE_MAX,
1328 _In_ int flags = match_default)
1329 {
1330 stdex_assert(text || start >= end);
1331 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1332 size_t dig;
1333 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1334 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1335 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1336 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1337 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1338 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1339 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1340 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1341 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1342 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1343 else break;
1344 this->value = this->value * 10 + dig;
1345 }
1346 if (start < this->interval.end) {
1347 this->interval.start = start;
1348 return true;
1349 }
1350 this->interval.invalidate();
1351 return false;
1352 }
1353
1354 std::shared_ptr<basic_parser<T>>
1355 m_digit_0,
1356 m_digit_1,
1357 m_digit_2,
1358 m_digit_3,
1359 m_digit_4,
1360 m_digit_5,
1361 m_digit_6,
1362 m_digit_7,
1363 m_digit_8,
1364 m_digit_9;
1365 };
1366
1369#ifdef _UNICODE
1370 using tinteger10 = winteger10;
1371#else
1372 using tinteger10 = integer10;
1373#endif
1375
1379 template <class T>
1381 {
1382 public:
1384 _In_ const std::shared_ptr<basic_integer10<T>>& digits,
1385 _In_ const std::shared_ptr<basic_set<T>>& separator,
1386 _In_ const std::locale& locale = std::locale()) :
1388 digit_count(0),
1389 has_separators(false),
1390 m_digits(digits),
1391 m_separator(separator)
1392 {}
1393
1394 virtual void invalidate()
1395 {
1396 digit_count = 0;
1397 has_separators = false;
1399 }
1400
1403
1404 protected:
1405 virtual bool do_match(
1406 _In_reads_or_z_opt_(end) const T* text,
1407 _In_ size_t start = 0,
1408 _In_ size_t end = SIZE_MAX,
1409 _In_ int flags = match_default)
1410 {
1411 stdex_assert(text || start >= end);
1412 if (m_digits->match(text, start, end, flags)) {
1413 // Leading part match.
1414 this->value = m_digits->value;
1415 digit_count = m_digits->interval.size();
1416 has_separators = false;
1417 this->interval.start = start;
1418 this->interval.end = m_digits->interval.end;
1419 if (m_digits->interval.size() <= 3) {
1420 // Maybe separated with thousand separators?
1421 size_t hit_offset = SIZE_MAX;
1422 while (m_separator->match(text, this->interval.end, end, flags) &&
1423 (hit_offset == SIZE_MAX || hit_offset == m_separator->hit_offset) && // All separators must be the same, no mixing.
1424 m_digits->match(text, m_separator->interval.end, end, flags) &&
1425 m_digits->interval.size() == 3)
1426 {
1427 // Thousand separator and three-digit integer followed.
1428 this->value = this->value * 1000 + m_digits->value;
1429 digit_count += 3;
1430 has_separators = true;
1431 this->interval.end = m_digits->interval.end;
1432 hit_offset = m_separator->hit_offset;
1433 }
1434 }
1435
1436 return true;
1437 }
1438 this->value = 0;
1439 this->interval.invalidate();
1440 return false;
1441 }
1442
1443 std::shared_ptr<basic_integer10<T>> m_digits;
1444 std::shared_ptr<basic_set<T>> m_separator;
1445 };
1446
1447 using integer10ts = basic_integer10ts<char>;
1448 using winteger10ts = basic_integer10ts<wchar_t>;
1449#ifdef _UNICODE
1450 using tinteger10ts = winteger10ts;
1451#else
1452 using tinteger10ts = integer10ts;
1453#endif
1454 using sgml_integer10ts = basic_integer10ts<char>;
1455
1459 template <class T>
1461 {
1462 public:
1464 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
1465 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1466 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
1467 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
1468 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
1469 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1470 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
1471 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
1472 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
1473 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
1474 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1475 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
1476 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
1477 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
1478 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
1479 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
1480 _In_ const std::locale& locale = std::locale()) :
1482 m_digit_0(digit_0),
1483 m_digit_1(digit_1),
1484 m_digit_2(digit_2),
1485 m_digit_3(digit_3),
1486 m_digit_4(digit_4),
1487 m_digit_5(digit_5),
1488 m_digit_6(digit_6),
1489 m_digit_7(digit_7),
1490 m_digit_8(digit_8),
1491 m_digit_9(digit_9),
1492 m_digit_10(digit_10),
1493 m_digit_11(digit_11),
1494 m_digit_12(digit_12),
1495 m_digit_13(digit_13),
1496 m_digit_14(digit_14),
1497 m_digit_15(digit_15)
1498 {}
1499
1500 protected:
1501 virtual bool do_match(
1502 _In_reads_or_z_opt_(end) const T* text,
1503 _In_ size_t start = 0,
1504 _In_ size_t end = SIZE_MAX,
1505 _In_ int flags = match_default)
1506 {
1507 stdex_assert(text || start >= end);
1508 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end];) {
1509 size_t dig;
1510 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; this->interval.end = m_digit_0->interval.end; }
1511 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; this->interval.end = m_digit_1->interval.end; }
1512 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; this->interval.end = m_digit_2->interval.end; }
1513 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; this->interval.end = m_digit_3->interval.end; }
1514 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; this->interval.end = m_digit_4->interval.end; }
1515 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; this->interval.end = m_digit_5->interval.end; }
1516 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; this->interval.end = m_digit_6->interval.end; }
1517 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; this->interval.end = m_digit_7->interval.end; }
1518 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; this->interval.end = m_digit_8->interval.end; }
1519 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; this->interval.end = m_digit_9->interval.end; }
1520 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; this->interval.end = m_digit_10->interval.end; }
1521 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; this->interval.end = m_digit_11->interval.end; }
1522 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; this->interval.end = m_digit_12->interval.end; }
1523 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; this->interval.end = m_digit_13->interval.end; }
1524 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; this->interval.end = m_digit_14->interval.end; }
1525 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; this->interval.end = m_digit_15->interval.end; }
1526 else break;
1527 this->value = this->value * 16 + dig;
1528 }
1529 if (start < this->interval.end) {
1530 this->interval.start = start;
1531 return true;
1532 }
1533 this->interval.invalidate();
1534 return false;
1535 }
1536
1537 std::shared_ptr<basic_parser<T>>
1538 m_digit_0,
1539 m_digit_1,
1540 m_digit_2,
1541 m_digit_3,
1542 m_digit_4,
1543 m_digit_5,
1544 m_digit_6,
1545 m_digit_7,
1546 m_digit_8,
1547 m_digit_9,
1548 m_digit_10,
1549 m_digit_11,
1550 m_digit_12,
1551 m_digit_13,
1552 m_digit_14,
1553 m_digit_15;
1554 };
1555
1558#ifdef _UNICODE
1559 using tinteger16 = winteger16;
1560#else
1561 using tinteger16 = integer16;
1562#endif
1564
1568 template <class T>
1570 {
1571 public:
1573 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
1574 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
1575 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
1576 _In_ const std::shared_ptr<basic_parser<T>>& digit_50,
1577 _In_ const std::shared_ptr<basic_parser<T>>& digit_100,
1578 _In_ const std::shared_ptr<basic_parser<T>>& digit_500,
1579 _In_ const std::shared_ptr<basic_parser<T>>& digit_1000,
1580 _In_ const std::shared_ptr<basic_parser<T>>& digit_5000,
1581 _In_ const std::shared_ptr<basic_parser<T>>& digit_10000,
1582 _In_ const std::locale& locale = std::locale()) :
1584 m_digit_1(digit_1),
1585 m_digit_5(digit_5),
1586 m_digit_10(digit_10),
1587 m_digit_50(digit_50),
1588 m_digit_100(digit_100),
1589 m_digit_500(digit_500),
1590 m_digit_1000(digit_1000),
1591 m_digit_5000(digit_5000),
1592 m_digit_10000(digit_10000)
1593 {}
1594
1595 protected:
1596 virtual bool do_match(
1597 _In_reads_or_z_opt_(end) const T* text,
1598 _In_ size_t start = 0,
1599 _In_ size_t end = SIZE_MAX,
1600 _In_ int flags = match_default)
1601 {
1602 stdex_assert(text || start >= end);
1603 size_t
1604 dig[5] = { SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX },
1605 end2;
1606
1607 for (this->interval.end = start, this->value = 0; this->interval.end < end && text[this->interval.end]; dig[3] = dig[2], dig[2] = dig[1], dig[1] = dig[0], this->interval.end = end2) {
1608 if (m_digit_1 && m_digit_1->match(text, this->interval.end, end, flags)) { dig[0] = 1; end2 = m_digit_1->interval.end; }
1609 else if (m_digit_5 && m_digit_5->match(text, this->interval.end, end, flags)) { dig[0] = 5; end2 = m_digit_5->interval.end; }
1610 else if (m_digit_10 && m_digit_10->match(text, this->interval.end, end, flags)) { dig[0] = 10; end2 = m_digit_10->interval.end; }
1611 else if (m_digit_50 && m_digit_50->match(text, this->interval.end, end, flags)) { dig[0] = 50; end2 = m_digit_50->interval.end; }
1612 else if (m_digit_100 && m_digit_100->match(text, this->interval.end, end, flags)) { dig[0] = 100; end2 = m_digit_100->interval.end; }
1613 else if (m_digit_500 && m_digit_500->match(text, this->interval.end, end, flags)) { dig[0] = 500; end2 = m_digit_500->interval.end; }
1614 else if (m_digit_1000 && m_digit_1000->match(text, this->interval.end, end, flags)) { dig[0] = 1000; end2 = m_digit_1000->interval.end; }
1615 else if (m_digit_5000 && m_digit_5000->match(text, this->interval.end, end, flags)) { dig[0] = 5000; end2 = m_digit_5000->interval.end; }
1616 else if (m_digit_10000 && m_digit_10000->match(text, this->interval.end, end, flags)) { dig[0] = 10000; end2 = m_digit_10000->interval.end; }
1617 else break;
1618
1619 // Store first digit.
1620 if (dig[4] == SIZE_MAX) dig[4] = dig[0];
1621
1622 if (dig[3] == dig[2] && dig[2] == dig[1] && dig[1] == dig[0] && dig[0] != dig[4]) {
1623 // Same digit repeated four times. No-go, unless first digit. E.g. XIIII vs. XIV. MMMMMCD allowed, IIII also...
1624 break;
1625 }
1626 if (dig[0] <= dig[1]) {
1627 // Digit is less or equal previous one: add.
1628 this->value += dig[0];
1629 }
1630 else if (
1631 (dig[1] == 1 && (dig[0] == 5 || dig[0] == 10)) ||
1632 (dig[1] == 10 && (dig[0] == 50 || dig[0] == 100)) ||
1633 (dig[1] == 100 && (dig[0] == 500 || dig[0] == 1000)) ||
1634 (dig[1] == 1000 && (dig[0] == 5000 || dig[0] == 10000)))
1635 {
1636 // Digit is up to two orders bigger than previous one: subtract. But...
1637 if (dig[2] < dig[0]) {
1638 // Digit is also bigger than pre-previous one. E.g. VIX (V < X => invalid)
1639 break;
1640 }
1641 this->value -= dig[1]; // Cancel addition in the previous step.
1642 dig[0] -= dig[1]; // Combine last two digits.
1643 dig[1] = dig[2]; // The true previous digit is now pre-previous one. :)
1644 dig[2] = dig[3]; // The true pre-previous digit is now pre-pre-previous one. :)
1645 this->value += dig[0]; // Add combined value.
1646 }
1647 else {
1648 // New digit is too big than the previous one. E.g. VX (V < X => invalid)
1649 break;
1650 }
1651 }
1652 if (this->value) {
1653 this->interval.start = start;
1654 return true;
1655 }
1656 this->interval.invalidate();
1657 return false;
1658 }
1659
1660 std::shared_ptr<basic_parser<T>>
1661 m_digit_1,
1662 m_digit_5,
1663 m_digit_10,
1664 m_digit_50,
1665 m_digit_100,
1666 m_digit_500,
1667 m_digit_1000,
1668 m_digit_5000,
1669 m_digit_10000;
1670 };
1671
1674#ifdef _UNICODE
1676#else
1678#endif
1680
1684 template <class T>
1686 {
1687 public:
1689 _In_ const std::shared_ptr<basic_parser<T>>& _numerator,
1690 _In_ const std::shared_ptr<basic_parser<T>>& _fraction_line,
1691 _In_ const std::shared_ptr<basic_parser<T>>& _denominator,
1692 _In_ const std::locale& locale = std::locale()) :
1694 numerator(_numerator),
1695 fraction_line(_fraction_line),
1696 denominator(_denominator)
1697 {}
1698
1699 virtual void invalidate()
1700 {
1701 numerator->invalidate();
1702 fraction_line->invalidate();
1703 denominator->invalidate();
1705 }
1706
1707 std::shared_ptr<basic_parser<T>> numerator;
1708 std::shared_ptr<basic_parser<T>> fraction_line;
1709 std::shared_ptr<basic_parser<T>> denominator;
1710
1711 protected:
1712 virtual bool do_match(
1713 _In_reads_or_z_opt_(end) const T* text,
1714 _In_ size_t start = 0,
1715 _In_ size_t end = SIZE_MAX,
1716 _In_ int flags = match_default)
1717 {
1718 stdex_assert(text || start >= end);
1719 if (numerator->match(text, start, end, flags) &&
1720 fraction_line->match(text, numerator->interval.end, end, flags) &&
1721 denominator->match(text, fraction_line->interval.end, end, flags))
1722 {
1723 this->interval.start = start;
1724 this->interval.end = denominator->interval.end;
1725 return true;
1726 }
1727 numerator->invalidate();
1728 fraction_line->invalidate();
1729 denominator->invalidate();
1730 this->interval.invalidate();
1731 return false;
1732 }
1733 };
1734
1737#ifdef _UNICODE
1738 using tfraction = wfraction;
1739#else
1740 using tfraction = fraction;
1741#endif
1743
1747 template <class T>
1748 class basic_score : public basic_parser<T>
1749 {
1750 public:
1752 _In_ const std::shared_ptr<basic_parser<T>>& _home,
1753 _In_ const std::shared_ptr<basic_parser<T>>& _separator,
1754 _In_ const std::shared_ptr<basic_parser<T>>& _guest,
1755 _In_ const std::shared_ptr<basic_parser<T>>& space,
1756 _In_ const std::locale& locale = std::locale()) :
1758 home(_home),
1759 separator(_separator),
1760 guest(_guest),
1761 m_space(space)
1762 {}
1763
1764 virtual void invalidate()
1765 {
1766 home->invalidate();
1767 separator->invalidate();
1768 guest->invalidate();
1770 }
1771
1772 std::shared_ptr<basic_parser<T>> home;
1773 std::shared_ptr<basic_parser<T>> separator;
1774 std::shared_ptr<basic_parser<T>> guest;
1775
1776 protected:
1777 virtual bool do_match(
1778 _In_reads_or_z_opt_(end) const T* text,
1779 _In_ size_t start = 0,
1780 _In_ size_t end = SIZE_MAX,
1781 _In_ int flags = match_default)
1782 {
1783 stdex_assert(text || start >= end);
1784 this->interval.end = start;
1785
1786 const int space_match_flags = flags & ~match_multiline; // Spaces in score must never be broken in new line.
1787
1788 if (home->match(text, this->interval.end, end, flags))
1789 this->interval.end = home->interval.end;
1790 else
1791 goto end;
1792
1793 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1794
1795 if (separator->match(text, this->interval.end, end, flags))
1796 this->interval.end = separator->interval.end;
1797 else
1798 goto end;
1799
1800 for (; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1801
1802 if (guest->match(text, this->interval.end, end, flags))
1803 this->interval.end = guest->interval.end;
1804 else
1805 goto end;
1806
1807 this->interval.start = start;
1808 return true;
1809
1810 end:
1811 home->invalidate();
1812 separator->invalidate();
1813 guest->invalidate();
1814 this->interval.invalidate();
1815 return false;
1816 }
1817
1818 std::shared_ptr<basic_parser<T>> m_space;
1819 };
1820
1821 using score = basic_score<char>;
1823#ifdef _UNICODE
1824 using tscore = wscore;
1825#else
1826 using tscore = score;
1827#endif
1829
1833 template <class T>
1835 {
1836 public:
1838 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1839 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1840 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1841 _In_ const std::shared_ptr<basic_parser<T>>& _number,
1842 _In_ const std::locale& locale = std::locale()) :
1844 positive_sign(_positive_sign),
1845 negative_sign(_negative_sign),
1846 special_sign(_special_sign),
1847 number(_number)
1848 {}
1849
1850 virtual void invalidate()
1851 {
1852 if (positive_sign) positive_sign->invalidate();
1853 if (negative_sign) negative_sign->invalidate();
1854 if (special_sign) special_sign->invalidate();
1855 number->invalidate();
1857 }
1858
1859 std::shared_ptr<basic_parser<T>> positive_sign;
1860 std::shared_ptr<basic_parser<T>> negative_sign;
1861 std::shared_ptr<basic_parser<T>> special_sign;
1862 std::shared_ptr<basic_parser<T>> number;
1863
1864 protected:
1865 virtual bool do_match(
1866 _In_reads_or_z_opt_(end) const T* text,
1867 _In_ size_t start = 0,
1868 _In_ size_t end = SIZE_MAX,
1869 _In_ int flags = match_default)
1870 {
1871 stdex_assert(text || start >= end);
1872 this->interval.end = start;
1873 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1874 this->interval.end = positive_sign->interval.end;
1875 if (negative_sign) negative_sign->invalidate();
1876 if (special_sign) special_sign->invalidate();
1877 }
1878 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1879 this->interval.end = negative_sign->interval.end;
1880 if (positive_sign) positive_sign->invalidate();
1881 if (special_sign) special_sign->invalidate();
1882 }
1883 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1884 this->interval.end = special_sign->interval.end;
1885 if (positive_sign) positive_sign->invalidate();
1886 if (negative_sign) negative_sign->invalidate();
1887 }
1888 else {
1889 if (positive_sign) positive_sign->invalidate();
1890 if (negative_sign) negative_sign->invalidate();
1891 if (special_sign) special_sign->invalidate();
1892 }
1893 if (number->match(text, this->interval.end, end, flags)) {
1894 this->interval.start = start;
1895 this->interval.end = number->interval.end;
1896 return true;
1897 }
1898 if (positive_sign) positive_sign->invalidate();
1899 if (negative_sign) negative_sign->invalidate();
1900 if (special_sign) special_sign->invalidate();
1901 number->invalidate();
1902 this->interval.invalidate();
1903 return false;
1904 }
1905 };
1906
1907 using signed_numeral = basic_signed_numeral<char>;
1908 using wsigned_numeral = basic_signed_numeral<wchar_t>;
1909#ifdef _UNICODE
1910 using tsigned_numeral = wsigned_numeral;
1911#else
1912 using tsigned_numeral = signed_numeral;
1913#endif
1914 using sgml_signed_numeral = basic_signed_numeral<char>;
1915
1919 template <class T>
1921 {
1922 public:
1924 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
1925 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
1926 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
1927 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
1928 _In_ const std::shared_ptr<basic_parser<T>>& space,
1929 _In_ const std::shared_ptr<basic_parser<T>>& _fraction,
1930 _In_ const std::locale& locale = std::locale()) :
1932 positive_sign(_positive_sign),
1933 negative_sign(_negative_sign),
1934 special_sign(_special_sign),
1935 integer(_integer),
1936 fraction(_fraction),
1937 m_space(space)
1938 {}
1939
1940 virtual void invalidate()
1941 {
1942 if (positive_sign) positive_sign->invalidate();
1943 if (negative_sign) negative_sign->invalidate();
1944 if (special_sign) special_sign->invalidate();
1945 integer->invalidate();
1946 fraction->invalidate();
1948 }
1949
1950 std::shared_ptr<basic_parser<T>> positive_sign;
1951 std::shared_ptr<basic_parser<T>> negative_sign;
1952 std::shared_ptr<basic_parser<T>> special_sign;
1953 std::shared_ptr<basic_parser<T>> integer;
1954 std::shared_ptr<basic_parser<T>> fraction;
1955
1956 protected:
1957 virtual bool do_match(
1958 _In_reads_or_z_opt_(end) const T* text,
1959 _In_ size_t start = 0,
1960 _In_ size_t end = SIZE_MAX,
1961 _In_ int flags = match_default)
1962 {
1963 stdex_assert(text || start >= end);
1964 this->interval.end = start;
1965
1966 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
1967 this->interval.end = positive_sign->interval.end;
1968 if (negative_sign) negative_sign->invalidate();
1969 if (special_sign) special_sign->invalidate();
1970 }
1971 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
1972 this->interval.end = negative_sign->interval.end;
1973 if (positive_sign) positive_sign->invalidate();
1974 if (special_sign) special_sign->invalidate();
1975 }
1976 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
1977 this->interval.end = special_sign->interval.end;
1978 if (positive_sign) positive_sign->invalidate();
1979 if (negative_sign) negative_sign->invalidate();
1980 }
1981 else {
1982 if (positive_sign) positive_sign->invalidate();
1983 if (negative_sign) negative_sign->invalidate();
1984 if (special_sign) special_sign->invalidate();
1985 }
1986
1987 // Check for <integer> <fraction>
1988 const int space_match_flags = flags & ~match_multiline; // Spaces in fractions must never be broken in new line.
1989 if (integer->match(text, this->interval.end, end, flags) &&
1990 m_space->match(text, integer->interval.end, end, space_match_flags))
1991 {
1992 for (this->interval.end = m_space->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
1993 if (fraction->match(text, this->interval.end, end, flags)) {
1994 this->interval.start = start;
1995 this->interval.end = fraction->interval.end;
1996 return true;
1997 }
1998 fraction->invalidate();
1999 this->interval.start = start;
2000 this->interval.end = integer->interval.end;
2001 return true;
2002 }
2003
2004 // Check for <fraction>
2005 if (fraction->match(text, this->interval.end, end, flags)) {
2006 integer->invalidate();
2007 this->interval.start = start;
2008 this->interval.end = fraction->interval.end;
2009 return true;
2010 }
2011
2012 // Check for <integer>
2013 if (integer->match(text, this->interval.end, end, flags)) {
2014 fraction->invalidate();
2015 this->interval.start = start;
2016 this->interval.end = integer->interval.end;
2017 return true;
2018 }
2019
2020 if (positive_sign) positive_sign->invalidate();
2021 if (negative_sign) negative_sign->invalidate();
2022 if (special_sign) special_sign->invalidate();
2023 integer->invalidate();
2024 fraction->invalidate();
2025 this->interval.invalidate();
2026 return false;
2027 }
2028
2029 std::shared_ptr<basic_parser<T>> m_space;
2030 };
2031
2032 using mixed_numeral = basic_mixed_numeral<char>;
2033 using wmixed_numeral = basic_mixed_numeral<wchar_t>;
2034#ifdef _UNICODE
2035 using tmixed_numeral = wmixed_numeral;
2036#else
2037 using tmixed_numeral = mixed_numeral;
2038#endif
2039 using sgml_mixed_numeral = basic_mixed_numeral<char>;
2040
2044 template <class T>
2046 {
2047 public:
2049 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2050 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2051 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2052 _In_ const std::shared_ptr<basic_integer<T>>& _integer,
2053 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2054 _In_ const std::shared_ptr<basic_integer<T>>& _decimal,
2055 _In_ const std::shared_ptr<basic_parser<T>>& _exponent_symbol,
2056 _In_ const std::shared_ptr<basic_parser<T>>& _positive_exp_sign,
2057 _In_ const std::shared_ptr<basic_parser<T>>& _negative_exp_sign,
2058 _In_ const std::shared_ptr<basic_integer<T>>& _exponent,
2059 _In_ const std::locale& locale = std::locale()) :
2061 positive_sign(_positive_sign),
2062 negative_sign(_negative_sign),
2063 special_sign(_special_sign),
2064 integer(_integer),
2065 decimal_separator(_decimal_separator),
2066 decimal(_decimal),
2067 exponent_symbol(_exponent_symbol),
2068 positive_exp_sign(_positive_exp_sign),
2069 negative_exp_sign(_negative_exp_sign),
2070 exponent(_exponent),
2071 value(std::numeric_limits<double>::quiet_NaN())
2072 {}
2073
2074 virtual void invalidate()
2075 {
2076 if (positive_sign) positive_sign->invalidate();
2077 if (negative_sign) negative_sign->invalidate();
2078 if (special_sign) special_sign->invalidate();
2079 integer->invalidate();
2080 decimal_separator->invalidate();
2081 decimal->invalidate();
2082 if (exponent_symbol) exponent_symbol->invalidate();
2083 if (positive_exp_sign) positive_exp_sign->invalidate();
2084 if (negative_exp_sign) negative_exp_sign->invalidate();
2085 if (exponent) exponent->invalidate();
2086 value = std::numeric_limits<double>::quiet_NaN();
2088 }
2089
2090 std::shared_ptr<basic_parser<T>> positive_sign;
2091 std::shared_ptr<basic_parser<T>> negative_sign;
2092 std::shared_ptr<basic_parser<T>> special_sign;
2093 std::shared_ptr<basic_integer<T>> integer;
2094 std::shared_ptr<basic_parser<T>> decimal_separator;
2095 std::shared_ptr<basic_integer<T>> decimal;
2096 std::shared_ptr<basic_parser<T>> exponent_symbol;
2097 std::shared_ptr<basic_parser<T>> positive_exp_sign;
2098 std::shared_ptr<basic_parser<T>> negative_exp_sign;
2099 std::shared_ptr<basic_integer<T>> exponent;
2100 double value;
2101
2102 protected:
2103 virtual bool do_match(
2104 _In_reads_or_z_opt_(end) const T* text,
2105 _In_ size_t start = 0,
2106 _In_ size_t end = SIZE_MAX,
2107 _In_ int flags = match_default)
2108 {
2109 stdex_assert(text || start >= end);
2110 this->interval.end = start;
2111
2112 if (positive_sign && positive_sign->match(text, this->interval.end, end, flags)) {
2113 this->interval.end = positive_sign->interval.end;
2114 if (negative_sign) negative_sign->invalidate();
2115 if (special_sign) special_sign->invalidate();
2116 }
2117 else if (negative_sign && negative_sign->match(text, this->interval.end, end, flags)) {
2118 this->interval.end = negative_sign->interval.end;
2119 if (positive_sign) positive_sign->invalidate();
2120 if (special_sign) special_sign->invalidate();
2121 }
2122 else if (special_sign && special_sign->match(text, this->interval.end, end, flags)) {
2123 this->interval.end = special_sign->interval.end;
2124 if (positive_sign) positive_sign->invalidate();
2125 if (negative_sign) negative_sign->invalidate();
2126 }
2127 else {
2128 if (positive_sign) positive_sign->invalidate();
2129 if (negative_sign) negative_sign->invalidate();
2130 if (special_sign) special_sign->invalidate();
2131 }
2132
2133 if (integer->match(text, this->interval.end, end, flags))
2134 this->interval.end = integer->interval.end;
2135
2136 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2137 decimal->match(text, decimal_separator->interval.end, end, flags))
2138 this->interval.end = decimal->interval.end;
2139 else {
2140 decimal_separator->invalidate();
2141 decimal->invalidate();
2142 }
2143
2144 if (integer->interval.empty() &&
2145 decimal->interval.empty())
2146 {
2147 // No integer part, no decimal part.
2148 if (positive_sign) positive_sign->invalidate();
2149 if (negative_sign) negative_sign->invalidate();
2150 if (special_sign) special_sign->invalidate();
2151 integer->invalidate();
2152 decimal_separator->invalidate();
2153 decimal->invalidate();
2154 if (exponent_symbol) exponent_symbol->invalidate();
2155 if (positive_exp_sign) positive_exp_sign->invalidate();
2156 if (negative_exp_sign) negative_exp_sign->invalidate();
2157 if (exponent) exponent->invalidate();
2158 this->interval.invalidate();
2159 return false;
2160 }
2161
2162 if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2163 ((positive_exp_sign && positive_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2164 exponent && exponent->match(text, positive_exp_sign->interval.end, end, flags)) ||
2165 (exponent && exponent->match(text, exponent_symbol->interval.end, end, flags))))
2166 {
2167 this->interval.end = exponent->interval.end;
2168 if (negative_exp_sign) negative_exp_sign->invalidate();
2169 }
2170 else if (exponent_symbol && exponent_symbol->match(text, this->interval.end, end, flags) &&
2171 negative_exp_sign && negative_exp_sign->match(text, exponent_symbol->interval.end, end, flags) &&
2172 exponent && exponent->match(text, negative_exp_sign->interval.end, end, flags))
2173 {
2174 this->interval.end = exponent->interval.end;
2175 if (positive_exp_sign) positive_exp_sign->invalidate();
2176 }
2177 else {
2178 if (exponent_symbol) exponent_symbol->invalidate();
2179 if (positive_exp_sign) positive_exp_sign->invalidate();
2180 if (negative_exp_sign) negative_exp_sign->invalidate();
2181 if (exponent) exponent->invalidate();
2182 }
2183
2184 value = (double)integer->value;
2185 if (decimal->interval)
2186 value += (double)decimal->value * pow(10.0, -(double)decimal->interval.size());
2187 if (negative_sign && negative_sign->interval)
2188 value = -value;
2189 if (exponent && exponent->interval) {
2190 double e = (double)exponent->value;
2191 if (negative_exp_sign && negative_exp_sign->interval)
2192 e = -e;
2193 value *= pow(10.0, e);
2194 }
2195
2196 this->interval.start = start;
2197 return true;
2198 }
2199 };
2200
2201 using scientific_numeral = basic_scientific_numeral<char>;
2202 using wscientific_numeral = basic_scientific_numeral<wchar_t>;
2203#ifdef _UNICODE
2204 using tscientific_numeral = wscientific_numeral;
2205#else
2206 using tscientific_numeral = scientific_numeral;
2207#endif
2208 using sgml_scientific_numeral = basic_scientific_numeral<char>;
2209
2213 template <class T>
2215 {
2216 public:
2218 _In_ const std::shared_ptr<basic_parser<T>>& _positive_sign,
2219 _In_ const std::shared_ptr<basic_parser<T>>& _negative_sign,
2220 _In_ const std::shared_ptr<basic_parser<T>>& _special_sign,
2221 _In_ const std::shared_ptr<basic_parser<T>>& _currency,
2222 _In_ const std::shared_ptr<basic_parser<T>>& _integer,
2223 _In_ const std::shared_ptr<basic_parser<T>>& _decimal_separator,
2224 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
2225 _In_ const std::locale& locale = std::locale()) :
2227 positive_sign(_positive_sign),
2228 negative_sign(_negative_sign),
2229 special_sign(_special_sign),
2230 currency(_currency),
2231 integer(_integer),
2232 decimal_separator(_decimal_separator),
2233 decimal(_decimal)
2234 {}
2235
2236 virtual void invalidate()
2237 {
2238 if (positive_sign) positive_sign->invalidate();
2239 if (negative_sign) negative_sign->invalidate();
2240 if (special_sign) special_sign->invalidate();
2241 currency->invalidate();
2242 integer->invalidate();
2243 decimal_separator->invalidate();
2244 decimal->invalidate();
2246 }
2247
2248 std::shared_ptr<basic_parser<T>> positive_sign;
2249 std::shared_ptr<basic_parser<T>> negative_sign;
2250 std::shared_ptr<basic_parser<T>> special_sign;
2251 std::shared_ptr<basic_parser<T>> currency;
2252 std::shared_ptr<basic_parser<T>> integer;
2253 std::shared_ptr<basic_parser<T>> decimal_separator;
2254 std::shared_ptr<basic_parser<T>> decimal;
2255
2256 protected:
2257 virtual bool do_match(
2258 _In_reads_or_z_opt_(end) const T* text,
2259 _In_ size_t start = 0,
2260 _In_ size_t end = SIZE_MAX,
2261 _In_ int flags = match_default)
2262 {
2263 stdex_assert(text || start >= end);
2264 this->interval.end = start;
2265
2266 if (positive_sign->match(text, this->interval.end, end, flags)) {
2267 this->interval.end = positive_sign->interval.end;
2268 if (negative_sign) negative_sign->invalidate();
2269 if (special_sign) special_sign->invalidate();
2270 }
2271 else if (negative_sign->match(text, this->interval.end, end, flags)) {
2272 this->interval.end = negative_sign->interval.end;
2273 if (positive_sign) positive_sign->invalidate();
2274 if (special_sign) special_sign->invalidate();
2275 }
2276 else if (special_sign->match(text, this->interval.end, end, flags)) {
2277 this->interval.end = special_sign->interval.end;
2278 if (positive_sign) positive_sign->invalidate();
2279 if (negative_sign) negative_sign->invalidate();
2280 }
2281 else {
2282 if (positive_sign) positive_sign->invalidate();
2283 if (negative_sign) negative_sign->invalidate();
2284 if (special_sign) special_sign->invalidate();
2285 }
2286
2287 if (currency->match(text, this->interval.end, end, flags))
2288 this->interval.end = currency->interval.end;
2289 else {
2290 if (positive_sign) positive_sign->invalidate();
2291 if (negative_sign) negative_sign->invalidate();
2292 if (special_sign) special_sign->invalidate();
2293 integer->invalidate();
2294 decimal_separator->invalidate();
2295 decimal->invalidate();
2296 this->interval.invalidate();
2297 return false;
2298 }
2299
2300 if (integer->match(text, this->interval.end, end, flags))
2301 this->interval.end = integer->interval.end;
2302 if (decimal_separator->match(text, this->interval.end, end, flags) &&
2303 decimal->match(text, decimal_separator->interval.end, end, flags))
2304 this->interval.end = decimal->interval.end;
2305 else {
2306 decimal_separator->invalidate();
2307 decimal->invalidate();
2308 }
2309
2310 if (integer->interval.empty() &&
2311 decimal->interval.empty())
2312 {
2313 // No integer part, no decimal part.
2314 if (positive_sign) positive_sign->invalidate();
2315 if (negative_sign) negative_sign->invalidate();
2316 if (special_sign) special_sign->invalidate();
2317 currency->invalidate();
2318 integer->invalidate();
2319 decimal_separator->invalidate();
2320 decimal->invalidate();
2321 this->interval.invalidate();
2322 return false;
2323 }
2324
2325 this->interval.start = start;
2326 return true;
2327 }
2328 };
2329
2330 using monetary_numeral = basic_monetary_numeral<char>;
2331 using wmonetary_numeral = basic_monetary_numeral<wchar_t>;
2332#ifdef _UNICODE
2333 using tmonetary_numeral = wmonetary_numeral;
2334#else
2335 using tmonetary_numeral = monetary_numeral;
2336#endif
2337 using sgml_monetary_numeral = basic_monetary_numeral<char>;
2338
2342 template <class T>
2344 {
2345 public:
2347 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2348 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2349 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2350 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2351 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2352 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2353 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2354 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2355 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2356 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2357 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2358 _In_ const std::locale& locale = std::locale()) :
2360 m_digit_0(digit_0),
2361 m_digit_1(digit_1),
2362 m_digit_2(digit_2),
2363 m_digit_3(digit_3),
2364 m_digit_4(digit_4),
2365 m_digit_5(digit_5),
2366 m_digit_6(digit_6),
2367 m_digit_7(digit_7),
2368 m_digit_8(digit_8),
2369 m_digit_9(digit_9),
2370 m_separator(separator)
2371 {
2372 value.s_addr = 0;
2373 }
2374
2375 virtual void invalidate()
2376 {
2377 components[0].start = 1;
2378 components[0].end = 0;
2379 components[1].start = 1;
2380 components[1].end = 0;
2381 components[2].start = 1;
2382 components[2].end = 0;
2383 components[3].start = 1;
2384 components[3].end = 0;
2385 value.s_addr = 0;
2387 }
2388
2390 struct in_addr value;
2391
2392 protected:
2393 virtual bool do_match(
2394 _In_reads_or_z_opt_(end) const T* text,
2395 _In_ size_t start = 0,
2396 _In_ size_t end = SIZE_MAX,
2397 _In_ int flags = match_default)
2398 {
2399 stdex_assert(text || start >= end);
2400 this->interval.end = start;
2401 value.s_addr = 0;
2402
2403 size_t i;
2404 for (i = 0; i < 4; i++) {
2405 if (i) {
2406 if (m_separator->match(text, this->interval.end, end, flags))
2407 this->interval.end = m_separator->interval.end;
2408 else
2409 goto error;
2410 }
2411
2412 components[i].start = this->interval.end;
2413 bool is_empty = true;
2414 size_t x;
2415 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2416 size_t dig, digit_end;
2417 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2418 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2419 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2420 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2421 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2422 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2423 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2424 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2425 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2426 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2427 else break;
2428 size_t x_n = x * 10 + dig;
2429 if (x_n <= 255) {
2430 x = x_n;
2431 this->interval.end = digit_end;
2432 is_empty = false;
2433 }
2434 else
2435 break;
2436 }
2437 if (is_empty)
2438 goto error;
2439 components[i].end = this->interval.end;
2440 value.s_addr = (value.s_addr << 8) | (uint8_t)x;
2441 }
2442 if (i < 4)
2443 goto error;
2444
2445 HE2BE(reinterpret_cast<uint32_t&>(value.s_addr));
2446 this->interval.start = start;
2447 return true;
2448
2449 error:
2450 invalidate();
2451 return false;
2452 }
2453
2454 std::shared_ptr<basic_parser<T>>
2455 m_digit_0,
2456 m_digit_1,
2457 m_digit_2,
2458 m_digit_3,
2459 m_digit_4,
2460 m_digit_5,
2461 m_digit_6,
2462 m_digit_7,
2463 m_digit_8,
2464 m_digit_9;
2465 std::shared_ptr<basic_parser<T>> m_separator;
2466 };
2467
2468 using ipv4_address = basic_ipv4_address<char>;
2469 using wipv4_address = basic_ipv4_address<wchar_t>;
2470#ifdef _UNICODE
2471 using tipv4_address = wipv4_address;
2472#else
2473 using tipv4_address = ipv4_address;
2474#endif
2475 using sgml_ipv4_address = basic_ipv4_address<char>;
2476
2480 template <class T>
2482 {
2483 public:
2484 basic_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2485
2486 protected:
2487 virtual bool do_match(
2488 _In_reads_or_z_opt_(end) const T* text,
2489 _In_ size_t start = 0,
2490 _In_ size_t end = SIZE_MAX,
2491 _In_ int flags = match_default)
2492 {
2493 stdex_assert(text || start >= end);
2494 if (start < end && text[start]) {
2495 if (text[start] == '-' ||
2496 text[start] == '_' ||
2497 text[start] == ':' ||
2498 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2499 {
2500 this->interval.end = (this->interval.start = start) + 1;
2501 return true;
2502 }
2503 }
2504 this->interval.invalidate();
2505 return false;
2506 }
2507 };
2508
2511#ifdef _UNICODE
2513#else
2515#endif
2516
2521 {
2522 public:
2523 sgml_ipv6_scope_id_char(_In_ const std::locale& locale = std::locale()) : sgml_parser(locale) {}
2524
2525 protected:
2526 virtual bool do_match(
2527 _In_reads_or_z_(end) const char* text,
2528 _In_ size_t start = 0,
2529 _In_ size_t end = SIZE_MAX,
2530 _In_ int flags = match_default)
2531 {
2532 stdex_assert(text || start >= end);
2533 if (start < end && text[start]) {
2534 wchar_t buf[5];
2535 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2536 const wchar_t* chr_end = chr + stdex::strlen(chr);
2537 if (((chr[0] == L'-' ||
2538 chr[0] == L'_' ||
2539 chr[0] == L':') && chr[1] == 0) ||
2540 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2541 {
2542 this->interval.start = start;
2543 return true;
2544 }
2545 }
2546 this->interval.invalidate();
2547 return false;
2548 }
2549 };
2550
2554 template <class T>
2556 {
2557 public:
2559 _In_ const std::shared_ptr<basic_parser<T>>& digit_0,
2560 _In_ const std::shared_ptr<basic_parser<T>>& digit_1,
2561 _In_ const std::shared_ptr<basic_parser<T>>& digit_2,
2562 _In_ const std::shared_ptr<basic_parser<T>>& digit_3,
2563 _In_ const std::shared_ptr<basic_parser<T>>& digit_4,
2564 _In_ const std::shared_ptr<basic_parser<T>>& digit_5,
2565 _In_ const std::shared_ptr<basic_parser<T>>& digit_6,
2566 _In_ const std::shared_ptr<basic_parser<T>>& digit_7,
2567 _In_ const std::shared_ptr<basic_parser<T>>& digit_8,
2568 _In_ const std::shared_ptr<basic_parser<T>>& digit_9,
2569 _In_ const std::shared_ptr<basic_parser<T>>& digit_10,
2570 _In_ const std::shared_ptr<basic_parser<T>>& digit_11,
2571 _In_ const std::shared_ptr<basic_parser<T>>& digit_12,
2572 _In_ const std::shared_ptr<basic_parser<T>>& digit_13,
2573 _In_ const std::shared_ptr<basic_parser<T>>& digit_14,
2574 _In_ const std::shared_ptr<basic_parser<T>>& digit_15,
2575 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2576 _In_ const std::shared_ptr<basic_parser<T>>& scope_id_separator = nullptr,
2577 _In_ const std::shared_ptr<basic_parser<T>>& _scope_id = nullptr,
2578 _In_ const std::locale& locale = std::locale()) :
2580 m_digit_0(digit_0),
2581 m_digit_1(digit_1),
2582 m_digit_2(digit_2),
2583 m_digit_3(digit_3),
2584 m_digit_4(digit_4),
2585 m_digit_5(digit_5),
2586 m_digit_6(digit_6),
2587 m_digit_7(digit_7),
2588 m_digit_8(digit_8),
2589 m_digit_9(digit_9),
2590 m_digit_10(digit_10),
2591 m_digit_11(digit_11),
2592 m_digit_12(digit_12),
2593 m_digit_13(digit_13),
2594 m_digit_14(digit_14),
2595 m_digit_15(digit_15),
2596 m_separator(separator),
2597 m_scope_id_separator(scope_id_separator),
2598 scope_id(_scope_id)
2599 {
2600 memset(&value, 0, sizeof(value));
2601 }
2602
2603 virtual void invalidate()
2604 {
2605 components[0].start = 1;
2606 components[0].end = 0;
2607 components[1].start = 1;
2608 components[1].end = 0;
2609 components[2].start = 1;
2610 components[2].end = 0;
2611 components[3].start = 1;
2612 components[3].end = 0;
2613 components[4].start = 1;
2614 components[4].end = 0;
2615 components[5].start = 1;
2616 components[5].end = 0;
2617 components[6].start = 1;
2618 components[6].end = 0;
2619 components[7].start = 1;
2620 components[7].end = 0;
2621 memset(&value, 0, sizeof(value));
2622 if (scope_id) scope_id->invalidate();
2624 }
2625
2627 struct in6_addr value;
2628 std::shared_ptr<basic_parser<T>> scope_id;
2629
2630 protected:
2631 virtual bool do_match(
2632 _In_reads_or_z_opt_(end) const T* text,
2633 _In_ size_t start = 0,
2634 _In_ size_t end = SIZE_MAX,
2635 _In_ int flags = match_default)
2636 {
2637 stdex_assert(text || start >= end);
2638 this->interval.end = start;
2639 memset(&value, 0, sizeof(value));
2640
2641 size_t i, compaction_i = SIZE_MAX, compaction_start = start;
2642 for (i = 0; i < 8; i++) {
2643 bool is_empty = true;
2644
2645 if (m_separator->match(text, this->interval.end, end, flags)) {
2646 // : found
2647 this->interval.end = m_separator->interval.end;
2648 if (m_separator->match(text, this->interval.end, end, flags)) {
2649 // :: found
2650 if (compaction_i == SIZE_MAX) {
2651 // Zero compaction start
2652 compaction_i = i;
2653 compaction_start = m_separator->interval.start;
2654 this->interval.end = m_separator->interval.end;
2655 }
2656 else {
2657 // More than one zero compaction
2658 break;
2659 }
2660 }
2661 else if (!i) {
2662 // Leading : found
2663 goto error;
2664 }
2665 }
2666 else if (i) {
2667 // : missing
2668 break;
2669 }
2670
2671 components[i].start = this->interval.end;
2672 size_t x;
2673 for (x = 0; this->interval.end < end && text[this->interval.end];) {
2674 size_t dig, digit_end;
2675 if (m_digit_0->match(text, this->interval.end, end, flags)) { dig = 0; digit_end = m_digit_0->interval.end; }
2676 else if (m_digit_1->match(text, this->interval.end, end, flags)) { dig = 1; digit_end = m_digit_1->interval.end; }
2677 else if (m_digit_2->match(text, this->interval.end, end, flags)) { dig = 2; digit_end = m_digit_2->interval.end; }
2678 else if (m_digit_3->match(text, this->interval.end, end, flags)) { dig = 3; digit_end = m_digit_3->interval.end; }
2679 else if (m_digit_4->match(text, this->interval.end, end, flags)) { dig = 4; digit_end = m_digit_4->interval.end; }
2680 else if (m_digit_5->match(text, this->interval.end, end, flags)) { dig = 5; digit_end = m_digit_5->interval.end; }
2681 else if (m_digit_6->match(text, this->interval.end, end, flags)) { dig = 6; digit_end = m_digit_6->interval.end; }
2682 else if (m_digit_7->match(text, this->interval.end, end, flags)) { dig = 7; digit_end = m_digit_7->interval.end; }
2683 else if (m_digit_8->match(text, this->interval.end, end, flags)) { dig = 8; digit_end = m_digit_8->interval.end; }
2684 else if (m_digit_9->match(text, this->interval.end, end, flags)) { dig = 9; digit_end = m_digit_9->interval.end; }
2685 else if (m_digit_10->match(text, this->interval.end, end, flags)) { dig = 10; digit_end = m_digit_10->interval.end; }
2686 else if (m_digit_11->match(text, this->interval.end, end, flags)) { dig = 11; digit_end = m_digit_11->interval.end; }
2687 else if (m_digit_12->match(text, this->interval.end, end, flags)) { dig = 12; digit_end = m_digit_12->interval.end; }
2688 else if (m_digit_13->match(text, this->interval.end, end, flags)) { dig = 13; digit_end = m_digit_13->interval.end; }
2689 else if (m_digit_14->match(text, this->interval.end, end, flags)) { dig = 14; digit_end = m_digit_14->interval.end; }
2690 else if (m_digit_15->match(text, this->interval.end, end, flags)) { dig = 15; digit_end = m_digit_15->interval.end; }
2691 else break;
2692 size_t x_n = x * 16 + dig;
2693 if (x_n <= 0xffff) {
2694 x = x_n;
2695 this->interval.end = digit_end;
2696 is_empty = false;
2697 }
2698 else
2699 break;
2700 }
2701 if (is_empty) {
2702 if (compaction_i != SIZE_MAX) {
2703 // Zero compaction active: no sweat.
2704 break;
2705 }
2706 goto error;
2707 }
2708 components[i].end = this->interval.end;
2709 HE2BE(reinterpret_cast<uint16_t&>(this->value.s6_words[i]));
2710 }
2711
2712 if (compaction_i != SIZE_MAX) {
2713 // Align components right due to zero compaction.
2714 size_t j, k;
2715 for (j = 8, k = i; k > compaction_i;) {
2716 this->value.s6_words[--j] = this->value.s6_words[--k];
2717 components[j] = components[k];
2718 }
2719 for (; j > compaction_i;) {
2720 this->value.s6_words[--j] = 0;
2721 components[j].start =
2722 components[j].end = compaction_start;
2723 }
2724 }
2725 else if (i < 8)
2726 goto error;
2727
2728 if (m_scope_id_separator && m_scope_id_separator->match(text, this->interval.end, end, flags) &&
2729 scope_id && scope_id->match(text, m_scope_id_separator->interval.end, end, flags))
2730 this->interval.end = scope_id->interval.end;
2731 else if (scope_id)
2732 scope_id->invalidate();
2733
2734 this->interval.start = start;
2735 return true;
2736
2737 error:
2738 invalidate();
2739 return false;
2740 }
2741
2742 std::shared_ptr<basic_parser<T>>
2743 m_digit_0,
2744 m_digit_1,
2745 m_digit_2,
2746 m_digit_3,
2747 m_digit_4,
2748 m_digit_5,
2749 m_digit_6,
2750 m_digit_7,
2751 m_digit_8,
2752 m_digit_9,
2753 m_digit_10,
2754 m_digit_11,
2755 m_digit_12,
2756 m_digit_13,
2757 m_digit_14,
2758 m_digit_15;
2759 std::shared_ptr<basic_parser<T>> m_separator, m_scope_id_separator;
2760 };
2761
2762 using ipv6_address = basic_ipv6_address<char>;
2763 using wipv6_address = basic_ipv6_address<wchar_t>;
2764#ifdef _UNICODE
2765 using tipv6_address = wipv6_address;
2766#else
2767 using tipv6_address = ipv6_address;
2768#endif
2769 using sgml_ipv6_address = basic_ipv6_address<char>;
2770
2774 template <class T>
2776 {
2777 public:
2779 _In_ bool allow_idn,
2780 _In_ const std::locale& locale = std::locale()) :
2782 m_allow_idn(allow_idn),
2783 allow_on_edge(true)
2784 {}
2785
2787
2788 protected:
2789 virtual bool do_match(
2790 _In_reads_or_z_opt_(end) const T* text,
2791 _In_ size_t start = 0,
2792 _In_ size_t end = SIZE_MAX,
2793 _In_ int flags = match_default)
2794 {
2795 stdex_assert(text || start >= end);
2796 if (start < end && text[start]) {
2797 if (('A' <= text[start] && text[start] <= 'Z') ||
2798 ('a' <= text[start] && text[start] <= 'z') ||
2799 ('0' <= text[start] && text[start] <= '9'))
2800 allow_on_edge = true;
2801 else if (text[start] == '-')
2802 allow_on_edge = false;
2803 else if (m_allow_idn && std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2804 allow_on_edge = true;
2805 else {
2806 this->interval.invalidate();
2807 return false;
2808 }
2809 this->interval.end = (this->interval.start = start) + 1;
2810 return true;
2811 }
2812 this->interval.invalidate();
2813 return false;
2814 }
2815
2816 bool m_allow_idn;
2817 };
2818
2819 using dns_domain_char = basic_dns_domain_char<char>;
2820 using wdns_domain_char = basic_dns_domain_char<wchar_t>;
2821#ifdef _UNICODE
2822 using tdns_domain_char = wdns_domain_char;
2823#else
2824 using tdns_domain_char = dns_domain_char;
2825#endif
2826
2831 {
2832 public:
2834 _In_ bool allow_idn,
2835 _In_ const std::locale& locale = std::locale()) :
2837 {}
2838
2839 protected:
2840 virtual bool do_match(
2841 _In_reads_or_z_(end) const char* text,
2842 _In_ size_t start = 0,
2843 _In_ size_t end = SIZE_MAX,
2844 _In_ int flags = match_default)
2845 {
2846 stdex_assert(text || start >= end);
2847 if (start < end && text[start]) {
2848 wchar_t buf[5];
2849 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
2850 const wchar_t* chr_end = chr + stdex::strlen(chr);
2851 if ((('A' <= chr[0] && chr[0] <= 'Z') ||
2852 ('a' <= chr[0] && chr[0] <= 'z') ||
2853 ('0' <= chr[0] && chr[0] <= '9')) && chr[1] == 0)
2854 allow_on_edge = true;
2855 else if (chr[0] == '-' && chr[1] == 0)
2856 allow_on_edge = false;
2857 else if (m_allow_idn && std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
2858 allow_on_edge = true;
2859 else {
2860 this->interval.invalidate();
2861 return false;
2862 }
2863 this->interval.start = start;
2864 return true;
2865 }
2866 this->interval.invalidate();
2867 return false;
2868 }
2869 };
2870
2874 template <class T>
2876 {
2877 public:
2879 _In_ bool allow_absolute,
2880 _In_ const std::shared_ptr<basic_dns_domain_char<T>>& domain_char,
2881 _In_ const std::shared_ptr<basic_parser<T>>& separator,
2882 _In_ const std::locale& locale = std::locale()) :
2884 m_allow_absolute(allow_absolute),
2885 m_domain_char(domain_char),
2886 m_separator(separator)
2887 {}
2888
2889 protected:
2890 virtual bool do_match(
2891 _In_reads_or_z_opt_(end) const T* text,
2892 _In_ size_t start = 0,
2893 _In_ size_t end = SIZE_MAX,
2894 _In_ int flags = match_default)
2895 {
2896 stdex_assert(text || start >= end);
2897 size_t i = start, count;
2898 for (count = 0; i < end && text[i] && count < 127; count++) {
2899 if (m_domain_char->match(text, i, end, flags) &&
2900 m_domain_char->allow_on_edge)
2901 {
2902 // Domain start
2903 this->interval.end = i = m_domain_char->interval.end;
2904 while (i < end && text[i]) {
2905 if (m_domain_char->allow_on_edge &&
2906 m_separator->match(text, i, end, flags))
2907 {
2908 // Domain end
2909 if (m_allow_absolute)
2910 this->interval.end = i = m_separator->interval.end;
2911 else {
2912 this->interval.end = i;
2913 i = m_separator->interval.end;
2914 }
2915 break;
2916 }
2917 if (m_domain_char->match(text, i, end, flags)) {
2918 if (m_domain_char->allow_on_edge)
2919 this->interval.end = i = m_domain_char->interval.end;
2920 else
2921 i = m_domain_char->interval.end;
2922 }
2923 else {
2924 this->interval.start = start;
2925 return true;
2926 }
2927 }
2928 }
2929 else
2930 break;
2931 }
2932 if (count) {
2933 this->interval.start = start;
2934 return true;
2935 }
2936 this->interval.invalidate();
2937 return false;
2938 }
2939
2941 std::shared_ptr<basic_dns_domain_char<T>> m_domain_char;
2942 std::shared_ptr<basic_parser<T>> m_separator;
2943 };
2944
2947#ifdef _UNICODE
2948 using tdns_name = wdns_name;
2949#else
2950 using tdns_name = dns_name;
2951#endif
2953
2957 template <class T>
2959 {
2960 public:
2961 basic_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
2962
2963 protected:
2964 virtual bool do_match(
2965 _In_reads_or_z_opt_(end) const T* text,
2966 _In_ size_t start = 0,
2967 _In_ size_t end = SIZE_MAX,
2968 _In_ int flags = match_default)
2969 {
2970 stdex_assert(text || start >= end);
2971 if (start < end && text[start]) {
2972 if (text[start] == '-' ||
2973 text[start] == '.' ||
2974 text[start] == '_' ||
2975 text[start] == '~' ||
2976 text[start] == '%' ||
2977 text[start] == '!' ||
2978 text[start] == '$' ||
2979 text[start] == '&' ||
2980 text[start] == '\'' ||
2981 //text[start] == '(' ||
2982 //text[start] == ')' ||
2983 text[start] == '*' ||
2984 text[start] == '+' ||
2985 text[start] == ',' ||
2986 text[start] == ';' ||
2987 text[start] == '=' ||
2988 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
2989 {
2990 this->interval.end = (this->interval.start = start) + 1;
2991 return true;
2992 }
2993 }
2994 this->interval.invalidate();
2995 return false;
2996 }
2997 };
2998
3001#ifdef _UNICODE
3003#else
3005#endif
3006
3011 {
3012 public:
3013 sgml_url_username_char(_In_ const std::locale& locale = std::locale()) : basic_url_username_char<char>(locale) {}
3014
3015 protected:
3016 virtual bool do_match(
3017 _In_reads_or_z_(end) const char* text,
3018 _In_ size_t start = 0,
3019 _In_ size_t end = SIZE_MAX,
3020 _In_ int flags = match_default)
3021 {
3022 stdex_assert(text || start >= end);
3023 if (start < end && text[start]) {
3024 wchar_t buf[5];
3025 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3026 const wchar_t* chr_end = chr + stdex::strlen(chr);
3027 if (((chr[0] == L'-' ||
3028 chr[0] == L'.' ||
3029 chr[0] == L'_' ||
3030 chr[0] == L'~' ||
3031 chr[0] == L'%' ||
3032 chr[0] == L'!' ||
3033 chr[0] == L'$' ||
3034 chr[0] == L'&' ||
3035 chr[0] == L'\'' ||
3036 //chr[0] == L'(' ||
3037 //chr[0] == L')' ||
3038 chr[0] == L'*' ||
3039 chr[0] == L'+' ||
3040 chr[0] == L',' ||
3041 chr[0] == L';' ||
3042 chr[0] == L'=') && chr[1] == 0) ||
3043 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3044 {
3045 this->interval.start = start;
3046 return true;
3047 }
3048 }
3049
3050 this->interval.invalidate();
3051 return false;
3052 }
3053 };
3054
3058 template <class T>
3060 {
3061 public:
3062 basic_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3063
3064 protected:
3065 virtual bool do_match(
3066 _In_reads_or_z_opt_(end) const T* text,
3067 _In_ size_t start = 0,
3068 _In_ size_t end = SIZE_MAX,
3069 _In_ int flags = match_default)
3070 {
3071 stdex_assert(text || start >= end);
3072 if (start < end && text[start]) {
3073 if (text[start] == '-' ||
3074 text[start] == '.' ||
3075 text[start] == '_' ||
3076 text[start] == '~' ||
3077 text[start] == '%' ||
3078 text[start] == '!' ||
3079 text[start] == '$' ||
3080 text[start] == '&' ||
3081 text[start] == '\'' ||
3082 text[start] == '(' ||
3083 text[start] == ')' ||
3084 text[start] == '*' ||
3085 text[start] == '+' ||
3086 text[start] == ',' ||
3087 text[start] == ';' ||
3088 text[start] == '=' ||
3089 text[start] == ':' ||
3090 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3091 {
3092 this->interval.end = (this->interval.start = start) + 1;
3093 return true;
3094 }
3095 }
3096 this->interval.invalidate();
3097 return false;
3098 }
3099 };
3100
3103#ifdef _UNICODE
3105#else
3107#endif
3108
3113 {
3114 public:
3115 sgml_url_password_char(_In_ const std::locale& locale = std::locale()) : basic_url_password_char<char>(locale) {}
3116
3117 protected:
3118 virtual bool do_match(
3119 _In_reads_or_z_(end) const char* text,
3120 _In_ size_t start = 0,
3121 _In_ size_t end = SIZE_MAX,
3122 _In_ int flags = match_default)
3123 {
3124 stdex_assert(text || start >= end);
3125 if (start < end && text[start]) {
3126 wchar_t buf[5];
3127 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3128 const wchar_t* chr_end = chr + stdex::strlen(chr);
3129 if (((chr[0] == L'-' ||
3130 chr[0] == L'.' ||
3131 chr[0] == L'_' ||
3132 chr[0] == L'~' ||
3133 chr[0] == L'%' ||
3134 chr[0] == L'!' ||
3135 chr[0] == L'$' ||
3136 chr[0] == L'&' ||
3137 chr[0] == L'\'' ||
3138 chr[0] == L'(' ||
3139 chr[0] == L')' ||
3140 chr[0] == L'*' ||
3141 chr[0] == L'+' ||
3142 chr[0] == L',' ||
3143 chr[0] == L';' ||
3144 chr[0] == L'=' ||
3145 chr[0] == L':') && chr[1] == 0) ||
3146 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3147 {
3148 this->interval.start = start;
3149 return true;
3150 }
3151 }
3152 this->interval.invalidate();
3153 return false;
3154 }
3155 };
3156
3160 template <class T>
3162 {
3163 public:
3164 basic_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
3165
3166 protected:
3167 virtual bool do_match(
3168 _In_reads_or_z_opt_(end) const T* text,
3169 _In_ size_t start = 0,
3170 _In_ size_t end = SIZE_MAX,
3171 _In_ int flags = match_default)
3172 {
3173 stdex_assert(text || start >= end);
3174 if (start < end && text[start]) {
3175 if (text[start] == '/' ||
3176 text[start] == '-' ||
3177 text[start] == '.' ||
3178 text[start] == '_' ||
3179 text[start] == '~' ||
3180 text[start] == '%' ||
3181 text[start] == '!' ||
3182 text[start] == '$' ||
3183 text[start] == '&' ||
3184 text[start] == '\'' ||
3185 text[start] == '(' ||
3186 text[start] == ')' ||
3187 text[start] == '*' ||
3188 text[start] == '+' ||
3189 text[start] == ',' ||
3190 text[start] == ';' ||
3191 text[start] == '=' ||
3192 text[start] == ':' ||
3193 text[start] == '@' ||
3194 text[start] == '?' ||
3195 text[start] == '#' ||
3196 std::use_facet<std::ctype<T>>(this->m_locale).is(std::ctype_base::alnum, text[start]))
3197 {
3198 this->interval.end = (this->interval.start = start) + 1;
3199 return true;
3200 }
3201 }
3202 this->interval.invalidate();
3203 return false;
3204 }
3205 };
3206
3209#ifdef _UNICODE
3211#else
3213#endif
3214
3219 {
3220 public:
3221 sgml_url_path_char(_In_ const std::locale& locale = std::locale()) : basic_url_path_char<char>(locale) {}
3222
3223 protected:
3224 virtual bool do_match(
3225 _In_reads_or_z_(end) const char* text,
3226 _In_ size_t start = 0,
3227 _In_ size_t end = SIZE_MAX,
3228 _In_ int flags = match_default)
3229 {
3230 stdex_assert(text || start >= end);
3231 if (start < end && text[start]) {
3232 wchar_t buf[5];
3233 const wchar_t* chr = next_sgml_cp(text, start, end, this->interval.end, buf);
3234 const wchar_t* chr_end = chr + stdex::strlen(chr);
3235 if (((chr[0] == L'/' ||
3236 chr[0] == L'-' ||
3237 chr[0] == L'.' ||
3238 chr[0] == L'_' ||
3239 chr[0] == L'~' ||
3240 chr[0] == L'%' ||
3241 chr[0] == L'!' ||
3242 chr[0] == L'$' ||
3243 chr[0] == L'&' ||
3244 chr[0] == L'\'' ||
3245 chr[0] == L'(' ||
3246 chr[0] == L')' ||
3247 chr[0] == L'*' ||
3248 chr[0] == L'+' ||
3249 chr[0] == L',' ||
3250 chr[0] == L';' ||
3251 chr[0] == L'=' ||
3252 chr[0] == L':' ||
3253 chr[0] == L'@' ||
3254 chr[0] == L'?' ||
3255 chr[0] == L'#') && chr[1] == 0) ||
3256 std::use_facet<std::ctype<wchar_t>>(m_locale).scan_not(std::ctype_base::alnum, chr, chr_end) == chr_end)
3257 {
3258 this->interval.start = start;
3259 return true;
3260 }
3261 }
3262 this->interval.invalidate();
3263 return false;
3264 }
3265 };
3266
3270 template <class T>
3272 {
3273 public:
3275 _In_ const std::shared_ptr<basic_parser<T>>& path_char,
3276 _In_ const std::shared_ptr<basic_parser<T>>& query_start,
3277 _In_ const std::shared_ptr<basic_parser<T>>& bookmark_start,
3278 _In_ const std::locale& locale = std::locale()) :
3280 m_path_char(path_char),
3281 m_query_start(query_start),
3282 m_bookmark_start(bookmark_start)
3283 {}
3284
3285 virtual void invalidate()
3286 {
3287 path.start = 1;
3288 path.end = 0;
3289 query.start = 1;
3290 query.end = 0;
3291 bookmark.start = 1;
3292 bookmark.end = 0;
3294 }
3295
3298 stdex::interval<size_t> bookmark;
3299
3300 protected:
3301 virtual bool do_match(
3302 _In_reads_or_z_opt_(end) const T* text,
3303 _In_ size_t start = 0,
3304 _In_ size_t end = SIZE_MAX,
3305 _In_ int flags = match_default)
3306 {
3307 stdex_assert(text || start >= end);
3308
3309 this->interval.end = start;
3310 path.start = start;
3311 query.start = 1;
3312 query.end = 0;
3313 bookmark.start = 1;
3314 bookmark.end = 0;
3315
3316 for (;;) {
3317 if (this->interval.end >= end || !text[this->interval.end])
3318 break;
3319 if (m_query_start->match(text, this->interval.end, end, flags)) {
3320 path.end = this->interval.end;
3321 query.start = this->interval.end = m_query_start->interval.end;
3322 for (;;) {
3323 if (this->interval.end >= end || !text[this->interval.end]) {
3324 query.end = this->interval.end;
3325 break;
3326 }
3327 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3328 query.end = this->interval.end;
3329 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3330 for (;;) {
3331 if (this->interval.end >= end || !text[this->interval.end]) {
3332 bookmark.end = this->interval.end;
3333 break;
3334 }
3335 if (m_path_char->match(text, this->interval.end, end, flags))
3336 this->interval.end = m_path_char->interval.end;
3337 else {
3338 bookmark.end = this->interval.end;
3339 break;
3340 }
3341 }
3342 this->interval.start = start;
3343 return true;
3344 }
3345 if (m_path_char->match(text, this->interval.end, end, flags))
3346 this->interval.end = m_path_char->interval.end;
3347 else {
3348 query.end = this->interval.end;
3349 break;
3350 }
3351 }
3352 this->interval.start = start;
3353 return true;
3354 }
3355 if (m_bookmark_start->match(text, this->interval.end, end, flags)) {
3356 path.end = this->interval.end;
3357 bookmark.start = this->interval.end = m_bookmark_start->interval.end;
3358 for (;;) {
3359 if (this->interval.end >= end || !text[this->interval.end]) {
3360 bookmark.end = this->interval.end;
3361 break;
3362 }
3363 if (m_path_char->match(text, this->interval.end, end, flags))
3364 this->interval.end = m_path_char->interval.end;
3365 else {
3366 bookmark.end = this->interval.end;
3367 break;
3368 }
3369 }
3370 this->interval.start = start;
3371 return true;
3372 }
3373 if (m_path_char->match(text, this->interval.end, end, flags))
3374 this->interval.end = m_path_char->interval.end;
3375 else
3376 break;
3377 }
3378
3379 if (start < this->interval.end) {
3380 path.end = this->interval.end;
3381 this->interval.start = start;
3382 return true;
3383 }
3384
3385 path.start = 1;
3386 path.end = 0;
3387 bookmark.start = 1;
3388 bookmark.end = 0;
3389 this->interval.invalidate();
3390 return false;
3391 }
3392
3393 std::shared_ptr<basic_parser<T>> m_path_char;
3394 std::shared_ptr<basic_parser<T>> m_query_start;
3395 std::shared_ptr<basic_parser<T>> m_bookmark_start;
3396 };
3397
3400#ifdef _UNICODE
3401 using turl_path = wurl_path;
3402#else
3403 using turl_path = url_path;
3404#endif
3406
3410 template <class T>
3411 class basic_url : public basic_parser<T>
3412 {
3413 public:
3414 basic_url(
3415 _In_ const std::shared_ptr<basic_parser<T>>& _http_scheme,
3416 _In_ const std::shared_ptr<basic_parser<T>>& _ftp_scheme,
3417 _In_ const std::shared_ptr<basic_parser<T>>& _mailto_scheme,
3418 _In_ const std::shared_ptr<basic_parser<T>>& _file_scheme,
3419 _In_ const std::shared_ptr<basic_parser<T>>& colon,
3420 _In_ const std::shared_ptr<basic_parser<T>>& slash,
3421 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3422 _In_ const std::shared_ptr<basic_parser<T>>& _password,
3423 _In_ const std::shared_ptr<basic_parser<T>>& at,
3424 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3425 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3426 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3427 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3428 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3429 _In_ const std::shared_ptr<basic_parser<T>>& _port,
3430 _In_ const std::shared_ptr<basic_parser<T>>& _path,
3431 _In_ const std::locale& locale = std::locale()) :
3433 http_scheme(_http_scheme),
3434 ftp_scheme(_ftp_scheme),
3435 mailto_scheme(_mailto_scheme),
3436 file_scheme(_file_scheme),
3437 m_colon(colon),
3438 m_slash(slash),
3439 username(_username),
3440 password(_password),
3441 m_at(at),
3442 m_ip_lbracket(ip_lbracket),
3443 m_ip_rbracket(ip_rbracket),
3444 ipv4_host(_ipv4_host),
3445 ipv6_host(_ipv6_host),
3446 dns_host(_dns_host),
3447 port(_port),
3448 path(_path)
3449 {}
3450
3451 virtual void invalidate()
3452 {
3453 http_scheme->invalidate();
3454 ftp_scheme->invalidate();
3455 mailto_scheme->invalidate();
3456 file_scheme->invalidate();
3457 username->invalidate();
3458 password->invalidate();
3459 ipv4_host->invalidate();
3460 ipv6_host->invalidate();
3461 dns_host->invalidate();
3462 port->invalidate();
3463 path->invalidate();
3465 }
3466
3467 std::shared_ptr<basic_parser<T>> http_scheme;
3468 std::shared_ptr<basic_parser<T>> ftp_scheme;
3469 std::shared_ptr<basic_parser<T>> mailto_scheme;
3470 std::shared_ptr<basic_parser<T>> file_scheme;
3471 std::shared_ptr<basic_parser<T>> username;
3472 std::shared_ptr<basic_parser<T>> password;
3473 std::shared_ptr<basic_parser<T>> ipv4_host;
3474 std::shared_ptr<basic_parser<T>> ipv6_host;
3475 std::shared_ptr<basic_parser<T>> dns_host;
3476 std::shared_ptr<basic_parser<T>> port;
3477 std::shared_ptr<basic_parser<T>> path;
3478
3479 protected:
3480 virtual bool do_match(
3481 _In_reads_or_z_opt_(end) const T* text,
3482 _In_ size_t start = 0,
3483 _In_ size_t end = SIZE_MAX,
3484 _In_ int flags = match_default)
3485 {
3486 stdex_assert(text || start >= end);
3487
3488 this->interval.end = start;
3489
3490 if (http_scheme->match(text, this->interval.end, end, flags) &&
3491 m_colon->match(text, http_scheme->interval.end, end, flags) &&
3492 m_slash->match(text, m_colon->interval.end, end, flags) &&
3493 m_slash->match(text, m_slash->interval.end, end, flags))
3494 {
3495 // http://
3496 this->interval.end = m_slash->interval.end;
3497 ftp_scheme->invalidate();
3498 mailto_scheme->invalidate();
3499 file_scheme->invalidate();
3500 }
3501 else if (ftp_scheme->match(text, this->interval.end, end, flags) &&
3502 m_colon->match(text, ftp_scheme->interval.end, end, flags) &&
3503 m_slash->match(text, m_colon->interval.end, end, flags) &&
3504 m_slash->match(text, m_slash->interval.end, end, flags))
3505 {
3506 // ftp://
3507 this->interval.end = m_slash->interval.end;
3508 http_scheme->invalidate();
3509 mailto_scheme->invalidate();
3510 file_scheme->invalidate();
3511 }
3512 else if (mailto_scheme->match(text, this->interval.end, end, flags) &&
3513 m_colon->match(text, mailto_scheme->interval.end, end, flags))
3514 {
3515 // mailto:
3516 this->interval.end = m_colon->interval.end;
3517 http_scheme->invalidate();
3518 ftp_scheme->invalidate();
3519 file_scheme->invalidate();
3520 }
3521 else if (file_scheme->match(text, this->interval.end, end, flags) &&
3522 m_colon->match(text, file_scheme->interval.end, end, flags) &&
3523 m_slash->match(text, m_colon->interval.end, end, flags) &&
3524 m_slash->match(text, m_slash->interval.end, end, flags))
3525 {
3526 // file://
3527 this->interval.end = m_slash->interval.end;
3528 http_scheme->invalidate();
3529 ftp_scheme->invalidate();
3530 mailto_scheme->invalidate();
3531 }
3532 else {
3533 // Default to http:
3534 http_scheme->invalidate();
3535 ftp_scheme->invalidate();
3536 mailto_scheme->invalidate();
3537 file_scheme->invalidate();
3538 }
3539
3540 if (ftp_scheme->interval) {
3541 if (username->match(text, this->interval.end, end, flags)) {
3542 if (m_colon->match(text, username->interval.end, end, flags) &&
3543 password->match(text, m_colon->interval.end, end, flags) &&
3544 m_at->match(text, password->interval.end, end, flags))
3545 {
3546 // Username and password
3547 this->interval.end = m_at->interval.end;
3548 }
3549 else if (m_at->match(text, this->interval.end, end, flags)) {
3550 // Username only
3551 this->interval.end = m_at->interval.end;
3552 password->invalidate();
3553 }
3554 else {
3555 username->invalidate();
3556 password->invalidate();
3557 }
3558 }
3559 else {
3560 username->invalidate();
3561 password->invalidate();
3562 }
3563
3564 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3565 // Host is IPv4
3566 this->interval.end = ipv4_host->interval.end;
3567 ipv6_host->invalidate();
3568 dns_host->invalidate();
3569 }
3570 else if (
3571 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3572 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3573 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3574 {
3575 // Host is IPv6
3576 this->interval.end = m_ip_rbracket->interval.end;
3577 ipv4_host->invalidate();
3578 dns_host->invalidate();
3579 }
3580 else if (dns_host->match(text, this->interval.end, end, flags)) {
3581 // Host is hostname
3582 this->interval.end = dns_host->interval.end;
3583 ipv4_host->invalidate();
3584 ipv6_host->invalidate();
3585 }
3586 else {
3587 invalidate();
3588 return false;
3589 }
3590
3591 if (m_colon->match(text, this->interval.end, end, flags) &&
3592 port->match(text, m_colon->interval.end, end, flags))
3593 {
3594 // Port
3595 this->interval.end = port->interval.end;
3596 }
3597 else
3598 port->invalidate();
3599
3600 if (path->match(text, this->interval.end, end, flags)) {
3601 // Path
3602 this->interval.end = path->interval.end;
3603 }
3604
3605 this->interval.start = start;
3606 return true;
3607 }
3608
3609 if (mailto_scheme->interval) {
3610 if (username->match(text, this->interval.end, end, flags) &&
3611 m_at->match(text, username->interval.end, end, flags))
3612 {
3613 // Username
3614 this->interval.end = m_at->interval.end;
3615 }
3616 else {
3617 invalidate();
3618 return false;
3619 }
3620
3621 if (m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3622 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3623 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3624 {
3625 // Host is IPv4
3626 this->interval.end = m_ip_rbracket->interval.end;
3627 ipv6_host->invalidate();
3628 dns_host->invalidate();
3629 }
3630 else if (
3631 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3632 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3633 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3634 {
3635 // Host is IPv6
3636 this->interval.end = m_ip_rbracket->interval.end;
3637 ipv4_host->invalidate();
3638 dns_host->invalidate();
3639 }
3640 else if (dns_host->match(text, this->interval.end, end, flags)) {
3641 // Host is hostname
3642 this->interval.end = dns_host->interval.end;
3643 ipv4_host->invalidate();
3644 ipv6_host->invalidate();
3645 }
3646 else {
3647 invalidate();
3648 return false;
3649 }
3650
3651 password->invalidate();
3652 port->invalidate();
3653 path->invalidate();
3654 this->interval.start = start;
3655 return true;
3656 }
3657
3658 if (file_scheme->interval) {
3659 if (path->match(text, this->interval.end, end, flags)) {
3660 // Path
3661 this->interval.end = path->interval.end;
3662 }
3663
3664 username->invalidate();
3665 password->invalidate();
3666 ipv4_host->invalidate();
3667 ipv6_host->invalidate();
3668 dns_host->invalidate();
3669 port->invalidate();
3670 this->interval.start = start;
3671 return true;
3672 }
3673
3674 // "http://" found or defaulted to
3675
3676 // If "http://" explicit, test for username&password.
3677 if (http_scheme->interval &&
3678 username->match(text, this->interval.end, end, flags))
3679 {
3680 if (m_colon->match(text, username->interval.end, end, flags) &&
3681 password->match(text, m_colon->interval.end, end, flags) &&
3682 m_at->match(text, password->interval.end, end, flags))
3683 {
3684 // Username and password
3685 this->interval.end = m_at->interval.end;
3686 }
3687 else if (m_at->match(text, username->interval.end, end, flags)) {
3688 // Username only
3689 this->interval.end = m_at->interval.end;
3690 password->invalidate();
3691 }
3692 else {
3693 username->invalidate();
3694 password->invalidate();
3695 }
3696 }
3697 else {
3698 username->invalidate();
3699 password->invalidate();
3700 }
3701
3702 if (ipv4_host->match(text, this->interval.end, end, flags)) {
3703 // Host is IPv4
3704 this->interval.end = ipv4_host->interval.end;
3705 ipv6_host->invalidate();
3706 dns_host->invalidate();
3707 }
3708 else if (
3709 m_ip_lbracket->match(text, this->interval.end, end, flags) &&
3710 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3711 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3712 {
3713 // Host is IPv6
3714 this->interval.end = m_ip_rbracket->interval.end;
3715 ipv4_host->invalidate();
3716 dns_host->invalidate();
3717 }
3718 else if (dns_host->match(text, this->interval.end, end, flags)) {
3719 // Host is hostname
3720 this->interval.end = dns_host->interval.end;
3721 ipv4_host->invalidate();
3722 ipv6_host->invalidate();
3723 }
3724 else {
3725 invalidate();
3726 return false;
3727 }
3728
3729 if (m_colon->match(text, this->interval.end, end, flags) &&
3730 port->match(text, m_colon->interval.end, end, flags))
3731 {
3732 // Port
3733 this->interval.end = port->interval.end;
3734 }
3735 else
3736 port->invalidate();
3737
3738 if (path->match(text, this->interval.end, end, flags)) {
3739 // Path
3740 this->interval.end = path->interval.end;
3741 }
3742
3743 this->interval.start = start;
3744 return true;
3745 }
3746
3747 std::shared_ptr<basic_parser<T>> m_colon;
3748 std::shared_ptr<basic_parser<T>> m_slash;
3749 std::shared_ptr<basic_parser<T>> m_at;
3750 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3751 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3752 };
3753
3754 using url = basic_url<char>;
3755 using wurl = basic_url<wchar_t>;
3756#ifdef _UNICODE
3757 using turl = wurl;
3758#else
3759 using turl = url;
3760#endif
3761 using sgml_url = basic_url<char>;
3762
3766 template <class T>
3768 {
3769 public:
3771 _In_ const std::shared_ptr<basic_parser<T>>& _username,
3772 _In_ const std::shared_ptr<basic_parser<T>>& at,
3773 _In_ const std::shared_ptr<basic_parser<T>>& ip_lbracket,
3774 _In_ const std::shared_ptr<basic_parser<T>>& ip_rbracket,
3775 _In_ const std::shared_ptr<basic_parser<T>>& _ipv4_host,
3776 _In_ const std::shared_ptr<basic_parser<T>>& _ipv6_host,
3777 _In_ const std::shared_ptr<basic_parser<T>>& _dns_host,
3778 _In_ const std::locale& locale = std::locale()) :
3780 username(_username),
3781 m_at(at),
3782 m_ip_lbracket(ip_lbracket),
3783 m_ip_rbracket(ip_rbracket),
3784 ipv4_host(_ipv4_host),
3785 ipv6_host(_ipv6_host),
3786 dns_host(_dns_host)
3787 {}
3788
3789 virtual void invalidate()
3790 {
3791 username->invalidate();
3792 ipv4_host->invalidate();
3793 ipv6_host->invalidate();
3794 dns_host->invalidate();
3796 }
3797
3798 std::shared_ptr<basic_parser<T>> username;
3799 std::shared_ptr<basic_parser<T>> ipv4_host;
3800 std::shared_ptr<basic_parser<T>> ipv6_host;
3801 std::shared_ptr<basic_parser<T>> dns_host;
3802
3803 protected:
3804 virtual bool do_match(
3805 _In_reads_or_z_opt_(end) const T* text,
3806 _In_ size_t start = 0,
3807 _In_ size_t end = SIZE_MAX,
3808 _In_ int flags = match_default)
3809 {
3810 stdex_assert(text || start >= end);
3811
3812 if (username->match(text, start, end, flags) &&
3813 m_at->match(text, username->interval.end, end, flags))
3814 {
3815 // Username@
3816 if (m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3817 ipv4_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3818 m_ip_rbracket->match(text, ipv4_host->interval.end, end, flags))
3819 {
3820 // Host is IPv4
3821 this->interval.end = m_ip_rbracket->interval.end;
3822 ipv6_host->invalidate();
3823 dns_host->invalidate();
3824 }
3825 else if (
3826 m_ip_lbracket->match(text, m_at->interval.end, end, flags) &&
3827 ipv6_host->match(text, m_ip_lbracket->interval.end, end, flags) &&
3828 m_ip_rbracket->match(text, ipv6_host->interval.end, end, flags))
3829 {
3830 // Host is IPv6
3831 this->interval.end = m_ip_rbracket->interval.end;
3832 ipv4_host->invalidate();
3833 dns_host->invalidate();
3834 }
3835 else if (dns_host->match(text, m_at->interval.end, end, flags)) {
3836 // Host is hostname
3837 this->interval.end = dns_host->interval.end;
3838 ipv4_host->invalidate();
3839 ipv6_host->invalidate();
3840 }
3841 else
3842 goto error;
3843 this->interval.start = start;
3844 return true;
3845 }
3846
3847 error:
3848 invalidate();
3849 return false;
3850 }
3851
3852 std::shared_ptr<basic_parser<T>> m_at;
3853 std::shared_ptr<basic_parser<T>> m_ip_lbracket;
3854 std::shared_ptr<basic_parser<T>> m_ip_rbracket;
3855 };
3856
3859#ifdef _UNICODE
3861#else
3863#endif
3865
3869 template <class T>
3871 {
3872 public:
3874 _In_ const std::shared_ptr<basic_parser<T>>& _emoticon,
3875 _In_ const std::shared_ptr<basic_parser<T>>& _apex,
3876 _In_ const std::shared_ptr<basic_parser<T>>& _eyes,
3877 _In_ const std::shared_ptr<basic_parser<T>>& _nose,
3878 _In_ const std::shared_ptr<basic_set<T>>& _mouth,
3879 _In_ const std::locale& locale = std::locale()) :
3881 emoticon(_emoticon),
3882 apex(_apex),
3883 eyes(_eyes),
3884 nose(_nose),
3885 mouth(_mouth)
3886 {}
3887
3888 virtual void invalidate()
3889 {
3890 if (emoticon) emoticon->invalidate();
3891 if (apex) apex->invalidate();
3892 eyes->invalidate();
3893 if (nose) nose->invalidate();
3894 mouth->invalidate();
3896 }
3897
3898 std::shared_ptr<basic_parser<T>> emoticon;
3899 std::shared_ptr<basic_parser<T>> apex;
3900 std::shared_ptr<basic_parser<T>> eyes;
3901 std::shared_ptr<basic_parser<T>> nose;
3902 std::shared_ptr<basic_set<T>> mouth;
3903
3904 protected:
3905 virtual bool do_match(
3906 _In_reads_or_z_opt_(end) const T* text,
3907 _In_ size_t start = 0,
3908 _In_ size_t end = SIZE_MAX,
3909 _In_ int flags = match_default)
3910 {
3911 stdex_assert(text || start >= end);
3912
3913 if (emoticon && emoticon->match(text, start, end, flags)) {
3914 if (apex) apex->invalidate();
3915 eyes->invalidate();
3916 if (nose) nose->invalidate();
3917 mouth->invalidate();
3918 this->interval.start = start;
3919 this->interval.end = emoticon->interval.end;
3920 return true;
3921 }
3922
3923 this->interval.end = start;
3924
3925 if (apex && apex->match(text, this->interval.end, end, flags))
3926 this->interval.end = apex->interval.end;
3927
3928 if (eyes->match(text, this->interval.end, end, flags)) {
3929 if (nose && nose->match(text, eyes->interval.end, end, flags) &&
3930 mouth->match(text, nose->interval.end, end, flags))
3931 {
3932 size_t
3933 start_mouth = mouth->interval.start,
3934 hit_offset = mouth->hit_offset;
3935 // Mouth may repeat :-)))))))
3936 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3937 mouth->interval.start = start_mouth;
3938 mouth->interval.end = this->interval.end;
3939 this->interval.start = start;
3940 return true;
3941 }
3942 if (mouth->match(text, eyes->interval.end, end, flags)) {
3943 size_t
3944 start_mouth = mouth->interval.start,
3945 hit_offset = mouth->hit_offset;
3946 // Mouth may repeat :-)))))))
3947 for (this->interval.end = mouth->interval.end; mouth->match(text, this->interval.end, end, flags) && mouth->hit_offset == hit_offset; this->interval.end = mouth->interval.end);
3948 if (nose) nose->invalidate();
3949 mouth->interval.start = start_mouth;
3950 mouth->interval.end = this->interval.end;
3951 this->interval.start = start;
3952 return true;
3953 }
3954 }
3955
3956 if (emoticon) emoticon->invalidate();
3957 if (apex) apex->invalidate();
3958 eyes->invalidate();
3959 if (nose) nose->invalidate();
3960 mouth->invalidate();
3961 this->interval.invalidate();
3962 return false;
3963 }
3964 };
3965
3966 using emoticon = basic_emoticon<char>;
3967 using wemoticon = basic_emoticon<wchar_t>;
3968#ifdef _UNICODE
3969 using temoticon = wemoticon;
3970#else
3971 using temoticon = emoticon;
3972#endif
3973 using sgml_emoticon = basic_emoticon<char>;
3974
3978 enum date_format_t {
3979 date_format_none = 0,
3980 date_format_dmy = 0x1,
3981 date_format_mdy = 0x2,
3982 date_format_ymd = 0x4,
3983 date_format_ym = 0x8,
3984 date_format_my = 0x10,
3985 date_format_dm = 0x20,
3986 date_format_md = 0x40,
3987 };
3988
3992 template <class T>
3993 class basic_date : public basic_parser<T>
3994 {
3995 public:
3996 basic_date(
3997 _In_ int format_mask,
3998 _In_ const std::shared_ptr<basic_integer<T>>& _day,
3999 _In_ const std::shared_ptr<basic_integer<T>>& _month,
4000 _In_ const std::shared_ptr<basic_integer<T>>& _year,
4001 _In_ const std::shared_ptr<basic_set<T>>& separator,
4002 _In_ const std::shared_ptr<basic_parser<T>>& space,
4003 _In_ const std::locale& locale = std::locale()) :
4005 format(date_format_none),
4006 m_format_mask(format_mask),
4007 day(_day),
4008 month(_month),
4009 year(_year),
4010 m_separator(separator),
4011 m_space(space)
4012 {}
4013
4014 virtual void invalidate()
4015 {
4016 if (day) day->invalidate();
4017 if (month) month->invalidate();
4018 if (year) year->invalidate();
4019 format = date_format_none;
4021 }
4022
4023 date_format_t format;
4024 std::shared_ptr<basic_integer<T>> day;
4025 std::shared_ptr<basic_integer<T>> month;
4026 std::shared_ptr<basic_integer<T>> year;
4027
4028 protected:
4029 virtual bool do_match(
4030 _In_reads_or_z_opt_(end) const T* text,
4031 _In_ size_t start = 0,
4032 _In_ size_t end = SIZE_MAX,
4033 _In_ int flags = match_default)
4034 {
4035 stdex_assert(text || start >= end);
4036
4037 const int space_match_flags = flags & ~match_multiline; // Spaces in dates must never be broken in new line.
4038 if ((m_format_mask & date_format_dmy) == date_format_dmy) {
4039 if (day->match(text, start, end, flags)) {
4040 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4041 if (m_separator->match(text, this->interval.end, end, flags)) {
4042 size_t hit_offset = m_separator->hit_offset;
4043 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4044 if (month->match(text, this->interval.end, end, flags)) {
4045 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4046 if (m_separator->match(text, this->interval.end, end, flags) &&
4047 m_separator->hit_offset == hit_offset) // Both separators must match.
4048 {
4049 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4050 if (year->match(text, this->interval.end, end, flags) &&
4051 is_valid(day->value, month->value))
4052 {
4053 this->interval.start = start;
4054 this->interval.end = year->interval.end;
4055 format = date_format_dmy;
4056 return true;
4057 }
4058 }
4059 }
4060 }
4061 }
4062 }
4063
4064 if ((m_format_mask & date_format_mdy) == date_format_mdy) {
4065 if (month->match(text, start, end, flags)) {
4066 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4067 if (m_separator->match(text, this->interval.end, end, flags)) {
4068 size_t hit_offset = m_separator->hit_offset;
4069 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4070 if (day->match(text, this->interval.end, end, flags)) {
4071 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4072 if (m_separator->match(text, this->interval.end, end, flags) &&
4073 m_separator->hit_offset == hit_offset) // Both separators must match.
4074 {
4075 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4076 if (year->match(text, this->interval.end, end, flags) &&
4077 is_valid(day->value, month->value))
4078 {
4079 this->interval.start = start;
4080 this->interval.end = year->interval.end;
4081 format = date_format_mdy;
4082 return true;
4083 }
4084 }
4085 }
4086 }
4087 }
4088 }
4089
4090 if ((m_format_mask & date_format_ymd) == date_format_ymd) {
4091 if (year->match(text, start, end, flags)) {
4092 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4093 if (m_separator->match(text, this->interval.end, end, flags)) {
4094 size_t hit_offset = m_separator->hit_offset;
4095 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4096 if (month->match(text, this->interval.end, end, flags)) {
4097 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4098 if (m_separator->match(text, this->interval.end, end, flags) &&
4099 m_separator->hit_offset == hit_offset) // Both separators must match.
4100 {
4101 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4102 if (day->match(text, this->interval.end, end, flags) &&
4103 is_valid(day->value, month->value))
4104 {
4105 this->interval.start = start;
4106 this->interval.end = day->interval.end;
4107 format = date_format_ymd;
4108 return true;
4109 }
4110 }
4111 }
4112 }
4113 }
4114 }
4115
4116 if ((m_format_mask & date_format_ym) == date_format_ym) {
4117 if (year->match(text, start, end, flags)) {
4118 for (this->interval.end = year->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4119 if (m_separator->match(text, this->interval.end, end, flags)) {
4120 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4121 if (month->match(text, this->interval.end, end, flags) &&
4122 is_valid(SIZE_MAX, month->value))
4123 {
4124 if (day) day->invalidate();
4125 this->interval.start = start;
4126 this->interval.end = month->interval.end;
4127 format = date_format_ym;
4128 return true;
4129 }
4130 }
4131 }
4132 }
4133
4134 if ((m_format_mask & date_format_my) == date_format_my) {
4135 if (month->match(text, start, end, flags)) {
4136 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4137 if (m_separator->match(text, this->interval.end, end, flags)) {
4138 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4139 if (year->match(text, this->interval.end, end, flags) &&
4140 is_valid(SIZE_MAX, month->value))
4141 {
4142 if (day) day->invalidate();
4143 this->interval.start = start;
4144 this->interval.end = year->interval.end;
4145 format = date_format_my;
4146 return true;
4147 }
4148 }
4149 }
4150 }
4151
4152 if ((m_format_mask & date_format_dm) == date_format_dm) {
4153 if (day->match(text, start, end, flags)) {
4154 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4155 if (m_separator->match(text, this->interval.end, end, flags)) {
4156 size_t hit_offset = m_separator->hit_offset;
4157 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4158 if (month->match(text, this->interval.end, end, flags) &&
4159 is_valid(day->value, month->value))
4160 {
4161 if (year) year->invalidate();
4162 this->interval.start = start;
4163 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4164 if (m_separator->match(text, this->interval.end, end, flags) &&
4165 m_separator->hit_offset == hit_offset) // Both separators must match.
4166 this->interval.end = m_separator->interval.end;
4167 else
4168 this->interval.end = month->interval.end;
4169 format = date_format_dm;
4170 return true;
4171 }
4172 }
4173 }
4174 }
4175
4176 if ((m_format_mask & date_format_md) == date_format_md) {
4177 if (month->match(text, start, end, flags)) {
4178 for (this->interval.end = month->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4179 if (m_separator->match(text, this->interval.end, end, flags)) {
4180 size_t hit_offset = m_separator->hit_offset;
4181 for (this->interval.end = m_separator->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4182 if (day->match(text, this->interval.end, end, flags) &&
4183 is_valid(day->value, month->value))
4184 {
4185 if (year) year->invalidate();
4186 this->interval.start = start;
4187 for (this->interval.end = day->interval.end; m_space->match(text, this->interval.end, end, space_match_flags); this->interval.end = m_space->interval.end);
4188 if (m_separator->match(text, this->interval.end, end, flags) &&
4189 m_separator->hit_offset == hit_offset) // Both separators must match.
4190 this->interval.end = m_separator->interval.end;
4191 else
4192 this->interval.end = day->interval.end;
4193 format = date_format_md;
4194 return true;
4195 }
4196 }
4197 }
4198 }
4199
4200 if (day) day->invalidate();
4201 if (month) month->invalidate();
4202 if (year) year->invalidate();
4203 format = date_format_none;
4204 this->interval.invalidate();
4205 return false;
4206 }
4207
4208 static bool is_valid(size_t day, size_t month)
4209 {
4210 if (month == SIZE_MAX) {
4211 // Default to January. This allows validating day only, as January has all 31 days.
4212 month = 1;
4213 }
4214 if (day == SIZE_MAX) {
4215 // Default to 1st day in month. This allows validating month only, as each month has 1st day.
4216 day = 1;
4217 }
4218
4219 switch (month) {
4220 case 1:
4221 case 3:
4222 case 5:
4223 case 7:
4224 case 8:
4225 case 10:
4226 case 12:
4227 return 1 <= day && day <= 31;
4228 case 2:
4229 return 1 <= day && day <= 29;
4230 case 4:
4231 case 6:
4232 case 9:
4233 case 11:
4234 return 1 <= day && day <= 30;
4235 default:
4236 return false;
4237 }
4238 }
4239
4240 int m_format_mask;
4241 std::shared_ptr<basic_set<T>> m_separator;
4242 std::shared_ptr<basic_parser<T>> m_space;
4243 };
4244
4245 using date = basic_date<char>;
4246 using wdate = basic_date<wchar_t>;
4247#ifdef _UNICODE
4248 using tdate = wdate;
4249#else
4250 using tdate = date;
4251#endif
4253
4257 template <class T>
4258 class basic_time : public basic_parser<T>
4259 {
4260 public:
4261 basic_time(
4262 _In_ const std::shared_ptr<basic_integer10<T>>& _hour,
4263 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4264 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4265 _In_ const std::shared_ptr<basic_integer10<T>>& _millisecond,
4266 _In_ const std::shared_ptr<basic_set<T>>& separator,
4267 _In_ const std::shared_ptr<basic_parser<T>>& millisecond_separator,
4268 _In_ const std::locale& locale = std::locale()) :
4270 hour(_hour),
4271 minute(_minute),
4272 second(_second),
4273 millisecond(_millisecond),
4274 m_separator(separator),
4275 m_millisecond_separator(millisecond_separator)
4276 {}
4277
4278 virtual void invalidate()
4279 {
4280 hour->invalidate();
4281 minute->invalidate();
4282 if (second) second->invalidate();
4283 if (millisecond) millisecond->invalidate();
4285 }
4286
4287 std::shared_ptr<basic_integer10<T>> hour;
4288 std::shared_ptr<basic_integer10<T>> minute;
4289 std::shared_ptr<basic_integer10<T>> second;
4290 std::shared_ptr<basic_integer10<T>> millisecond;
4291
4292 protected:
4293 virtual bool do_match(
4294 _In_reads_or_z_opt_(end) const T* text,
4295 _In_ size_t start = 0,
4296 _In_ size_t end = SIZE_MAX,
4297 _In_ int flags = match_default)
4298 {
4299 stdex_assert(text || start >= end);
4300
4301 if (hour->match(text, start, end, flags) &&
4302 m_separator->match(text, hour->interval.end, end, flags) &&
4303 minute->match(text, m_separator->interval.end, end, flags) &&
4304 minute->value < 60)
4305 {
4306 // hh::mm
4307 size_t hit_offset = m_separator->hit_offset;
4308 if (m_separator->match(text, minute->interval.end, end, flags) &&
4309 m_separator->hit_offset == hit_offset && // Both separators must match.
4310 second && second->match(text, m_separator->interval.end, end, flags) &&
4311 second->value < 60)
4312 {
4313 // hh::mm:ss
4314 if (m_millisecond_separator && m_millisecond_separator->match(text, second->interval.end, end, flags) &&
4315 millisecond && millisecond->match(text, m_millisecond_separator->interval.end, end, flags) &&
4316 millisecond->value < 1000)
4317 {
4318 // hh::mm:ss.mmmm
4319 this->interval.end = millisecond->interval.end;
4320 }
4321 else {
4322 if (millisecond) millisecond->invalidate();
4323 this->interval.end = second->interval.end;
4324 }
4325 }
4326 else {
4327 if (second) second->invalidate();
4328 if (millisecond) millisecond->invalidate();
4329 this->interval.end = minute->interval.end;
4330 }
4331 this->interval.start = start;
4332 return true;
4333 }
4334
4335 hour->invalidate();
4336 minute->invalidate();
4337 if (second) second->invalidate();
4338 if (millisecond) millisecond->invalidate();
4339 this->interval.invalidate();
4340 return false;
4341 }
4342
4343 std::shared_ptr<basic_set<T>> m_separator;
4344 std::shared_ptr<basic_parser<T>> m_millisecond_separator;
4345 };
4346
4347 using time = basic_time<char>;
4348 using wtime = basic_time<wchar_t>;
4349#ifdef _UNICODE
4350 using ttime = wtime;
4351#else
4352 using ttime = time;
4353#endif
4355
4359 template <class T>
4360 class basic_angle : public basic_parser<T>
4361 {
4362 public:
4364 _In_ const std::shared_ptr<basic_integer10<T>>& _degree,
4365 _In_ const std::shared_ptr<basic_parser<T>>& _degree_separator,
4366 _In_ const std::shared_ptr<basic_integer10<T>>& _minute,
4367 _In_ const std::shared_ptr<basic_parser<T>>& _minute_separator,
4368 _In_ const std::shared_ptr<basic_integer10<T>>& _second,
4369 _In_ const std::shared_ptr<basic_parser<T>>& _second_separator,
4370 _In_ const std::shared_ptr<basic_parser<T>>& _decimal,
4371 _In_ const std::locale& locale = std::locale()) :
4373 degree(_degree),
4374 degree_separator(_degree_separator),
4375 minute(_minute),
4376 minute_separator(_minute_separator),
4377 second(_second),
4378 second_separator(_second_separator),
4379 decimal(_decimal)
4380 {}
4381
4382 virtual void invalidate()
4383 {
4384 degree->invalidate();
4385 degree_separator->invalidate();
4386 minute->invalidate();
4387 minute_separator->invalidate();
4388 if (second) second->invalidate();
4389 if (second_separator) second_separator->invalidate();
4390 if (decimal) decimal->invalidate();
4392 }
4393
4394 std::shared_ptr<basic_integer10<T>> degree;
4395 std::shared_ptr<basic_parser<T>> degree_separator;
4396 std::shared_ptr<basic_integer10<T>> minute;
4397 std::shared_ptr<basic_parser<T>> minute_separator;
4398 std::shared_ptr<basic_integer10<T>> second;
4399 std::shared_ptr<basic_parser<T>> second_separator;
4400 std::shared_ptr<basic_parser<T>> decimal;
4401
4402 protected:
4403 virtual bool do_match(
4404 _In_reads_or_z_opt_(end) const T* text,
4405 _In_ size_t start = 0,
4406 _In_ size_t end = SIZE_MAX,
4407 _In_ int flags = match_default)
4408 {
4409 stdex_assert(text || start >= end);
4410
4411 this->interval.end = start;
4412
4413 if (degree->match(text, this->interval.end, end, flags) &&
4414 degree_separator->match(text, degree->interval.end, end, flags))
4415 {
4416 // Degrees
4417 this->interval.end = degree_separator->interval.end;
4418 }
4419 else {
4420 degree->invalidate();
4421 degree_separator->invalidate();
4422 }
4423
4424 if (minute->match(text, this->interval.end, end, flags) &&
4425 minute->value < 60 &&
4426 minute_separator->match(text, minute->interval.end, end, flags))
4427 {
4428 // Minutes
4429 this->interval.end = minute_separator->interval.end;
4430 }
4431 else {
4432 minute->invalidate();
4433 minute_separator->invalidate();
4434 }
4435
4436 if (second && second->match(text, this->interval.end, end, flags) &&
4437 second->value < 60)
4438 {
4439 // Seconds
4440 this->interval.end = second->interval.end;
4441 if (second_separator && second_separator->match(text, this->interval.end, end, flags))
4442 this->interval.end = second_separator->interval.end;
4443 else
4444 if (second_separator) second_separator->invalidate();
4445 }
4446 else {
4447 if (second) second->invalidate();
4448 if (second_separator) second_separator->invalidate();
4449 }
4450
4451 if (degree->interval.start < degree->interval.end ||
4452 minute->interval.start < minute->interval.end ||
4453 (second && second->interval.start < second->interval.end))
4454 {
4455 if (decimal && decimal->match(text, this->interval.end, end, flags)) {
4456 // Decimals
4457 this->interval.end = decimal->interval.end;
4458 }
4459 else if (decimal)
4460 decimal->invalidate();
4461 this->interval.start = start;
4462 return true;
4463 }
4464 if (decimal) decimal->invalidate();
4465 this->interval.invalidate();
4466 return false;
4467 }
4468 };
4469
4470 using angle = basic_angle<char>;
4472#ifdef _UNICODE
4473 using RRegElKot = wangle;
4474#else
4475 using RRegElKot = angle;
4476#endif
4478
4482 template <class T>
4484 {
4485 public:
4487 _In_ const std::shared_ptr<basic_parser<T>>& digit,
4488 _In_ const std::shared_ptr<basic_parser<T>>& plus_sign,
4489 _In_ const std::shared_ptr<basic_set<T>>& lparenthesis,
4490 _In_ const std::shared_ptr<basic_set<T>>& rparenthesis,
4491 _In_ const std::shared_ptr<basic_parser<T>>& separator,
4492 _In_ const std::shared_ptr<basic_parser<T>>& space,
4493 _In_ const std::locale& locale = std::locale()) :
4495 m_digit(digit),
4496 m_plus_sign(plus_sign),
4497 m_lparenthesis(lparenthesis),
4498 m_rparenthesis(rparenthesis),
4499 m_separator(separator),
4500 m_space(space)
4501 {}
4502
4503 virtual void invalidate()
4504 {
4505 value.clear();
4507 }
4508
4509 std::basic_string<T> value;
4510
4511 protected:
4512 virtual bool do_match(
4513 _In_reads_or_z_opt_(end) const T* text,
4514 _In_ size_t start = 0,
4515 _In_ size_t end = SIZE_MAX,
4516 _In_ int flags = match_default)
4517 {
4518 stdex_assert(text || start >= end);
4519
4520 size_t safe_digit_end = start, safe_value_size = 0;
4521 bool has_digits = false, after_digit = false, in_parentheses = false, after_parentheses = false;
4522 const int space_match_flags = flags & ~match_multiline; // Spaces in phone numbers must never be broken in new line.
4523
4524 this->interval.end = start;
4525 value.clear();
4526 m_lparenthesis->invalidate();
4527 m_rparenthesis->invalidate();
4528
4529 if (m_plus_sign && m_plus_sign->match(text, this->interval.end, end, flags)) {
4530 value.append(text + m_plus_sign->interval.start, text + m_plus_sign->interval.end);
4531 safe_value_size = value.size();
4532 this->interval.end = m_plus_sign->interval.end;
4533 }
4534
4535 for (;;) {
4536 stdex_assert(text || this->interval.end >= end);
4537 if (this->interval.end >= end || !text[this->interval.end])
4538 break;
4539 if (m_digit->match(text, this->interval.end, end, flags)) {
4540 // Digit
4541 value.append(text + m_digit->interval.start, text + m_digit->interval.end);
4542 this->interval.end = m_digit->interval.end;
4543 if (!in_parentheses) {
4544 safe_digit_end = this->interval.end;
4545 safe_value_size = value.size();
4546 has_digits = true;
4547 }
4548 after_digit = true;
4549 after_parentheses = false;
4550 }
4551 else if (
4552 m_lparenthesis && !m_lparenthesis->interval && // No left parenthesis yet
4553 m_rparenthesis && !m_rparenthesis->interval && // Right parenthesis after left
4554 m_lparenthesis->match(text, this->interval.end, end, flags))
4555 {
4556 // Left parenthesis
4557 value.append(text + m_lparenthesis->interval.start, m_lparenthesis->interval.size());
4558 this->interval.end = m_lparenthesis->interval.end;
4559 in_parentheses = true;
4560 after_digit = false;
4561 after_parentheses = false;
4562 }
4563 else if (
4564 in_parentheses && // After left parenthesis
4565 m_rparenthesis && !m_rparenthesis->interval && // No right parenthesis yet
4566 m_rparenthesis->match(text, this->interval.end, end, flags) &&
4567 m_lparenthesis->hit_offset == m_rparenthesis->hit_offset) // Left and right parentheses must match
4568 {
4569 // Right parenthesis
4570 value.append(text + m_rparenthesis->interval.start, text + m_rparenthesis->interval.end);
4571 this->interval.end = m_rparenthesis->interval.end;
4572 safe_digit_end = this->interval.end;
4573 safe_value_size = value.size();
4574 in_parentheses = false;
4575 after_digit = false;
4576 after_parentheses = true;
4577 }
4578 else if (
4579 after_digit &&
4580 !in_parentheses && // No separators inside parentheses
4581 !after_parentheses && // No separators following right parenthesis
4582 m_separator && m_separator->match(text, this->interval.end, end, flags))
4583 {
4584 // Separator
4585 this->interval.end = m_separator->interval.end;
4586 after_digit = false;
4587 after_parentheses = false;
4588 }
4589 else if (
4590 (after_digit || after_parentheses) &&
4591 m_space && m_space->match(text, this->interval.end, end, space_match_flags))
4592 {
4593 // Space
4594 this->interval.end = m_space->interval.end;
4595 after_digit = false;
4596 after_parentheses = false;
4597 }
4598 else
4599 break;
4600 }
4601 if (has_digits) {
4602 value.erase(safe_value_size);
4603 this->interval.start = start;
4604 this->interval.end = safe_digit_end;
4605 return true;
4606 }
4607 value.clear();
4608 this->interval.invalidate();
4609 return false;
4610 }
4611
4612 std::shared_ptr<basic_parser<T>> m_digit;
4613 std::shared_ptr<basic_parser<T>> m_plus_sign;
4614 std::shared_ptr<basic_set<T>> m_lparenthesis;
4615 std::shared_ptr<basic_set<T>> m_rparenthesis;
4616 std::shared_ptr<basic_parser<T>> m_separator;
4617 std::shared_ptr<basic_parser<T>> m_space;
4618 };
4619
4620 using phone_number = basic_phone_number<char>;
4621 using wphone_number = basic_phone_number<wchar_t>;
4622#ifdef _UNICODE
4623 using tphone_number = wphone_number;
4624#else
4625 using tphone_number = phone_number;
4626#endif
4627 using sgml_phone_number = basic_phone_number<char>;
4628
4634 template <class T>
4635 class basic_iban : public basic_parser<T>
4636 {
4637 public:
4638 basic_iban(
4639 _In_ const std::shared_ptr<basic_parser<T>>& space,
4640 _In_ const std::locale& locale = std::locale()) :
4642 m_space(space)
4643 {
4644 this->country[0] = 0;
4645 this->check_digits[0] = 0;
4646 this->bban[0] = 0;
4647 this->is_valid = false;
4648 }
4649
4650 virtual void invalidate()
4651 {
4652 this->country[0] = 0;
4653 this->check_digits[0] = 0;
4654 this->bban[0] = 0;
4655 this->is_valid = false;
4657 }
4658
4659 T country[3];
4661 T bban[31];
4663
4664 protected:
4665 virtual bool do_match(
4666 _In_reads_or_z_opt_(end) const T* text,
4667 _In_ size_t start = 0,
4668 _In_ size_t end = SIZE_MAX,
4669 _In_ int flags = match_default)
4670 {
4671 stdex_assert(text || start >= end);
4672 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4673 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4674 struct country_t {
4675 T country[2];
4676 T check_digits[2];
4677 size_t length;
4678 };
4679 static const country_t s_countries[] = {
4680 { { 'A', 'D' }, {}, 24 }, // Andorra
4681 { { 'A', 'E' }, {}, 23 }, // United Arab Emirates
4682 { { 'A', 'L' }, {}, 28 }, // Albania
4683 { { 'A', 'O' }, {}, 25 }, // Angola
4684 { { 'A', 'T' }, {}, 20 }, // Austria
4685 { { 'A', 'Z' }, {}, 28 }, // Azerbaijan
4686 { { 'B', 'A' }, { '3', '9' }, 20}, // Bosnia and Herzegovina
4687 { { 'B', 'E' }, {}, 16 }, // Belgium
4688 { { 'B', 'F' }, {}, 28 }, // Burkina Faso
4689 { { 'B', 'G' }, {}, 22 }, // Bulgaria
4690 { { 'B', 'H' }, {}, 22 }, // Bahrain
4691 { { 'B', 'I' }, {}, 27 }, // Burundi
4692 { { 'B', 'J' }, {}, 28 }, // Benin
4693 { { 'B', 'R' }, {}, 29 }, // Brazil
4694 { { 'B', 'Y' }, {}, 28 }, // Belarus
4695 { { 'C', 'F' }, {}, 27 }, // Central African Republic
4696 { { 'C', 'G' }, {}, 27 }, // Congo, Republic of the
4697 { { 'C', 'H' }, {}, 21 }, // Switzerland
4698 { { 'C', 'I' }, {}, 28 }, // Côte d'Ivoire
4699 { { 'C', 'M' }, {}, 27 }, // Cameroon
4700 { { 'C', 'R' }, {}, 22 }, // Costa Rica
4701 { { 'C', 'V' }, {}, 25 }, // Cabo Verde
4702 { { 'C', 'Y' }, {}, 28 }, // Cyprus
4703 { { 'C', 'Z' }, {}, 24 }, // Czech Republic
4704 { { 'D', 'E' }, {}, 22 }, // Germany
4705 { { 'D', 'J' }, {}, 27 }, // Djibouti
4706 { { 'D', 'K' }, {}, 18 }, // Denmark
4707 { { 'D', 'O' }, {}, 28 }, // Dominican Republic
4708 { { 'D', 'Z' }, {}, 26 }, // Algeria
4709 { { 'E', 'E' }, {}, 20 }, // Estonia
4710 { { 'E', 'G' }, {}, 29 }, // Egypt
4711 { { 'E', 'S' }, {}, 24 }, // Spain
4712 { { 'F', 'I' }, {}, 18 }, // Finland
4713 { { 'F', 'O' }, {}, 18 }, // Faroe Islands
4714 { { 'F', 'R' }, {}, 27 }, // France
4715 { { 'G', 'A' }, {}, 27 }, // Gabon
4716 { { 'G', 'B' }, {}, 22 }, // United Kingdom
4717 { { 'G', 'E' }, {}, 22 }, // Georgia
4718 { { 'G', 'I' }, {}, 23 }, // Gibraltar
4719 { { 'G', 'L' }, {}, 18 }, // Greenland
4720 { { 'G', 'Q' }, {}, 27 }, // Equatorial Guinea
4721 { { 'G', 'R' }, {}, 27 }, // Greece
4722 { { 'G', 'T' }, {}, 28 }, // Guatemala
4723 { { 'G', 'W' }, {}, 25 }, // Guinea-Bissau
4724 { { 'H', 'N' }, {}, 28 }, // Honduras
4725 { { 'H', 'R' }, {}, 21 }, // Croatia
4726 { { 'H', 'U' }, {}, 28 }, // Hungary
4727 { { 'I', 'E' }, {}, 22 }, // Ireland
4728 { { 'I', 'L' }, {}, 23 }, // Israel
4729 { { 'I', 'Q' }, {}, 23 }, // Iraq
4730 { { 'I', 'R' }, {}, 26 }, // Iran
4731 { { 'I', 'S' }, {}, 26 }, // Iceland
4732 { { 'I', 'T' }, {}, 27 }, // Italy
4733 { { 'J', 'O' }, {}, 30 }, // Jordan
4734 { { 'K', 'M' }, {}, 27 }, // Comoros
4735 { { 'K', 'W' }, {}, 30 }, // Kuwait
4736 { { 'K', 'Z' }, {}, 20 }, // Kazakhstan
4737 { { 'L', 'B' }, {}, 28 }, // Lebanon
4738 { { 'L', 'C' }, {}, 32 }, // Saint Lucia
4739 { { 'L', 'I' }, {}, 21 }, // Liechtenstein
4740 { { 'L', 'T' }, {}, 20 }, // Lithuania
4741 { { 'L', 'U' }, {}, 20 }, // Luxembourg
4742 { { 'L', 'V' }, {}, 21 }, // Latvia
4743 { { 'L', 'Y' }, {}, 25 }, // Libya
4744 { { 'M', 'A' }, {}, 28 }, // Morocco
4745 { { 'M', 'C' }, {}, 27 }, // Monaco
4746 { { 'M', 'D' }, {}, 24 }, // Moldova
4747 { { 'M', 'E' }, { '2', '5' }, 22 }, // Montenegro
4748 { { 'M', 'G' }, {}, 27 }, // Madagascar
4749 { { 'M', 'K' }, { '0', '7' }, 19 }, // North Macedonia
4750 { { 'M', 'L' }, {}, 28 }, // Mali
4751 { { 'M', 'R' }, { '1', '3' }, 27}, // Mauritania
4752 { { 'M', 'T' }, {}, 31 }, // Malta
4753 { { 'M', 'U' }, {}, 30 }, // Mauritius
4754 { { 'M', 'Z' }, {}, 25 }, // Mozambique
4755 { { 'N', 'E' }, {}, 28 }, // Niger
4756 { { 'N', 'I' }, {}, 32 }, // Nicaragua
4757 { { 'N', 'L' }, {}, 18 }, // Netherlands
4758 { { 'N', 'O' }, {}, 15 }, // Norway
4759 { { 'P', 'K' }, {}, 24 }, // Pakistan
4760 { { 'P', 'L' }, {}, 28 }, // Poland
4761 { { 'P', 'S' }, {}, 29 }, // Palestinian territories
4762 { { 'P', 'T' }, { '5', '0' }, 25 }, // Portugal
4763 { { 'Q', 'A' }, {}, 29 }, // Qatar
4764 { { 'R', 'O' }, {}, 24 }, // Romania
4765 { { 'R', 'S' }, { '3', '5' }, 22 }, // Serbia
4766 { { 'R', 'U' }, {}, 33 }, // Russia
4767 { { 'S', 'A' }, {}, 24 }, // Saudi Arabia
4768 { { 'S', 'C' }, {}, 31 }, // Seychelles
4769 { { 'S', 'D' }, {}, 18 }, // Sudan
4770 { { 'S', 'E' }, {}, 24 }, // Sweden
4771 { { 'S', 'I' }, { '5', '6' }, 19 }, // Slovenia
4772 { { 'S', 'K' }, {}, 24 }, // Slovakia
4773 { { 'S', 'M' }, {}, 27 }, // San Marino
4774 { { 'S', 'N' }, {}, 28 }, // Senegal
4775 { { 'S', 'T' }, {}, 25 }, // São Tomé and Príncipe
4776 { { 'S', 'V' }, {}, 28 }, // El Salvador
4777 { { 'T', 'D' }, {}, 27 }, // Chad
4778 { { 'T', 'G' }, {}, 28 }, // Togo
4779 { { 'T', 'L' }, { '3', '8' }, 23}, // East Timor
4780 { { 'T', 'N' }, { '5', '9' }, 24 }, // Tunisia
4781 { { 'T', 'R' }, {}, 26 }, // Turkey
4782 { { 'U', 'A' }, {}, 29 }, // Ukraine
4783 { { 'V', 'A' }, {}, 22 }, // Vatican City
4784 { { 'V', 'G' }, {}, 24 }, // Virgin Islands, British
4785 { { 'X', 'K' }, {}, 20 }, // Kosovo
4786 };
4787 const country_t* country_desc = nullptr;
4788 size_t n, available, next, bban_length;
4789 uint32_t nominator;
4790
4791 this->interval.end = start;
4792 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4793 if (this->interval.end >= end || !text[this->interval.end])
4794 goto error; // incomplete country code
4795 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4796 if (chr < 'A' || 'Z' < chr)
4797 goto error; // invalid country code
4798 this->country[i] = chr;
4799 }
4800 for (size_t l = 0, r = _countof(s_countries);;) {
4801 if (l >= r)
4802 goto error; // unknown country
4803 size_t m = (l + r) / 2;
4804 const country_t& c = s_countries[m];
4805 if (c.country[0] < this->country[0] || (c.country[0] == this->country[0] && c.country[1] < this->country[1]))
4806 l = m + 1;
4807 else if (this->country[0] < c.country[0] || (this->country[0] == c.country[0] && this->country[1] < c.country[1]))
4808 r = m;
4809 else {
4810 country_desc = &c;
4811 break;
4812 }
4813 }
4814 this->country[2] = 0;
4815
4816 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4817 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4818 goto error; // incomplete or invalid check digits
4819 this->check_digits[i] = text[this->interval.end];
4820 }
4821 this->check_digits[2] = 0;
4822
4823 if ((country_desc->check_digits[0] && this->check_digits[0] != country_desc->check_digits[0]) ||
4824 (country_desc->check_digits[1] && this->check_digits[1] != country_desc->check_digits[1]))
4825 goto error; // unexpected check digits
4826
4827 bban_length = country_desc->length - 4;
4828 for (n = 0; n < bban_length;) {
4829 if (this->interval.end >= end || !text[this->interval.end])
4830 goto error; // bban too short
4831 if (m_space && m_space->match(text, this->interval.end, end, flags)) {
4832 this->interval.end = m_space->interval.end;
4833 continue;
4834 }
4835 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4836 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4837 this->bban[n++] = chr;
4838 this->interval.end++;
4839 }
4840 else
4841 goto error; // invalid bban
4842 }
4843 this->bban[n] = 0;
4844
4845 // Normalize IBAN.
4846 T normalized[69];
4847 available = 0;
4848 for (size_t i = 0; ; ++i) {
4849 if (!this->bban[i]) {
4850 for (i = 0; i < 2; ++i) {
4851 if ('A' <= this->country[i] && this->country[i] <= 'J') {
4852 normalized[available++] = '1';
4853 normalized[available++] = '0' + this->country[i] - 'A';
4854 }
4855 else if ('K' <= this->country[i] && this->country[i] <= 'T') {
4856 normalized[available++] = '2';
4857 normalized[available++] = '0' + this->country[i] - 'K';
4858 }
4859 else if ('U' <= this->country[i] && this->country[i] <= 'Z') {
4860 normalized[available++] = '3';
4861 normalized[available++] = '0' + this->country[i] - 'U';
4862 }
4863 }
4864 normalized[available++] = this->check_digits[0];
4865 normalized[available++] = this->check_digits[1];
4866 normalized[available] = 0;
4867 break;
4868 }
4869 if ('0' <= this->bban[i] && this->bban[i] <= '9')
4870 normalized[available++] = this->bban[i];
4871 else if ('A' <= this->bban[i] && this->bban[i] <= 'J') {
4872 normalized[available++] = '1';
4873 normalized[available++] = '0' + this->bban[i] - 'A';
4874 }
4875 else if ('K' <= this->bban[i] && this->bban[i] <= 'T') {
4876 normalized[available++] = '2';
4877 normalized[available++] = '0' + this->bban[i] - 'K';
4878 }
4879 else if ('U' <= this->bban[i] && this->bban[i] <= 'Z') {
4880 normalized[available++] = '3';
4881 normalized[available++] = '0' + this->bban[i] - 'U';
4882 }
4883 }
4884
4885 // Calculate modulo 97.
4886 nominator = stdex::strtou32(normalized, 9, &next, 10);
4887 for (;;) {
4888 nominator %= 97;
4889 if (!normalized[next]) {
4890 this->is_valid = nominator == 1;
4891 break;
4892 }
4893 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
4894 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
4895 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
4896 }
4897
4898 this->interval.start = start;
4899 return true;
4900
4901 error:
4902 invalidate();
4903 return false;
4904 }
4905
4906 std::shared_ptr<basic_parser<T>> m_space;
4907 };
4908
4909 using iban = basic_iban<char>;
4910 using wiban = basic_iban<wchar_t>;
4911#ifdef _UNICODE
4912 using tiban = wiban;
4913#else
4914 using tiban = iban;
4915#endif
4916 using sgml_iban = basic_iban<char>;
4917
4923 template <class T>
4925 {
4926 public:
4928 _In_ const std::shared_ptr<basic_parser<T>>& space,
4929 _In_ const std::locale& locale = std::locale()) :
4931 m_space(space)
4932 {
4933 this->check_digits[0] = 0;
4934 this->reference[0] = 0;
4935 this->is_valid = false;
4936 }
4937
4938 virtual void invalidate()
4939 {
4940 this->check_digits[0] = 0;
4941 this->reference[0] = 0;
4942 this->is_valid = false;
4944 }
4945
4949
4950 protected:
4951 virtual bool do_match(
4952 _In_reads_or_z_opt_(end) const T* text,
4953 _In_ size_t start = 0,
4954 _In_ size_t end = SIZE_MAX,
4955 _In_ int flags = match_default)
4956 {
4957 stdex_assert(text || start >= end);
4958 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
4959 const bool case_insensitive = flags & match_case_insensitive ? true : false;
4960 size_t n, available, next;
4961 uint32_t nominator;
4962
4963 this->interval.end = start;
4964 if (this->interval.end + 1 >= end ||
4965 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'R' ||
4966 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'F')
4967 goto error; // incomplete or wrong reference ID
4968 this->interval.end += 2;
4969
4970 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
4971 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
4972 goto error; // incomplete or invalid check digits
4973 this->check_digits[i] = text[this->interval.end];
4974 }
4975 this->check_digits[2] = 0;
4976
4977 for (n = 0;;) {
4978 if (m_space && m_space->match(text, this->interval.end, end, flags))
4979 this->interval.end = m_space->interval.end;
4980 for (size_t j = 0; j < 4; ++j) {
4981 if (this->interval.end >= end || !text[this->interval.end])
4982 goto out;
4983 T chr = case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end];
4984 if (('0' <= chr && chr <= '9') || ('A' <= chr && chr <= 'Z')) {
4985 if (n >= _countof(reference) - 1)
4986 goto error; // reference overflow
4987 this->reference[n++] = chr;
4988 this->interval.end++;
4989 }
4990 else
4991 goto out;
4992 }
4993 }
4994 out:
4995 if (!n)
4996 goto error; // reference too short
4997 this->reference[_countof(this->reference) - 1] = 0;
4998 for (size_t i = n, j = _countof(this->reference) - 1; i;)
4999 this->reference[--j] = this->reference[--i];
5000 for (size_t j = _countof(this->reference) - 1 - n; j;)
5001 this->reference[--j] = '0';
5002
5003 // Normalize creditor reference.
5004 T normalized[47];
5005 available = 0;
5006 for (size_t i = 0; ; ++i) {
5007 if (!this->reference[i]) {
5008 normalized[available++] = '2'; // R
5009 normalized[available++] = '7';
5010 normalized[available++] = '1'; // F
5011 normalized[available++] = '5';
5012 normalized[available++] = this->check_digits[0];
5013 normalized[available++] = this->check_digits[1];
5014 normalized[available] = 0;
5015 break;
5016 }
5017 if ('0' <= this->reference[i] && this->reference[i] <= '9')
5018 normalized[available++] = this->reference[i];
5019 else if ('A' <= this->reference[i] && this->reference[i] <= 'J') {
5020 normalized[available++] = '1';
5021 normalized[available++] = '0' + this->reference[i] - 'A';
5022 }
5023 else if ('K' <= this->reference[i] && this->reference[i] <= 'T') {
5024 normalized[available++] = '2';
5025 normalized[available++] = '0' + this->reference[i] - 'K';
5026 }
5027 else if ('U' <= this->reference[i] && this->reference[i] <= 'Z') {
5028 normalized[available++] = '3';
5029 normalized[available++] = '0' + this->reference[i] - 'U';
5030 }
5031 }
5032
5033 // Calculate modulo 97.
5034 nominator = stdex::strtou32(normalized, 9, &next, 10);
5035 for (;;) {
5036 nominator %= 97;
5037 if (!normalized[next]) {
5038 this->is_valid = nominator == 1;
5039 break;
5040 }
5041 size_t digit_count = nominator == 0 ? 0 : nominator < 10 ? 1 : 2;
5042 for (; digit_count < 9 && normalized[next]; ++next, ++digit_count)
5043 nominator = nominator * 10 + static_cast<uint32_t>(normalized[next] - '0');
5044 }
5045
5046 this->interval.start = start;
5047 return true;
5048
5049 error:
5050 invalidate();
5051 return false;
5052 }
5053
5054 std::shared_ptr<basic_parser<T>> m_space;
5055 };
5056
5057 using creditor_reference = basic_creditor_reference<char>;
5058 using wcreditor_reference = basic_creditor_reference<wchar_t>;
5059#ifdef _UNICODE
5060 using tcreditor_reference = wcreditor_reference;
5061#else
5062 using tcreditor_reference = creditor_reference;
5063#endif
5064 using sgml_creditor_reference = basic_creditor_reference<char>;
5065
5071 template <class T>
5073 {
5074 public:
5075 basic_si_reference_part(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5076
5077 protected:
5078 virtual bool do_match(
5079 _In_reads_or_z_opt_(end) const T* text,
5080 _In_ size_t start = 0,
5081 _In_ size_t end = SIZE_MAX,
5082 _In_ int flags = match_default)
5083 {
5084 stdex_assert(text || start >= end);
5085 this->interval.end = start;
5086 for (;;) {
5087 if (this->interval.end >= end || !text[this->interval.end])
5088 break;
5089 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9')
5090 this->interval.end++;
5091 else
5092 break;
5093 }
5094 if (start < this->interval.end) {
5095 this->interval.start = start;
5096 return true;
5097 }
5098 this->interval.invalidate();
5099 return false;
5100 }
5101 };
5102
5105#ifdef _UNICODE
5107#else
5109#endif
5111
5117 template <class T>
5119 {
5120 public:
5121 basic_si_reference_delimiter(_In_ const std::locale& locale = std::locale()) : basic_parser<T>(locale) {}
5122
5123 protected:
5124 virtual bool do_match(
5125 _In_reads_or_z_opt_(end) const T* text,
5126 _In_ size_t start = 0,
5127 _In_ size_t end = SIZE_MAX,
5128 _In_ int flags = match_default)
5129 {
5130 stdex_assert(text || start >= end);
5131 if (start < end && text[start] == '-') {
5132 this->interval.end = (this->interval.start = start) + 1;
5133 return true;
5134 }
5135 this->interval.invalidate();
5136 return false;
5137 }
5138 };
5139
5142#ifdef _UNICODE
5144#else
5146#endif
5148
5156 template <class T>
5158 {
5159 public:
5161 _In_ const std::shared_ptr<basic_parser<T>>& space,
5162 _In_ const std::locale& locale = std::locale()) :
5164 part1(locale),
5165 part2(locale),
5166 part3(locale),
5167 is_valid(false),
5168 m_space(space),
5169 m_delimiter(locale)
5170 {
5171 this->model[0] = 0;
5172 }
5173
5174 virtual void invalidate()
5175 {
5176 this->model[0] = 0;
5177 this->part1.invalidate();
5178 this->part2.invalidate();
5179 this->part3.invalidate();
5180 this->is_valid = false;
5182 }
5183
5184 T model[3];
5189
5190 protected:
5191 virtual bool do_match(
5192 _In_reads_or_z_opt_(end) const T* text,
5193 _In_ size_t start = 0,
5194 _In_ size_t end = SIZE_MAX,
5195 _In_ int flags = match_default)
5196 {
5197 stdex_assert(text || start >= end);
5198 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
5199 const bool case_insensitive = flags & match_case_insensitive ? true : false;
5200
5201 this->interval.end = start;
5202 if (this->interval.end + 1 >= end ||
5203 (case_insensitive ? ctype.toupper(text[this->interval.end]) : text[this->interval.end]) != 'S' ||
5204 (case_insensitive ? ctype.toupper(text[this->interval.end + 1]) : text[this->interval.end + 1]) != 'I')
5205 goto error; // incomplete or wrong reference ID
5206 this->interval.end += 2;
5207
5208 for (size_t i = 0; i < 2; ++i, ++this->interval.end) {
5209 if (this->interval.end >= end || text[this->interval.end] < '0' || '9' < text[this->interval.end])
5210 goto error; // incomplete or invalid model
5211 this->model[i] = text[this->interval.end];
5212 }
5213 this->model[2] = 0;
5214
5215 this->part1.invalidate();
5216 this->part2.invalidate();
5217 this->part3.invalidate();
5218 if (this->model[0] == '9' && this->model[1] == '9') {
5219 is_valid = true;
5220 this->interval.start = start;
5221 return true;
5222 }
5223
5224 if (m_space && m_space->match(text, this->interval.end, end, flags))
5225 this->interval.end = m_space->interval.end;
5226
5227 this->part1.match(text, this->interval.end, end, flags) &&
5228 this->m_delimiter.match(text, this->part1.interval.end, end, flags) &&
5229 this->part2.match(text, this->m_delimiter.interval.end, end, flags) &&
5230 this->m_delimiter.match(text, this->part2.interval.end, end, flags) &&
5231 this->part3.match(text, this->m_delimiter.interval.end, end, flags);
5232
5233 this->interval.start = start;
5234 if (this->part3.interval)
5235 this->interval.end = this->part3.interval.end;
5236 else if (this->part2.interval)
5237 this->interval.end = this->part2.interval.end;
5238 else if (this->part1.interval)
5239 this->interval.end = this->part1.interval.end;
5240 else
5241 this->interval.end = start + 4;
5242
5243 if (this->model[0] == '0' && this->model[1] == '0')
5244 is_valid =
5245 this->part3.interval ?
5246 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5247 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 :
5248 this->part2.interval ?
5249 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5250 this->part1.interval.size() + this->part2.interval.size() <= 20 :
5251 this->part1.interval ?
5252 this->part1.interval.size() <= 12 :
5253 false;
5254 else if (this->model[0] == '0' && this->model[1] == '1')
5255 is_valid =
5256 this->part3.interval ?
5257 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5258 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5259 check11(
5260 text + this->part1.interval.start, this->part1.interval.size(),
5261 text + this->part2.interval.start, this->part2.interval.size(),
5262 text + this->part3.interval.start, this->part3.interval.size()) :
5263 this->part2.interval ?
5264 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5265 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5266 check11(
5267 text + this->part1.interval.start, this->part1.interval.size(),
5268 text + this->part2.interval.start, this->part2.interval.size()) :
5269 this->part1.interval ?
5270 this->part1.interval.size() <= 12 &&
5271 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5272 false;
5273 else if (this->model[0] == '0' && this->model[1] == '2')
5274 is_valid =
5275 this->part3.interval ?
5276 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5277 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5278 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5279 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5280 false;
5281 else if (this->model[0] == '0' && this->model[1] == '3')
5282 is_valid =
5283 this->part3.interval ?
5284 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5285 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5286 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5287 check11(text + this->part2.interval.start, this->part2.interval.size()) &&
5288 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5289 false;
5290 else if (this->model[0] == '0' && this->model[1] == '4')
5291 is_valid =
5292 this->part3.interval ?
5293 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5294 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5295 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5296 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5297 false;
5298 else if ((this->model[0] == '0' || this->model[0] == '5') && this->model[1] == '5')
5299 is_valid =
5300 this->part3.interval ?
5301 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5302 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5303 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5304 this->part2.interval ?
5305 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5306 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5307 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5308 this->part1.interval ?
5309 this->part1.interval.size() <= 12 &&
5310 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5311 false;
5312 else if (this->model[0] == '0' && this->model[1] == '6')
5313 is_valid =
5314 this->part3.interval ?
5315 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5316 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5317 check11(
5318 text + this->part2.interval.start, this->part2.interval.size(),
5319 text + this->part3.interval.start, this->part3.interval.size()) :
5320 this->part2.interval ?
5321 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5322 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5323 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5324 false;
5325 else if (this->model[0] == '0' && this->model[1] == '7')
5326 is_valid =
5327 this->part3.interval ?
5328 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5329 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5330 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5331 this->part2.interval ?
5332 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5333 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5334 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5335 false;
5336 else if (this->model[0] == '0' && this->model[1] == '8')
5337 is_valid =
5338 this->part3.interval ?
5339 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5340 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5341 check11(
5342 text + this->part1.interval.start, this->part1.interval.size(),
5343 text + this->part2.interval.start, this->part2.interval.size()) &&
5344 check11(text + this->part3.interval.start, this->part3.interval.size()) :
5345 false;
5346 else if (this->model[0] == '0' && this->model[1] == '9')
5347 is_valid =
5348 this->part3.interval ?
5349 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5350 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5351 check11(
5352 text + this->part1.interval.start, this->part1.interval.size(),
5353 text + this->part2.interval.start, this->part2.interval.size()) :
5354 this->part2.interval ?
5355 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5356 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5357 check11(
5358 text + this->part1.interval.start, this->part1.interval.size(),
5359 text + this->part2.interval.start, this->part2.interval.size()) :
5360 this->part1.interval ?
5361 this->part1.interval.size() <= 12 &&
5362 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5363 false;
5364 else if (this->model[0] == '1' && this->model[1] == '0')
5365 is_valid =
5366 this->part3.interval ?
5367 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5368 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5369 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5370 check11(
5371 text + this->part2.interval.start, this->part2.interval.size(),
5372 text + this->part3.interval.start, this->part3.interval.size()) :
5373 this->part2.interval ?
5374 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5375 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5376 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5377 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5378 false;
5379 else if (
5380 (this->model[0] == '1' && (this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5381 ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '8') ||
5382 (this->model[0] == '4' && (this->model[1] == '0' || this->model[1] == '1' || this->model[1] == '8' || this->model[1] == '9')) ||
5383 (this->model[0] == '5' && (this->model[1] == '1' || this->model[1] == '8')))
5384 is_valid =
5385 this->part3.interval ?
5386 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 && this->part3.interval.size() <= 12 &&
5387 this->part1.interval.size() + this->part2.interval.size() + this->part3.interval.size() <= 20 &&
5388 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5389 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5390 this->part2.interval ?
5391 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5392 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5393 check11(text + this->part1.interval.start, this->part1.interval.size()) &&
5394 check11(text + this->part2.interval.start, this->part2.interval.size()) :
5395 false;
5396 else if (this->model[0] == '1' && this->model[1] == '2')
5397 is_valid =
5398 this->part3.interval ? false :
5399 this->part2.interval ? false :
5400 this->part1.interval ?
5401 this->part1.interval.size() <= 13 &&
5402 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5403 false;
5404 else if ((this->model[0] == '2' || this->model[0] == '3') && this->model[1] == '1')
5405 is_valid =
5406 this->part3.interval ? false :
5407 this->part2.interval ?
5408 this->part1.interval.size() <= 12 && this->part2.interval.size() <= 12 &&
5409 this->part1.interval.size() + this->part2.interval.size() <= 20 &&
5410 check11(text + this->part1.interval.start, this->part1.interval.size()) :
5411 false;
5412 else
5413 is_valid = true; // Assume models we don't handle as valid
5414 return true;
5415
5416 error:
5417 invalidate();
5418 return false;
5419 }
5420
5421 static bool check11(
5422 _In_count_(num_part1) const T* part1, _In_ size_t num_part1)
5423 {
5424 stdex_assert(part1 && num_part1 >= 1);
5425 uint32_t nominator = 0, ponder = 2;
5426 for (size_t i = num_part1 - 1; i--; ++ponder)
5427 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5428 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5429 if (control >= 10)
5430 control = 0;
5431 return control == part1[num_part1 - 1] - '0';
5432 }
5433
5434 static bool check11(
5435 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5436 _In_count_(num_part2) const T* part2, _In_ size_t num_part2)
5437 {
5438 stdex_assert(part1 || !num_part1);
5439 stdex_assert(part2 && num_part2 >= 1);
5440 uint32_t nominator = 0, ponder = 2;
5441 for (size_t i = num_part2 - 1; i--; ++ponder)
5442 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5443 for (size_t i = num_part1; i--; ++ponder)
5444 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5445 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5446 if (control == 10)
5447 control = 0;
5448 return control == part2[num_part2 - 1] - '0';
5449 }
5450
5451 static bool check11(
5452 _In_count_(num_part1) const T* part1, _In_ size_t num_part1,
5453 _In_count_(num_part2) const T* part2, _In_ size_t num_part2,
5454 _In_count_(num_part3) const T* part3, _In_ size_t num_part3)
5455 {
5456 stdex_assert(part1 || !num_part1);
5457 stdex_assert(part2 || !num_part2);
5458 stdex_assert(part3 && num_part3 >= 1);
5459 uint32_t nominator = 0, ponder = 2;
5460 for (size_t i = num_part3 - 1; i--; ++ponder)
5461 nominator += static_cast<uint32_t>(part3[i] - '0') * ponder;
5462 for (size_t i = num_part2; i--; ++ponder)
5463 nominator += static_cast<uint32_t>(part2[i] - '0') * ponder;
5464 for (size_t i = num_part1; i--; ++ponder)
5465 nominator += static_cast<uint32_t>(part1[i] - '0') * ponder;
5466 uint8_t control = 11 - static_cast<uint8_t>(nominator % 11);
5467 if (control == 10)
5468 control = 0;
5469 return control == part2[num_part3 - 1] - '0';
5470 }
5471
5472 std::shared_ptr<basic_parser<T>> m_space;
5473 basic_si_reference_delimiter<T> m_delimiter;
5474 };
5475
5476 using si_reference = basic_si_reference<char>;
5477 using wsi_reference = basic_si_reference<wchar_t>;
5478#ifdef _UNICODE
5479 using tsi_reference = wsi_reference;
5480#else
5481 using tsi_reference = si_reference;
5482#endif
5483 using sgml_si_reference = basic_si_reference<char>;
5484
5488 template <class T>
5490 {
5491 public:
5493 _In_ const std::shared_ptr<basic_parser<T>>& element,
5494 _In_ const std::shared_ptr<basic_parser<T>>& digit,
5495 _In_ const std::shared_ptr<basic_parser<T>>& sign,
5496 _In_ const std::locale& locale = std::locale()) :
5498 m_element(element),
5499 m_digit(digit),
5500 m_sign(sign),
5501 has_digits(false),
5502 has_charge(false)
5503 {}
5504
5505 virtual void invalidate()
5506 {
5507 has_digits = false;
5508 has_charge = false;
5510 }
5511
5512 bool has_digits;
5513 bool has_charge;
5514
5515 protected:
5516 virtual bool do_match(
5517 _In_reads_or_z_opt_(end) const T* text,
5518 _In_ size_t start = 0,
5519 _In_ size_t end = SIZE_MAX,
5520 _In_ int flags = match_default)
5521 {
5522 stdex_assert(text || start >= end);
5523
5524 has_digits = false;
5525 has_charge = false;
5526 this->interval.end = start;
5527
5528 const int element_match_flags = flags & ~match_case_insensitive; // Chemical elements are always case-sensitive.
5529 for (;;) {
5530 if (m_element->match(text, this->interval.end, end, element_match_flags)) {
5531 this->interval.end = m_element->interval.end;
5532 while (m_digit->match(text, this->interval.end, end, flags)) {
5533 this->interval.end = m_digit->interval.end;
5534 has_digits = true;
5535 }
5536 }
5537 else if (start < this->interval.end) {
5538 if (m_sign->match(text, this->interval.end, end, flags)) {
5539 this->interval.end = m_sign->interval.end;
5540 has_charge = true;
5541 }
5542 this->interval.start = start;
5543 return true;
5544 }
5545 else {
5546 this->interval.invalidate();
5547 return false;
5548 }
5549 }
5550 }
5551
5552 std::shared_ptr<basic_parser<T>> m_element;
5553 std::shared_ptr<basic_parser<T>> m_digit;
5554 std::shared_ptr<basic_parser<T>> m_sign;
5555 };
5556
5559#ifdef _UNICODE
5561#else
5563#endif
5565
5570 {
5571 protected:
5572 virtual bool do_match(
5573 _In_reads_or_z_(end) const char* text,
5574 _In_ size_t start = 0,
5575 _In_ size_t end = SIZE_MAX,
5576 _In_ int flags = match_default)
5577 {
5578 stdex_assert(text || start >= end);
5579 this->interval.end = start;
5580
5581 stdex_assert(text || this->interval.end >= end);
5582 if (this->interval.end < end && text[this->interval.end]) {
5583 if (text[this->interval.end] == '\r') {
5584 this->interval.end++;
5585 if (this->interval.end < end && text[this->interval.end] == '\n') {
5586 this->interval.start = start;
5587 this->interval.end++;
5588 return true;
5589 }
5590 }
5591 else if (text[this->interval.end] == '\n') {
5592 this->interval.start = start;
5593 this->interval.end++;
5594 return true;
5595 }
5596 }
5597 this->interval.invalidate();
5598 return false;
5599 }
5600 };
5601
5605 class http_space : public parser
5606 {
5607 protected:
5608 virtual bool do_match(
5609 _In_reads_or_z_(end) const char* text,
5610 _In_ size_t start = 0,
5611 _In_ size_t end = SIZE_MAX,
5612 _In_ int flags = match_default)
5613 {
5614 stdex_assert(text || start >= end);
5615 this->interval.end = start;
5616 if (m_line_break.match(text, this->interval.end, end, flags)) {
5617 this->interval.end = m_line_break.interval.end;
5618 if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5619 this->interval.start = start;
5620 this->interval.end++;
5621 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5622 return true;
5623 }
5624 }
5625 else if (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) {
5626 this->interval.start = start;
5627 this->interval.end++;
5628 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
5629 return true;
5630 }
5631 this->interval.invalidate();
5632 return false;
5633 }
5634
5635 http_line_break m_line_break;
5636 };
5637
5641 class http_text_char : public parser
5642 {
5643 protected:
5644 virtual bool do_match(
5645 _In_reads_or_z_(end) const char* text,
5646 _In_ size_t start = 0,
5647 _In_ size_t end = SIZE_MAX,
5648 _In_ int flags = match_default)
5649 {
5650 stdex_assert(text || start >= end);
5651 this->interval.end = start;
5652
5653 stdex_assert(text || this->interval.end >= end);
5654 if (m_space.match(text, this->interval.end, end, flags)) {
5655 this->interval.start = start;
5656 this->interval.end = m_space.interval.end;
5657 return true;
5658 }
5659 else if (this->interval.end < end && text[this->interval.end] && text[this->interval.end] >= 0x20) {
5660 this->interval.start = start;
5661 this->interval.end++;
5662 return true;
5663 }
5664 this->interval.invalidate();
5665 return false;
5666 }
5667
5668 http_space m_space;
5669 };
5670
5674 class http_token : public parser
5675 {
5676 protected:
5677 virtual bool do_match(
5678 _In_reads_or_z_(end) const char* text,
5679 _In_ size_t start = 0,
5680 _In_ size_t end = SIZE_MAX,
5681 _In_ int flags = match_default)
5682 {
5683 stdex_assert(text || start >= end);
5684 this->interval.end = start;
5685 for (;;) {
5686 if (this->interval.end < end && text[this->interval.end]) {
5687 if ((unsigned int)text[this->interval.end] < 0x20 ||
5688 (unsigned int)text[this->interval.end] == 0x7f ||
5689 text[this->interval.end] == '(' ||
5690 text[this->interval.end] == ')' ||
5691 text[this->interval.end] == '<' ||
5692 text[this->interval.end] == '>' ||
5693 text[this->interval.end] == '@' ||
5694 text[this->interval.end] == ',' ||
5695 text[this->interval.end] == ';' ||
5696 text[this->interval.end] == ':' ||
5697 text[this->interval.end] == '\\' ||
5698 text[this->interval.end] == '\"' ||
5699 text[this->interval.end] == '/' ||
5700 text[this->interval.end] == '[' ||
5701 text[this->interval.end] == ']' ||
5702 text[this->interval.end] == '?' ||
5703 text[this->interval.end] == '=' ||
5704 text[this->interval.end] == '{' ||
5705 text[this->interval.end] == '}' ||
5706 stdex::isspace(text[this->interval.end]))
5707 break;
5708 else
5709 this->interval.end++;
5710 }
5711 else
5712 break;
5713 }
5714 if (start < this->interval.end) {
5715 this->interval.start = start;
5716 return true;
5717 }
5718 else {
5719 this->interval.invalidate();
5720 return false;
5721 }
5722 }
5723 };
5724
5729 {
5730 public:
5731 virtual void invalidate()
5732 {
5733 content.start = 1;
5734 content.end = 0;
5735 parser::invalidate();
5736 }
5737
5739
5740 protected:
5741 virtual bool do_match(
5742 _In_reads_or_z_(end) const char* text,
5743 _In_ size_t start = 0,
5744 _In_ size_t end = SIZE_MAX,
5745 _In_ int flags = match_default)
5746 {
5747 stdex_assert(text || start >= end);
5748 this->interval.end = start;
5749 if (this->interval.end < end && text[this->interval.end] != '"')
5750 goto error;
5751 this->interval.end++;
5752 content.start = this->interval.end;
5753 for (;;) {
5754 stdex_assert(text || this->interval.end >= end);
5755 if (this->interval.end < end && text[this->interval.end]) {
5756 if (text[this->interval.end] == '"') {
5757 content.end = this->interval.end;
5758 this->interval.end++;
5759 break;
5760 }
5761 else if (text[this->interval.end] == '\\') {
5762 this->interval.end++;
5763 if (this->interval.end < end && text[this->interval.end]) {
5764 this->interval.end++;
5765 }
5766 else
5767 goto error;
5768 }
5769 else if (m_chr.match(text, this->interval.end, end, flags))
5770 this->interval.end++;
5771 else
5772 goto error;
5773 }
5774 else
5775 goto error;
5776 }
5777 this->interval.start = start;
5778 return true;
5779
5780 error:
5781 invalidate();
5782 return false;
5783 }
5784
5785 http_text_char m_chr;
5786 };
5787
5791 class http_value : public parser
5792 {
5793 public:
5794 virtual void invalidate()
5795 {
5796 string.invalidate();
5797 token.invalidate();
5798 parser::invalidate();
5799 }
5800
5803
5804 protected:
5805 virtual bool do_match(
5806 _In_reads_or_z_(end) const char* text,
5807 _In_ size_t start = 0,
5808 _In_ size_t end = SIZE_MAX,
5809 _In_ int flags = match_default)
5810 {
5811 stdex_assert(text || start >= end);
5812 this->interval.end = start;
5813 if (string.match(text, this->interval.end, end, flags)) {
5814 token.invalidate();
5815 this->interval.end = string.interval.end;
5816 this->interval.start = start;
5817 return true;
5818 }
5819 else if (token.match(text, this->interval.end, end, flags)) {
5820 string.invalidate();
5821 this->interval.end = token.interval.end;
5822 this->interval.start = start;
5823 return true;
5824 }
5825 else {
5826 this->interval.invalidate();
5827 return false;
5828 }
5829 }
5830 };
5831
5835 class http_parameter : public parser
5836 {
5837 public:
5838 virtual void invalidate()
5839 {
5840 name.invalidate();
5841 value.invalidate();
5842 parser::invalidate();
5843 }
5844
5847
5848 protected:
5849 virtual bool do_match(
5850 _In_reads_or_z_(end) const char* text,
5851 _In_ size_t start = 0,
5852 _In_ size_t end = SIZE_MAX,
5853 _In_ int flags = match_default)
5854 {
5855 stdex_assert(text || start >= end);
5856 this->interval.end = start;
5857 if (name.match(text, this->interval.end, end, flags))
5858 this->interval.end = name.interval.end;
5859 else
5860 goto error;
5861 while (m_space.match(text, this->interval.end, end, flags))
5862 this->interval.end = m_space.interval.end;
5863 stdex_assert(text || this->interval.end >= end);
5864 if (this->interval.end < end && text[this->interval.end] == '=')
5865 this->interval.end++;
5866 else
5867 while (m_space.match(text, this->interval.end, end, flags))
5868 this->interval.end = m_space.interval.end;
5869 if (value.match(text, this->interval.end, end, flags))
5870 this->interval.end = value.interval.end;
5871 else
5872 goto error;
5873 this->interval.start = start;
5874 return true;
5875
5876 error:
5877 invalidate();
5878 return false;
5879 }
5880
5881 http_space m_space;
5882 };
5883
5887 class http_any_type : public parser
5888 {
5889 protected:
5890 virtual bool do_match(
5891 _In_reads_or_z_(end) const char* text,
5892 _In_ size_t start = 0,
5893 _In_ size_t end = SIZE_MAX,
5894 _In_ int flags = match_default)
5895 {
5896 stdex_assert(text || start >= end);
5897 if (start + 2 < end &&
5898 text[start] == '*' &&
5899 text[start + 1] == '/' &&
5900 text[start + 2] == '*')
5901 {
5902 this->interval.end = (this->interval.start = start) + 3;
5903 return true;
5904 }
5905 else if (start < end && text[start] == '*') {
5906 this->interval.end = (this->interval.start = start) + 1;
5907 return true;
5908 }
5909 else {
5910 this->interval.invalidate();
5911 return false;
5912 }
5913 }
5914 };
5915
5920 {
5921 public:
5922 virtual void invalidate()
5923 {
5924 type.invalidate();
5925 subtype.invalidate();
5926 parser::invalidate();
5927 }
5928
5929 http_token type;
5930 http_token subtype;
5931
5932 protected:
5933 virtual bool do_match(
5934 _In_reads_or_z_(end) const char* text,
5935 _In_ size_t start = 0,
5936 _In_ size_t end = SIZE_MAX,
5937 _In_ int flags = match_default)
5938 {
5939 stdex_assert(text || start >= end);
5940 this->interval.end = start;
5941 if (type.match(text, this->interval.end, end, flags))
5942 this->interval.end = type.interval.end;
5943 else
5944 goto error;
5945 while (m_space.match(text, this->interval.end, end, flags))
5946 this->interval.end = m_space.interval.end;
5947 if (this->interval.end < end && text[this->interval.end] == '/')
5948 this->interval.end++;
5949 else
5950 goto error;
5951 while (m_space.match(text, this->interval.end, end, flags))
5952 this->interval.end = m_space.interval.end;
5953 if (subtype.match(text, this->interval.end, end, flags))
5954 this->interval.end = subtype.interval.end;
5955 else
5956 goto error;
5957 this->interval.start = start;
5958 return true;
5959
5960 error:
5961 invalidate();
5962 return false;
5963 }
5964
5965 http_space m_space;
5966 };
5967
5972 {
5973 public:
5974 virtual void invalidate()
5975 {
5976 params.clear();
5977 http_media_range::invalidate();
5978 }
5979
5980 std::list<http_parameter> params;
5981
5982 protected:
5983 virtual bool do_match(
5984 _In_reads_or_z_(end) const char* text,
5985 _In_ size_t start = 0,
5986 _In_ size_t end = SIZE_MAX,
5987 _In_ int flags = match_default)
5988 {
5989 stdex_assert(text || start >= end);
5990 if (!http_media_range::do_match(text, start, end, flags))
5991 goto error;
5992 params.clear();
5993 for (;;) {
5994 if (this->interval.end < end && text[this->interval.end]) {
5995 if (m_space.match(text, this->interval.end, end, flags))
5996 this->interval.end = m_space.interval.end;
5997 else if (text[this->interval.end] == ';') {
5998 this->interval.end++;
5999 while (m_space.match(text, this->interval.end, end, flags))
6000 this->interval.end = m_space.interval.end;
6001 http_parameter param;
6002 if (param.match(text, this->interval.end, end, flags)) {
6003 this->interval.end = param.interval.end;
6004 params.push_back(std::move(param));
6005 }
6006 else
6007 break;
6008 }
6009 else
6010 break;
6011 }
6012 else
6013 break;
6014 }
6015 this->interval.end = params.empty() ? subtype.interval.end : params.back().interval.end;
6016 return true;
6017
6018 error:
6019 invalidate();
6020 return false;
6021 }
6022 };
6023
6028 {
6029 protected:
6030 virtual bool do_match(
6031 _In_reads_or_z_(end) const char* text,
6032 _In_ size_t start = 0,
6033 _In_ size_t end = SIZE_MAX,
6034 _In_ int flags = match_default)
6035 {
6036 stdex_assert(text || start >= end);
6037 this->interval.end = start;
6038 for (;;) {
6039 if (this->interval.end < end && text[this->interval.end]) {
6040 if ((unsigned int)text[this->interval.end] < 0x20 ||
6041 (unsigned int)text[this->interval.end] == 0x7f ||
6042 text[this->interval.end] == ':' ||
6043 text[this->interval.end] == '/' ||
6044 stdex::isspace(text[this->interval.end]))
6045 break;
6046 else
6047 this->interval.end++;
6048 }
6049 else
6050 break;
6051 }
6052 if (start < this->interval.end) {
6053 this->interval.start = start;
6054 return true;
6055 }
6056 this->interval.invalidate();
6057 return false;
6058 }
6059 };
6060
6064 class http_url_port : public parser
6065 {
6066 public:
6067 http_url_port(_In_ const std::locale& locale = std::locale()) :
6068 parser(locale),
6069 value(0)
6070 {}
6071
6072 virtual void invalidate()
6073 {
6074 value = 0;
6075 parser::invalidate();
6076 }
6077
6078 uint16_t value;
6079
6080 protected:
6081 virtual bool do_match(
6082 _In_reads_or_z_(end) const char* text,
6083 _In_ size_t start = 0,
6084 _In_ size_t end = SIZE_MAX,
6085 _In_ int flags = match_default)
6086 {
6087 stdex_assert(text || start >= end);
6088 value = 0;
6089 this->interval.end = start;
6090 for (;;) {
6091 if (this->interval.end < end && text[this->interval.end]) {
6092 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6093 size_t _value = static_cast<size_t>(value) * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6094 if (_value > UINT16_MAX) {
6095 value = 0;
6096 this->interval.invalidate();
6097 return false;
6098 }
6099 value = (uint16_t)_value;
6100 this->interval.end++;
6101 }
6102 else
6103 break;
6104 }
6105 else
6106 break;
6107 }
6108 if (start < this->interval.end) {
6109 this->interval.start = start;
6110 return true;
6111 }
6112 this->interval.invalidate();
6113 return false;
6114 }
6115 };
6116
6121 {
6122 protected:
6123 virtual bool do_match(
6124 _In_reads_or_z_(end) const char* text,
6125 _In_ size_t start = 0,
6126 _In_ size_t end = SIZE_MAX,
6127 _In_ int flags = match_default)
6128 {
6129 stdex_assert(text || start >= end);
6130 this->interval.end = start;
6131 for (;;) {
6132 if (this->interval.end < end && text[this->interval.end]) {
6133 if ((unsigned int)text[this->interval.end] < 0x20 ||
6134 (unsigned int)text[this->interval.end] == 0x7f ||
6135 text[this->interval.end] == '?' ||
6136 text[this->interval.end] == '/' ||
6137 stdex::isspace(text[this->interval.end]))
6138 break;
6139 else
6140 this->interval.end++;
6141 }
6142 else
6143 break;
6144 }
6145 this->interval.start = start;
6146 return true;
6147 }
6148 };
6149
6153 class http_url_path : public parser
6154 {
6155 public:
6156 virtual void invalidate()
6157 {
6158 segments.clear();
6159 parser::invalidate();
6160 }
6161
6162 std::vector<http_url_path_segment> segments;
6163
6164 protected:
6165 virtual bool do_match(
6166 _In_reads_or_z_(end) const char* text,
6167 _In_ size_t start = 0,
6168 _In_ size_t end = SIZE_MAX,
6169 _In_ int flags = match_default)
6170 {
6171 stdex_assert(text || start >= end);
6173 this->interval.end = start;
6174 segments.clear();
6175 stdex_assert(text || this->interval.end >= end);
6176 if (this->interval.end < end && text[this->interval.end] != '/')
6177 goto error;
6178 this->interval.end++;
6179 s.match(text, this->interval.end, end, flags);
6180 segments.push_back(s);
6181 this->interval.end = s.interval.end;
6182 for (;;) {
6183 if (this->interval.end < end && text[this->interval.end]) {
6184 if (text[this->interval.end] == '/') {
6185 this->interval.end++;
6186 s.match(text, this->interval.end, end, flags);
6187 segments.push_back(s);
6188 this->interval.end = s.interval.end;
6189 }
6190 else
6191 break;
6192 }
6193 else
6194 break;
6195 }
6196 this->interval.start = start;
6197 return true;
6198
6199 error:
6200 invalidate();
6201 return false;
6202 }
6203 };
6204
6209 {
6210 public:
6211 virtual void invalidate()
6212 {
6213 name.start = 1;
6214 name.end = 0;
6215 value.start = 1;
6216 value.end = 0;
6217 parser::invalidate();
6218 }
6219
6222
6223 protected:
6224 virtual bool do_match(
6225 _In_reads_or_z_(end) const char* text,
6226 _In_ size_t start = 0,
6227 _In_ size_t end = SIZE_MAX,
6228 _In_ int flags = match_default)
6229 {
6230 stdex_assert(text || start >= end);
6231 this->interval.end = start;
6232 name.start = this->interval.end;
6233 for (;;) {
6234 if (this->interval.end < end && text[this->interval.end]) {
6235 if ((unsigned int)text[this->interval.end] < 0x20 ||
6236 (unsigned int)text[this->interval.end] == 0x7f ||
6237 text[this->interval.end] == '&' ||
6238 text[this->interval.end] == '=' ||
6239 stdex::isspace(text[this->interval.end]))
6240 break;
6241 else
6242 this->interval.end++;
6243 }
6244 else
6245 break;
6246 }
6247 if (start < this->interval.end)
6248 name.end = this->interval.end;
6249 else
6250 goto error;
6251 if (text[this->interval.end] == '=') {
6252 this->interval.end++;
6253 value.start = this->interval.end;
6254 for (;;) {
6255 if (this->interval.end < end && text[this->interval.end]) {
6256 if ((unsigned int)text[this->interval.end] < 0x20 ||
6257 (unsigned int)text[this->interval.end] == 0x7f ||
6258 text[this->interval.end] == '&' ||
6259 stdex::isspace(text[this->interval.end]))
6260 break;
6261 else
6262 this->interval.end++;
6263 }
6264 else
6265 break;
6266 }
6267 value.end = this->interval.end;
6268 }
6269 else {
6270 value.start = 1;
6271 value.end = 0;
6272 }
6273 this->interval.start = start;
6274 return true;
6275
6276 error:
6277 invalidate();
6278 return false;
6279 }
6280 };
6281
6285 class http_url : public parser
6286 {
6287 public:
6288 http_url(_In_ const std::locale& locale = std::locale()) :
6289 parser(locale),
6290 port(locale)
6291 {}
6292
6293 virtual void invalidate()
6294 {
6295 server.invalidate();
6296 port.invalidate();
6297 path.invalidate();
6298 params.clear();
6299 parser::invalidate();
6300 }
6301
6302 http_url_server server;
6303 http_url_port port;
6304 http_url_path path;
6305 std::list<http_url_parameter> params;
6306
6307 protected:
6308 virtual bool do_match(
6309 _In_reads_or_z_(end) const char* text,
6310 _In_ size_t start = 0,
6311 _In_ size_t end = SIZE_MAX,
6312 _In_ int flags = match_default)
6313 {
6314 stdex_assert(text || start >= end);
6315 this->interval.end = start;
6316
6317 if (this->interval.end + 7 <= end && stdex::strnicmp(text + this->interval.end, 7, "http://", SIZE_MAX, m_locale) == 0) {
6318 this->interval.end += 7;
6319 if (server.match(text, this->interval.end, end, flags))
6320 this->interval.end = server.interval.end;
6321 else
6322 goto error;
6323 if (this->interval.end < end && text[this->interval.end] == ':') {
6324 this->interval.end++;
6325 if (port.match(text, this->interval.end, end, flags))
6326 this->interval.end = port.interval.end;
6327 }
6328 else {
6329 port.invalidate();
6330 port.value = 80;
6331 }
6332 }
6333 else {
6334 server.invalidate();
6335 port.invalidate();
6336 port.value = 80;
6337 }
6338
6339 if (path.match(text, this->interval.end, end, flags))
6340 this->interval.end = path.interval.end;
6341 else
6342 goto error;
6343
6344 params.clear();
6345
6346 if (this->interval.end < end && text[this->interval.end] == '?') {
6347 this->interval.end++;
6348 for (;;) {
6349 if (this->interval.end < end && text[this->interval.end]) {
6350 if ((unsigned int)text[this->interval.end] < 0x20 ||
6351 (unsigned int)text[this->interval.end] == 0x7f ||
6352 stdex::isspace(text[this->interval.end]))
6353 break;
6354 else if (text[this->interval.end] == '&')
6355 this->interval.end++;
6356 else {
6357 http_url_parameter param;
6358 if (param.match(text, this->interval.end, end, flags)) {
6359 this->interval.end = param.interval.end;
6360 params.push_back(std::move(param));
6361 }
6362 else
6363 break;
6364 }
6365 }
6366 else
6367 break;
6368 }
6369 }
6370
6371 this->interval.start = start;
6372 return true;
6373
6374 error:
6375 invalidate();
6376 return false;
6377 }
6378 };
6379
6383 class http_language : public parser
6384 {
6385 public:
6386 virtual void invalidate()
6387 {
6388 components.clear();
6389 parser::invalidate();
6390 }
6391
6392 std::vector<stdex::interval<size_t>> components;
6393
6394 protected:
6395 virtual bool do_match(
6396 _In_reads_or_z_(end) const char* text,
6397 _In_ size_t start = 0,
6398 _In_ size_t end = SIZE_MAX,
6399 _In_ int flags = match_default)
6400 {
6401 stdex_assert(text || start >= end);
6402 this->interval.end = start;
6403 components.clear();
6404 for (;;) {
6405 if (this->interval.end < end && text[this->interval.end]) {
6407 k.end = this->interval.end;
6408 for (;;) {
6409 if (k.end < end && text[k.end]) {
6410 if (stdex::isalpha(text[k.end]))
6411 k.end++;
6412 else
6413 break;
6414 }
6415 else
6416 break;
6417 }
6418 if (this->interval.end < k.end) {
6419 k.start = this->interval.end;
6420 this->interval.end = k.end;
6421 components.push_back(k);
6422 }
6423 else
6424 break;
6425 if (this->interval.end < end && text[this->interval.end] == '-')
6426 this->interval.end++;
6427 else
6428 break;
6429 }
6430 else
6431 break;
6432 }
6433 if (!components.empty()) {
6434 this->interval.start = start;
6435 this->interval.end = components.back().end;
6436 return true;
6437 }
6438 this->interval.invalidate();
6439 return false;
6440 }
6441 };
6442
6446 class http_weight : public parser
6447 {
6448 public:
6449 http_weight(_In_ const std::locale& locale = std::locale()) :
6450 parser(locale),
6451 value(1.0f)
6452 {}
6453
6454 virtual void invalidate()
6455 {
6456 value = 1.0f;
6457 parser::invalidate();
6458 }
6459
6460 float value;
6461
6462 protected:
6463 virtual bool do_match(
6464 _In_reads_or_z_(end) const char* text,
6465 _In_ size_t start = 0,
6466 _In_ size_t end = SIZE_MAX,
6467 _In_ int flags = match_default)
6468 {
6469 stdex_assert(text || start >= end);
6470 size_t celi_del = 0, decimalni_del = 0, decimalni_del_n = 1;
6471 this->interval.end = start;
6472 for (;;) {
6473 if (this->interval.end < end && text[this->interval.end]) {
6474 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6475 celi_del = celi_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6476 this->interval.end++;
6477 }
6478 else if (text[this->interval.end] == '.') {
6479 this->interval.end++;
6480 for (;;) {
6481 if (this->interval.end < end && text[this->interval.end]) {
6482 if ('0' <= text[this->interval.end] && text[this->interval.end] <= '9') {
6483 decimalni_del = decimalni_del * 10 + static_cast<size_t>(text[this->interval.end] - '0');
6484 decimalni_del_n *= 10;
6485 this->interval.end++;
6486 }
6487 else
6488 break;
6489 }
6490 else
6491 break;
6492 }
6493 break;
6494 }
6495 else
6496 break;
6497 }
6498 else
6499 break;
6500 }
6501 if (start < this->interval.end) {
6502 value = (float)((double)celi_del + (double)decimalni_del / decimalni_del_n);
6503 this->interval.start = start;
6504 return true;
6505 }
6506 value = 1.0f;
6507 this->interval.invalidate();
6508 return false;
6509 }
6510 };
6511
6515 class http_asterisk : public parser
6516 {
6517 protected:
6518 virtual bool do_match(
6519 _In_reads_or_z_(end) const char* text,
6520 _In_ size_t start = 0,
6521 _In_ size_t end = SIZE_MAX,
6522 _In_ int flags = match_default)
6523 {
6524 stdex_assert(text || end <= start);
6525 if (start < end && text[start] == '*') {
6526 this->interval.end = (this->interval.start = start) + 1;
6527 return true;
6528 }
6529 this->interval.invalidate();
6530 return false;
6531 }
6532 };
6533
6537 template <class T, class T_asterisk = http_asterisk>
6539 {
6540 public:
6541 http_weighted_value(_In_ const std::locale& locale = std::locale()) :
6542 parser(locale),
6543 factor(locale)
6544 {}
6545
6546 virtual void invalidate()
6547 {
6548 asterisk.invalidate();
6549 value.invalidate();
6550 factor.invalidate();
6551 parser::invalidate();
6552 }
6553
6554 T_asterisk asterisk;
6555 T value;
6556 http_weight factor;
6557
6558 protected:
6559 virtual bool do_match(
6560 _In_reads_or_z_(end) const char* text,
6561 _In_ size_t start = 0,
6562 _In_ size_t end = SIZE_MAX,
6563 _In_ int flags = match_default)
6564 {
6565 stdex_assert(text || start >= end);
6566 size_t konec_vrednosti;
6567 this->interval.end = start;
6568 if (asterisk.match(text, this->interval.end, end, flags)) {
6569 this->interval.end = konec_vrednosti = asterisk.interval.end;
6570 value.invalidate();
6571 }
6572 else if (value.match(text, this->interval.end, end, flags)) {
6573 this->interval.end = konec_vrednosti = value.interval.end;
6574 asterisk.invalidate();
6575 }
6576 else {
6577 asterisk.invalidate();
6578 value.invalidate();
6579 this->interval.invalidate();
6580 return false;
6581 }
6582
6583 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6584 if (this->interval.end < end && text[this->interval.end] == ';') {
6585 this->interval.end++;
6586 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6587 if (this->interval.end < end && (text[this->interval.end] == 'q' || text[this->interval.end] == 'Q')) {
6588 this->interval.end++;
6589 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6590 if (this->interval.end < end && text[this->interval.end] == '=') {
6591 this->interval.end++;
6592 while (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])) this->interval.end++;
6593 if (factor.match(text, this->interval.end, end, flags))
6594 this->interval.end = factor.interval.end;
6595 }
6596 }
6597 }
6598 if (!factor.interval) {
6599 factor.invalidate();
6600 this->interval.end = konec_vrednosti;
6601 }
6602 this->interval.start = start;
6603 return true;
6604 }
6605 };
6606
6611 {
6612 public:
6613 virtual void invalidate()
6614 {
6615 name.invalidate();
6616 value.invalidate();
6617 parser::invalidate();
6618 }
6619
6620 http_token name;
6621 http_value value;
6622
6623 protected:
6624 virtual bool do_match(
6625 _In_reads_or_z_(end) const char* text,
6626 _In_ size_t start = 0,
6627 _In_ size_t end = SIZE_MAX,
6628 _In_ int flags = match_default)
6629 {
6630 stdex_assert(text || start >= end);
6631 this->interval.end = start;
6632 if (this->interval.end < end && text[this->interval.end] == '$')
6633 this->interval.end++;
6634 else
6635 goto error;
6636 if (name.match(text, this->interval.end, end, flags))
6637 this->interval.end = name.interval.end;
6638 else
6639 goto error;
6640 while (m_space.match(text, this->interval.end, end, flags))
6641 this->interval.end = m_space.interval.end;
6642 if (this->interval.end < end && text[this->interval.end] == '=')
6643 this->interval.end++;
6644 else
6645 goto error;
6646 while (m_space.match(text, this->interval.end, end, flags))
6647 this->interval.end = m_space.interval.end;
6648 if (value.match(text, this->interval.end, end, flags))
6649 this->interval.end = value.interval.end;
6650 else
6651 goto error;
6652 this->interval.start = start;
6653 return true;
6654
6655 error:
6656 invalidate();
6657 return false;
6658 }
6659
6660 http_space m_space;
6661 };
6662
6666 class http_cookie : public parser
6667 {
6668 public:
6669 virtual void invalidate()
6670 {
6671 name.invalidate();
6672 value.invalidate();
6673 params.clear();
6674 parser::invalidate();
6675 }
6676
6679 std::list<http_cookie_parameter> params;
6680
6681 protected:
6682 virtual bool do_match(
6683 _In_reads_or_z_(end) const char* text,
6684 _In_ size_t start = 0,
6685 _In_ size_t end = SIZE_MAX,
6686 _In_ int flags = match_default)
6687 {
6688 stdex_assert(text || start >= end);
6689 this->interval.end = start;
6690 if (name.match(text, this->interval.end, end, flags))
6691 this->interval.end = name.interval.end;
6692 else
6693 goto error;
6694 while (m_space.match(text, this->interval.end, end, flags))
6695 this->interval.end = m_space.interval.end;
6696 if (this->interval.end < end && text[this->interval.end] == '=')
6697 this->interval.end++;
6698 else
6699 goto error;
6700 while (m_space.match(text, this->interval.end, end, flags))
6701 this->interval.end = m_space.interval.end;
6702 if (value.match(text, this->interval.end, end, flags))
6703 this->interval.end = value.interval.end;
6704 else
6705 goto error;
6706 params.clear();
6707 for (;;) {
6708 if (this->interval.end < end && text[this->interval.end]) {
6709 if (m_space.match(text, this->interval.end, end, flags))
6710 this->interval.end = m_space.interval.end;
6711 else if (text[this->interval.end] == ';') {
6712 this->interval.end++;
6713 while (m_space.match(text, this->interval.end, end, flags))
6714 this->interval.end = m_space.interval.end;
6716 if (param.match(text, this->interval.end, end, flags)) {
6717 this->interval.end = param.interval.end;
6718 params.push_back(std::move(param));
6719 }
6720 else
6721 break;
6722 }
6723 else
6724 break;
6725 }
6726 else
6727 break;
6728 }
6729 this->interval.start = start;
6730 this->interval.end = params.empty() ? value.interval.end : params.back().interval.end;
6731 return true;
6732
6733 error:
6734 invalidate();
6735 return false;
6736 }
6737
6738 http_space m_space;
6739 };
6740
6744 class http_agent : public parser
6745 {
6746 public:
6747 virtual void invalidate()
6748 {
6749 type.start = 1;
6750 type.end = 0;
6751 version.start = 1;
6752 version.end = 0;
6753 parser::invalidate();
6754 }
6755
6758
6759 protected:
6760 virtual bool do_match(
6761 _In_reads_or_z_(end) const char* text,
6762 _In_ size_t start = 0,
6763 _In_ size_t end = SIZE_MAX,
6764 _In_ int flags = match_default)
6765 {
6766 stdex_assert(text || start >= end);
6767 this->interval.end = start;
6768 type.start = this->interval.end;
6769 for (;;) {
6770 if (this->interval.end < end && text[this->interval.end]) {
6771 if (text[this->interval.end] == '/') {
6772 type.end = this->interval.end;
6773 this->interval.end++;
6774 version.start = this->interval.end;
6775 for (;;) {
6776 if (this->interval.end < end && text[this->interval.end]) {
6777 if (stdex::isspace(text[this->interval.end])) {
6778 version.end = this->interval.end;
6779 break;
6780 }
6781 else
6782 this->interval.end++;
6783 }
6784 else {
6785 version.end = this->interval.end;
6786 break;
6787 }
6788 }
6789 break;
6790 }
6791 else if (stdex::isspace(text[this->interval.end])) {
6792 type.end = this->interval.end;
6793 break;
6794 }
6795 else
6796 this->interval.end++;
6797 }
6798 else {
6799 type.end = this->interval.end;
6800 break;
6801 }
6802 }
6803 if (start < this->interval.end) {
6804 this->interval.start = start;
6805 return true;
6806 }
6807 type.start = 1;
6808 type.end = 0;
6809 version.start = 1;
6810 version.end = 0;
6811 this->interval.invalidate();
6812 return false;
6813 }
6814 };
6815
6819 class http_protocol : public parser
6820 {
6821 public:
6822 http_protocol(_In_ const std::locale& locale = std::locale()) :
6823 parser(locale),
6824 version(0x009)
6825 {}
6826
6827 virtual void invalidate()
6828 {
6829 type.start = 1;
6830 type.end = 0;
6831 version_maj.start = 1;
6832 version_maj.end = 0;
6833 version_min.start = 1;
6834 version_min.end = 0;
6835 version = 0x009;
6836 parser::invalidate();
6837 }
6838
6840 stdex::interval<size_t> version_maj;
6841 stdex::interval<size_t> version_min;
6842 uint16_t version;
6843
6844 protected:
6845 virtual bool do_match(
6846 _In_reads_or_z_(end) const char* text,
6847 _In_ size_t start = 0,
6848 _In_ size_t end = SIZE_MAX,
6849 _In_ int flags = match_default)
6850 {
6851 stdex_assert(text || start >= end);
6852 this->interval.end = start;
6853 type.start = this->interval.end;
6854 for (;;) {
6855 if (this->interval.end < end && text[this->interval.end]) {
6856 if (text[this->interval.end] == '/') {
6857 type.end = this->interval.end;
6858 this->interval.end++;
6859 break;
6860 }
6861 else if (stdex::isspace(text[this->interval.end]))
6862 goto error;
6863 else
6864 this->interval.end++;
6865 }
6866 else {
6867 type.end = this->interval.end;
6868 goto error;
6869 }
6870 }
6871 version_maj.start = this->interval.end;
6872 for (;;) {
6873 if (this->interval.end < end && text[this->interval.end]) {
6874 if (text[this->interval.end] == '.') {
6875 version_maj.end = this->interval.end;
6876 this->interval.end++;
6877 version_min.start = this->interval.end;
6878 for (;;) {
6879 if (this->interval.end < end && text[this->interval.end]) {
6880 if (stdex::isspace(text[this->interval.end])) {
6881 version_min.end = this->interval.end;
6882 version =
6883 (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100 +
6884 (uint16_t)strtoui(text + version_min.start, version_min.size(), nullptr, 10);
6885 break;
6886 }
6887 else
6888 this->interval.end++;
6889 }
6890 else
6891 goto error;
6892 }
6893 break;
6894 }
6895 else if (stdex::isspace(text[this->interval.end])) {
6896 version_maj.end = this->interval.end;
6897 version_min.start = 1;
6898 version_min.end = 0;
6899 version = (uint16_t)strtoui(text + version_maj.start, version_maj.size(), nullptr, 10) * 0x100;
6900 break;
6901 }
6902 else
6903 this->interval.end++;
6904 }
6905 else
6906 goto error;
6907 }
6908 this->interval.start = start;
6909 return true;
6910
6911 error:
6912 invalidate();
6913 return false;
6914 }
6915 };
6916
6920 class http_request : public parser
6921 {
6922 public:
6923 http_request(_In_ const std::locale& locale = std::locale()) :
6924 parser(locale),
6925 url(locale),
6926 protocol(locale)
6927 {}
6928
6929 virtual void invalidate()
6930 {
6931 verb.start = 1;
6932 verb.end = 0;
6933 url.invalidate();
6934 protocol.invalidate();
6935 parser::invalidate();
6936 }
6937
6939 http_url url;
6940 http_protocol protocol;
6941
6942 protected:
6943 virtual bool do_match(
6944 _In_reads_or_z_(end) const char* text,
6945 _In_ size_t start = 0,
6946 _In_ size_t end = SIZE_MAX,
6947 _In_ int flags = match_default)
6948 {
6949 stdex_assert(text || start >= end);
6950 this->interval.end = start;
6951
6952 for (;;) {
6953 if (m_line_break.match(text, this->interval.end, end, flags))
6954 goto error;
6955 else if (this->interval.end < end && text[this->interval.end]) {
6956 if (stdex::isspace(text[this->interval.end]))
6957 this->interval.end++;
6958 else
6959 break;
6960 }
6961 else
6962 goto error;
6963 }
6964 verb.start = this->interval.end;
6965 for (;;) {
6966 if (m_line_break.match(text, this->interval.end, end, flags))
6967 goto error;
6968 else if (this->interval.end < end && text[this->interval.end]) {
6969 if (stdex::isspace(text[this->interval.end])) {
6970 verb.end = this->interval.end;
6971 this->interval.end++;
6972 break;
6973 }
6974 else
6975 this->interval.end++;
6976 }
6977 else
6978 goto error;
6979 }
6980
6981 for (;;) {
6982 if (m_line_break.match(text, this->interval.end, end, flags))
6983 goto error;
6984 else if (this->interval.end < end && text[this->interval.end]) {
6985 if (stdex::isspace(text[this->interval.end]))
6986 this->interval.end++;
6987 else
6988 break;
6989 }
6990 else
6991 goto error;
6992 }
6993 if (url.match(text, this->interval.end, end, flags))
6994 this->interval.end = url.interval.end;
6995 else
6996 goto error;
6997
6998 protocol.invalidate();
6999 for (;;) {
7000 if (m_line_break.match(text, this->interval.end, end, flags)) {
7001 this->interval.end = m_line_break.interval.end;
7002 goto end;
7003 }
7004 else if (this->interval.end < end && text[this->interval.end]) {
7005 if (stdex::isspace(text[this->interval.end]))
7006 this->interval.end++;
7007 else
7008 break;
7009 }
7010 else
7011 goto end;
7012 }
7013 for (;;) {
7014 if (m_line_break.match(text, this->interval.end, end, flags)) {
7015 this->interval.end = m_line_break.interval.end;
7016 goto end;
7017 }
7018 else if (protocol.match(text, this->interval.end, end, flags)) {
7019 this->interval.end = protocol.interval.end;
7020 break;
7021 }
7022 else
7023 goto end;
7024 }
7025
7026 for (;;) {
7027 if (m_line_break.match(text, this->interval.end, end, flags)) {
7028 this->interval.end = m_line_break.interval.end;
7029 break;
7030 }
7031 else if (this->interval.end < end && text[this->interval.end])
7032 this->interval.end++;
7033 else
7034 goto end;
7035 }
7036
7037 end:
7038 this->interval.start = start;
7039 return true;
7040
7041 error:
7042 invalidate();
7043 return false;
7044 }
7045
7046 http_line_break m_line_break;
7047 };
7048
7052 class http_header : public parser
7053 {
7054 public:
7055 virtual void invalidate()
7056 {
7057 name.start = 1;
7058 name.end = 0;
7059 value.start = 1;
7060 value.end = 0;
7061 parser::invalidate();
7062 }
7063
7066
7067 protected:
7068 virtual bool do_match(
7069 _In_reads_or_z_(end) const char* text,
7070 _In_ size_t start = 0,
7071 _In_ size_t end = SIZE_MAX,
7072 _In_ int flags = match_default)
7073 {
7074 stdex_assert(text || start >= end);
7075 this->interval.end = start;
7076
7077 if (m_line_break.match(text, this->interval.end, end, flags) ||
7078 (this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end])))
7079 goto error;
7080 name.start = this->interval.end;
7081 for (;;) {
7082 if (m_line_break.match(text, this->interval.end, end, flags))
7083 goto error;
7084 else if (this->interval.end < end && text[this->interval.end]) {
7085 if (stdex::isspace(text[this->interval.end])) {
7086 name.end = this->interval.end;
7087 this->interval.end++;
7088 for (;;) {
7089 if (m_line_break.match(text, this->interval.end, end, flags))
7090 goto error;
7091 else if (this->interval.end < end && text[this->interval.end]) {
7092 if (stdex::isspace(text[this->interval.end]))
7093 this->interval.end++;
7094 else
7095 break;
7096 }
7097 else
7098 goto error;
7099 }
7100 if (this->interval.end < end && text[this->interval.end] == ':') {
7101 this->interval.end++;
7102 break;
7103 }
7104 else
7105 goto error;
7106 break;
7107 }
7108 else if (text[this->interval.end] == ':') {
7109 name.end = this->interval.end;
7110 this->interval.end++;
7111 break;
7112 }
7113 else
7114 this->interval.end++;
7115 }
7116 else
7117 goto error;
7118 }
7119 value.start = SIZE_MAX;
7120 value.end = 0;
7121 for (;;) {
7122 if (m_line_break.match(text, this->interval.end, end, flags)) {
7123 this->interval.end = m_line_break.interval.end;
7124 if (!m_line_break.match(text, this->interval.end, end, flags) &&
7125 this->interval.end < end && text[this->interval.end] && stdex::isspace(text[this->interval.end]))
7126 this->interval.end++;
7127 else
7128 break;
7129 }
7130 else if (this->interval.end < end && text[this->interval.end]) {
7131 if (stdex::isspace(text[this->interval.end]))
7132 this->interval.end++;
7133 else {
7134 if (value.start == SIZE_MAX) value.start = this->interval.end;
7135 value.end = ++this->interval.end;
7136 }
7137 }
7138 else
7139 break;
7140 }
7141 this->interval.start = start;
7142 return true;
7143
7144 error:
7145 invalidate();
7146 return false;
7147 }
7148
7149 http_line_break m_line_break;
7150 };
7151
7155 template <class KEY, class T>
7156 class http_value_collection : public T
7157 {
7158 public:
7159 void insert(
7160 _In_reads_or_z_(end) const char* text,
7161 _In_ size_t start = 0,
7162 _In_ size_t end = SIZE_MAX,
7163 _In_ int flags = match_default)
7164 {
7165 while (start < end) {
7166 while (start < end && text[start] && stdex::isspace(text[start])) start++;
7167 if (start < end && text[start] == ',') {
7168 start++;
7169 while (start < end&& text[start] && stdex::isspace(text[start])) start++;
7170 }
7171 KEY el;
7172 if (el.match(text, start, end, flags)) {
7173 start = el.interval.end;
7174 T::insert(std::move(el));
7175 }
7176 else
7177 break;
7178 }
7179 }
7180 };
7181
7182 template <class T>
7184 constexpr bool operator()(const T& a, const T& b) const noexcept
7185 {
7186 return a.factor.value > b.factor.value;
7187 }
7188 };
7189
7193 template <class T, class AX = std::allocator<T>>
7195
7199 template <class T>
7201 {
7202 public:
7204 _In_ const std::shared_ptr<basic_parser<T>>& quote,
7205 _In_ const std::shared_ptr<basic_parser<T>>& chr,
7206 _In_ const std::shared_ptr<basic_parser<T>>& escape,
7207 _In_ const std::shared_ptr<basic_parser<T>>& sol,
7208 _In_ const std::shared_ptr<basic_parser<T>>& bs,
7209 _In_ const std::shared_ptr<basic_parser<T>>& ff,
7210 _In_ const std::shared_ptr<basic_parser<T>>& lf,
7211 _In_ const std::shared_ptr<basic_parser<T>>& cr,
7212 _In_ const std::shared_ptr<basic_parser<T>>& htab,
7213 _In_ const std::shared_ptr<basic_parser<T>>& uni,
7214 _In_ const std::shared_ptr<basic_integer16<T>>& hex,
7215 _In_ const std::locale& locale = std::locale()) :
7217 m_quote(quote),
7218 m_chr(chr),
7219 m_escape(escape),
7220 m_sol(sol),
7221 m_bs(bs),
7222 m_ff(ff),
7223 m_lf(lf),
7224 m_cr(cr),
7225 m_htab(htab),
7226 m_uni(uni),
7227 m_hex(hex)
7228 {}
7229
7230 virtual void invalidate()
7231 {
7232 value.clear();
7234 }
7235
7236 std::basic_string<T> value;
7237
7238 protected:
7239 virtual bool do_match(
7240 _In_reads_or_z_opt_(end) const T* text,
7241 _In_ size_t start = 0,
7242 _In_ size_t end = SIZE_MAX,
7243 _In_ int flags = match_default)
7244 {
7245 stdex_assert(text || start >= end);
7246 this->interval.end = start;
7247 if (m_quote->match(text, this->interval.end, end, flags)) {
7248 this->interval.end = m_quote->interval.end;
7249 value.clear();
7250 for (;;) {
7251 if (m_quote->match(text, this->interval.end, end, flags)) {
7252 this->interval.start = start;
7253 this->interval.end = m_quote->interval.end;
7254 return true;
7255 }
7256 if (m_escape->match(text, this->interval.end, end, flags)) {
7257 if (m_quote->match(text, m_escape->interval.end, end, flags)) {
7258 value += '"'; this->interval.end = m_quote->interval.end;
7259 continue;
7260 }
7261 if (m_sol->match(text, m_escape->interval.end, end, flags)) {
7262 value += '/'; this->interval.end = m_sol->interval.end;
7263 continue;
7264 }
7265 if (m_bs->match(text, m_escape->interval.end, end, flags)) {
7266 value += '\b'; this->interval.end = m_bs->interval.end;
7267 continue;
7268 }
7269 if (m_ff->match(text, m_escape->interval.end, end, flags)) {
7270 value += '\f'; this->interval.end = m_ff->interval.end;
7271 continue;
7272 }
7273 if (m_lf->match(text, m_escape->interval.end, end, flags)) {
7274 value += '\n'; this->interval.end = m_lf->interval.end;
7275 continue;
7276 }
7277 if (m_cr->match(text, m_escape->interval.end, end, flags)) {
7278 value += '\r'; this->interval.end = m_cr->interval.end;
7279 continue;
7280 }
7281 if (m_htab->match(text, m_escape->interval.end, end, flags)) {
7282 value += '\t'; this->interval.end = m_htab->interval.end;
7283 continue;
7284 }
7285 if (
7286 m_uni->match(text, m_escape->interval.end, end, flags) &&
7287 m_hex->match(text, m_uni->interval.end, std::min(m_uni->interval.end + 4, end), flags | match_case_insensitive) &&
7288 m_hex->interval.size() == 4 /* JSON requests 4-digit Unicode sequneces: \u.... */)
7289 {
7290 stdex_assert(m_hex->value <= 0xffff);
7291 if (sizeof(T) == 1) {
7292 if (m_hex->value > 0x7ff) {
7293 value += (T)(0xe0 | ((m_hex->value >> 12) & 0x0f));
7294 value += (T)(0x80 | ((m_hex->value >> 6) & 0x3f));
7295 value += (T)(0x80 | (m_hex->value & 0x3f));
7296 }
7297 else if (m_hex->value > 0x7f) {
7298 value += (T)(0xc0 | ((m_hex->value >> 6) & 0x1f));
7299 value += (T)(0x80 | (m_hex->value & 0x3f));
7300 }
7301 else
7302 value += (T)(m_hex->value & 0x7f);
7303 }
7304 else
7305 value += (T)m_hex->value;
7306 this->interval.end = m_hex->interval.end;
7307 continue;
7308 }
7309 if (m_escape->match(text, m_escape->interval.end, end, flags)) {
7310 value += '\\'; this->interval.end = m_escape->interval.end;
7311 continue;
7312 }
7313 }
7314 if (m_chr->match(text, this->interval.end, end, flags)) {
7315 value.append(text + m_chr->interval.start, m_chr->interval.size());
7316 this->interval.end = m_chr->interval.end;
7317 continue;
7318 }
7319 break;
7320 }
7321 }
7322 value.clear();
7323 this->interval.invalidate();
7324 return false;
7325 }
7326
7327 std::shared_ptr<basic_parser<T>> m_quote;
7328 std::shared_ptr<basic_parser<T>> m_chr;
7329 std::shared_ptr<basic_parser<T>> m_escape;
7330 std::shared_ptr<basic_parser<T>> m_sol;
7331 std::shared_ptr<basic_parser<T>> m_bs;
7332 std::shared_ptr<basic_parser<T>> m_ff;
7333 std::shared_ptr<basic_parser<T>> m_lf;
7334 std::shared_ptr<basic_parser<T>> m_cr;
7335 std::shared_ptr<basic_parser<T>> m_htab;
7336 std::shared_ptr<basic_parser<T>> m_uni;
7337 std::shared_ptr<basic_integer16<T>> m_hex;
7338 };
7339
7342#ifdef _UNICODE
7343 using tjson_string = wjson_string;
7344#else
7345 using tjson_string = json_string;
7346#endif
7347
7351 template <class T>
7353 {
7354 public:
7355 virtual void invalidate()
7356 {
7357 this->content.invalidate();
7359 }
7360
7362
7363 protected:
7364 virtual bool do_match(
7365 _In_reads_or_z_opt_(end) const T* text,
7366 _In_ size_t start = 0,
7367 _In_ size_t end = SIZE_MAX,
7368 _In_ int flags = match_multiline)
7369 {
7370 _Unreferenced_(flags);
7371 stdex_assert(text || start + 1 >= end);
7372 if (start + 1 < end &&
7373 text[start] == '/' &&
7374 text[start + 1] == '*')
7375 {
7376 // /*
7377 this->content.start = this->interval.end = start + 2;
7378 for (;;) {
7379 if (this->interval.end >= end || !text[this->interval.end])
7380 break;
7381 if (this->interval.end + 1 < end &&
7382 text[this->interval.end] == '*' &&
7383 text[this->interval.end + 1] == '/')
7384 {
7385 // /*...*/
7386 this->content.end = this->interval.end;
7387 this->interval.start = start;
7388 this->interval.end = this->interval.end + 2;
7389 return true;
7390 }
7391 this->interval.end++;
7392 }
7393 }
7394 this->content.invalidate();
7395 this->interval.invalidate();
7396 return false;
7397 }
7398 };
7399
7400 using css_comment = basic_css_comment<char>;
7401 using wcss_comment = basic_css_comment<wchar_t>;
7402#ifdef _UNICODE
7403 using tcss_comment = wcss_comment;
7404#else
7405 using tcss_comment = css_comment;
7406#endif
7407
7411 template <class T>
7412 class basic_css_cdo : public basic_parser<T>
7413 {
7414 protected:
7415 virtual bool do_match(
7416 _In_reads_or_z_opt_(end) const T* text,
7417 _In_ size_t start = 0,
7418 _In_ size_t end = SIZE_MAX,
7419 _In_ int flags = match_multiline)
7420 {
7421 _Unreferenced_(flags);
7422 stdex_assert(text || start + 3 >= end);
7423 if (start + 3 < end &&
7424 text[start] == '<' &&
7425 text[start + 1] == '!' &&
7426 text[start + 2] == '-' &&
7427 text[start + 3] == '-')
7428 {
7429 this->interval.start = start;
7430 this->interval.end = start + 4;
7431 return true;
7432 }
7433 this->interval.invalidate();
7434 return false;
7435 }
7436 };
7437
7440#ifdef _UNICODE
7441 using tcss_cdo = wcss_cdo;
7442#else
7443 using tcss_cdo = css_cdo;
7444#endif
7445
7449 template <class T>
7450 class basic_css_cdc : public basic_parser<T>
7451 {
7452 protected:
7453 virtual bool do_match(
7454 _In_reads_or_z_opt_(end) const T* text,
7455 _In_ size_t start = 0,
7456 _In_ size_t end = SIZE_MAX,
7457 _In_ int flags = match_multiline)
7458 {
7459 _Unreferenced_(flags);
7460 stdex_assert(text || start + 2 >= end);
7461 if (start + 2 < end &&
7462 text[start] == '-' &&
7463 text[start + 1] == '-' &&
7464 text[start + 2] == '>')
7465 {
7466 this->interval.start = start;
7467 this->interval.end = start + 3;
7468 return true;
7469 }
7470 this->interval.invalidate();
7471 return false;
7472 }
7473 };
7474
7477#ifdef _UNICODE
7478 using tcss_cdc = wcss_cdc;
7479#else
7480 using tcss_cdc = css_cdc;
7481#endif
7482
7486 template <class T>
7488 {
7489 public:
7490 virtual void invalidate()
7491 {
7492 this->content.invalidate();
7494 }
7495
7497
7498 protected:
7499 virtual bool do_match(
7500 _In_reads_or_z_opt_(end) const T* text,
7501 _In_ size_t start = 0,
7502 _In_ size_t end = SIZE_MAX,
7503 _In_ int flags = match_multiline)
7504 {
7505 _Unreferenced_(flags);
7506 this->interval.end = start;
7507 stdex_assert(text || this->interval.end >= end);
7508 if (this->interval.end < end &&
7509 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7510 {
7511 // "Quoted...
7512 T quote = text[this->interval.end];
7513 this->content.start = ++this->interval.end;
7514 for (;;) {
7515 if (this->interval.end >= end || !text[this->interval.end])
7516 break;
7517 if (text[this->interval.end] == quote) {
7518 // End quote"
7519 this->content.end = this->interval.end;
7520 this->interval.start = start;
7521 this->interval.end++;
7522 return true;
7523 }
7524 if (this->interval.end + 1 < end &&
7525 text[this->interval.end] == '\\' &&
7526 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7527 {
7528 // Escaped quote
7529 this->interval.end = this->interval.end + 2;
7530 }
7531 else
7532 this->interval.end++;
7533 }
7534 }
7535
7536 this->content.invalidate();
7537 this->interval.invalidate();
7538 return false;
7539 }
7540 };
7541
7542 using css_string = basic_css_string<char>;
7543 using wcss_string = basic_css_string<wchar_t>;
7544#ifdef _UNICODE
7545 using tcss_string = wcss_string;
7546#else
7547 using tcss_string = css_string;
7548#endif
7549
7553 template <class T>
7554 class basic_css_uri : public basic_parser<T>
7555 {
7556 public:
7557 virtual void invalidate()
7558 {
7559 this->content.invalidate();
7561 }
7562
7564
7565 protected:
7566 virtual bool do_match(
7567 _In_reads_or_z_opt_(end) const T* text,
7568 _In_ size_t start = 0,
7569 _In_ size_t end = SIZE_MAX,
7570 _In_ int flags = match_multiline)
7571 {
7572 _Unreferenced_(flags);
7573 this->interval.end = start;
7574 stdex_assert(text || this->interval.end + 3 >= end);
7575 if (this->interval.end + 3 < end &&
7576 (text[this->interval.end] == 'u' || text[this->interval.end] == 'U') &&
7577 (text[this->interval.end + 1] == 'r' || text[this->interval.end + 1] == 'R') &&
7578 (text[this->interval.end + 2] == 'l' || text[this->interval.end + 2] == 'L') &&
7579 text[this->interval.end + 3] == '(')
7580 {
7581 // url(
7582 this->interval.end = this->interval.end + 4;
7583
7584 // Skip whitespace.
7585 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7586 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7587
7588 if (this->interval.end < end &&
7589 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7590 {
7591 // url("Quoted...
7592 T quote = text[this->interval.end];
7593 this->content.start = ++this->interval.end;
7594 for (;;) {
7595 if (this->interval.end >= end || !text[this->interval.end])
7596 goto error;
7597 if (text[this->interval.end] == quote) {
7598 // End quote"
7599 this->content.end = this->interval.end;
7600 this->interval.end++;
7601 break;
7602 }
7603 if (this->interval.end + 1 < end &&
7604 text[this->interval.end] == '\\' &&
7605 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7606 {
7607 // Escaped quote
7608 this->interval.end = this->interval.end + 2;
7609 }
7610 else
7611 this->interval.end++;
7612 }
7613
7614 // Skip whitespace.
7615 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7616
7617 if (this->interval.end < end &&
7618 text[this->interval.end] == ')')
7619 {
7620 // url("...")
7621 this->interval.start = start;
7622 this->interval.end++;
7623 return true;
7624 }
7625 }
7626 else {
7627 // url(...
7628 this->content.start = content.end = this->interval.end;
7629 for (;;) {
7630 if (this->interval.end >= end || !text[this->interval.end])
7631 goto error;
7632 if (text[this->interval.end] == ')') {
7633 // url(...)
7634 this->interval.start = start;
7635 this->interval.end++;
7636 return true;
7637 }
7638 if (ctype.is(ctype.space, text[this->interval.end]))
7639 this->interval.end++;
7640 else
7641 this->content.end = ++this->interval.end;
7642 }
7643 }
7644 }
7645
7646 error:
7647 invalidate();
7648 return false;
7649 }
7650 };
7651
7652 using css_uri = basic_css_uri<char>;
7653 using wcss_uri = basic_css_uri<wchar_t>;
7654#ifdef _UNICODE
7655 using tcss_uri = wcss_uri;
7656#else
7657 using tcss_uri = css_uri;
7658#endif
7659
7663 template <class T>
7665 {
7666 public:
7667 virtual void invalidate()
7668 {
7669 this->content.invalidate();
7671 }
7672
7674
7675 protected:
7676 virtual bool do_match(
7677 _In_reads_or_z_opt_(end) const T* text,
7678 _In_ size_t start = 0,
7679 _In_ size_t end = SIZE_MAX,
7680 _In_ int flags = match_multiline)
7681 {
7682 _Unreferenced_(flags);
7683 this->interval.end = start;
7684 stdex_assert(text || this->interval.end + 6 >= end);
7685 if (this->interval.end + 6 < end &&
7686 text[this->interval.end] == '@' &&
7687 (text[this->interval.end + 1] == 'i' || text[this->interval.end + 1] == 'I') &&
7688 (text[this->interval.end + 2] == 'm' || text[this->interval.end + 2] == 'M') &&
7689 (text[this->interval.end + 3] == 'p' || text[this->interval.end + 3] == 'P') &&
7690 (text[this->interval.end + 4] == 'o' || text[this->interval.end + 4] == 'O') &&
7691 (text[this->interval.end + 5] == 'r' || text[this->interval.end + 5] == 'R') &&
7692 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T'))
7693 {
7694 // @import...
7695 this->interval.end = this->interval.end + 7;
7696
7697 // Skip whitespace.
7698 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7699 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7700
7701 if (this->interval.end < end &&
7702 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7703 {
7704 // @import "Quoted
7705 T quote = text[this->interval.end];
7706 this->content.start = ++this->interval.end;
7707 for (;;) {
7708 if (this->interval.end >= end || !text[this->interval.end])
7709 goto error;
7710 if (text[this->interval.end] == quote) {
7711 // End quote"
7712 this->content.end = this->interval.end;
7713 this->interval.start = start;
7714 this->interval.end++;
7715 return true;
7716 }
7717 if (this->interval.end + 1 < end &&
7718 text[this->interval.end] == '\\' &&
7719 (text[this->interval.end + 1] == '\"' || text[this->interval.end + 1] == '\''))
7720 {
7721 // Escaped quote
7722 this->interval.end = this->interval.end + 2;
7723 }
7724 else
7725 this->interval.end++;
7726 }
7727 }
7728 }
7729
7730 error:
7731 invalidate();
7732 return false;
7733 }
7734 };
7735
7736 using css_import = basic_css_import<char>;
7737 using wcss_import = basic_css_import<wchar_t>;
7738#ifdef _UNICODE
7739 using tcss_import = wcss_import;
7740#else
7741 using tcss_import = css_import;
7742#endif
7743
7747 template <class T>
7749 {
7750 public:
7751 virtual void invalidate()
7752 {
7753 this->base_type.invalidate();
7754 this->sub_type.invalidate();
7755 this->charset.invalidate();
7757 }
7758
7762
7763 protected:
7764 virtual bool do_match(
7765 _In_reads_or_z_opt_(end) const T* text,
7766 _In_ size_t start = 0,
7767 _In_ size_t end = SIZE_MAX,
7768 _In_ int flags = match_multiline)
7769 {
7770 _Unreferenced_(flags);
7771 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7772
7773 this->interval.end = start;
7774 this->base_type.start = this->interval.end;
7775 for (;;) {
7776 stdex_assert(text || this->interval.end >= end);
7777 if (this->interval.end >= end || !text[this->interval.end])
7778 break;
7779 if (text[this->interval.end] == '/' ||
7780 text[this->interval.end] == ';' ||
7781 ctype.is(ctype.space, text[this->interval.end]))
7782 break;
7783 this->interval.end++;
7784 }
7785 if (this->interval.end <= this->base_type.start)
7786 goto error;
7787 this->base_type.end = this->interval.end;
7788
7789 if (end <= this->interval.end || text[this->interval.end] != '/')
7790 goto error;
7791
7792 this->interval.end++;
7793 this->sub_type.start = this->interval.end;
7794 for (;;) {
7795 if (this->interval.end >= end || !text[this->interval.end])
7796 break;
7797 if (text[this->interval.end] == '/' ||
7798 text[this->interval.end] == ';' ||
7799 ctype.is(ctype.space, text[this->interval.end]))
7800 break;
7801 this->interval.end++;
7802 }
7803 if (this->interval.end <= this->sub_type.start)
7804 goto error;
7805
7806 this->sub_type.end = this->interval.end;
7807 this->charset.invalidate();
7808 if (this->interval.end < end && text[this->interval.end] == ';') {
7809 this->interval.end++;
7810
7811 // Skip whitespace.
7812 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
7813
7814 if (this->interval.end + 7 < end &&
7815 (text[this->interval.end] == 'c' || text[this->interval.end] == 'C') &&
7816 (text[this->interval.end + 1] == 'h' || text[this->interval.end + 1] == 'H') &&
7817 (text[this->interval.end + 2] == 'a' || text[this->interval.end + 2] == 'A') &&
7818 (text[this->interval.end + 3] == 'r' || text[this->interval.end + 3] == 'R') &&
7819 (text[this->interval.end + 4] == 's' || text[this->interval.end + 4] == 'S') &&
7820 (text[this->interval.end + 5] == 'e' || text[this->interval.end + 5] == 'E') &&
7821 (text[this->interval.end + 6] == 't' || text[this->interval.end + 6] == 'T') &&
7822 text[this->interval.end + 7] == '=')
7823 {
7824 this->interval.end = this->interval.end + 8;
7825 if (this->interval.end < end &&
7826 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7827 {
7828 // "Quoted...
7829 T quote = text[this->interval.end];
7830 this->charset.start = ++this->interval.end;
7831 for (;;) {
7832 if (this->interval.end >= end || !text[this->interval.end]) {
7833 // No end quote!
7834 this->charset.invalidate();
7835 break;
7836 }
7837 if (text[this->interval.end] == quote) {
7838 // End quote"
7839 this->charset.end = this->interval.end;
7840 this->interval.end++;
7841 break;
7842 }
7843 this->interval.end++;
7844 }
7845 }
7846 else {
7847 // Nonquoted
7848 this->charset.start = this->interval.end;
7849 for (;;) {
7850 if (this->interval.end >= end || !text[this->interval.end] ||
7851 ctype.is(ctype.space, text[this->interval.end])) {
7852 this->charset.end = this->interval.end;
7853 break;
7854 }
7855 this->interval.end++;
7856 }
7857 }
7858 }
7859 }
7860 this->interval.start = start;
7861 return true;
7862
7863 error:
7864 invalidate();
7865 return false;
7866 }
7867 };
7868
7869 using mime_type = basic_mime_type<char>;
7870 using wmime_type = basic_mime_type<wchar_t>;
7871#ifdef _UNICODE
7872 using tmime_type = wmime_type;
7873#else
7874 using tmime_type = mime_type;
7875#endif
7876
7880 template <class T>
7882 {
7883 protected:
7884 virtual bool do_match(
7885 _In_reads_or_z_opt_(end) const T* text,
7886 _In_ size_t start = 0,
7887 _In_ size_t end = SIZE_MAX,
7888 _In_ int flags = match_default)
7889 {
7890 _Unreferenced_(flags);
7891 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7892 this->interval.end = start;
7893 for (;;) {
7894 stdex_assert(text || this->interval.end >= end);
7895 if (this->interval.end >= end || !text[this->interval.end]) {
7896 if (start < this->interval.end) {
7897 this->interval.start = start;
7898 return true;
7899 }
7900 this->interval.invalidate();
7901 return false;
7902 }
7903 if (text[this->interval.end] == '>' ||
7904 text[this->interval.end] == '=' ||
7905 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7906 ctype.is(ctype.space, text[this->interval.end]))
7907 {
7908 this->interval.start = start;
7909 return true;
7910 }
7911 this->interval.end++;
7912 }
7913 }
7914 };
7915
7918#ifdef _UNICODE
7919 using thtml_ident = whtml_ident;
7920#else
7921 using thtml_ident = html_ident;
7922#endif
7923
7927 template <class T>
7929 {
7930 public:
7931 virtual void invalidate()
7932 {
7933 this->content.invalidate();
7935 }
7936
7938
7939 protected:
7940 virtual bool do_match(
7941 _In_reads_or_z_opt_(end) const T* text,
7942 _In_ size_t start = 0,
7943 _In_ size_t end = SIZE_MAX,
7944 _In_ int flags = match_default)
7945 {
7946 _Unreferenced_(flags);
7947 this->interval.end = start;
7948 stdex_assert(text || this->interval.end >= end);
7949 if (this->interval.end < end &&
7950 (text[this->interval.end] == '\"' || text[this->interval.end] == '\''))
7951 {
7952 // "Quoted...
7953 T quote = text[this->interval.end];
7954 this->content.start = ++this->interval.end;
7955 for (;;) {
7956 if (this->interval.end >= end || !text[this->interval.end]) {
7957 // No end quote!
7958 this->content.invalidate();
7959 this->interval.invalidate();
7960 return false;
7961 }
7962 if (text[this->interval.end] == quote) {
7963 // End quote"
7964 this->content.end = this->interval.end;
7965 this->interval.start = start;
7966 this->interval.end++;
7967 return true;
7968 }
7969 this->interval.end++;
7970 }
7971 }
7972
7973 // Nonquoted
7974 this->content.start = this->interval.end;
7975 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
7976 for (;;) {
7977 stdex_assert(text || this->interval.end >= end);
7978 if (this->interval.end >= end || !text[this->interval.end]) {
7979 this->content.end = this->interval.end;
7980 this->interval.start = start;
7981 return true;
7982 }
7983 if (text[this->interval.end] == '>' ||
7984 (text[this->interval.end] == '/' && this->interval.end + 1 < end && text[this->interval.end + 1] == '>') ||
7985 ctype.is(ctype.space, text[this->interval.end]))
7986 {
7987 this->content.end = this->interval.end;
7988 this->interval.start = start;
7989 return true;
7990 }
7991 this->interval.end++;
7992 }
7993 }
7994 };
7995
7996 using html_value = basic_html_value<char>;
7997 using whtml_value = basic_html_value<wchar_t>;
7998#ifdef _UNICODE
7999 using thtml_value = whtml_value;
8000#else
8001 using thtml_value = html_value;
8002#endif
8003
8007 enum class html_sequence_t {
8008 text = 0,
8009 element,
8010 element_start,
8011 element_end,
8012 declaration,
8013 comment,
8014 instruction,
8015 PCDATA,
8016 CDATA,
8017
8018 unknown = -1,
8019 };
8020
8028
8032 template <class T>
8034 {
8035 public:
8036 basic_html_tag(_In_ const std::locale& locale = std::locale()) :
8038 type(html_sequence_t::unknown)
8039 {}
8040
8041 virtual void invalidate()
8042 {
8043 this->type = html_sequence_t::unknown;
8044 this->name.invalidate();
8045 this->attributes.clear();
8047 }
8048
8049 html_sequence_t type;
8051 std::vector<html_attribute> attributes;
8052
8053 protected:
8054 virtual bool do_match(
8055 _In_reads_or_z_opt_(end) const T* text,
8056 _In_ size_t start = 0,
8057 _In_ size_t end = SIZE_MAX,
8058 _In_ int flags = match_multiline)
8059 {
8060 stdex_assert(text || start >= end);
8061 if (start >= end || text[start] != '<')
8062 goto error;
8063 this->interval.end = start + 1;
8064 if (this->interval.end >= end || !text[this->interval.end])
8065 goto error;
8066 if (text[this->interval.end] == '/' &&
8067 this->m_ident.match(text, this->interval.end + 1, end, flags))
8068 {
8069 // </...
8070 this->type = html_sequence_t::element_end;
8071 this->name = this->m_ident.interval;
8072 this->interval.end = this->m_ident.interval.end;
8073 }
8074 else if (text[this->interval.end] == '!') {
8075 // <!...
8076 this->interval.end++;
8077 if (this->interval.end + 1 < end &&
8078 text[this->interval.end] == '-' &&
8079 text[this->interval.end + 1] == '-')
8080 {
8081 // <!--...
8082 this->name.start = this->interval.end = this->interval.end + 2;
8083 for (;;) {
8084 if (this->interval.end >= end || !text[this->interval.end])
8085 goto error;
8086 if (this->interval.end + 2 < end &&
8087 text[this->interval.end] == '-' &&
8088 text[this->interval.end + 1] == '-' &&
8089 text[this->interval.end + 2] == '>')
8090 {
8091 // <!--...-->
8092 this->type = html_sequence_t::comment;
8093 this->name.end = this->interval.end;
8094 this->attributes.clear();
8095 this->interval.start = start;
8096 this->interval.end = this->interval.end + 3;
8097 return true;
8098 }
8099 this->interval.end++;
8100 }
8101 }
8102 this->type = html_sequence_t::declaration;
8103 this->name.start = this->name.end = this->interval.end;
8104 }
8105 else if (text[this->interval.end] == '?') {
8106 // <?...
8107 this->name.start = ++this->interval.end;
8108 for (;;) {
8109 if (this->interval.end >= end || !text[this->interval.end])
8110 goto error;
8111 if (text[this->interval.end] == '>') {
8112 // <?...>
8113 this->type = html_sequence_t::instruction;
8114 this->name.end = this->interval.end;
8115 this->attributes.clear();
8116 this->interval.start = start;
8117 this->interval.end++;
8118 return true;
8119 }
8120 if (this->interval.end + 1 < end &&
8121 text[this->interval.end] == '?' &&
8122 text[this->interval.end + 1] == '>')
8123 {
8124 // <?...?>
8125 this->type = html_sequence_t::instruction;
8126 this->name.end = this->interval.end;
8127 this->attributes.clear();
8128 this->interval.start = start;
8129 this->interval.end = this->interval.end + 2;
8130 return true;
8131 }
8132 this->interval.end++;
8133 }
8134 }
8135 else if (this->m_ident.match(text, this->interval.end, end, flags)) {
8136 // <tag...
8137 this->type = html_sequence_t::element_start;
8138 this->name = this->m_ident.interval;
8139 this->interval.end = this->m_ident.interval.end;
8140 }
8141 else
8142 goto error;
8143
8144 {
8145 // Skip whitespace.
8146 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8147 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8148
8149 this->attributes.clear();
8150 for (;;) {
8151 if (this->type == html_sequence_t::element_start &&
8152 this->interval.end + 1 < end &&
8153 text[this->interval.end] == '/' &&
8154 text[this->interval.end + 1] == '>')
8155 {
8156 // <tag .../>
8157 this->type = html_sequence_t::element;
8158 this->interval.end = this->interval.end + 2;
8159 break;
8160 }
8161 if (this->interval.end < end &&
8162 text[this->interval.end] == '>')
8163 {
8164 // <tag ...>
8165 this->interval.end++;
8166 break;
8167 }
8168 if (this->type == html_sequence_t::declaration &&
8169 this->interval.end + 1 < end &&
8170 text[this->interval.end] == '!' &&
8171 text[this->interval.end + 1] == '>')
8172 {
8173 // "<!...!>".
8174 this->interval.end = this->interval.end + 2;
8175 break;
8176 }
8177 if (this->type == html_sequence_t::declaration &&
8178 this->interval.end + 1 < end &&
8179 text[this->interval.end] == '-' &&
8180 text[this->interval.end + 1] == '-')
8181 {
8182 // "<! ... --...".
8183 this->interval.end = this->interval.end + 2;
8184 for (;;) {
8185 if (this->interval.end >= end || !text[this->interval.end])
8186 goto error;
8187 if (this->interval.end + 1 < end &&
8188 text[this->interval.end] == '-' &&
8189 text[this->interval.end + 1] == '-')
8190 {
8191 // "<! ... --...--".
8192 this->interval.end = this->interval.end + 2;
8193 break;
8194 }
8195 this->interval.end++;
8196 }
8197
8198 // Skip whitespace.
8199 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8200 continue;
8201 }
8202
8203 if (this->interval.end >= end || !text[this->interval.end])
8204 goto error;
8205
8206 // Attributes follow...
8207 html_attribute* a = nullptr;
8208 if (this->m_ident.match(text, this->interval.end, end, flags)) {
8209 this->attributes.push_back(std::move(html_attribute{ this->m_ident.interval }));
8210 a = &this->attributes.back();
8211 stdex_assert(a);
8212 this->interval.end = this->m_ident.interval.end;
8213 }
8214 else {
8215 // What was that?! Skip.
8216 this->interval.end++;
8217 continue;
8218 }
8219
8220 // Skip whitespace.
8221 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8222
8223 if (this->interval.end < end && text[this->interval.end] == '=') {
8224 this->interval.end++;
8225
8226 // Skip whitespace.
8227 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8228
8229 if (this->m_value.match(text, this->interval.end, end, flags)) {
8230 // This attribute has value.
8231 a->value = this->m_value.content;
8232 this->interval.end = this->m_value.interval.end;
8233
8234 // Skip whitespace.
8235 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8236 }
8237 }
8238 else {
8239 // This attribute has no value.
8240 a->value.invalidate();
8241 }
8242 }
8243 }
8244
8245 this->interval.start = start;
8246 return true;
8247
8248 error:
8249 invalidate();
8250 return false;
8251 }
8252
8253 basic_html_ident<T> m_ident;
8254 basic_html_value<T> m_value;
8255 };
8256
8257 using html_tag = basic_html_tag<char>;
8258 using whtml_tag = basic_html_tag<wchar_t>;
8259#ifdef _UNICODE
8260 using thtml_tag = whtml_tag;
8261#else
8262 using thtml_tag = html_tag;
8263#endif
8264
8268 template <class T>
8270 {
8271 public:
8272 virtual void invalidate()
8273 {
8274 this->condition.invalidate();
8276 }
8277
8278 stdex::interval<size_t> condition;
8279
8280 protected:
8281 virtual bool do_match(
8282 _In_reads_or_z_opt_(end) const T* text,
8283 _In_ size_t start = 0,
8284 _In_ size_t end = SIZE_MAX,
8285 _In_ int flags = match_multiline)
8286 {
8287 _Unreferenced_(flags);
8288 stdex_assert(text || start + 2 >= end);
8289 if (start + 2 < end &&
8290 text[start] == '<' &&
8291 text[start + 1] == '!' &&
8292 text[start + 2] == '[')
8293 {
8294 this->interval.end = start + 3;
8295
8296 // Skip whitespace.
8297 const auto& ctype = std::use_facet<std::ctype<T>>(this->m_locale);
8298 for (; this->interval.end < end && text[this->interval.end] && ctype.is(ctype.space, text[this->interval.end]); this->interval.end++);
8299
8300 this->condition.start = this->condition.end = this->interval.end;
8301
8302 for (;;) {
8303 if (this->interval.end >= end || !text[this->interval.end])
8304 break;
8305 if (text[this->interval.end] == '[') {
8306 this->interval.start = start;
8307 this->interval.end++;
8308 return true;
8309 }
8310 if (ctype.is(ctype.space, text[this->interval.end]))
8311 this->interval.end++;
8312 else
8313 this->condition.end = ++this->interval.end;
8314 }
8315 }
8316
8317 this->condition.invalidate();
8318 this->interval.invalidate();
8319 return false;
8320 }
8321 };
8322
8323 using html_declaration_condition_start = basic_html_declaration_condition_start<char>;
8324 using whtml_declaration_condition_start = basic_html_declaration_condition_start<wchar_t>;
8325#ifdef _UNICODE
8326 using thtml_declaration_condition_start = whtml_declaration_condition_start;
8327#else
8328 using thtml_declaration_condition_start = html_declaration_condition_start;
8329#endif
8330
8334 template <class T>
8336 {
8337 protected:
8338 virtual bool do_match(
8339 _In_reads_or_z_opt_(end) const T* text,
8340 _In_ size_t start = 0,
8341 _In_ size_t end = SIZE_MAX,
8342 _In_ int flags = match_multiline)
8343 {
8344 _Unreferenced_(flags);
8345 stdex_assert(text || start + 2 >= end);
8346 if (start + 2 < end &&
8347 text[start] == ']' &&
8348 text[start + 1] == ']' &&
8349 text[start + 2] == '>')
8350 {
8351 this->interval.start = start;
8352 this->interval.end = start + 3;
8353 return true;
8354 }
8355 this->interval.invalidate();
8356 return false;
8357 }
8358 };
8359
8362#ifdef _UNICODE
8364#else
8366#endif
8367 }
8368}
8369
8370#undef ENUM_FLAG_OPERATOR
8371#undef ENUM_FLAGS
8372
8373#if defined(_MSC_VER)
8374#pragma warning(pop)
8375#elif defined(__GNUC__)
8376#pragma GCC diagnostic pop
8377#endif
locale_t helper class to free_locale when going out of scope.
Definition locale.hpp:74
Test for angle in d°mm'ss.dddd form.
Definition parser.hpp:4361
Test for any code unit.
Definition parser.hpp:216
Test for beginning of line.
Definition parser.hpp:615
Test for any.
Definition parser.hpp:1058
Test for chemical formula.
Definition parser.hpp:5490
Test for Creditor Reference.
Definition parser.hpp:4925
T reference[22]
Normalized national reference number.
Definition parser.hpp:4947
T check_digits[3]
Two check digits.
Definition parser.hpp:4946
bool is_valid
Is reference valid per ISO 7064.
Definition parser.hpp:4948
Legacy CSS comment end -->
Definition parser.hpp:7451
Legacy CSS comment start <!--
Definition parser.hpp:7413
CSS comment.
Definition parser.hpp:7353
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7361
CSS import directive.
Definition parser.hpp:7665
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7673
CSS string.
Definition parser.hpp:7488
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7496
URI in CSS.
Definition parser.hpp:7555
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7563
Test for any code unit from a given string of code units.
Definition parser.hpp:720
Test for specific code unit.
Definition parser.hpp:288
Test for date.
Definition parser.hpp:3994
Test for valid DNS domain character.
Definition parser.hpp:2776
bool allow_on_edge
Is character allowed at the beginning or an end of a DNS domain?
Definition parser.hpp:2786
Test for DNS domain/hostname.
Definition parser.hpp:2876
bool m_allow_absolute
May DNS names end with a dot (absolute name)?
Definition parser.hpp:2940
Test for e-mail address.
Definition parser.hpp:3768
Test for emoticon.
Definition parser.hpp:3871
std::shared_ptr< basic_parser< T > > apex
apex/eyebrows/halo (e.g. O, 0)
Definition parser.hpp:3899
std::shared_ptr< basic_parser< T > > eyes
eyes (e.g. :, ;, >, |, B)
Definition parser.hpp:3900
std::shared_ptr< basic_set< T > > mouth
mouth (e.g. ), ), (, (, |, P, D, p, d)
Definition parser.hpp:3902
std::shared_ptr< basic_parser< T > > nose
nose (e.g. -, o)
Definition parser.hpp:3901
std::shared_ptr< basic_parser< T > > emoticon
emoticon as a whole (e.g. 😀, 🤔, 😶)
Definition parser.hpp:3898
Test for end of line.
Definition parser.hpp:654
Test for fraction.
Definition parser.hpp:1686
End of condition ...]]>
Definition parser.hpp:8336
Start of condition <![condition[...
Definition parser.hpp:8270
virtual bool do_match(_In_reads_or_z_opt_(end) const T *text, size_t start=0, size_t end=SIZE_MAX, int flags=match_multiline)
condition position in source
Definition parser.hpp:8281
Contiguous sequence of characters representing name of element, attribute etc.
Definition parser.hpp:7882
Tag.
Definition parser.hpp:8034
std::vector< html_attribute > attributes
tag attributes
Definition parser.hpp:8051
html_sequence_t type
tag type
Definition parser.hpp:8049
stdex::interval< size_t > name
tag name position in source
Definition parser.hpp:8050
Optionally-quoted string representing value of an attribute.
Definition parser.hpp:7929
stdex::interval< size_t > content
content position in source
Definition parser.hpp:7937
Test for International Bank Account Number.
Definition parser.hpp:4636
T bban[31]
Normalized Basic Bank Account Number.
Definition parser.hpp:4661
T country[3]
ISO 3166-1 alpha-2 country code.
Definition parser.hpp:4659
T check_digits[3]
Two check digits.
Definition parser.hpp:4660
bool is_valid
Is IBAN valid per ISO 7064.
Definition parser.hpp:4662
Test for decimal integer.
Definition parser.hpp:1296
Test for decimal integer possibly containing thousand separators.
Definition parser.hpp:1381
bool has_separators
Did integer have any separators?
Definition parser.hpp:1402
size_t digit_count
Total number of digits in integer.
Definition parser.hpp:1401
Test for hexadecimal integer.
Definition parser.hpp:1461
Base class for integer testing.
Definition parser.hpp:1274
size_t value
Calculated value of the numeral.
Definition parser.hpp:1288
Test for IPv4 address.
Definition parser.hpp:2344
stdex::interval< size_t > components[4]
Individual component intervals.
Definition parser.hpp:2389
struct in_addr value
IPv4 address value.
Definition parser.hpp:2390
Test for IPv6 address.
Definition parser.hpp:2556
std::shared_ptr< basic_parser< T > > scope_id
Scope ID (e.g. NIC index with link-local addresses)
Definition parser.hpp:2628
stdex::interval< size_t > components[8]
Individual component intervals.
Definition parser.hpp:2626
struct in6_addr value
IPv6 address value.
Definition parser.hpp:2627
Test for valid IPv6 address scope ID character.
Definition parser.hpp:2482
Test for repeating.
Definition parser.hpp:910
bool m_greedy
try to match as long sequence as possible
Definition parser.hpp:949
std::shared_ptr< basic_parser< T > > m_el
repeating element
Definition parser.hpp:946
size_t m_min_iterations
minimum number of iterations
Definition parser.hpp:947
size_t m_max_iterations
maximum number of iterations
Definition parser.hpp:948
Test for JSON string.
Definition parser.hpp:7201
MIME content type.
Definition parser.hpp:7749
stdex::interval< size_t > base_type
basic type position in source
Definition parser.hpp:7759
stdex::interval< size_t > sub_type
sub-type position in source
Definition parser.hpp:7760
stdex::interval< size_t > charset
charset position in source
Definition parser.hpp:7761
Test for mixed numeral.
Definition parser.hpp:1921
std::shared_ptr< basic_parser< T > > fraction
fraction
Definition parser.hpp:1954
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1952
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1951
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1950
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:1953
Test for monetary numeral.
Definition parser.hpp:2215
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2248
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2253
std::shared_ptr< basic_parser< T > > currency
Currency part.
Definition parser.hpp:2251
std::shared_ptr< basic_parser< T > > decimal
Decimal part.
Definition parser.hpp:2254
std::shared_ptr< basic_parser< T > > integer
Integer part.
Definition parser.hpp:2252
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2249
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2250
"No-op" match
Definition parser.hpp:184
Base template for all parsers.
Definition parser.hpp:81
stdex::interval< size_t > interval
Region of the last match.
Definition parser.hpp:121
Test for permutation.
Definition parser.hpp:1198
Test for phone number.
Definition parser.hpp:4484
std::basic_string< T > value
Normalized phone number.
Definition parser.hpp:4509
Test for any punctuation code unit.
Definition parser.hpp:461
Test for Roman numeral.
Definition parser.hpp:1570
Test for scientific numeral.
Definition parser.hpp:2046
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:2092
std::shared_ptr< basic_parser< T > > exponent_symbol
Exponent symbol (e.g. 'e')
Definition parser.hpp:2096
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:2090
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:2091
double value
Calculated value of the numeral.
Definition parser.hpp:2100
std::shared_ptr< basic_parser< T > > negative_exp_sign
Negative exponent sign (e.g. '-')
Definition parser.hpp:2098
std::shared_ptr< basic_integer< T > > decimal
Decimal part.
Definition parser.hpp:2095
std::shared_ptr< basic_parser< T > > positive_exp_sign
Positive exponent sign (e.g. '+')
Definition parser.hpp:2097
std::shared_ptr< basic_integer< T > > exponent
Exponent part.
Definition parser.hpp:2099
std::shared_ptr< basic_parser< T > > decimal_separator
Decimal separator.
Definition parser.hpp:2094
std::shared_ptr< basic_integer< T > > integer
Integer part.
Definition parser.hpp:2093
Test for match score.
Definition parser.hpp:1749
Test for sequence.
Definition parser.hpp:1006
Definition parser.hpp:689
Test for SI Reference delimiter.
Definition parser.hpp:5119
Test for SI Reference part.
Definition parser.hpp:5073
Test for SI Reference.
Definition parser.hpp:5158
basic_si_reference_part< T > part3
Reference data part 3 (P3)
Definition parser.hpp:5187
basic_si_reference_part< T > part1
Reference data part 1 (P1)
Definition parser.hpp:5185
bool is_valid
Is reference valid.
Definition parser.hpp:5188
T model[3]
Reference model.
Definition parser.hpp:5184
basic_si_reference_part< T > part2
Reference data part 2 (P2)
Definition parser.hpp:5186
Test for signed numeral.
Definition parser.hpp:1835
std::shared_ptr< basic_parser< T > > special_sign
Special sign (e.g. plus-minus '±')
Definition parser.hpp:1861
std::shared_ptr< basic_parser< T > > negative_sign
Negative sign.
Definition parser.hpp:1860
std::shared_ptr< basic_parser< T > > positive_sign
Positive sign.
Definition parser.hpp:1859
std::shared_ptr< basic_parser< T > > number
Number.
Definition parser.hpp:1862
Test for any space code unit.
Definition parser.hpp:381
Test for any space or punctuation code unit.
Definition parser.hpp:536
Test for any string.
Definition parser.hpp:1126
Test for given string.
Definition parser.hpp:815
Test for time.
Definition parser.hpp:4259
Test for valid URL password character.
Definition parser.hpp:3060
Test for valid URL path character.
Definition parser.hpp:3162
Test for URL path.
Definition parser.hpp:3272
Test for valid URL username character.
Definition parser.hpp:2959
Test for URL.
Definition parser.hpp:3412
Test for HTTP agent.
Definition parser.hpp:6745
Test for HTTP any type.
Definition parser.hpp:5888
Test for HTTP asterisk.
Definition parser.hpp:6516
Test for HTTP header.
Definition parser.hpp:7053
Test for HTTP language (RFC1766)
Definition parser.hpp:6384
Test for HTTP line break (RFC2616: CRLF | LF)
Definition parser.hpp:5570
Test for HTTP media range (RFC2616: media-range)
Definition parser.hpp:5920
Test for HTTP media type (RFC2616: media-type)
Definition parser.hpp:5972
Test for HTTP parameter (RFC2616: parameter)
Definition parser.hpp:5836
http_token name
Parameter name.
Definition parser.hpp:5845
http_value value
Parameter value.
Definition parser.hpp:5846
Test for HTTP protocol.
Definition parser.hpp:6820
uint16_t version
HTTP protocol version: 0x100 = 1.0, 0x101 = 1.1...
Definition parser.hpp:6842
Test for HTTP quoted string (RFC2616: quoted-string)
Definition parser.hpp:5729
stdex::interval< size_t > content
String content (without quotes)
Definition parser.hpp:5738
Test for HTTP request.
Definition parser.hpp:6921
Test for HTTP space (RFC2616: LWS)
Definition parser.hpp:5606
Test for HTTP text character (RFC2616: TEXT)
Definition parser.hpp:5642
Test for HTTP token (RFC2616: token - tolerates non-ASCII)
Definition parser.hpp:5675
Test for HTTP URL parameter.
Definition parser.hpp:6209
Test for HTTP URL path segment.
Definition parser.hpp:6121
Test for HTTP URL path segment.
Definition parser.hpp:6154
std::vector< http_url_path_segment > segments
Path segments.
Definition parser.hpp:6162
Test for HTTP URL port.
Definition parser.hpp:6065
Test for HTTP URL server.
Definition parser.hpp:6028
Test for HTTP URL.
Definition parser.hpp:6286
Collection of HTTP values.
Definition parser.hpp:7157
Test for HTTP value (RFC2616: value)
Definition parser.hpp:5792
http_quoted_string string
Value when matched as quoted string.
Definition parser.hpp:5801
http_token token
Value when matched as token.
Definition parser.hpp:5802
Test for HTTP weight factor.
Definition parser.hpp:6447
float value
Calculated value of the weight factor.
Definition parser.hpp:6460
Test for HTTP weighted value.
Definition parser.hpp:6539
Base template for collection-holding parsers.
Definition parser.hpp:966
Test for any SGML code point.
Definition parser.hpp:249
Test for any SGML code point from a given string of SGML code points.
Definition parser.hpp:772
Test for specific SGML code point.
Definition parser.hpp:337
Test for valid DNS domain SGML character.
Definition parser.hpp:2831
Test for valid IPv6 address scope ID SGML character.
Definition parser.hpp:2521
Test for any SGML punctuation code point.
Definition parser.hpp:502
Test for any SGML space code point.
Definition parser.hpp:424
Test for any SGML space or punctuation code point.
Definition parser.hpp:579
Test for SGML given string.
Definition parser.hpp:862
Test for valid URL password SGML character.
Definition parser.hpp:3113
Test for valid URL path SGML character.
Definition parser.hpp:3219
Test for valid URL username SGML character.
Definition parser.hpp:3011
Numerical interval.
Definition interval.hpp:18
T size() const
Returns interval size.
Definition interval.hpp:47
T end
interval end
Definition interval.hpp:20
interval() noexcept
Constructs an invalid interval.
Definition interval.hpp:25
void invalidate()
Invalidates interval.
Definition interval.hpp:59
T start
interval start
Definition interval.hpp:19
Tag attribute.
Definition parser.hpp:8024
stdex::interval< size_t > name
attribute name position in source
Definition parser.hpp:8025
stdex::interval< size_t > value
attribute value position in source
Definition parser.hpp:8026
Definition parser.hpp:7183