stdex
Additional custom or not Standard C++ covered algorithms
|
HTML parser. More...
Public Member Functions | |
parser (const document< T, TR, AX > &document, _In_reads_or_z_opt_(num_chars) const stdex::schar_t *url=nullptr, size_t num_chars=0, bool parse_frames=false, stdex::progress< size_t > *progress=nullptr) | |
text_token< T, TR, AX > * | parse () |
Parses HTML document. | |
token_list::const_iterator | end_tokens (std::basic_string< T, TR, AX > &source, token_list &active_tokens, const token_list &new_tokens) |
Pops ending tokens from the active token list and append their tags to the source code string. | |
void | append_inserted_tokens (std::basic_string< T, TR, AX > &source, inserted_token_list &inserted_tokens, size_t word_index, bool after_word, token_list &active_tokens) |
Adds matching inserted tokens before/after the given word in source code. | |
void | make_absolute_url (std::basic_string< T, TR, AX > &rel) |
Converts URL to absolute. | |
const token_vector & | tokens () const |
Returns collection of tokens. | |
Static Public Member Functions | |
static void | link (std::basic_string< T, TR, AX > &source, const text_token< T, TR, AX > *t) |
Rebuilds HTML source code from the token tree. | |
static void | start_tokens (std::basic_string< T, TR, AX > &source, token_list &active_tokens, const token_list &new_tokens, token_list::const_iterator from) |
Pushes tokens to the active token list and appends their tags to the source code string. | |
static void | merge (token_list &a, const token_list &b) |
Adds tokens from list b to list a creating an union. | |
Protected Member Functions | |
template<class T_token > | |
T_token * | append_token (std::unique_ptr< T_token > &&token) |
Adds token to the collection. | |
template<class T_token > | |
size_t | append_token (std::unique_ptr< T_token > &&token, std::basic_string< T, TR, AX > &source) |
Adds token to the collection and appends its tag to the source code string. | |
text_token< T, TR, AX > * | parse (const sequence_store::const_iterator &end, uint32_t text_type=0) |
Recursively parses HTML document. | |
text_token< T, TR, AX > * | parse_css (size_t start, size_t end) |
Parses CSS. | |
Protected Attributes | |
const document< T, TR, AX > & | m_document |
Document being analyzed. | |
const stdex::sstring | m_url |
Absolute document URL. | |
const bool | m_parse_frames |
Parse frames. | |
stdex::progress< size_t > * | m_progress |
Progress indicator. | |
const T * | m_source |
HTML source code. | |
token_vector | m_tokens |
HTML token storage. | |
sequence_store::const_iterator | m_offset |
Index of active section. | |
stdex::parser::basic_css_cdo< T > | m_css_cdo |
stdex::parser::basic_css_cdc< T > | m_css_cdc |
stdex::parser::basic_css_comment< T > | m_css_comment |
stdex::parser::basic_css_string< T > | m_css_string |
stdex::parser::basic_css_uri< T > | m_css_uri |
stdex::parser::basic_css_import< T > | m_css_import |
stdex::parser::basic_any_cu< T > | m_any_char |
HTML parser.
|
inline |
Adds matching inserted tokens before/after the given word in source code.
[in,out] | source | Source code |
[in,out] | inserted_tokens | List of tokens to insert. The tokens are removed from the list once inserted. |
[in] | word_index | Word index |
[in] | after_word | false if source code is before the word; true if after the word |
[in,out] | active_tokens | Stack of active tokens |
|
inlineprotected |
Adds token to the collection.
[in] | token | Token |
|
inlineprotected |
Adds token to the collection and appends its tag to the source code string.
[in] | token | Token |
[in,out] | source | Source code |
|
inline |
Pops ending tokens from the active token list and append their tags to the source code string.
[in,out] | source | Source code |
[in,out] | active_tokens | Stack of active tokens |
[in] | new_tokens | Desired stack of active tokens |
new_tokens
specifying where the cut was made
|
inlinestatic |
Rebuilds HTML source code from the token tree.
[in,out] | source | String to append source code to |
[in] | t | Document root token |
|
inlinestatic |
Adds tokens from list b
to list a
creating an union.
[in,out] | a | Token list to merge b into |
[in] | b | Token list to merge to a |
|
inlineprotected |
Recursively parses HTML document.
[in] | end | Parse sequences on [m_offset , end ) interval |
[in] | text_type | Text flags of the sequences being parsed |
|
inlinestatic |
Pushes tokens to the active token list and appends their tags to the source code string.
[in,out] | source | Source code |
[in,out] | active_tokens | Stack of active tokens |
[in] | new_tokens | New tokens to add |
[in] | from | Token from new_tokens to start adding at |