stdex
Additional custom or not Standard C++ covered algorithms
All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
stdex::html::parser< T, TR, AX > Class Template Reference

HTML parser. More...

Public Member Functions

 parser (const document< T, TR, AX > &document, _In_reads_or_z_opt_(num_chars) const stdex::schar_t *url=nullptr, size_t num_chars=0, bool parse_frames=false, stdex::progress< size_t > *progress=nullptr)
 
text_token< T, TR, AX > * parse ()
 Parses HTML document.
 
token_list::const_iterator end_tokens (std::basic_string< T, TR, AX > &source, token_list &active_tokens, const token_list &new_tokens)
 Pops ending tokens from the active token list and append their tags to the source code string.
 
void append_inserted_tokens (std::basic_string< T, TR, AX > &source, inserted_token_list &inserted_tokens, size_t word_index, bool after_word, token_list &active_tokens)
 Adds matching inserted tokens before/after the given word in source code.
 
void make_absolute_url (std::basic_string< T, TR, AX > &rel)
 Converts URL to absolute.
 
const token_vector & tokens () const
 Returns collection of tokens.
 

Static Public Member Functions

static void link (std::basic_string< T, TR, AX > &source, const text_token< T, TR, AX > *t)
 Rebuilds HTML source code from the token tree.
 
static void start_tokens (std::basic_string< T, TR, AX > &source, token_list &active_tokens, const token_list &new_tokens, token_list::const_iterator from)
 Pushes tokens to the active token list and appends their tags to the source code string.
 
static void merge (token_list &a, const token_list &b)
 Adds tokens from list b to list a creating an union.
 

Protected Member Functions

template<class T_token >
T_token * append_token (std::unique_ptr< T_token > &&token)
 Adds token to the collection.
 
template<class T_token >
size_t append_token (std::unique_ptr< T_token > &&token, std::basic_string< T, TR, AX > &source)
 Adds token to the collection and appends its tag to the source code string.
 
text_token< T, TR, AX > * parse (const sequence_store::const_iterator &end, uint32_t text_type=0)
 Recursively parses HTML document.
 
text_token< T, TR, AX > * parse_css (size_t start, size_t end)
 Parses CSS.
 

Protected Attributes

const document< T, TR, AX > & m_document
 Document being analyzed.
 
const stdex::sstring m_url
 Absolute document URL.
 
const bool m_parse_frames
 Parse frames.
 
stdex::progress< size_t > * m_progress
 Progress indicator.
 
const T * m_source
 HTML source code.
 
token_vector m_tokens
 HTML token storage.
 
sequence_store::const_iterator m_offset
 Index of active section.
 
stdex::parser::basic_css_cdo< T > m_css_cdo
 
stdex::parser::basic_css_cdc< T > m_css_cdc
 
stdex::parser::basic_css_comment< T > m_css_comment
 
stdex::parser::basic_css_string< T > m_css_string
 
stdex::parser::basic_css_uri< T > m_css_uri
 
stdex::parser::basic_css_import< T > m_css_import
 
stdex::parser::basic_any_cu< T > m_any_char
 

Detailed Description

template<class T, class TR, class AX>
class stdex::html::parser< T, TR, AX >

HTML parser.

Member Function Documentation

◆ append_inserted_tokens()

template<class T , class TR , class AX >
void stdex::html::parser< T, TR, AX >::append_inserted_tokens ( std::basic_string< T, TR, AX > & source,
inserted_token_list & inserted_tokens,
size_t word_index,
bool after_word,
token_list & active_tokens )
inline

Adds matching inserted tokens before/after the given word in source code.

Parameters
[in,out]sourceSource code
[in,out]inserted_tokensList of tokens to insert. The tokens are removed from the list once inserted.
[in]word_indexWord index
[in]after_wordfalse if source code is before the word; true if after the word
[in,out]active_tokensStack of active tokens

◆ append_token() [1/2]

template<class T , class TR , class AX >
template<class T_token >
T_token * stdex::html::parser< T, TR, AX >::append_token ( std::unique_ptr< T_token > && token)
inlineprotected

Adds token to the collection.

Parameters
[in]tokenToken
Returns
Pointer to the token for non-owning references

◆ append_token() [2/2]

template<class T , class TR , class AX >
template<class T_token >
size_t stdex::html::parser< T, TR, AX >::append_token ( std::unique_ptr< T_token > && token,
std::basic_string< T, TR, AX > & source )
inlineprotected

Adds token to the collection and appends its tag to the source code string.

Parameters
[in]tokenToken
[in,out]sourceSource code
Returns
Number of code units appended to the source code

◆ end_tokens()

template<class T , class TR , class AX >
token_list::const_iterator stdex::html::parser< T, TR, AX >::end_tokens ( std::basic_string< T, TR, AX > & source,
token_list & active_tokens,
const token_list & new_tokens )
inline

Pops ending tokens from the active token list and append their tags to the source code string.

Parameters
[in,out]sourceSource code
[in,out]active_tokensStack of active tokens
[in]new_tokensDesired stack of active tokens
Returns
Position in new_tokens specifying where the cut was made

◆ link()

template<class T , class TR , class AX >
static void stdex::html::parser< T, TR, AX >::link ( std::basic_string< T, TR, AX > & source,
const text_token< T, TR, AX > * t )
inlinestatic

Rebuilds HTML source code from the token tree.

Parameters
[in,out]sourceString to append source code to
[in]tDocument root token

◆ merge()

template<class T , class TR , class AX >
static void stdex::html::parser< T, TR, AX >::merge ( token_list & a,
const token_list & b )
inlinestatic

Adds tokens from list b to list a creating an union.

Parameters
[in,out]aToken list to merge b into
[in]bToken list to merge to a

◆ parse()

template<class T , class TR , class AX >
text_token< T, TR, AX > * stdex::html::parser< T, TR, AX >::parse ( const sequence_store::const_iterator & end,
uint32_t text_type = 0 )
inlineprotected

Recursively parses HTML document.

Parameters
[in]endParse sequences on [m_offset, end) interval
[in]text_typeText flags of the sequences being parsed
Returns
Token representing sequences parsed

◆ start_tokens()

template<class T , class TR , class AX >
static void stdex::html::parser< T, TR, AX >::start_tokens ( std::basic_string< T, TR, AX > & source,
token_list & active_tokens,
const token_list & new_tokens,
token_list::const_iterator from )
inlinestatic

Pushes tokens to the active token list and appends their tags to the source code string.

Parameters
[in,out]sourceSource code
[in,out]active_tokensStack of active tokens
[in]new_tokensNew tokens to add
[in]fromToken from new_tokens to start adding at

The documentation for this class was generated from the following file: