Topics
:Overview
Enumerations
Data Structures
Functions
HTML Tag Handlers
The gxsHTML class is a base class used to parse html documents. The gxsHTML class works through multiple inheritance. It includes functions to load and parse HTML files. HTML tags are handled through the use of virtual tag handlers. Derived classes are responsible for processing HTML tags and any associated attributes by overriding the appropriate tag handler.
// The following list of HTML tags is a combination of HTML
// 2.0, 3.0, 3.2 tags supported by Netscape's Navigator
// web browser, Microsoft's Internet Explorer web browser,
// and standards defined by the World Wide Web Consortium.
// This list was taken from the Willcam's Comprehensive HTML
// Cross Reference at:
// http://www.willcam.com/cmat/html/crossref.html
enum { // HTML tags and modifiers ID enumeration
gxsHTML::gxs_invalid_tag = 0, // Invalid tag specified
gxsHTML::gxs_unknown_tag, // Unknown tag specified
gxsHTML::gxs_special_tag, // Unknown special tags starting with
// an ampersand ending in a semicolon
// &xxxx;
// Tags and format specifiers with special meaning
gxsHTML::gxs_comment_tag, // comment
gxsHTML::gxs_less_then, // Less than sign "<"
gxsHTML::gxs_greater_then, // Greater then sign ">"
gxsHTML::gxs_ampersand, // Ampersand "&"
gxsHTML::gxs_nb_space, // Non-breaking space " "
gxsHTML::gxs_quote, // Quotation mark """
gxsHTML::gxs_ex_acsii_set, // Extended ASCII character set
// HTML tag codes
gxsHTML::gxs_a_tag, // anchor
gxsHTML::gxs_abbrev_tag, // abbreviation
gxsHTML::gxs_acronym_tag, // acronym
gxsHTML::gxs_address_tag, // address
gxsHTML::gxs_applet_tag, // java applet
gxsHTML::gxs_area_tag, // area
gxsHTML::gxs_au_tag, // author
gxsHTML::gxs_author_tag, // author
gxsHTML::gxs_b_tag, // bold
gxsHTML::gxs_banner_tag, // banner
gxsHTML::gxs_base_tag, // base
gxsHTML::gxs_basefont_tag, // base font
gxsHTML::gxs_bgsound_tag, // background sound
gxsHTML::gxs_big_tag, // big text
gxsHTML::gxs_blink_tag, // blink
gxsHTML::gxs_blockquote_tag, // block quote
gxsHTML::gxs_bq_tag, // block quote
gxsHTML::gxs_body_tag, // body
gxsHTML::gxs_br_tag, // line break
gxsHTML::gxs_caption_tag, // caption
gxsHTML::gxs_center_tag, // center
gxsHTML::gxs_cite_tag, // citation
gxsHTML::gxs_code_tag, // code
gxsHTML::gxs_col_tag, // table column
gxsHTML::gxs_colgroup_tag, // table column group
gxsHTML::gxs_credit_tag, // credit
gxsHTML::gxs_del_tag, // deleted text
gxsHTML::gxs_dfn_tag, // definition
gxsHTML::gxs_dir_tag, // directory list
gxsHTML::gxs_div_tag, // division
gxsHTML::gxs_dl_tag, // definition list
gxsHTML::gxs_dt_tag, // definition term
gxsHTML::gxs_dd_tag, // definition definition
gxsHTML::gxs_em_tag, // emphasized
gxsHTML::gxs_embed_tag, // embed
gxsHTML::gxs_fig_tag, // figure
gxsHTML::gxs_fn_tag, // footnote
gxsHTML::gxs_font_tag, // font
gxsHTML::gxs_form_tag, // form
gxsHTML::gxs_frame_tag, // frame
gxsHTML::gxs_frameset_tag, // frame set
gxsHTML::gxs_h1_tag, // heading 1
gxsHTML::gxs_h2_tag, // heading 2
gxsHTML::gxs_h3_tag, // heading 3
gxsHTML::gxs_h4_tag, // heading 4
gxsHTML::gxs_h5_tag, // heading 5
gxsHTML::gxs_h6_tag, // heading 6
gxsHTML::gxs_head_tag, // head
gxsHTML::gxs_hr_tag, // horizontal rule
gxsHTML::gxs_html_tag, // html
gxsHTML::gxs_i_tag, // italic
gxsHTML::gxs_iframe_tag, // frame - floating
gxsHTML::gxs_img_tag, // inline image
gxsHTML::gxs_input_tag, // form input
gxsHTML::gxs_ins_tag, // inserted text
gxsHTML::gxs_isindex_tag, // is index
gxsHTML::gxs_kbd_tag, // keyboard
gxsHTML::gxs_lang_tag, // language
gxsHTML::gxs_lh_tag, // list heading
gxsHTML::gxs_li_tag, // list item
gxsHTML::gxs_link_tag, // link
gxsHTML::gxs_listing_tag, // listing
gxsHTML::gxs_map_tag, // map
gxsHTML::gxs_marquee_tag, // marquee
gxsHTML::gxs_math_tag, // math
gxsHTML::gxs_menu_tag, // menu list
gxsHTML::gxs_meta_tag, // meta
gxsHTML::gxs_multicol_tag, // multi column text
gxsHTML::gxs_nobr_tag, // no break
gxsHTML::gxs_noframes_tag, // no frames
gxsHTML::gxs_note_tag, // note
gxsHTML::gxs_ol_tag, // ordered list
gxsHTML::gxs_overlay_tag, // overlay
gxsHTML::gxs_p_tag, // paragraph
gxsHTML::gxs_param_tag, // parameters
gxsHTML::gxs_person_tag, // person
gxsHTML::gxs_plaintext_tag, // plain text
gxsHTML::gxs_pre_tag, // preformatted text
gxsHTML::gxs_q_tag, // quote
gxsHTML::gxs_range_tag, // range
gxsHTML::gxs_samp_tag, // sample
gxsHTML::gxs_script_tag, // script
gxsHTML::gxs_select_tag, // form select
gxsHTML::gxs_small_tag, // small text
gxsHTML::gxs_spacer_tag, // white space
gxsHTML::gxs_spot_tag, // spot
gxsHTML::gxs_strike_tag, // strikethrough
gxsHTML::gxs_strong_tag, // strong
gxsHTML::gxs_sub_tag, // subscript
gxsHTML::gxs_sup_tag, // superscript
gxsHTML::gxs_tab_tag, // horizontal tab
gxsHTML::gxs_table_tag, // table
gxsHTML::gxs_tbody_tag, // table body
gxsHTML::gxs_td_tag, // table data
gxsHTML::gxs_textarea_tag, // form text area
gxsHTML::gxs_textflow_tag, // java applet textflow
gxsHTML::gxs_tfoot_tag, // table footer
gxsHTML::gxs_th_tag, // table header
gxsHTML::gxs_thead_tag, // table head
gxsHTML::gxs_title_tag, // title
gxsHTML::gxs_tr_tag, // table row
gxsHTML::gxs_tt_tag, // teletype
gxsHTML::gxs_u_tag, // underlined
gxsHTML::gxs_ul_tag, // unordered list
gxsHTML::gxs_var_tag, // variable
gxsHTML::gxs_wbr_tag, // word break
gxsHTML::gxs_xmp_tag // example
};
Data structure used to store the file position of an html tag, the tag itself, its attributes and instructions.
struct gxsHTMLTagInfo
{
// File information
df_StreamPos start_tag; // This tag's starting position in the file
df_StreamPos end_tag; // This tag's ending position in the file
unsigned tag_length; // The complete length of this tag "< ---- >"
// Tag information
int tag_id; // Numerical value used to identify supported tags
gxString tag_info; // Complete tag from opening to closing bracket
gxString tag; // HTML tag
gxString attr; // HTML tag attributes
// Tag instructions
int start_instruction; // True if start of tag instruction "<"
int end_instruction; // True if end of a tag instruction "/x>"
int has_attributes; // True if this tag has associated attributes
};
gxsHTML::gxsHTML()
gxsHTML::~gxsHTML()
gxsHTML::ClearTagList()
gxsHTML::CloseFile()
gxsHTML::CollectHTMLTags()
gxsHTML::Copy()
gxsHTML::GetTag()
gxsHTML::GetTagID()
gxsHTML::GetTagList()
gxsHTML::HandleHTMLTag()
gxsHTML::LoadHTMLFile()
gxsHTML::LoadMemoryBuffer()
gxsHTML::NumProcessed()
gxsHTML::NumTags()
gxsHTML::ParseHTMLTagInfo()
gxsHTML::ProcessHTMLTags()
gxsHTML::gxsHTML()
- Default class constructor.gxsHTML::gxsHTML(const gxsHTML &ob)
- Class copy constructor. - Class destructor. - Public member function used to clear the tag list. - Public member function used to close the open HTML file after a load operation.int gxsHTML::CollectHTMLTags()
- Internal processing function used to collect all the HTML tags in a previously opened file. Returns a zero if no file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int gxsHTML::CollectHTMLTags(const MemoryBuffer &membuf)
- Internal processing function used to collect all the HTML tags from a previously loaded MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.void gxsHTML::Copy(const gxsHTML &ob)
- Internal processing function used to copy gxsHTML objects.char *gxsHTML::GetTag(int tag_id)
- Public member function that returns a null terminated string based on the value of the tag ID number. The "tag_id" variable must equal one of the integer constants defined in the tag ID enumeration.int gxsHTML::GetTagID(const gxString &tag)
- Public member function that returns a numerical value defined in the tag ID enumeration that represents the specified tag.gxDLList
void gxsHTML::HandleHTMLTag(int tag_id)
- Internal processing function used to execute the derived class version of a specific tag handler.int gxsHTML::LoadHTMLFile(const char *fname)
- Public member function used to open the specified HTML file and process all the tags collected from the file. Returns a zero if no disk file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int gxsHTML::LoadMemoryBuffer(const MemoryBuffer &membuf)
- Public member function used to process all the tags stored in a MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.unsigned gxsHTML::NumProcessed()
- Public member function used to retrieve the total number of tags processed. - Public member function used to retrieve the total number of tags collected.void gxsHTML::ParseHTMLTagInfo(gxsHTMLTagInfo &t)
- Public member function used to parse the specific tag information based on the string contained in the gxsHTMLTagInfo::tag_info member.int gxsHTML::ProcessHTMLTags()
- Internal processing function used to read and process all the tags in a previously opened file. Returns a zero if no disk file errors occur or a non-zero corresponding to DiskFileB error code if an error occurs.int gxsHTML::ProcessHTMLTags(const MemoryBuffer &membuf)
- Internal processing function used to read and process all the tags in a MemoryBuffer object. Returns a zero if no errors occur or a non-zero to indicate a failure.Derived class interface used to process tags.
void gxsHTML::Handle_INVALID_Tag()
{
// Override to handle INVALID tags
}
void gxsHTML::Handle_UNKNOWN_Tag()
{
// Override to handle UNKNOWN tags
}
void gxsHTML::Handle_UNKNOWN_SPECIAL_Tag()
{
// Override to handle unknown special tags starting
// with an ampersand ending in a semicolon &xxxx;
}
void gxsHTML::Handle_COMMENT_Tag()
{
// Override to handle COMMENT tags
}
void gxsHTML::Handle_LESS_THEN_Tag()
{
// Override to handle a less than sign "<"
}
void gxsHTML::Handle_GREATER_THEN_Tag()
{
// Override to handle a greater than sign ">"
}
void gxsHTML::Handle_AMPERSAND_Tag()
{
// Override to handle an ampersand "&"
}
void gxsHTML::Handle_NB_SPACE_Tag()
{
// Override to handle a non-breaking space " "
}
void gxsHTML::Handle_QUOTE_Tag()
{
// Override to handle a quotation mark """
}
void gxsHTML::Handle_EX_ASCII_Tag()
{
// Override to handle the extended ASCII character set "&#"
}
void gxsHTML::Handle_A_Tag()
{
// Override to handle ANCHOR tags
}
void gxsHTML::Handle_ABBREV_Tag()
{
// Override to handle ABBREVIATION tags
}
void gxsHTML::Handle_ACRONYM_Tag()
{
// Override to handle ACRONYM tags
}
void gxsHTML::Handle_ADDRESS_Tag()
{
// Override to handle ADDRESS tags
}
void gxsHTML::Handle_APPLET_Tag()
{
// Override to handle JAVA APPLET tag
}
void gxsHTML::Handle_AREA_Tag()
{
// Override to handle AREA tags
}
void gxsHTML::Handle_AU_Tag()
{
// Override to handle AUTHOR tags
}
void gxsHTML::Handle_AUTHOR_Tag()
{
// Override to handle AUTHOR tags
}
void gxsHTML::Handle_B_Tag()
{
// Override to handle BOLD tags
}
void gxsHTML::Handle_BANNER_Tag()
{
// Override to handle BANNER tags
}
void gxsHTML::Handle_BASE_Tag()
{
// Override to handle BASE tags
}
void gxsHTML::Handle_BASEFONT_Tag()
{
// Override to handle BASE FONT
}
void gxsHTML::Handle_BGSOUND_Tag()
{
// Override to handle BACKGROUND SOUND
}
void gxsHTML::Handle_BIG_Tag()
{
// Override to handle BIG text
}
void gxsHTML::Handle_BLINK_Tag()
{
// Override to handle BLINK tags
}
void gxsHTML::Handle_BLOCKQUOTE_Tag()
{
// Override to handle BLOCK QUOTE tags
}
void gxsHTML::Handle_BQ_Tag()
{
// Override to handle BLOCK QUOTE tags
}
void gxsHTML::Handle_BODY_Tag()
{
// Override to handle BODY tags
}
void gxsHTML::Handle_BR_Tag()
{
// Override to handle LINE BREAK tags
}
void gxsHTML::Handle_CAPTION_Tag()
{
// Override to handle CAPTION tags
}
void gxsHTML::Handle_CENTER_Tag()
{
// Override to handle CENTER tags
}
void gxsHTML::Handle_CITE_Tag()
{
// Override to handle CITATION tags
}
void gxsHTML::Handle_CODE_Tag()
{
// Override to handle CODE tags
}
void gxsHTML::Handle_COL_Tag()
{
// Override to handle TABLE Cols tags
}
void gxsHTML::Handle_COLGROUP_Tag()
{
// Override to handle TABLE Cols tags
}
void gxsHTML::Handle_CREDIT_Tag()
{
// Override to handle CREDIT tags
}
void gxsHTML::Handle_DEL_Tag()
{
// Override to handle DELETED text tags
}
void gxsHTML::Handle_DFN_Tag()
{
// Override to handle DEFINITION tags
}
void gxsHTML::Handle_DIR_Tag()
{
// Override to handle DIRECTORY list tags
}
void gxsHTML::Handle_DIV_Tag()
{
// Override to handle DIVISION tags
}
void gxsHTML::Handle_DL_Tag()
{
// Override to handle DEFINITION list tags
}
void gxsHTML::Handle_DT_Tag()
{
// Override to handle DEFINITION term tags
}
void gxsHTML::Handle_DD_Tag()
{
// Override to handle DEFINITION tags
}
void gxsHTML::Handle_EM_Tag()
{
// Override to handle EMPHASIZED tags
}
void gxsHTML::Handle_EMBED_Tag()
{
// Override to handle EMBED tags
}
void gxsHTML::Handle_FIG_Tag()
{
// Override to handle FIGURE tags
}
void gxsHTML::Handle_FN_Tag()
{
// Override to handle FOOTNOTE tags
}
void gxsHTML::Handle_FONT_Tag()
{
// Override to handle FONT tags
}
void gxsHTML::Handle_FORM_Tag()
{
// Override to handle FORM tags
}
void gxsHTML::Handle_FRAME_Tag()
{
// Override to handle FRAME tags
}
void gxsHTML::Handle_FRAMESET_Tag()
{
// Override to handle FRAME sets
}
void gxsHTML::Handle_H1_Tag()
{
// Override to handle HEADING 1 tags
}
void gxsHTML::Handle_H2_Tag()
{
// Override to handle HEADING 2 tags
}
void gxsHTML::Handle_H3_Tag()
{
// Override to handle HEADING 3 tags
}
void gxsHTML::Handle_H4_Tag()
{
// Override to handle HEADING 4 tags
}
void gxsHTML::Handle_H5_Tag()
{
// Override to handle HEADING 5 tags
}
void gxsHTML::Handle_H6_Tag()
{
// Override to handle HEADING 6 tags
}
void gxsHTML::Handle_HEAD_Tag()
{
// Override to handle HEAD tags
}
void gxsHTML::Handle_HR_Tag()
{
// Override to handle HORIZONTAL rules
}
void gxsHTML::Handle_HTML_Tag()
{
// Override to handle HTML tags
}
void gxsHTML::Handle_I_Tag()
{
// Override to handle ITALIC tags
}
void gxsHTML::Handle_IFRAME_Tag()
{
// Override to handle FRAME - Floating tag
}
void gxsHTML::Handle_IMG_Tag()
{
// Override to handle INLINE images
}
void gxsHTML::Handle_INPUT_Tag()
{
// Override to handle FORM input tags
}
void gxsHTML::Handle_INS_Tag()
{
// Override to handle INSERTED text
}
void gxsHTML::Handle_ISINDEX_Tag()
{
// Override to handle ISINDEX tag
}
void gxsHTML::Handle_KBD_Tag()
{
// Override to handle KEYBOARD tags
}
void gxsHTML::Handle_LANG_Tag()
{
// Override to handle LANGUAGE tags
}
void gxsHTML::Handle_LH_Tag()
{
// Override to handle LIST header tags
}
void gxsHTML::Handle_LI_Tag()
{
// Override to handle LIST item tags
}
void gxsHTML::Handle_LINK_Tag()
{
// Override to handle LINK tags
}
void gxsHTML::Handle_LISTING_Tag()
{
// Override to handle LISTING tags
}
void gxsHTML::Handle_MAP_Tag()
{
// Override to handle MAP tags
}
void gxsHTML::Handle_MARQUEE_Tag()
{
// Override to handle MARQUEE tags
}
void gxsHTML::Handle_MATH_Tag()
{
// Override to handle MATH tags
}
void gxsHTML::Handle_MENU_Tag()
{
// Override to handle MENU list tags
}
void gxsHTML::Handle_META_Tag()
{
// Override to handle META tags
}
void gxsHTML::Handle_MULTICOL_Tag()
{
// Override to handle MULTI COLUMN tags
}
void gxsHTML::Handle_NOBR_Tag()
{
// Override to handle NO BREAK tags
}
void gxsHTML::Handle_NOFRAMES_Tag()
{
// Override to handle NO FRAMES tags
}
void gxsHTML::Handle_NOTE_Tag()
{
// Override to handle NOTE tags
}
void gxsHTML::Handle_OL_Tag()
{
// Override to handle ORDERED list tags
}
void gxsHTML::Handle_OVERLAY_Tag()
{
// Override to handle OVERLAY tags
}
void gxsHTML::Handle_P_Tag()
{
// Override to handle PARAGRAPH tags
}
void gxsHTML::Handle_PARAM_Tag()
{
// Override to handle PARAMETERS tags
}
void gxsHTML::Handle_PERSON_Tag()
{
// Override to handle PERSON tags
}
void gxsHTML::Handle_PLAINTEXT_Tag()
{
// Override to handle PLAIN text tags
}
void gxsHTML::Handle_PRE_Tag()
{
// Override to handle PREFORMATTED text tags
}
void gxsHTML::Handle_Q_Tag()
{
// Override to handle QUOTE tags
}
void gxsHTML::Handle_RANGE_Tag()
{
// Override to handle RANGE tags
}
void gxsHTML::Handle_SAMP_Tag()
{
// Override to handle SAMPLE tags
}
void gxsHTML::Handle_SCRIPT_Tag()
{
// Override to handle SCRIPT tags
}
void gxsHTML::Handle_SELECT_Tag()
{
// Override to handle FORM SELECT tags
}
void gxsHTML::Handle_SMALL_Tag()
{
// Override to handle SMALL text tags
}
void gxsHTML::Handle_SPACER_Tag()
{
// Override to handle WHITE SPACE tags
}
void gxsHTML::Handle_SPOT_Tag()
{
// Override to handle SPOT tags
}
void gxsHTML::Handle_STRIKE_Tag()
{
// Override to handle STRIKETHROUGH tags
}
void gxsHTML::Handle_STRONG_Tag()
{
// Override to handle STRONG tags
}
void gxsHTML::Handle_SUB_Tag()
{
// Override to handle SUBSCRIPT tags
}
void gxsHTML::Handle_SUP_Tag()
{
// Override to handle SUPERSCRIPT tags
}
void gxsHTML::Handle_TAB_Tag()
{
// Override to handle HORIZONTAL TABS tags
}
void gxsHTML::Handle_TABLE_Tag()
{
// Override to handle TABLE tags
}
void gxsHTML::Handle_TBODY_Tag()
{
// Override to handle TABLE body tags
}
void gxsHTML::Handle_TD_Tag()
{
// Override to handle TABLE data tags
}
void gxsHTML::Handle_TEXTAREA_Tag()
{
// Override to handle FORM form tags
}
void gxsHTML::Handle_TEXTFLOW_Tag()
{
// Override to handle JAVA applet textflow
}
void gxsHTML::Handle_TFOOT_Tag()
{
// Override to handle TABLE footer tags
}
void gxsHTML::Handle_TH_Tag()
{
// Override to handle TABLE head
}
void gxsHTML::Handle_THEAD_Tag()
{
// Override to handle TABLE head tag
}
void gxsHTML::Handle_TITLE_Tag()
{
// Override to handle TITLE tags
}
void gxsHTML::Handle_TR_Tag()
{
// Override to handle TABLE row tags
}
void gxsHTML::Handle_TT_Tag()
{
// Override to handle TELETYPE tags
}
void gxsHTML::Handle_U_Tag()
{
// Override to handle UNDERLINED tags
}
void gxsHTML::Handle_UL_Tag()
{
// Override to handle UNORDERED list tags
}
void gxsHTML::Handle_VAR_Tag()
{
// Override to handle VARIABLE tags
}
void gxsHTML::Handle_WBR_Tag()
{
// Override to handle WORD BREAK tags
}
void gxsHTML::Handle_XMP_Tag()
{
// Override to handle EXAMPLE tags
}
|
End Of Document |