Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
fpc-src / usr / share / fpcsrc / 3.2.0 / packages / fcl-xml / src / htmldefs.pp
Size: Mime:
{
    $Id: htmldefs.pp,v 1.2 2006/01/03 23:33:23 lukvdl Exp $
    This file is part of the Free Component Library

    HTML definitions and utility functions
    Copyright (c) 2000-2002 by
      Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org

    See the file COPYING.FPC, included in this distribution,
    for details about the copyright.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

 **********************************************************************}


unit HTMLDefs;

{$MODE objfpc}
{$H+}

interface

type

  THTMLCData = string;
  THTMLID = string;
  THTMLName = string;
  THTMLIDRef = string;
  THTMLIDRefs = string;
  THTMLNumber = longint;

  THTMLText = THTMLCData;
  THTMLCharsets = THTMLCData;
  THTMLContentTypes = THTMLCData;
  THTMLURI = string;
  THTMLCharacter = char;

  THTMLDir = (dirEmpty,dirLeftToRight,dirRightToLeft);
  THTMLalign = (alEmpty,alleft,alcenter,alright,aljustify,alchar);
  THTMLvalign = (vaEmpty,vatop,vamiddle,vabottom,vabaseline);
  THTMLframe = (frEmpty,frvoid,frabove,frbelow,frhsides,frvsides,frlefthandsise,frrighthandside,frbox,frborder);
  THTMLrules = (ruEmpty,runone,rugroups,rurows,rucols,ruall);
  THTMLvaluetype = (vtEmpty,vtdata,vtref,vtobject);
  THTMLshape = (shEmpty,shdefault,shrect,shcircle,shpoly);
  THTMLinputtype = (itEmpty,ittext,itpassword,itcheckbox,itradio,itsubmit,itreset,itfile,ithidden,itimage,itbutton);
  THTMLbuttontype = (btEmpty,btsubmit,btreset,btbutton);

  THTMLColor = (
      clHTMLBlack, clHTMLSilver, clHTMLGray,   clHTMLWhite, clHTMLMaroon,
    //  #000000      #C0C0C0       #808080       #FFFFFF      #800000
      clHTMLRed,   clHTMLPurple, clHTMLFuchsia,clHTMLGreen, clHTMLLime, clHTMLOlive,
    //  #FF0000      #800080       #FF00FF       #008000      #00FF00     #808000
      clHTMLYellow,clHTMLNavy,   clHTMLBlue,   clHTMLTeal,  clHTMLAqua
    //  #FFFF00      #000080       #0000FF       #008080      #00FFFF
      );

  THTMLAttributeTag = (
      atabbr, atalink, atacceptcharset, ataccept, ataccesskey, ataction, atalign, atalt, atarchive,
      ataxis, atbackground, atbgcolor, atborder, atcellpadding, atcellspacing, atchar, atcharoff, atcharset,
      atchecked, atcite, atclass, atclassid, atclear, atcode, atcodebase, atcodetype, atcolor, atcols,
      atcolspan, atcompact, atcontent, atcoords, atdata, atdatetime, atdeclare,atdefer,
      atdir, atdisabled, atenctype, atface, atfor, atframe, atframeborder, atheaders,
      atheight, athref, athreflang, athspace, athttpequiv, atid, atismap, atlabel, atlang, atlink,
      atlongdesc, atmarginheight, atmarginwidth, atmaxlength, atmedia, atmethod,
      atmultiple, atname, atnohref, atnoresize, atnoshade, atnowrap, atobject, atonblur, atonchange, atonclick,
      atondblclick, atonfocus, atonkeydown, atonkeypress, atonkeyup, atonload,
      atonmousedown, atonmousemove, atonmouseout, atonmouseover, atonmouseup,
      atonreset, atonselect, atonsubmit, atonunload, atprofile, atprompt, atreadonly,
      atrel, atrev, atrows, atrowspan, atrules, atscheme, atscope, atscrolling,
      atselected, atshape, atsize, atspan, atsrc, atstandby, atstart, atstyle, atsummary,
      attabindex, attarget, attext, attitle, attype, atusemap, atvalign, atvalue,
      atvaluetype, atversion, atvlink, atvspace, atwidth
      );
  THTMLAttributeSet = set of THTMLAttributeTag;

  THTMLElementTag = (
      eta, etabbr, etacronym, etaddress, etapplet, etarea, etb, etbase,
      etbasefont, etbdo, etbig, etblockquote, etbody, etbr, etbutton,
      etcaption, etcenter, etcite, etcode, etcol, etcolgroup, etdd, etdel,
      etdfn, etdir, etdiv, etdl, etdt, etem, etfieldset, etfont, etform,
      etframe, etframeset, eth1, eth2, eth3, eth4, eth5, eth6, ethead, ethr,
      ethtml, eti, etiframe, etimg, etinput, etins, etisindex, etkbd, etlabel,
      etlegend, etli, etlink, etmap, etmenu, etmeta, etnoframes, etnoscript,
      etobject, etol, etoptgroup, etoption, etp, etparam, etpre, etq, ets,
      etsamp, etscript, etselect, etsmall, etspan, etstrike, etstrong,
      etstyle, etsub, etsup, ettable, ettbody, ettd, ettextarea, ettfoot,
      etth, etthead, ettitle, ettr, ettt, etu, etul, etvar,
      etText, etUnknown
      );
  THTMLElementTagSet = set of THTMLElementTag;

  THTMLElementFlag = (
    efSubelementContent,                // may have subelements
    efPCDATAContent,                    // may have PCDATA content
    efPreserveWhitespace,               // preserve all whitespace
    efDeprecated,                       // can be dropped in future versions
    efNoChecks,                         // Checks (attributes,subtags,...) can only be implemented in descendants
    efEndTagOptional
  );
  THTMLElementFlags = set of THTMLElementFlag;

  PHTMLElementProps = ^THTMLElementProps;
  THTMLElementProps = record
    Name: String;
    Flags: THTMLElementFlags;
    Attributes: THTMLAttributeSet;
  end;


const

  BooleanAttributes = [atchecked,atdeclare,atdefer,atdisabled,atnohref,atnoresize,
                    atmultiple,atreadonly,atselected];

  DeprecatedAttributes = [atalink, atbackground, atbgcolor, atclear, atcode, atcolor,
    atcompact, atface, athspace, atlink, atnoshade, atnowrap, atobject, atprompt,
    atstart, attext, atvlink, atversion, atvspace];

  efSubcontent = [efSubelementContent, efPCDATAContent];

  atsi18n = [atlang, atdir];
  atscoreattrs = [atid,atclass,atstyle,attitle];
  atsevents = [atonclick,atondblclick,atonmousedown,atonmouseup,atonmouseover,
               atonmousemove,atonmouseout,atonkeypress,atonkeydown,atonkeyup];
  atsattrs = atsevents + atscoreattrs + atsi18n;
  atscellhalign = [atalign, atchar, atcharoff];

{  etsStructured := [];
  etsDivisions := [];
  etsLists := [];
  etsLinks := [];
  etsObjects := [etImg, etObject, etApplet, etMap, etArea];
  etsForms := [etForm];

  etsText = etsStructured + etsDivisions + etsLists + etsLinks + etsObjects +
          etsForms +
          etTable + etText + etScript + ;      }

  HTMLElementProps: array[THTMLElementTag] of THTMLElementProps = (
    (Name: 'a';         Flags: efSubcontent;
     Attributes: atsattrs+[atcharset,attype,atname,athref,athreflang,atrel,atrev,
                 ataccesskey,atshape,atcoords,attabindex,atonfocus,atonblur]),

    (Name: 'abbr';      Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'acronym';   Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'address';   Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'applet';    Flags: efSubcontent+[efDeprecated];
     Attributes: atscoreattrs+[atcodebase,atarchive,atalt,atname,atwidth,atheight]),

    (Name: 'area';      Flags: [];
     Attributes: atsattrs+[atshape,atcoords,athref,atnohref,atalt,attabindex,
     ataccesskey,atonfocus,atonblur]),

    (Name: 'b';         Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'base';      Flags: []; Attributes: [athref]),

    (Name: 'basefont';  Flags: [efDeprecated]; Attributes: [atid]),

    (Name: 'bdo';       Flags: efSubcontent; Attributes: atscoreattrs+[atlang,atdir]),

    (Name: 'big';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'blockquote';Flags: [efSubelementContent]; Attributes: atsattrs+[atcite]),

    (Name: 'body';      Flags: [efSubelementContent];
     Attributes: atsAttrs+[atonload, atonunload]),

    (Name: 'br';        Flags: []; Attributes: atscoreattrs),

    (Name: 'button';    Flags: efSubcontent;
     Attributes: atsattrs+[atname,atvalue,attype,atdisabled,attabindex,
                 ataccesskey,atonfocus,atonblur]),

    (Name: 'caption';   Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'center';    Flags: [efSubelementContent,efDeprecated]; Attributes: []),

    (Name: 'cite';      Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'code';      Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'col';       Flags: [];
     Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),

    (Name: 'colgroup';  Flags: [efSubelementContent, efEndTagOptional];
     Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),

    (Name: 'dd';        Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),

    (Name: 'del';       Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),

    (Name: 'dfn';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'dir';       Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),

    (Name: 'div';       Flags: efSubContent; Attributes: atsattrs),

    (Name: 'dl';        Flags: [efSubelementContent]; Attributes: atsattrs),

    (Name: 'dt';        Flags: [efPCDataContent, efEndTagOptional]; Attributes: atsattrs),

    (Name: 'em';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'fieldset';  Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'font';      Flags: efSubcontent+[efDeprecated]; Attributes: atscoreattrs+atsi18n),

    (Name: 'form';      Flags: [efSubelementContent];
     Attributes: atsattrs+[ataction,atmethod,atenctype,atonsubmit,atonreset,atacceptcharset]),

    (Name: 'frame';     Flags: [];
     Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,
                 atmarginwidth,atmarginheight,atnoresize,atscrolling]),

    (Name: 'frameset';  Flags: efSubcontent;
     Attributes: atsCoreattrs+[atrows,atcols,atonload,atonunload]),

    (Name: 'h1';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'h2';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'h3';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'h4';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'h5';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'h6';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'head';      Flags: [efSubelementContent]; Attributes: atsi18n+[atprofile]),

    (Name: 'hr';        Flags: []; Attributes: atscoreattrs+atsevents),

    (Name: 'html';      Flags: [efSubelementContent]; Attributes: atsi18n),

    (Name: 'i';         Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'iframe';    Flags: [efSubelementContent];
     Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,atmarginwidth,
                 atmarginheight,atscrolling,atalign,atheight,atwidth]),

    (Name: 'img';       Flags: [];
     Attributes: atsattrs+[atsrc,atalt,atlongdesc,atheight,atwidth,atusemap,atismap]),

    (Name: 'input';     Flags: [];
     Attributes: atsattrs+[attype,atname,atvalue,atchecked,atdisabled,
                 atreadonly,atsize,atmaxlength,atsrc,atalt,atusemap,attabindex,
                 ataccesskey,atonfocus,atonblur,atonselect,atonchange,ataccept]),

    (Name: 'ins';       Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),

    (Name: 'isindex';   Flags: [efDeprecated]; Attributes: atscoreattrs+atsi18n),

    (Name: 'kbd';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'label';     Flags: efSubcontent;
     Attributes: atsattrs+[atfor,ataccesskey,atonfocus,atonblur]),

    (Name: 'legend';    Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]),

    (Name: 'li';        Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),

    (Name: 'link';      Flags: [];
     Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]),

    (Name: 'map';       Flags: [efSubelementContent]; Attributes: atsattrs+[atname]),

    (Name: 'menu';      Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),

    (Name: 'meta';      Flags: []; Attributes: atsi18n+[athttpequiv,atname,atcontent,atscheme]),

    (Name: 'noframes';  Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'noscript';  Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'object';    Flags: efSubcontent;
     Attributes: atsattrs+[atdeclare,atclassid,atcodebase,atdata,attype,atcodetype,
                 atarchive,atstandby,atheight,atwidth,atusemap,atname,attabindex]),

    (Name: 'ol';        Flags: [efSubelementContent]; Attributes: atsattrs),

    (Name: 'optgroup';  Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]),

    (Name: 'option';    Flags: efSubcontent+[efEndTagOptional];
     Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]),

    (Name: 'p';         Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),

    (Name: 'param';     Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]),

    (Name: 'pre';       Flags: efSubcontent + [efPreserveWhitespace]; Attributes: atsattrs),

    (Name: 'q';         Flags: efSubcontent; Attributes: atsattrs+[atcite]),

    (Name: 's';         Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),

    (Name: 'samp';      Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'script';    Flags: [efPCDATAContent]; Attributes: [atcharset,attype,atsrc,atdefer]),

    (Name: 'select';    Flags: [efSubelementContent];
     Attributes: atsattrs+[atname,atsize,atmultiple,atdisabled,attabindex,atonfocus,
                 atonblur,atonchange]),

    (Name: 'small';     Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'span';      Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'strike';    Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),

    (Name: 'strong';    Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'style';     Flags: [efPCDATAContent];
     Attributes: atsi18n+[attype,atmedia,attitle]),

    (Name: 'sub';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'sup';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'table';     Flags: [efSubelementContent];
     Attributes: atsattrs+[atsummary,atwidth,atborder,atframe,atrules,atcellspacing,atcellpadding]),

    (Name: 'tbody';     Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),

    (Name: 'td';        Flags: efSubcontent+[efEndTagOptional];
     Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),

    (Name: 'textarea';  Flags: [efPCDATAContent];
     Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex,
                 ataccesskey,atonfocus,atonblur,atonselect,atonchange]),

    (Name: 'tfoot';     Flags: [efSubelementContent,efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),

    (Name: 'th';        Flags: efSubcontent+[efEndTagOptional];
     Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),

    (Name: 'thead';     Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),

    (Name: 'title';     Flags: efSubcontent; Attributes: atsi18n),

    (Name: 'tr';        Flags: [efSubelementContent, efEndTagOptional];
     Attributes: atsattrs+atscellhalign+[atvalign]),

    (Name: 'tt';        Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'u';         Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),

    (Name: 'ul';        Flags: [efSubelementContent]; Attributes: atsattrs),

    (Name: 'var';       Flags: efSubcontent; Attributes: atsattrs),

    (Name: 'text';      Flags: efSubcontent; Attributes: []),

    (Name: 'unknown';   Flags: efSubcontent+[efNoChecks]; Attributes: [])

    );

  HTMLAttributeTag : array [THTMLAttributeTag] of String = (
      'abbr', 'alink', 'accept-charset', 'accept', 'accesskey', 'action', 'align', 'alt', 'archive',
      'axis', 'background', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset',
      'checked', 'cite', 'class', 'classid', 'clear', 'code', 'codebase', 'codetype', 'color', 'cols',
      'colspan', 'compact', 'content', 'coords', 'data', 'datetime', 'declare', 'defer',
      'dir', 'disabled', 'enctype', 'face', 'for', 'frame', 'frameborder', 'headers',
      'height', 'href', 'hreflang', 'hspace', 'http-equiv', 'id', 'ismap', 'label', 'lang', 'link',
      'longdesc', 'marginheight', 'marginwidth', 'maxlength', 'media', 'method',
      'multiple', 'name', 'nohref', 'noresize', 'noshade', 'nowrap', 'object', 'onblur', 'onchange', 'onclick',
      'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
      'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
      'onreset', 'onselect', 'onsubmit', 'onunload', 'profile', 'prompt', 'readonly',
      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scheme', 'scope', 'scrolling',
      'selected', 'shape', 'size', 'span', 'src', 'standby', 'start', 'style', 'summary',
      'tabindex', 'target', 'text', 'title', 'type', 'usemap', 'valign', 'value',
      'valuetype', 'version', 'vlink', 'vspace', 'width');

  HTMLColor : array [THTMLColor] of string =
    ('Black', 'Silver', 'Gray', 'White', 'Maroon', 'Red', 'Purple', 'Fuchsia',
     'Green', 'Lime', 'Olive', 'Yellow', 'Navy', 'Blue', 'Teal', 'Aqua');
  HTMLDir : array [THTMLDir] of string = ('','LTR','RTL');
  HTMLAlign : array [THTMLalign] of string = ('','left','center','right','justify','char');
  HTMLvalign : array [THTMLvalign] of string = ('','top','middle','bottom','baseline');
  HTMLframe : array [THTMLframe] of string =
      ('','void','above','below','hsides','vsides','lhs','rhs','box','border');
  HTMLrules : array [THTMLrules] of string = ('','none','groups','rows','cols','all');
  HTMLvaluetype : array [THTMLvaluetype] of string = ('','data','ref','object');
  HTMLshape : array [THTMLshape] of string = ('','default','rect','circle','poly');
  HTMLinputtype : array [THTMLinputtype] of string = ('','text','password','checkbox',
      'radio','submit','reset','file','hidden','image','button');
  HTMLbuttontype : array [THTMLbuttontype] of string = ('','submit','reset','button');

function ResolveHTMLEntityReference(const Name: WideString;
  var Entity: WideChar): Boolean;

function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;


implementation

uses SysUtils;

{ Define which elements auto-close other elements, modelled after libxml2.
  This is an array of variable-length lists, each terminated by etUnknown.
  Indices to first element of each list are provided by AutoCloseIndex array,
  which *must* be updated after any change. }
const
  AutoCloseTab: array[0..277] of THTMLElementTag = (

  etform,       etform, etp, ethr, eth1, eth2, eth3, eth4, eth5, eth6,
                etdl, etul, etol, etmenu, etdir, etaddress, etpre,
                ethead, etUnknown,
  ethead,       etp, etUnknown,
  ettitle,      etp, etUnknown,
  etbody,       ethead, etstyle, etlink, ettitle, etp, etUnknown,
  etframeset,   ethead, etstyle, etlink, ettitle, etp, etUnknown,
  etli,         etp, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etaddress,
                etpre, ethead, etli, etUnknown,
  ethr,         etp, ethead, etUnknown,
  eth1,         etp, ethead, etUnknown,
  eth2,         etp, ethead, etUnknown,
  eth3,         etp, ethead, etUnknown,
  eth4,         etp, ethead, etUnknown,
  eth5,         etp, ethead, etUnknown,
  eth6,         etp, ethead, etUnknown,
  etdir,        etp, ethead, etUnknown,
  etaddress,    etp, ethead, etul, etUnknown,
  etpre,        etp, ethead, etul, etUnknown,
  etblockquote, etp, ethead, etUnknown,
  etdl,         etp, etdt, etmenu, etdir, etaddress, etpre,
                ethead, etUnknown,
  etdt,         etp, etmenu, etdir, etaddress, etpre,
                ethead, etdd, etUnknown,
  etdd,         etp, etmenu, etdir, etaddress, etpre,
                ethead, etdt, etUnknown,
  etul,         etp, ethead, etol, etmenu, etdir, etaddress, etpre, etUnknown,
  etol,         etp, ethead, etul, etUnknown,
  etmenu,       etp, ethead, etul, etUnknown,
  etp,          etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etUnknown,
  etdiv,        etp, ethead, etUnknown,
  etnoscript,   etp, ethead, etUnknown,
  etcenter,     etfont, etb, eti, etp, ethead, etUnknown,
  eta,          eta, etUnknown,
  etcaption,    etp, etUnknown,
  etcolgroup,   etcaption, etcolgroup, etcol, etp, etUnknown,
  etcol,        etcaption, etcol, etp, etUnknown,
  ettable,      etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre,
                eta, etUnknown,
  etth,         etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  ettd,         etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  ettr,         etth, ettd, ettr, etcaption, etcol, etcolgroup, etp, etUnknown,
  etthead,      etcaption, etcol, etcolgroup, etUnknown,
  ettfoot,      etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
                ettbody, etp, etUnknown,
  ettbody,      etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
                ettfoot, ettbody, etp, etUnknown,
  etoptgroup,   etoption, etUnknown,
  etoption,     etoption, etUnknown,
  etfieldset,   etlegend, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6,
                etpre, eta, etUnknown,
  etUnknown);

  AutoCloseIndex: array[0..40] of Integer = (
    0, 19, 22, 25, 32, 39, 53, 57, 61, 65, 69,
    73, 77, 81, 85, 90, 95, 99, 108, 117, 126,
    135, 140, 145, 155, 159, 163, 170, 173, 176,
    182, 187, 199, 210, 221, 230, 235, 246, 258,
    261, 264
  );

{ HTML entities, each preceded with its code. There is a separate list for
  each entity length, and each list is sorted by character codes.
  The sole purpose of using AnsiString here is staying compatible with Delphi 7,
  which is totally broken with respect to handling wide literals.
}

  ent_2 =
    #3#$9C  + 'Mu'+
    #3#$9D  + 'Nu'+
    #3#$A0  + 'Pi'+
    #3#$9E  + 'Xi'+
    #$22#$65+ 'ge'+
    #0#62   + 'gt'+
    #$22#$64+ 'le'+
    #0#60   + 'lt'+
    #3#$BC  + 'mu'+
    #$22#$60+ 'ne'+
    #$22#$0B+ 'ni'+
    #3#$BD  + 'nu'+
    #$22#$28+ 'or'+
    #3#$C0  + 'pi'+
    #3#$BE  + 'xi';

  ent_3 =
    #3#$A7  + 'Chi'+
    #0#208  + 'ETH'+
    #3#$97  + 'Eta'+
    #3#$A6  + 'Phi'+
    #3#$A8  + 'Psi'+
    #3#$A1  + 'Rho'+
    #3#$A4  + 'Tau'+
    #0#38   + 'amp'+
    #$22#$27+ 'and'+
    #$22#$20+ 'ang'+
    #$22#$29+ 'cap'+
    #3#$C7  + 'chi'+
    #$22#$2A+ 'cup'+
    #0#176  + 'deg'+
    #3#$B7  + 'eta'+
    #0#240  + 'eth'+
    #$22#$2B+ 'int'+
    #$25#$CA+ 'loz'+
    #$20#$0E+ 'lrm'+
    #0#172  + 'not'+
    #3#$C6  + 'phi'+
    #3#$D6  + 'piv'+
    #3#$C8  + 'psi'+
    #0#174  + 'reg'+
    #3#$C1  + 'rho'+
    #$20#$0F+ 'rlm'+
    #0#173  + 'shy'+
    #$22#$3C+ 'sim'+
    #$22#$82+ 'sub'+
    #$22#$11+ 'sum'+
    #$22#$83+ 'sup'+
    #3#$C4  + 'tau'+
    #0#168  + 'uml'+
    #0#165  + 'yen'+
    #$20#$0D+ 'zwj';

  ent_4 =
    #0#196  + 'Auml'+
    #3#$92  + 'Beta'+
    #0#203  + 'Euml'+
    #3#$99  + 'Iota'+
    #0#207  + 'Iuml'+
    #0#214  + 'Ouml'+
    #0#220  + 'Uuml'+
    #1#$78  + 'Yuml'+
    #3#$96  + 'Zeta'+

    #0#228  + 'auml'+
    #3#$B2  + 'beta'+
    #$20#$22+ 'bull'+
    #0#162  + 'cent'+
    #2#$C6  + 'circ'+
    #$22#$45+ 'cong'+
    #0#169  + 'copy'+
    #$21#$D3+ 'dArr'+
    #$21#$93+ 'darr'+
    #$20#$03+ 'emsp'+
    #$20#$02+ 'ensp'+
    #0#235  + 'euml'+
    #$20#$AC+ 'euro'+
    #1#$92  + 'fnof'+
    #$21#$D4+ 'hArr'+
    #$21#$94+ 'harr'+
    #3#$B9  + 'iota'+
    #$22#$08+ 'isin'+
    #0#239  + 'iuml'+
    #$21#$D0+ 'lArr'+
    #$23#$29+ 'lang'+
    #$21#$90+ 'larr'+
    #0#175  + 'macr'+
    #0#160  + 'nbsp'+
    #$22#$84+ 'nsub'+
    #0#170  + 'ordf'+
    #0#186  + 'ordm'+
    #0#246  + 'ouml'+
    #0#182  + 'para'+
    #$22#$02+ 'part'+
    #$22#$A5+ 'perp'+
    #$22#$0F+ 'prod'+
    #$22#$1D+ 'prop'+
    #0#34   + 'quot'+
    #$21#$D2+ 'rArr'+
    #$23#$2A+ 'rang'+
    #$21#$92+ 'rarr'+
    #$21#$1C+ 'real'+
    #$22#$C5+ 'sdot'+
    #0#167  + 'sect'+
    #$22#$86+ 'sube'+
    #0#185  + 'sup1'+
    #0#178  + 'sup2'+
    #0#179  + 'sup3'+
    #$22#$87+ 'supe'+
    #$21#$D1+ 'uArr'+
    #$21#$91+ 'uarr'+
    #0#252  + 'uuml'+
    #0#255  + 'yuml'+
    #3#$B6  + 'zeta'+
    #$20#$0C+ 'zwnj';

  ent_5 =
    #0#198  + 'AElig'+
    #0#194  + 'Acirc'+
    #3#$91  + 'Alpha'+
    #0#197  + 'Aring'+
    #3#$94  + 'Delta'+
    #0#202  + 'Ecirc'+
    #3#$93  + 'Gamma'+
    #0#206  + 'Icirc'+
    #3#$9A  + 'Kappa'+
    #1#$52  + 'OElig'+
    #0#212  + 'Ocirc'+
    #3#$A9  + 'Omega'+
    #$20#$33+ 'Prime'+
    #3#$A3  + 'Sigma'+
    #0#222  + 'THORN'+
    #3#$98  + 'Theta'+
    #0#219  + 'Ucirc'+

    #0#226  + 'acirc'+
    #0#180  + 'acute'+
    #0#230  + 'aelig'+
    #3#$B1  + 'alpha'+
    #0#229  + 'aring'+
    #$22#$48+ 'asymp'+
    #$20#$1E+ 'bdquo'+
    #0#184  + 'cedil'+
    #$26#$63+ 'clubs'+
    #$21#$B5+ 'crarr'+
    #3#$B4  + 'delta'+
    #$26#$66+ 'diams'+
    #0#234  + 'ecirc'+
    #$22#$05+ 'empty'+
    #$22#$61+ 'equiv'+
    #$22#$03+ 'exist'+
    #$20#$44+ 'frasl'+
    #3#$B3  + 'gamma'+
    #0#238  + 'icirc'+
    #0#161  + 'iexcl'+
    #$21#$11+ 'image'+
    #$22#$1E+ 'infin'+
    #3#$BA  + 'kappa'+
    #0#171  + 'laquo'+
    #$23#$08+ 'lceil'+
    #$20#$1C+ 'ldquo'+
    #$20#$18+ 'lsquo'+
    #$20#$14+ 'mdash'+
    #0#181  + 'micro'+
    #$22#$12+ 'minus'+
    #$22#$07+ 'nabla'+
    #$20#$13+ 'ndash'+
    #$22#$09+ 'notin'+
    #0#244  + 'ocirc'+
    #1#$53  + 'oelig'+
    #$20#$3E+ 'oline'+
    #3#$C9  + 'omega'+
    #$22#$95+ 'oplus'+
    #0#163  + 'pound'+
    #$20#$32+ 'prime'+
    #$22#$1A+ 'radic'+
    #0#187  + 'raquo'+
    #$23#$09+ 'rceil'+
    #$20#$1D+ 'rdquo'+
    #$20#$19+ 'rsquo'+
    #$20#$1A+ 'sbquo'+
    #3#$C3  + 'sigma'+
    #0#223  + 'szlig'+
    #3#$B8  + 'theta'+
    #0#254  + 'thorn'+
    #2#$DC  + 'tilde'+
    #0#215  + 'times'+
    #$21#$22+ 'trade'+
    #0#251  + 'ucirc'+
    #3#$D2  + 'upsih';

  ent_6 =
    #0#193  + 'Aacute'+
    #0#192  + 'Agrave'+
    #0#195  + 'Atilde'+
    #0#199  + 'Ccedil'+
    #$20#$21+ 'Dagger'+
    #0#201  + 'Eacute'+
    #0#200  + 'Egrave'+
    #0#205  + 'Iacute'+
    #0#204  + 'Igrave'+
    #3#$9B  + 'Lambda'+
    #0#209  + 'Ntilde'+
    #0#211  + 'Oacute'+
    #0#210  + 'Ograve'+
    #0#216  + 'Oslash'+
    #0#213  + 'Otilde'+
    #1#$60  + 'Scaron'+
    #0#218  + 'Uacute'+
    #0#217  + 'Ugrave'+
    #0#221  + 'Yacute'+

    #0#225  + 'aacute'+
    #0#224  + 'agrave'+
    #0#227  + 'atilde'+
    #0#166  + 'brvbar'+
    #0#231  + 'ccedil'+
    #0#164  + 'curren'+
    #$20#$20+ 'dagger'+
    #0#247  + 'divide'+
    #0#233  + 'eacute'+
    #0#232  + 'egrave'+
    #$22#$00+ 'forall'+
    #0#189  + 'frac12'+
    #0#188  + 'frac14'+
    #0#190  + 'frac34'+
    #$26#$65+ 'hearts'+
    #$20#$26+ 'hellip'+
    #0#237  + 'iacute'+
    #0#236  + 'igrave'+
    #0#191  + 'iquest'+
    #3#$BB  + 'lambda'+
    #$23#$0A+ 'lfloor'+
    #$22#$17+ 'lowast'+
    #$20#$39+ 'lsaquo'+
    #0#183  + 'middot'+
    #0#241  + 'ntilde'+
    #0#243  + 'oacute'+
    #0#242  + 'ograve'+
    #0#248  + 'oslash'+
    #0#245  + 'otilde'+
    #$22#$97+ 'otimes'+
    #$20#$30+ 'permil'+
    #0#177  + 'plusmn'+
    #$23#$0B+ 'rfloor'+
    #$20#$3A+ 'rsaquo'+
    #1#$61  + 'scaron'+
    #3#$C2  + 'sigmaf'+
    #$26#$60+ 'spades'+
    #$22#$34+ 'there4'+
    #$20#$09+ 'thinsp'+
    #0#250  + 'uacute'+
    #0#249  + 'ugrave'+
    #$21#$18+ 'weierp'+
    #0#253  + 'yacute';

  ent_7 =
    #3#$95  + 'Epsilon'+
    #3#$9F  + 'Omicron'+
    #3#$A5  + 'Upsilon'+
    #$21#$35+ 'alefsym'+
    #3#$B5  + 'epsilon'+
    #3#$BF  + 'omicron'+
    #3#$C5  + 'upsilon';

  ent_8 =
    #3#$D1  + 'thetasym';

  strs: array[2..8] of string = (
    ent_2, ent_3, ent_4, ent_5, ent_6, ent_7, ent_8
  );

function BSearch(P: PWideChar; Len: Integer; const data: string): WideChar;
var
  L, H, mid, J, C: Integer;
begin
  Result := #0;
  L := 0;
  H := (Length(data)+1) div (Len+2);
  while L <= H do
  begin
    mid := L + ((H - L) shr 1);
    J := 0;
    repeat
      C := ord(P[J]) - ord(data[mid*(Len+2)+3+J]);
      Inc(J);
    until (C <> 0) or (J >= Len);
    if C > 0 then L := mid + 1 else
    begin
      H := mid - 1;
      if C = 0 then
      begin
        Result := WideChar((ord(data[mid*(Len+2)+1]) shl 8) or ord(data[mid*(Len+2)+2]));
        Exit;
      end;
    end;
  end;
end;

{
  Remaining issues:
  1) UTF-16 surrogate pairs
  2) HTML accepts uppercase 'X' for hex notation, but XML does not.
  3) 'apos' is used in xml/xhtml, but not in HTML 4.01
}

function ResolveHTMLEntityReference(const Name: WideString;
  var Entity: WideChar): Boolean;
var
  i, L: Integer;
  value: Integer;
begin
  L := Length(Name);
  if (L > 1) and (Name[1] = '#') then
  begin
    value := 0;
    if (Name[2] = 'x') or (Name[2] = 'X') then
    begin
      i := 3;
      while i <= L do
      begin
        case Name[i] of
          '0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0');
          'a'..'f': Value := Value * 16 + Ord(Name[i]) - (Ord('a') - 10);
          'A'..'F': Value := Value * 16 + Ord(Name[i]) - (Ord('A') - 10);
        else
          Break;
        end;
        Inc(i);
      end;
    end
    else
    begin
      i := 2;
      while i <= L do
      begin
        case Name[i] of
          '0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0');
        else
          Break;
        end;
        Inc(i);
      end;
    end;
    Result := (i = L+1);
    if Result then
      Entity := WideChar(Value);
  end
  else
  begin
    case L of
      2..8: Entity := BSearch(PWideChar(Name), L, strs[L]);
    else
      Entity := #0;
    end;
    Result := (Entity <> #0);
  end;
end;

function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
var
  i, j: Integer;
begin
  Result := False;
  for i := 0 to high(AutoCloseIndex) do
    if NewTag = AutoCloseTab[AutoCloseIndex[i]] then
    begin
      j := AutoCloseIndex[i]+1;
      while AutoCloseTab[j] <> etUnknown do
      begin
        if AutoCloseTab[j] = OldTag then
        begin
          Result := True;
          Exit;
        end;
        Inc(j);
      end;
      Exit;
    end;
end;

end.