--- HTMLparser.h.old Tue Jan 21 13:37:37 2003 +++ HTMLparser.h Tue Jan 21 20:37:40 2003 @@ -28,7 +28,8 @@ typedef xmlNodePtr htmlNodePtr; /* - * Internal description of an HTML element. + * Internal description of an HTML element, representing HTML 4.01 + * and XHTML 1.0 (which share the same structure). */ typedef struct _htmlElemDesc htmlElemDesc; typedef htmlElemDesc *htmlElemDescPtr; @@ -42,6 +43,23 @@ char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ char isinline; /* is this a block 0 or inline 1 element */ const char *desc; /* the description */ + +/* NRK Jan.2003 + * New fields encapsulating HTML structure + * + * Bugs: + * This is a very limited representation. It fails to tell us when + * an element *requires* subelements (we only have whether they're + * allowed or not), and it doesn't tell us where CDATA and PCDATA + * are allowed. Some element relationships are not fully represented: + * these are flagged with the word MODIFIER + */ + const xmlChar** subelts; /* allowed sub-elements of this element */ + const xmlChar* defaultsubelt; /* subelement for suggested auto-repair + if necessary or NULL */ + const xmlChar** attrs_opt; /* Optional Attributes */ + const xmlChar** attrs_depr; /* Additional deprecated attributes */ + const xmlChar** attrs_req; /* Required attributes */ }; /* @@ -110,6 +128,30 @@ const char *chunk, int size, int terminate); + +/* NRK/Jan2003: further knowledge of HTML structure + */ +typedef enum { + HTML_NA = 0 , /* something we don't check at all */ + HTML_INVALID = 0x1 , + HTML_DEPRECATED = 0x2 , + HTML_VALID = 0x4 , + HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ +} htmlStatus ; + +/* Using htmlElemDesc rather than name here, to emphasise the fact + that otherwise there's a lookup overhead +*/ +htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; +int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; +htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; +htmlStatus htmlNodeStatus(const htmlNodePtr, int) ; +#define htmlDefaultSubelement(elt) elt->defaultsubelt +#define htmlElementAllowedHereDesc(parent,elt) \ + htmlElementAllowedHere((parent), (elt)->name) +#define htmlRequiredAttrs(elt) (elt)->attrs_req + + #ifdef __cplusplus } #endif