Index: ChangeLog =================================================================== RCS file: /cvs/gnome/gnome-xml/ChangeLog,v retrieving revision 1.460 diff -u -r1.460 ChangeLog --- ChangeLog 2001/05/09 10:51:31 1.460 +++ ChangeLog 2001/05/09 17:26:59 @@ -1,3 +1,10 @@ +2001-05-09 Jonas Borgström + + * HTMLparser.c (htmlGetEndPriority): New function, returns + the priority of a certain element. + (htmlAutoCloseOnClose): Only close inline elements if they + all have lower or equal priority. + Wed May 9 12:50:15 CEST 2001 Daniel Veillard * tree.c: zb bisp com reported an error in xmlNodeGetLang() Index: HTMLparser.c =================================================================== RCS file: /cvs/gnome/gnome-xml/HTMLparser.c,v retrieving revision 1.83 diff -u -r1.83 HTMLparser.c --- HTMLparser.c 2001/05/04 17:52:34 1.83 +++ HTMLparser.c 2001/05/09 17:27:00 @@ -564,17 +564,48 @@ "onselect" }; +typedef struct { + const char *name; + int priority; +} elementPriority; /* - * end tags that imply the end of the inside elements - */ -const char *htmlEndClose[] = { -"head", -"body", -"html", -NULL + * This table is used by the htmlparser to know what to do with + * broken html pages. By assigning different priorities to different + * elements the parser can decide how to handle extra endtags. + * Endtags are only allowed to close elements with lower or equal + * priority. + */ +const elementPriority htmlEndPriority[] = { + {"div", 150}, + {"td", 160}, + {"th", 160}, + {"tr", 170}, + {"thead", 180}, + {"tbody", 180}, + {"tfoot", 180}, + {"table", 190}, + {"head", 200}, + {"body", 200}, + {"html", 220}, + {NULL, 100} /* Default priority */ }; +/** + * htmlGetEndPriority: + * @name: The name of the element to look up the priority for. + * + * Return value: The "endtag" priority. + **/ +static int +htmlGetEndPriority (const char *name) { + int i = 0; + + while (htmlEndPriority[i].name != NULL && + !xmlStrEqual(htmlEndPriority[i].name, name)) i++; + return htmlEndPriority[i].priority; +} + static const char** htmlStartCloseIndex[100]; static int htmlStartCloseIndexinitialized = 0; @@ -674,7 +705,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { htmlElemDescPtr info; xmlChar *oldname; - int i, endCloses = 0; + int i, priority = htmlGetEndPriority (newtag); #ifdef DEBUG xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); @@ -683,14 +714,16 @@ #endif for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; + /* + * A missplaced endtagad can only close elements with lower or equal priority, so + * if we find an element with higher priority before we find an element with + * matching name, we just ignore this endtag + */ + if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return; } if (i < 0) return; - for (i = 0; (htmlEndClose[i] != NULL);i++) - if (xmlStrEqual(newtag, (const xmlChar *) htmlEndClose[i])) { - endCloses = 1; - break; - } while (!xmlStrEqual(newtag, ctxt->name)) { info = htmlTagLookup(ctxt->name); @@ -707,8 +740,6 @@ "Opening and ending tag mismatch: %s and %s\n", newtag, ctxt->name); ctxt->wellFormed = 0; - } else if (endCloses == 0) { - return; } if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name);