#include #include #include #include static void _getpage_startElementSAX (void * userData, const xmlChar * name, const xmlChar ** atts) { if (!strncmp("script", name, 6)) printf("<%s>", name); } static void _getpage_endElementSAX (void * userData, const xmlChar * name) { if (!strncmp("script", name, 6)) printf("", name); } static void _getpage_charDataSAX (void * userData, const xmlChar * buffer, int length) { struct _site_userdata *su = userData; printf("%.*s", length, buffer); } int main() { htmlSAXHandler hsh; htmlParserCtxtPtr ctxt; FILE *fp; char buf[100]; memset(&hsh, 0, sizeof(htmlSAXHandler)); hsh.startElement = _getpage_startElementSAX; hsh.endElement = _getpage_endElementSAX; hsh.characters = _getpage_charDataSAX; //ctxt = htmlCreatePushParserCtxt(&hsh, NULL, NULL, 0, NULL, XML_CHAR_ENCODING_UTF8); ctxt = htmlCreatePushParserCtxt(&hsh, NULL, NULL, 0, NULL, XML_CHAR_ENCODING_NONE); fp = fopen("bare.txt", "r"); while (!feof(fp)) { fgets(buf, 100, fp); htmlParseChunk(ctxt, buf, strlen(buf), 0); } fclose(fp); htmlParseChunk(ctxt, NULL, 0, 1); }