#include #include #include #include #include static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts) { printf("start_element(%s)\n", name); } static void end_element(void * ctx, const xmlChar *name) { printf("end_element(%s)\n", name); } static void start_document(void * ctx) { printf("start_document\n"); } static void end_document(void * ctx) { printf("end_document\n"); } static void start_element_ns ( void * ctx, const xmlChar * localname, const xmlChar * prefix, const xmlChar * uri, int nb_namespaces, const xmlChar ** namespaces, int nb_attributes, int nb_defaulted, const xmlChar ** attributes) { printf("start_element_ns(%s, %s, %s, %d)\n", localname, prefix, uri, nb_namespaces); } /** * end_element_ns was borrowed heavily from libxml-ruby. */ static void end_element_ns ( void * ctx, const xmlChar * localname, const xmlChar * prefix, const xmlChar * uri) { printf("end_element_ns(%s, %s, %s)\n", localname, prefix, uri); } static void characters_func(void * ctx, const xmlChar * ch, int len) { printf("characters(%s)\n", ch); } static void comment_func(void * ctx, const xmlChar * value) { printf("comment(%s)\n", value); } static void warning_func(void * ctx, const char *msg, ...) { printf("warning:"); va_list args; va_start(args, msg); vprintf(msg, args); va_end(args); } static void error_func(void * ctx, const char *msg, ...) { printf("error:"); va_list args; va_start(args, msg); vprintf(msg, args); va_end(args); } static void cdata_block(void * ctx, const xmlChar * value, int len) { printf("cdata(%s)\n", value); } htmlSAXHandlerPtr init_sax_handler() { xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler)); xmlSetStructuredErrorFunc(NULL, NULL); handler->startDocument = start_document; handler->endDocument = end_document; handler->startElement = start_element; handler->endElement = end_element; handler->startElementNs = start_element_ns; handler->endElementNs = end_element_ns; handler->characters = characters_func; handler->comment = comment_func; handler->warning = warning_func; handler->error = error_func; handler->cdataBlock = cdata_block; handler->initialized = XML_SAX2_MAGIC; return handler; } int main() { char * html; htmlParserCtxtPtr ctxt; xmlCharEncodingHandlerPtr enc; html = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"; ctxt = htmlCreateMemoryParserCtxt(html, strlen(html)); enc = xmlFindCharEncodingHandler("utf-8"); xmlSwitchToEncoding(ctxt, enc); ctxt->sax = init_sax_handler(); htmlParseDocument(ctxt); return 0; }