[xml] libxml2 and thread-safety



hi,

developing a little multithreaded app using libxml2 and libxslt i was bitten by some random segfaults. i traced them back to libxml2, here's the gdb stack trace:

#0  0x40090961 in startElement () from /usr/lib/libxml2.so.2
#1  0x400c30a1 in htmlParseCharRef () from /usr/lib/libxml2.so.2
#2  0x400c48df in htmlFreeParserCtxt () from /usr/lib/libxml2.so.2
#3  0x400c52e2 in htmlParseChunk () from /usr/lib/libxml2.so.2
#4  0x4001cd34 in _libwatson_http_search_html (http_context=0x8081b80)
    at watson_http.c:89
#5  0x4001cfb9 in libwatson_http_search (wpi=0x8080b90) at watson_http.c:171
#6  0x40019104 in _watson_search_main (data="" at search.c:171
#7  0x40146efa in pthread_start_thread () from /lib/libpthread.so.0
#8  0x40146f41 in pthread_start_thread_event () from /lib/libpthread.so.0

the code that caused the segfault is in attach. just note that the libwatson_http_search function can be entered by more than one thread at time and is itself thread-safe (as long as i didn't put any nasty bug in it.)

libxml2 version is 2.4.5 on a 2.4.7 linux box running debian/sid. code was compiled with _REENTRANT and linked with libpthreads, etc.

can someone tell me if libxml2 is really thread safe (and the guilty is my code) or special precautions should be taken when using it in multithraded environment?

many thanx,
federico
/*
 * watson_http.c -- watson backend for http searches
 * $Id: watson_http.c,v 1.9 2001/10/07 23:29:39 fog Exp $
 *
 * Copyright (C) 2001 Federico Di Gregorio <fog debian org>
 *
 * This file is part of the watson library.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2,
 * or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <glib.h>
#include <libxml/HTMLparser.h>
#include <libxml/nanohttp.h>
#include "libwatson/backend.h"
#include "libwatson/debug.h"
#include "libwatson/search.h"


#define BUFFER_LEN 8192


/**** functions forward declarations and backend information ****/
gboolean libwatson_http_init(void);
xmlDocPtr libwatson_http_search(WatsonPluginInstance *wpi);

WatsonBackend WatsonBackendInfo = {
    "http",
    "1.0",
    libwatson_http_init,
    libwatson_http_search,
    NULL
};


/**** backend functions ****/

gboolean
libwatson_http_init(void)
{
    DBG(WATSON_DBG_BACKEND, "%s backend, version %s installed",
        WatsonBackendInfo.name, WatsonBackendInfo.version);
    return TRUE;
}


xmlDocPtr
_libwatson_http_search_html(void *http_context)
{
    xmlDocPtr res = NULL;
    htmlParserCtxtPtr parser;
    xmlSAXHandler silent, *old;
    gchar buffer[BUFFER_LEN];
    size_t len;

    /* first read from the network (if we are lucky all the stuff we need
       will be in the buffer after a single fetch...) */
    len = xmlNanoHTTPRead(http_context, buffer, BUFFER_LEN);

    if (len > 0) {
        parser = htmlCreatePushParserCtxt(NULL, NULL, buffer, len, NULL, 0);
        parser->pedantic = 0;

        /* setup custom warning and error handlers to suppress error messages */
        memcpy(&silent, parser->sax, sizeof(xmlSAXHandler));
        silent.warning = NULL;
        silent.error = NULL;
        
        old = parser->sax;
        //parser->sax = &silent;

        do {
            len = xmlNanoHTTPRead(http_context, buffer, BUFFER_LEN);
            htmlParseChunk(parser, buffer, len, 0);
        }
        while (len > 0 && parser->instate != 14 && parser->instate != -1);

        if (len <= 0) {
            htmlParseChunk(parser, buffer, 0, 1);
        }
        res = parser->myDoc;
        parser->sax = old;
        htmlFreeParserCtxt(parser);
    }
    
    return res;
}

xmlDocPtr
_libwatson_http_search_xml(void *http_context)
{
    xmlDocPtr res = NULL;
    xmlParserCtxtPtr parser;
    xmlSAXHandler silent, *old;
    gchar buffer[BUFFER_LEN];
    size_t len;

    /* first read from the network (if we are lucky all the stuff we need
       will be in the buffer after a single fetch...) */
    len = xmlNanoHTTPRead(http_context, buffer, BUFFER_LEN);

    if (len > 0) {
        parser = xmlCreatePushParserCtxt(NULL, NULL, buffer, len, NULL);
        parser->pedantic = 0;

        /* setup custom warning and error handlers to suppress error messages */
        memcpy(&silent, parser->sax, sizeof(xmlSAXHandler));
        silent.warning = NULL;
        silent.error = NULL;
        
        old = parser->sax;
        //parser->sax = &silent;

        do {
            len = xmlNanoHTTPRead(http_context, buffer, BUFFER_LEN);
            xmlParseChunk(parser, buffer, len, 0);
        }
        while (len > 0 && parser->instate != 14 && parser->instate != -1);

        if (len <= 0) {
            xmlParseChunk(parser, buffer, 0, 1);
        }
        res = parser->myDoc;
        parser->sax = old;
        xmlFreeParserCtxt(parser);
    }
    
    return res;
}

xmlDocPtr
libwatson_http_search(WatsonPluginInstance *wpi)
{
    xmlDocPtr res;
    xmlChar *url, *fmt;
    void *http_context;
    gchar *content_type;

    url = "" "url");
    fmt =  watson_plugin_extract_arg(wpi, "result-format");
    DBG(WATSON_DBG_BACKEND, "trying out %s (result is %s)", url, fmt);
    if (url == NULL) return NULL;
    
    /* open the connection, content_type can be used to select the right
       parser (html or xml) */
    http_context = xmlNanoHTTPOpen(url, &content_type);
    if (http_context == NULL) return NULL;
    DBG(WATSON_DBG_BACKEND, "connection open: content_type = %s", content_type);

    /* analyze format and create correct parser */
    if (xmlStrcmp(fmt, "xml") == 0
        || xmlStrncmp(content_type, "text/xml", 8) == 0) {
        res = _libwatson_http_search_xml(http_context);
    }
    else {
        res = _libwatson_http_search_html(http_context);
    }
    
    xmlNanoHTTPClose(http_context);
    
    DBG(WATSON_DBG_BACKEND, "file parsed, result is at %p", res);
    free(content_type);
    return res;
}
-- 
Federico Di Gregorio
Debian GNU/Linux Developer & Italian Press Contact        fog debian org
INIT.D Developer                                           fog initd org
  Qu'est ce que la folie? Juste un sentiment de liberté si
   fort qu'on en oublie ce qui nous rattache au monde... -- J. de Loctra


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]