Re: [xml] side effects of thread-enabling libxml2



Daniel Veillard wrote:

On Fri, May 09, 2003 at 06:06:04PM +0200, Stéphane Bidoul wrote:
I think I suggested adding functions
to register the global default values and the other things
needed would be
to take a lock when initializing a thread set of local variables.
Here is the thing.

I selected names like xmlSetLineNumbersDefaultValueForNewThreads(),
because I felt "ForNewThreads" conveyed precisely the purpose
of the functions. Such names are not particularly nice, but then the
feature is quite convoluted too :-/  (and now I'll stop whining).

The changes by file:
- build_glob.py, global.data:
 added a field to control the new feature per variable;
 some changes so line-ends are processed correctly on windows
- globals.c: declared a static "ForNewThreads" sibling for
 each global (except the default SAX handlers and locator);
 added the mutex to protect the variables;
 modified xmlInitializeGlobalState to use the new variables;
 generated new accessors
- python/libxml.c: call xmlInitGlobals to initialize the new mutex,
 initialize the python error handler for new threads; (note that
 the python generic error handler is truly global -- ie not per thread,
 since I still hope to avoid thread local storage in the python
 bindings)

For windows, you'll need to redo apibuild.py and defgen.xsl.

You'll probably want to rename/reshuffle things quite a bit...

I attached the patch and a python test case.

-sbi
Index: build_glob.py
===================================================================
RCS file: /cvs/gnome/gnome-xml/build_glob.py,v
retrieving revision 1.3
diff -c -b -r1.3 build_glob.py
*** build_glob.py       10 Feb 2002 13:20:36 -0000      1.3
--- build_glob.py       10 May 2003 22:02:08 -0000
***************
*** 16,25 ****
          self.type=type
          self.name=name
  
  def writeline(file, line=None):
      if line:
          file.write(line)
!     file.write(os.linesep)
  
  if __name__ == "__main__":
      globals={}
--- 16,30 ----
          self.type=type
          self.name=name
  
+ def striplinesep(line):
+     while line and line[-1] in ('\r','\n'):
+         line = line[:-1]
+     return line
+ 
  def writeline(file, line=None):
      if line:
          file.write(line)
!     file.write("\n")
  
  if __name__ == "__main__":
      globals={}
***************
*** 34,41 ****
      # Automatically generated string
      # 
      for line in global_hdr:
!         if line[-len(os.linesep):] == os.linesep:
!             line = line[:-len(os.linesep)]
          if line == " * Automatically generated by build_glob.py.":
            break
        writeline(global_functions_hdr, line)
--- 39,45 ----
      # Automatically generated string
      # 
      for line in global_hdr:
!         line = striplinesep(line)
          if line == " * Automatically generated by build_glob.py.":
            break
        writeline(global_functions_hdr, line)
***************
*** 46,53 ****
      writeline(global_functions_hdr)
  
      for line in global_code:
!         if line[-len(os.linesep):] == os.linesep:
!             line = line[:-len(os.linesep)]
          if line == " * Automatically generated by build_glob.py.":
            break
        writeline(global_functions_impl, line)
--- 50,56 ----
      writeline(global_functions_hdr)
  
      for line in global_code:
!         line = striplinesep(line)
          if line == " * Automatically generated by build_glob.py.":
            break
        writeline(global_functions_impl, line)
***************
*** 61,96 ****
      for line in global_data:
          if line[0]=='#':
              continue
!         if line[-len(os.linesep):] == os.linesep:
!             line = line[:-len(os.linesep)]
          fields = string.split(line, ",")
          # Update the header file
          writeline(global_functions_hdr)
          global_functions_hdr.write("extern "+fields[0]+" *")
!         if len(fields) == 3:
              global_functions_hdr.write("(*")
          global_functions_hdr.write("__"+fields[1]+"(void)")
!         if len(fields) == 3:
              global_functions_hdr.write(")"+fields[2])
          writeline(global_functions_hdr,";")
          writeline(global_functions_hdr, "#ifdef LIBXML_THREAD_ENABLED")
          writeline(global_functions_hdr,"#define "+fields[1]+" \\")
          writeline(global_functions_hdr,"(*(__"+fields[1]+"()))")
          writeline(global_functions_hdr,"#else")
!         if len(fields) == 3:
              writeline(global_functions_hdr,"LIBXML_DLL_IMPORT extern "+fields[0]+" 
"+fields[1]+fields[2]+";")
          else:
              writeline(global_functions_hdr,"LIBXML_DLL_IMPORT extern "+fields[0]+" "+fields[1]+";")
          writeline(global_functions_hdr,"#endif")
          # Update the implementation file
          writeline(global_functions_impl)
  #        writeline(global_functions_impl, "extern "+fields[0]+" "+fields[1]+";")
          writeline(global_functions_impl, "#undef\t"+fields[1])
          writeline(global_functions_impl, fields[0]+" *")
!         if len(fields) == 3:
              global_functions_impl.write("(*")
          global_functions_impl.write("__"+fields[1]+"(void)")
!         if len(fields) == 3:
              writeline(global_functions_impl, ")[]")
          writeline(global_functions_impl, " {")
          writeline(global_functions_impl, "    if (IS_MAIN_THREAD)")
--- 64,102 ----
      for line in global_data:
          if line[0]=='#':
              continue
!         line = striplinesep(line)
          fields = string.split(line, ",")
          # Update the header file
          writeline(global_functions_hdr)
          global_functions_hdr.write("extern "+fields[0]+" *")
!         if fields[2]:
              global_functions_hdr.write("(*")
          global_functions_hdr.write("__"+fields[1]+"(void)")
!         if fields[2]:
              global_functions_hdr.write(")"+fields[2])
          writeline(global_functions_hdr,";")
          writeline(global_functions_hdr, "#ifdef LIBXML_THREAD_ENABLED")
          writeline(global_functions_hdr,"#define "+fields[1]+" \\")
          writeline(global_functions_hdr,"(*(__"+fields[1]+"()))")
          writeline(global_functions_hdr,"#else")
!         if fields[2]:
              writeline(global_functions_hdr,"LIBXML_DLL_IMPORT extern "+fields[0]+" 
"+fields[1]+fields[2]+";")
          else:
              writeline(global_functions_hdr,"LIBXML_DLL_IMPORT extern "+fields[0]+" "+fields[1]+";")
          writeline(global_functions_hdr,"#endif")
+         # set/get for per-thread global defaults
+         if fields[3]:
+             writeline(global_functions_hdr,"extern void 
"+fields[1][:3]+"Set"+fields[1][3:]+"ForNewThreads("+fields[0]+" v);")
+             writeline(global_functions_hdr,"extern "+fields[0]+" 
"+fields[1][:3]+"Get"+fields[1][3:]+"ForNewThreads();")
          # Update the implementation file
          writeline(global_functions_impl)
  #        writeline(global_functions_impl, "extern "+fields[0]+" "+fields[1]+";")
          writeline(global_functions_impl, "#undef\t"+fields[1])
          writeline(global_functions_impl, fields[0]+" *")
!         if fields[2]:
              global_functions_impl.write("(*")
          global_functions_impl.write("__"+fields[1]+"(void)")
!         if fields[2]:
              writeline(global_functions_impl, ")[]")
          writeline(global_functions_impl, " {")
          writeline(global_functions_impl, "    if (IS_MAIN_THREAD)")
***************
*** 98,103 ****
--- 104,123 ----
          writeline(global_functions_impl, "    else")
          writeline(global_functions_impl, "\treturn (&xmlGetGlobalState()->"+fields[1]+");")
          writeline(global_functions_impl, "}")
+         # set/get for per-thread global defaults
+         if fields[3]:
+             writeline(global_functions_impl,"void 
"+fields[1][:3]+"Set"+fields[1][3:]+"ForNewThreads("+fields[0]+" v) {")
+             writeline(global_functions_impl,"    xmlMutexLock(xmlForNewThreadsMutex);")
+             writeline(global_functions_impl,"    "+fields[1][:3]+fields[1][3:]+"ForNewThreads = v;")
+             writeline(global_functions_impl,"    xmlMutexUnlock(xmlForNewThreadsMutex);")
+             writeline(global_functions_impl,"}")
+             writeline(global_functions_impl,fields[0]+" 
"+fields[1][:3]+"Get"+fields[1][3:]+"ForNewThreads() {")
+             writeline(global_functions_impl,"    "+fields[0]+" ret;");
+             writeline(global_functions_impl,"    xmlMutexLock(xmlForNewThreadsMutex);")
+             writeline(global_functions_impl,"    ret = "+fields[1][:3]+fields[1][3:]+"ForNewThreads;")
+             writeline(global_functions_impl,"    xmlMutexUnlock(xmlForNewThreadsMutex);")
+             writeline(global_functions_impl,"    return ret;")
+             writeline(global_functions_impl,"}")
      # Terminate the header file with appropriate boilerplate
      writeline(global_functions_hdr)
      writeline(global_functions_hdr, "#ifdef __cplusplus")
Index: global.data
===================================================================
RCS file: /cvs/gnome/gnome-xml/global.data,v
retrieving revision 1.5
diff -c -b -r1.5 global.data
*** global.data 1 Jan 2003 20:59:37 -0000       1.5
--- global.data 10 May 2003 22:02:08 -0000
***************
*** 1,24 ****
! int,oldXMLWDcompatibility
! xmlBufferAllocationScheme,xmlBufferAllocScheme
! int,xmlDefaultBufferSize
! xmlSAXHandler,xmlDefaultSAXHandler
! xmlSAXLocator,xmlDefaultSAXLocator
! int,xmlDoValidityCheckingDefaultValue
! xmlGenericErrorFunc,xmlGenericError
! void *,xmlGenericErrorContext
! int,xmlGetWarningsDefaultValue
! int,xmlIndentTreeOutput
! const char *,xmlTreeIndentString
! int,xmlKeepBlanksDefaultValue
! int,xmlLineNumbersDefaultValue
! int,xmlLoadExtDtdDefaultValue
! int,xmlParserDebugEntities
! const char *,xmlParserVersion
! int,xmlPedanticParserDefaultValue
! int,xmlSaveNoEmptyTags
! #const xmlChar,xmlStringComment,[]
! #const xmlChar,xmlStringText,[]
! #const xmlChar,xmlStringTextNoenc,[]
! int,xmlSubstituteEntitiesDefaultValue
! xmlRegisterNodeFunc,xmlRegisterNodeDefaultValue
! xmlDeregisterNodeFunc,xmlDeregisterNodeDefaultValue
--- 1,25 ----
! #type,name,array?,threadGlobalDefault accessor?
! int,oldXMLWDcompatibility,,
! xmlBufferAllocationScheme,xmlBufferAllocScheme,,1
! int,xmlDefaultBufferSize,,1
! xmlSAXHandler,xmlDefaultSAXHandler,,
! xmlSAXLocator,xmlDefaultSAXLocator,,
! int,xmlDoValidityCheckingDefaultValue,,1
! xmlGenericErrorFunc,xmlGenericError,,
! void *,xmlGenericErrorContext,,
! int,xmlGetWarningsDefaultValue,,1
! int,xmlIndentTreeOutput,,1
! const char *,xmlTreeIndentString,,1
! int,xmlKeepBlanksDefaultValue,,1
! int,xmlLineNumbersDefaultValue,,1
! int,xmlLoadExtDtdDefaultValue,,1
! int,xmlParserDebugEntities,,1
! const char *,xmlParserVersion,,
! int,xmlPedanticParserDefaultValue,,1
! int,xmlSaveNoEmptyTags,,1
! #const xmlChar,xmlStringComment,[],1
! #const xmlChar,xmlStringText,[],1
! #const xmlChar,xmlStringTextNoenc,[],1
! int,xmlSubstituteEntitiesDefaultValue,,1
! xmlRegisterNodeFunc,xmlRegisterNodeDefaultValue,,
! xmlDeregisterNodeFunc,xmlDeregisterNodeDefaultValue,,
Index: parser.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/parser.c,v
retrieving revision 1.267
diff -c -b -r1.267 parser.c
*** parser.c    9 May 2003 22:26:28 -0000       1.267
--- parser.c    10 May 2003 22:02:31 -0000
***************
*** 11140,11145 ****
--- 11140,11146 ----
      if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
        (xmlGenericError == NULL))
        initGenericErrorDefaultFunc(NULL);
+     xmlInitGlobals();
      xmlInitThreads();
      xmlInitMemory();
      xmlInitCharEncodingHandlers();
***************
*** 11176,11180 ****
--- 11177,11182 ----
      xmlCatalogCleanup();
  #endif
      xmlCleanupThreads();
+     xmlCleanupGlobals();
      xmlParserInitialized = 0;
  }
Index: include/libxml/globals.h
===================================================================
RCS file: /cvs/gnome/gnome-xml/include/libxml/globals.h,v
retrieving revision 1.11
diff -c -b -r1.11 globals.h
*** include/libxml/globals.h    19 Apr 2003 00:07:51 -0000      1.11
--- include/libxml/globals.h    10 May 2003 22:02:32 -0000
***************
*** 22,27 ****
--- 22,30 ----
  extern "C" {
  #endif
  
+ void xmlInitGlobals();
+ void xmlCleanupGlobals();
+ 
  /*
   * Externally global symbols which need to be protected for backwards
   * compatibility support.
***************
*** 112,119 ****
--- 115,126 ----
  
  void  xmlInitializeGlobalState(xmlGlobalStatePtr gs);
  
+ void xmlSetGenericErrorFuncForNewThreads(void *ctx, xmlGenericErrorFunc handler);
+ 
  xmlRegisterNodeFunc xmlRegisterNodeDefault(xmlRegisterNodeFunc func);
+ xmlRegisterNodeFunc xmlRegisterNodeDefaultForNewThreads(xmlRegisterNodeFunc func);
  xmlDeregisterNodeFunc xmlDeregisterNodeDefault(xmlDeregisterNodeFunc func);
+ xmlDeregisterNodeFunc xmlDeregisterNodeDefaultForNewThreads(xmlDeregisterNodeFunc func);
  
  /*
   * In general the memory allocation entry points are not kept
***************
*** 217,222 ****
--- 224,231 ----
  #else
  LIBXML_DLL_IMPORT extern xmlBufferAllocationScheme xmlBufferAllocScheme;
  #endif
+ extern void xmlSetBufferAllocSchemeForNewThreads(xmlBufferAllocationScheme v);
+ extern xmlBufferAllocationScheme xmlGetBufferAllocSchemeForNewThreads();
  
  extern int *__xmlDefaultBufferSize(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 225,230 ****
--- 234,241 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlDefaultBufferSize;
  #endif
+ extern void xmlSetDefaultBufferSizeForNewThreads(int v);
+ extern int xmlGetDefaultBufferSizeForNewThreads();
  
  extern xmlSAXHandler *__xmlDefaultSAXHandler(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 249,254 ****
--- 260,267 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
  #endif
+ extern void xmlSetDoValidityCheckingDefaultValueForNewThreads(int v);
+ extern int xmlGetDoValidityCheckingDefaultValueForNewThreads();
  
  extern xmlGenericErrorFunc *__xmlGenericError(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 273,278 ****
--- 286,293 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
  #endif
+ extern void xmlSetGetWarningsDefaultValueForNewThreads(int v);
+ extern int xmlGetGetWarningsDefaultValueForNewThreads();
  
  extern int *__xmlIndentTreeOutput(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 281,286 ****
--- 296,303 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlIndentTreeOutput;
  #endif
+ extern void xmlSetIndentTreeOutputForNewThreads(int v);
+ extern int xmlGetIndentTreeOutputForNewThreads();
  
  extern const char * *__xmlTreeIndentString(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 289,294 ****
--- 306,313 ----
  #else
  LIBXML_DLL_IMPORT extern const char * xmlTreeIndentString;
  #endif
+ extern void xmlSetTreeIndentStringForNewThreads(const char * v);
+ extern const char * xmlGetTreeIndentStringForNewThreads();
  
  extern int *__xmlKeepBlanksDefaultValue(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 297,302 ****
--- 316,323 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
  #endif
+ extern void xmlSetKeepBlanksDefaultValueForNewThreads(int v);
+ extern int xmlGetKeepBlanksDefaultValueForNewThreads();
  
  extern int *__xmlLineNumbersDefaultValue(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 305,310 ****
--- 326,333 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlLineNumbersDefaultValue;
  #endif
+ extern void xmlSetLineNumbersDefaultValueForNewThreads(int v);
+ extern int xmlGetLineNumbersDefaultValueForNewThreads();
  
  extern int *__xmlLoadExtDtdDefaultValue(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 313,318 ****
--- 336,343 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue;
  #endif
+ extern void xmlSetLoadExtDtdDefaultValueForNewThreads(int v);
+ extern int xmlGetLoadExtDtdDefaultValueForNewThreads();
  
  extern int *__xmlParserDebugEntities(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 321,326 ****
--- 346,353 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
  #endif
+ extern void xmlSetParserDebugEntitiesForNewThreads(int v);
+ extern int xmlGetParserDebugEntitiesForNewThreads();
  
  extern const char * *__xmlParserVersion(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 337,342 ****
--- 364,371 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
  #endif
+ extern void xmlSetPedanticParserDefaultValueForNewThreads(int v);
+ extern int xmlGetPedanticParserDefaultValueForNewThreads();
  
  extern int *__xmlSaveNoEmptyTags(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 345,350 ****
--- 374,381 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlSaveNoEmptyTags;
  #endif
+ extern void xmlSetSaveNoEmptyTagsForNewThreads(int v);
+ extern int xmlGetSaveNoEmptyTagsForNewThreads();
  
  extern int *__xmlSubstituteEntitiesDefaultValue(void);
  #ifdef LIBXML_THREAD_ENABLED
***************
*** 353,358 ****
--- 384,391 ----
  #else
  LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
  #endif
+ extern void xmlSetSubstituteEntitiesDefaultValueForNewThreads(int v);
+ extern int xmlGetSubstituteEntitiesDefaultValueForNewThreads();
  
  extern xmlRegisterNodeFunc *__xmlRegisterNodeDefaultValue(void);
  #ifdef LIBXML_THREAD_ENABLED
Index: python/libxml.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/python/libxml.c,v
retrieving revision 1.42
diff -c -b -r1.42 libxml.c
*** python/libxml.c     23 Apr 2003 07:36:50 -0000      1.42
--- python/libxml.c     10 May 2003 22:02:37 -0000
***************
*** 1267,1272 ****
--- 1267,1273 ----
      printf("libxml_xmlErrorInitialize() called\n");
  #endif
      xmlSetGenericErrorFunc(NULL, libxml_xmlErrorFuncHandler);
+     xmlSetGenericErrorFuncForNewThreads(NULL, libxml_xmlErrorFuncHandler);
  }
  
  PyObject *
***************
*** 2620,2625 ****
--- 2621,2628 ----
  
      if (initialized != 0)
          return;
+     /* XXX xmlInitParser does much more than this */
+     xmlInitGlobals();
      xmlRegisterDefaultOutputCallbacks();
      xmlRegisterDefaultInputCallbacks();
      m = Py_InitModule((char *) "libxml2mod", libxmlMethods);
import string, sys
import libxml2

from threading import Thread

failed = 0

class ErrorHandler:

    def __init__(self):
        self.errors = []

    def handler(self,ctx,str):
        self.errors.append(str)

def getLineNumbersDefault():
    old = libxml2.lineNumbersDefault(0)
    libxml2.lineNumbersDefault(old)
    return old

def test(expectedLineNumbersDefault):
    global failed
    eh = ErrorHandler()
    libxml2.registerErrorHandler(eh.handler,"")
    try:
        doc = libxml2.parseFile("bad.xml")
    except:
        if not len(eh.errors):
            failed = 1
            print "FAILED to get parse errors"
    if expectedLineNumbersDefault != getLineNumbersDefault():
        failed = 1
        print "FAILED to obtain correct value for lineNumbersDefault"

# set on the main thread only
libxml2.lineNumbersDefault(1) 
test(1)

ts = []
for i in range(2):
    # expect 0 for lineNumbersDefault because
    # the new value has been set on the main thread only
    ts.append(Thread(target=test,args=(0,)))
for t in ts:
    t.start()
for t in ts:
    t.join()

# set lineNumbersDefault for future new threads
libxml2.setLineNumbersDefaultValueForNewThreads(1)
t = Thread(target=test,args=(1,))
t.start()
t.join()

if failed:
    print "FAILED"
    sys.exit(1)

# Memory debug specific
libxml2.cleanupParser()
if libxml2.debugMemory(1) == 0:
    print "OK"
else:
    print "Memory leak %d bytes" % (libxml2.debugMemory(1))
    libxml2.dumpMemory()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]