Re: [xml] libxml2 performance
- From: Bjorn Reese <breese mail1 stofanet dk>
- To: xml gnome org
- Subject: Re: [xml] libxml2 performance
- Date: Wed, 29 May 2002 16:43:54 +0000
Daniel Veillard wrote:
Please note that I am listing the results of two different test cases
below, separated by a dashed line.
xmllint --noout --timing --repeat doc/libxml2-api.xml
Quantify on Solaris 2.5.1 gives (top-20)
14.57% _free_unlocked
10.69% xmlParseAttValue
10.28% realfree
6.18% t_splay
5.84% _malloc_unlocked
4.41% isolat1ToUTF8
3.49% cleanfree
3.24% t_delete
3.15% xmlCurrentChar
2.42% mutex_lock
2.42% mutex_unlock
2.26% xmlParseCharData
1.80% memset
1.66% malloc
1.47% xmlParseStartTag
1.47% xmlParseName
1.43% _smalloc
1.36% xmlNextChar
1.36% free
1.32% xmlStrndup
Most of these (including mutex locking) are indeed related to memory
management, so I also collected information related to malloc/free:
Function name: malloc
Filename: /usr/lib/libc.so.1
Called: 13481642 times
Function time: 148298062 cycles ( 1.66% of .root.)
Function+descendants time: 2200589659 cycles (24.67% of .root.)
Average function time: 11 cycles
Minimum function time: 11 cycles
Maximum function time: 11 cycles
Distribution to callers:
2058700 times (35.03%) xmlNewTextLen
7841422 times (30.16%) xmlStrndup
1167400 times (12.76%) xmlNewNsProp
1167400 times (11.92%) xmlParseAttValue
675000 times ( 6.90%) xmlNewNode
570000 times ( 3.17%) xmlParseStartTag
100 times ( 0.03%) xmlNewParserCtxt
400 times ( 0.00%) xmlBufferCreate
500 times ( 0.00%) xmlInitParserCtxt
100 times ( 0.00%) _findbuf
100 times ( 0.00%) xmlNewDoc
100 times ( 0.00%) xmlNewInputStream
100 times ( 0.00%) xmlURIUnescapeString
100 times ( 0.00%) xmlParseVersionNum
100 times ( 0.00%) xmlParseEncName
1 time ( 0.00%) xmlInitCharEncodingHandlers
100 times ( 0.00%) xmlAllocParserInputBuffer
7 times ( 0.00%) xmlNewCharEncodingHandler
5 times ( 0.00%) xmlHashAddEntry3
5 times ( 0.00%) xmlAddEntity
2 times ( 0.00%) xmlHashCreate
Contributions from descendants:
13481642 times (81.01%) _malloc_unlocked
13481642 times ( 6.13%) mutex_lock
13481642 times ( 6.13%) mutex_unlock
Function name: free
Filename: /usr/lib/libc.so.1
Called: 13481642 times
Function time: 121334778 cycles ( 1.36% of .root.)
Function+descendants time: 2619886858 cycles (29.37% of .root.)
Average function time: 9 cycles
Minimum function time: 9 cycles
Maximum function time: 9 cycles
Distribution to callers:
5467400 times (57.25%) xmlFreeNodeList
2334800 times (25.58%) xmlFreeProp
2904800 times ( 9.73%) xmlParseStartTag
1167400 times ( 2.72%) attribute
458600 times ( 1.83%) xmlParseElement
675000 times ( 1.72%) startElement
432800 times ( 1.07%) xmlParseEndTag
33000 times ( 0.08%) xmlParseEntityRef
5500 times ( 0.01%) xmlStringGetNodeList
400 times ( 0.00%) xmlFreeDoc
800 times ( 0.00%) xmlFreeParserCtxt
400 times ( 0.00%) xmlBufferFree
400 times ( 0.00%) xmlFreeInputStream
100 times ( 0.00%) xmlFreeParserInputBuffer
100 times ( 0.00%) fclose
100 times ( 0.00%) xmlParserInputBufferCreateFilename
15 times ( 0.00%) xmlFreeEntity
15 times ( 0.00%) xmlCleanupCharEncodingHandlers
12 times ( 0.00%) xmlHashFree
Contributions from descendants:
13481642 times (85.08%) _free_unlocked
13481642 times ( 5.15%) mutex_lock
13481642 times ( 5.15%) mutex_unlock
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
I also tried:
xmllint --repeat --noout test/svg3
44.06% xmlParseAttValue
13.09% xmlCurrentChar
6.73% _free_unlocked
4.62% realfree
3.56% xmlStringGetNodeList
2.86% _malloc_unlocked
2.44% t_splay
1.83% cleanfree
1.63% t_delete
1.17% mutex_unlock
1.17% mutex_lock
1.11% _read
0.91% _open
0.86% memset
0.78% malloc
0.78% xmlParseContent
0.69% _smalloc
0.64% free
0.62% xmlStrndup
0.60% xmlFreeNodeList
In this test memory management no longer dorminates the picture.
Below is a line-based profile of xmlParseAttValue and xmlCurrentChar.
It is kind of difficult to see how those two can be improved (maybe
ctxt->input->cur could be replaced by a pointer, although I would
expect the compiler to handle such optimizations.)
* *********************************************************
* Quantify Annotated Source of
/home/br/misc/libxml/libxml2-2.4.22/parser.c
* Data from Quantify'd ./xmllint.quant (pid 24288)
* Quantify version: 4.5.1
* - Annotation: Function+descendant time (% of f+d time)
*
* Legend:
*
* Lines are annotated with a distinguishing character and
collected data.
*
* * - A comment line inserted by Quantify.
* | - A line containing a single executed block.
* + - A line containing multiple basic blocks.
* . - The extent of any basic blocks over several lines.
* # - A line containing basic blocks that were not executed.
* *********************************************************
xmlChar *
* *********************************************************
* Function: xmlParseAttValue
* Called: 48000 times
* Function time: 461465300 cycles (44.06% of
.root.)
* Function+descendants time: 638554086 cycles (60.96% of
.root.)
* Distribution to Callers:
* 48000 times xmlParseAttribute
* *********************************************************
0.00%# xmlParseAttValue(xmlParserCtxtPtr ctxt) {
. xmlChar limit = 0;
. xmlChar *buf = NULL;
. int len = 0;
. int buf_size = 0;
. int c, l;
. xmlChar *current = NULL;
. xmlEntityPtr ent;
.
.
0.22%+ SHRINK;
0.03%+ if (NXT(0) == '"') {
0.00%# ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
. limit = '"';
. NEXT;
0.02%+ } else if (NXT(0) == '\'') {
0.00%# limit = '\'';
0.00%# ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
0.27%| NEXT;
0.02%| } else {
. ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
0.00%# if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
0.00%# ctxt->sax->error(ctxt->userData, "AttValue: \" or '
expected\n");
0.00%# ctxt->wellFormed = 0;
0.00%# ctxt->disableSAX = 1;
0.00%# return(NULL);
0.00%# }
.
. /*
. * allocate a translation buffer.
. */
0.00%# buf_size = XML_PARSER_BUFFER_SIZE;
1.66%| buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
0.02%| if (buf == NULL) {
0.00%# perror("xmlParseAttValue: malloc failed");
0.00%# return(NULL);
. }
.
. /*
. * OK loop until we reach one of the ending char or a size
limit.
. */
0.13%| c = CUR_CHAR(l);
0.00%# while (((NXT(0) != limit) && /* checked */
10.03%+ (c != '<')) || (ctxt->token != 0)) {
0.00%# if (c == 0) break;
4.27%| if (ctxt->token == '&') {
0.00%# if (ctxt->replaceEntities) {
. if (len > buf_size - 10) {
. growBuffer(buf);
. }
. buf[len++] = '&';
. } else {
. /*
. * The reparsing will be done in xmlStringGetNodeList()
. * called by the attribute() function in SAX.c
. */
0.00%# static xmlChar buffer[6] = "&";
.
. if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# current = &buffer[0];
0.00%# while (*current != 0) { /* non input consuming */
0.00%# buf[len++] = *current++;
0.00%# }
2.85%+ ctxt->token = 0;
. }
. } else if (c == '&') {
0.00%# if (NXT(1) == '#') {
0.00%# int val = xmlParseCharRef(ctxt);
0.00%# if (val == '&') {
0.00%# if (ctxt->replaceEntities) {
0.00%# if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# buf[len++] = '&';
0.00%# } else {
. /*
. * The reparsing will be done in xmlStringGetNodeList()
. * called by the attribute() function in SAX.c
. */
0.00%# static xmlChar buffer[6] = "&";
.
. if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# current = &buffer[0];
0.00%# while (*current != 0) { /* non input consuming */
0.00%# buf[len++] = *current++;
0.00%# }
0.00%# }
. } else {
0.00%# if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# len += xmlCopyChar(0, &buf[len], val);
. }
0.00%# } else {
0.00%# ent = xmlParseEntityRef(ctxt);
0.00%# if ((ent != NULL) &&
. (ctxt->replaceEntities != 0)) {
0.00%# xmlChar *rep;
.
. if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
0.00%# rep = xmlStringDecodeEntities(ctxt, ent->content,
. XML_SUBSTITUTE_REF, 0, 0, 0);
0.00%# if (rep != NULL) {
. current = rep;
0.00%# while (*current != 0) { /* non input consuming */
0.00%# buf[len++] = *current++;
0.00%# if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# }
0.00%# xmlFree(rep);
. }
0.00%# } else {
0.00%# if (len > buf_size - 10) {
0.00%# growBuffer(buf);
. }
0.00%# if (ent->content != NULL)
0.00%# buf[len++] = ent->content[0];
. }
. } else if (ent != NULL) {
0.00%# int i = xmlStrlen(ent->name);
. const xmlChar *cur = ent->name;
.
. /*
. * This may look absurd but is needed to detect
. * entities problems
. */
0.00%# if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
. (ent->content != NULL)) {
0.00%# xmlChar *rep;
. rep = xmlStringDecodeEntities(ctxt, ent->content,
. XML_SUBSTITUTE_REF, 0, 0, 0);
0.00%# if (rep != NULL)
0.00%# xmlFree(rep);
0.00%# }
.
. /*
. * Just output the reference
. */
. buf[len++] = '&';
0.00%# if (len > buf_size - i - 10) {
0.00%# growBuffer(buf);
. }
0.00%# for (;i > 0;i--)
0.00%# buf[len++] = *cur++;
0.00%# buf[len++] = ';';
. }
. }
0.00%# } else {
10.42%+ if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9))
{
0.80%+ COPY_BUF(l,buf,len,0x20);
0.48%| if (len > buf_size - 10) {
0.31%+ growBuffer(buf);
. }
. } else {
5.05%+ COPY_BUF(l,buf,len,c);
3.79%| if (len > buf_size - 10) {
1.54%| growBuffer(buf);
. }
. }
21.35%+ NEXTL(l);
. }
9.30%+ GROW;
24.20%+ c = CUR_CHAR(l);
2.85%| }
0.00%# buf[len++] = 0;
0.06%+ if (RAW == '<') {
0.00%# ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
0.00%# if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
0.00%# ctxt->sax->error(ctxt->userData,
. "Unescaped '<' not allowed in attributes values\n");
. ctxt->wellFormed = 0;
. ctxt->disableSAX = 1;
0.06%+ } else if (RAW != limit) {
. ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
0.00%# if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
0.00%# ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
0.00%# ctxt->wellFormed = 0;
0.00%# ctxt->disableSAX = 1;
0.00%# } else
0.27%| NEXT;
0.02%| return(buf);
}
int
* *********************************************************
* Function: xmlCurrentChar
* Called: 9138200 times
* Function time: 137073000 cycles (13.09% of
.root.)
* Function+descendants time: 137073000 cycles (13.09% of
.root.)
* Distribution to Callers:
* 9138200 times xmlParseAttValue
* *********************************************************
0.00%# xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
20.00%| if (ctxt->instate == XML_PARSER_EOF)
0.00%# return(0);
.
13.33%| if (ctxt->token != 0) {
0.00%# *len = 0;
0.00%# return(ctxt->token);
. }
40.00%| if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <=
0x7F)) {
. *len = 1;
. return((int) *ctxt->input->cur);
. }
0.00%# if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
. /*
. * We are supposed to handle UTF8, check it's valid
. * From rfc2044: encoding of the Unicode values on UTF-8:
. *
. * UCS-4 range (hex.) UTF-8 octet sequence (binary)
. * 0000 0000-0000 007F 0xxxxxxx
. * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
. * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
. *
. * Check for the 0x110000 limit too
. */
0.00%# const unsigned char *cur = ctxt->input->cur;
. unsigned char c;
. unsigned int val;
.
. c = *cur;
. if (c & 0x80) {
0.00%# if (cur[1] == 0)
0.00%# xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
0.00%# if ((cur[1] & 0xc0) != 0x80)
. goto encoding_error;
0.00%# if ((c & 0xe0) == 0xe0) {
.
0.00%# if (cur[2] == 0)
0.00%# xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
0.00%# if ((cur[2] & 0xc0) != 0x80)
. goto encoding_error;
0.00%# if ((c & 0xf0) == 0xf0) {
0.00%# if (cur[3] == 0)
0.00%# xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
0.00%# if (((c & 0xf8) != 0xf0) ||
. ((cur[3] & 0xc0) != 0x80))
. goto encoding_error;
. /* 4-byte code */
0.00%# *len = 4;
0.00%# val = (cur[0] & 0x7) << 18;
0.00%# val |= (cur[1] & 0x3f) << 12;
0.00%# val |= (cur[2] & 0x3f) << 6;
0.00%# val |= cur[3] & 0x3f;
0.00%# } else {
. /* 3-byte code */
0.00%# *len = 3;
0.00%# val = (cur[0] & 0xf) << 12;
0.00%# val |= (cur[1] & 0x3f) << 6;
0.00%# val |= cur[2] & 0x3f;
. }
0.00%# } else {
. /* 2-byte code */
0.00%# *len = 2;
0.00%# val = (cur[0] & 0x1f) << 6;
0.00%# val |= cur[1] & 0x3f;
. }
0.00%# if (!IS_CHAR(val)) {
0.00%# if ((ctxt->sax != NULL) &&
. (ctxt->sax->error != NULL))
0.00%# ctxt->sax->error(ctxt->userData,
. "Char 0x%X out of allowed range\n", val);
0.00%# ctxt->errNo = XML_ERR_INVALID_ENCODING;
0.00%# ctxt->wellFormed = 0;
0.00%# ctxt->disableSAX = 1;
. }
0.00%# return(val);
. } else {
. /* 1-byte code */
. *len = 1;
. if (*ctxt->input->cur == 0xD) {
. if (ctxt->input->cur[1] == 0xA) {
. ctxt->nbChars++;
. ctxt->input->cur++;
. }
. return(0xA);
. }
. return((int) *ctxt->input->cur);
. }
0.00%# }
. /*
. * Assume it's a fixed length encoding (1) with
. * a compatible encoding for the ASCII set, since
. * XML constructs only use < 128 chars
. */
. *len = 1;
0.00%# if (*ctxt->input->cur == 0xD) {
0.00%# if (ctxt->input->cur[1] == 0xA) {
0.00%# ctxt->nbChars++;
0.00%# ctxt->input->cur++;
. }
0.00%# return(0xA);
. }
. return((int) *ctxt->input->cur);
. encoding_error:
. /*
. * If we detect an UTF8 error that probably mean that the
. * input encoding didn't get properly advertised in the
. * declaration header. Report the error and switch the
encoding
. * to ISO-Latin-1 (if you don't like this policy, just
declare the
. * encoding !)
. */
0.00%# if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
0.00%# ctxt->sax->error(ctxt->userData,
. "Input is not proper UTF-8, indicate encoding !\n");
0.00%# ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X
0x%02X\n",
. ctxt->input->cur[0], ctxt->input->cur[1],
. ctxt->input->cur[2], ctxt->input->cur[3]);
. }
0.00%# ctxt->wellFormed = 0;
0.00%# ctxt->errNo = XML_ERR_INVALID_ENCODING;
.
0.00%# ctxt->charset = XML_CHAR_ENCODING_8859_1;
0.00%# *len = 1;
13.33%| return((int) *ctxt->input->cur);
13.33%| }
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]