Re: [xml] libxml2 performance



Hi Daniel, All,

This may be a futile exercise in micro-optimization,
but given over 7% time attributed to isolat1ToUTF8
I've tried to improve that routine.

xmllint --noout --timing --repeat doc/libxml2-api.xml
[...]
  7.71      1.58     0.32    12400    25.81    25.81  isolat1ToUTF8
[...]


The attached patch introduces a fast path for plain ASCII
characters in the isolat1 input (up to 40% faster) with no
penalty (even slightly faster) for all input chars in the upper
half (and reasonable linear scaling between the extremes).
It's the fastest I can get when not allowed to be slower on
any input.

Of course, for such small routines you are at the mercy
of your compiler anyway, so it's not that clear what gcc
and others will make of it.

Regards,
Peter Jacobi


*** ..\2-4-22\encoding.c        Fri Mar 22 03:35:22 2002
--- encoding.c  Thu May 30 11:19:20 2002
***************
*** 560,597 ****
                const unsigned char* in, int *inlen) {
      unsigned char* outstart = out;
      const unsigned char* base = in;
-     const unsigned char* processed = in;
      unsigned char* outend = out + *outlen;
      const unsigned char* inend;
!     unsigned int c;
  
      inend = in + (*inlen);
!     while (in < inend) {
!       c = *in++;
! 
!         if (out >= outend)
!           break;
! 
!         if (c < 0x80) {
!           *out++ =  c;
!           processed++;
!           continue;
!       } else {
!           /*
!            * make sure there is 2 chars left in advance
!            */
!             if (out + 1 >= outend) {
!               break;
!           }
            *out++= ((c >>  6) & 0x1F) | 0xC0;
              *out++= (c & 0x3F) | 0x80;
!           processed++;
!         }
      }
      *outlen = out - outstart;
!     *inlen = processed - base;
      return(0);
  }
  
  /**
   * UTF8Toisolat1:
--- 560,596 ----
                const unsigned char* in, int *inlen) {
      unsigned char* outstart = out;
      const unsigned char* base = in;
      unsigned char* outend = out + *outlen;
      const unsigned char* inend;
!     const unsigned char* instop;
!     xmlChar c = *in;
  
      inend = in + (*inlen);
!     instop = inend;
!     
!     while (in < inend && out < outend - 1) {
!       if (c >= 0x80) {
            *out++= ((c >>  6) & 0x1F) | 0xC0;
              *out++= (c & 0x3F) | 0x80;
!           ++in;
!           c = *in;
!       }
!       if (instop - in > outend - out) instop = in + (outend - out); 
!       while (c < 0x80 && in < instop) {
!           *out++ =  c;
!           ++in;
!           c = *in;
!       }
!     } 
!     if (in < inend && out < outend && c < 0x80) {
!         *out++ =  c;
!       ++in;
      }
      *outlen = out - outstart;
!     *inlen = in - base;
      return(0);
  }
+ 
  
  /**
   * UTF8Toisolat1:


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]