[xml] PATCH: gzip decoding support for remote HTTP resources



Hi there,
attached is a sample patch which might be useful for retrieving
compressed resources over HTTP. It sends the "Accept-Encoding" header,
and then passes the response through zlib if the "Content-Encoding"
response header is appropriately set.

However, maybe it needs a flag somewhere to disable this behaviour
unless it is required?

Comments welcome,
Gary.
Index: nanohttp.c
===================================================================
RCS file: /cvs/gnome/libxml2/nanohttp.c,v
retrieving revision 1.93
diff -c -r1.93 nanohttp.c
*** nanohttp.c  13 Oct 2005 23:12:42 -0000      1.93
--- nanohttp.c  15 Dec 2005 03:39:52 -0000
***************
*** 11,19 ****
   * daniel veillard com
   */
   
- /* TODO add compression support, Send the Accept- , and decompress on the
-         fly with ZLIB if found at compile-time */
- 
  #define NEED_SOCKETS
  #define IN_LIBXML
  #include "libxml.h"
--- 11,16 ----
***************
*** 66,71 ****
--- 63,72 ----
  #ifdef SUPPORT_IP6
  #include <resolv.h>
  #endif
+ #ifdef HAVE_ZLIB_H
+ #include <zlib.h>
+ #endif
+ 
  
  #ifdef VMS
  #include <stropts>
***************
*** 152,157 ****
--- 153,162 ----
      char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
      char *encoding;   /* encoding extracted from the contentType */
      char *mimeType;   /* Mime-Type extracted from the contentType */
+ #ifdef HAVE_ZLIB_H
+     z_stream *strm;   /* Zlib stream object */
+     int usesGzip;     /* "Content-Encoding: gzip" was detected */
+ #endif
  } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
  
  static int initialized = 0;
***************
*** 413,418 ****
--- 418,430 ----
      if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
      if (ctxt->location != NULL) xmlFree(ctxt->location);
      if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
+ #ifdef HAVE_ZLIB_H
+     if (ctxt->strm != NULL) {
+       inflateEnd(ctxt->strm);
+       xmlFree(ctxt->strm);
+     }
+ #endif
+ 
      ctxt->state = XML_NANO_HTTP_NONE;
      if (ctxt->fd >= 0) closesocket(ctxt->fd);
      ctxt->fd = -1;
***************
*** 752,757 ****
--- 764,789 ----
        if (ctxt->authHeader != NULL)
            xmlFree(ctxt->authHeader);
        ctxt->authHeader = xmlMemStrdup(cur);
+ #ifdef HAVE_ZLIB_H
+     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
+       cur += 17;
+       while ((*cur == ' ') || (*cur == '\t')) cur++;
+       if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
+           ctxt->usesGzip = 1;
+ 
+           ctxt->strm = xmlMalloc(sizeof(z_stream));
+ 
+           if (ctxt->strm != NULL) {
+               ctxt->strm->zalloc = Z_NULL;
+               ctxt->strm->zfree = Z_NULL;
+               ctxt->strm->opaque = Z_NULL;
+               ctxt->strm->avail_in = 0;
+               ctxt->strm->next_in = Z_NULL;
+ 
+               inflateInit2( ctxt->strm, 31 );
+           }
+       }
+ #endif
      } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
        cur += 15;
        ctxt->ContentLength = strtol( cur, NULL, 10 );
***************
*** 1156,1166 ****
--- 1188,1222 ----
  int
  xmlNanoHTTPRead(void *ctx, void *dest, int len) {
      xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
+ #ifdef HAVE_ZLIB_H
+     int bytes_read = 0;
+     int orig_avail_in;
+     int z_ret;
+ #endif
  
      if (ctx == NULL) return(-1);
      if (dest == NULL) return(-1);
      if (len <= 0) return(0);
  
+ #ifdef HAVE_ZLIB_H
+     if (ctxt->usesGzip == 1 && ctxt->strm == NULL) return(0);
+  
+     ctxt->strm->next_out = dest;
+     ctxt->strm->avail_out = len;
+ 
+     do {
+       orig_avail_in = ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr - bytes_read;
+       ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
+ 
+       z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
+       bytes_read += orig_avail_in - ctxt->strm->avail_in;
+ 
+       if (z_ret != Z_OK) break;
+     } while (ctxt->strm->avail_out > 0 && xmlNanoHTTPRecv(ctxt) > 0);
+ 
+     ctxt->inrptr += bytes_read;
+     return(len - ctxt->strm->avail_out);
+ #else
      while (ctxt->inptr - ctxt->inrptr < len) {
          if (xmlNanoHTTPRecv(ctxt) <= 0) break;
      }
***************
*** 1169,1174 ****
--- 1225,1231 ----
      memcpy(dest, ctxt->inrptr, len);
      ctxt->inrptr += len;
      return(len);
+ #endif
  }
  
  /**
***************
*** 1275,1280 ****
--- 1332,1340 ----
      if (ctxt->query != NULL)
        blen += strlen(ctxt->query) + 1;
      blen += strlen(method) + strlen(ctxt->path) + 24;
+ #ifdef HAVE_ZLIB_H
+     blen += 23;
+ #endif
      bp = (char*)xmlMallocAtomic(blen);
      if ( bp == NULL ) {
          xmlNanoHTTPFreeCtxt( ctxt );
***************
*** 1302,1307 ****
--- 1362,1371 ----
  
      p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n", 
                    ctxt->hostname);
+ 
+ #ifdef HAVE_ZLIB_H
+     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
+ #endif
  
      if (contentType != NULL && *contentType) 
        p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]