Re: [xml] PATCH: gzip decoding support for remote HTTP resources



Daniel Veillard wrote:
On Thu, Dec 15, 2005 at 03:51:38AM +0000, Gary Coady wrote:

Hi there,
attached is a sample patch which might be useful for retrieving
compressed resources over HTTP. It sends the "Accept-Encoding" header,
and then passes the response through zlib if the "Content-Encoding"
response header is appropriately set.


  Sounds cool, but I'm a bit worried by the case where the return is not
compressed, in xmlNanoHTTPRead() you will pass
  if (ctxt->usesGzip == 1 && ctxt->strm == NULL)
because ctxt->usesGzip should be 0, but  ctxt->strm will be NULL and
will be dereferenced on the next line. It seems to me that this whole
chunk of code need to be garded by
   if (ctxt->usesGzip) {
    ....
   }
and normal existent code should be run in the default case. Otherwise the
patch looks fine. Can you confirm my analysis ?

I realized this morning that I hadn't handled the non-gzip case, but
didn't quite get into work before you looked at the patch :-)

An updated patch is attached.

Gary.
Index: nanohttp.c
===================================================================
RCS file: /cvs/gnome/libxml2/nanohttp.c,v
retrieving revision 1.93
diff -c -r1.93 nanohttp.c
*** nanohttp.c  13 Oct 2005 23:12:42 -0000      1.93
--- nanohttp.c  15 Dec 2005 10:10:01 -0000
***************
*** 11,19 ****
   * daniel veillard com
   */
   
- /* TODO add compression support, Send the Accept- , and decompress on the
-         fly with ZLIB if found at compile-time */
- 
  #define NEED_SOCKETS
  #define IN_LIBXML
  #include "libxml.h"
--- 11,16 ----
***************
*** 66,71 ****
--- 63,72 ----
  #ifdef SUPPORT_IP6
  #include <resolv.h>
  #endif
+ #ifdef HAVE_ZLIB_H
+ #include <zlib.h>
+ #endif
+ 
  
  #ifdef VMS
  #include <stropts>
***************
*** 152,157 ****
--- 153,162 ----
      char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
      char *encoding;   /* encoding extracted from the contentType */
      char *mimeType;   /* Mime-Type extracted from the contentType */
+ #ifdef HAVE_ZLIB_H
+     z_stream *strm;   /* Zlib stream object */
+     int usesGzip;     /* "Content-Encoding: gzip" was detected */
+ #endif
  } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
  
  static int initialized = 0;
***************
*** 413,418 ****
--- 418,430 ----
      if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
      if (ctxt->location != NULL) xmlFree(ctxt->location);
      if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
+ #ifdef HAVE_ZLIB_H
+     if (ctxt->strm != NULL) {
+       inflateEnd(ctxt->strm);
+       xmlFree(ctxt->strm);
+     }
+ #endif
+ 
      ctxt->state = XML_NANO_HTTP_NONE;
      if (ctxt->fd >= 0) closesocket(ctxt->fd);
      ctxt->fd = -1;
***************
*** 752,757 ****
--- 764,789 ----
        if (ctxt->authHeader != NULL)
            xmlFree(ctxt->authHeader);
        ctxt->authHeader = xmlMemStrdup(cur);
+ #ifdef HAVE_ZLIB_H
+     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
+       cur += 17;
+       while ((*cur == ' ') || (*cur == '\t')) cur++;
+       if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
+           ctxt->usesGzip = 1;
+ 
+           ctxt->strm = xmlMalloc(sizeof(z_stream));
+ 
+           if (ctxt->strm != NULL) {
+               ctxt->strm->zalloc = Z_NULL;
+               ctxt->strm->zfree = Z_NULL;
+               ctxt->strm->opaque = Z_NULL;
+               ctxt->strm->avail_in = 0;
+               ctxt->strm->next_in = Z_NULL;
+ 
+               inflateInit2( ctxt->strm, 31 );
+           }
+       }
+ #endif
      } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
        cur += 15;
        ctxt->ContentLength = strtol( cur, NULL, 10 );
***************
*** 1156,1166 ****
--- 1188,1225 ----
  int
  xmlNanoHTTPRead(void *ctx, void *dest, int len) {
      xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
+ #ifdef HAVE_ZLIB_H
+     int bytes_read = 0;
+     int orig_avail_in;
+     int z_ret;
+ #endif
  
      if (ctx == NULL) return(-1);
      if (dest == NULL) return(-1);
      if (len <= 0) return(0);
  
+ #ifdef HAVE_ZLIB_H
+     if (ctxt->usesGzip == 1) {
+         if (ctxt->strm == NULL) return(0);
+  
+         ctxt->strm->next_out = dest;
+         ctxt->strm->avail_out = len;
+ 
+         do {
+             orig_avail_in = ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr - bytes_read;
+             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
+ 
+             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
+             bytes_read += orig_avail_in - ctxt->strm->avail_in;
+ 
+             if (z_ret != Z_OK) break;
+         } while (ctxt->strm->avail_out > 0 && xmlNanoHTTPRecv(ctxt) > 0);
+ 
+         ctxt->inrptr += bytes_read;
+         return(len - ctxt->strm->avail_out);
+     }
+ #endif
+ 
      while (ctxt->inptr - ctxt->inrptr < len) {
          if (xmlNanoHTTPRecv(ctxt) <= 0) break;
      }
***************
*** 1275,1280 ****
--- 1334,1342 ----
      if (ctxt->query != NULL)
        blen += strlen(ctxt->query) + 1;
      blen += strlen(method) + strlen(ctxt->path) + 24;
+ #ifdef HAVE_ZLIB_H
+     blen += 23;
+ #endif
      bp = (char*)xmlMallocAtomic(blen);
      if ( bp == NULL ) {
          xmlNanoHTTPFreeCtxt( ctxt );
***************
*** 1302,1307 ****
--- 1364,1373 ----
  
      p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n", 
                    ctxt->hostname);
+ 
+ #ifdef HAVE_ZLIB_H
+     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
+ #endif
  
      if (contentType != NULL && *contentType) 
        p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]