Re: [xml] Newbie question about an example from the web site
- From: Menu Jacques <imj-219 bluewin ch>
 
- To: xml gnome org
 
- Subject: Re: [xml] Newbie question about an example from the web site
 
- Date: Tue, 1 Dec 2015 07:18:44 +0100
 
Hello folks,
Found out that the code of convert() in EncodingConversionExample.c wasn’t so clear after all...
Fixing that with the code below gives:
07:09:48 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash 
--> Compiling EncodingConversionExample.c...
16 -rwxr-xr-x. 1 menu menu 13243 Dec  1 07:10 EncodingConversionExample
--> Running EncodingConversionExample:
--> encoding = ISO-8859-1
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
--> ret = 13
--> temp = 7
--> out_size = 13
conversion was successful. converted: 7 octets.
--> out = élà ö
<?xml version="1.0" encoding="ISO-8859-1"?>
<root>élàö</root>
With ISO-8859-7, though:
07:10:05 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash 
--> Compiling EncodingConversionExample.c...
16 -rwxr-xr-x. 1 menu menu 13243 Dec  1 07:10 EncodingConversionExample
--> Running EncodingConversionExample:
--> encoding = ISO-8859-7
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
./DoEncodingConversionExample.bash: line 28: 14446 Segmentation fault      (core dumped) ./${EXECUTABLE} 
"élàö"
//-----------------------------
#include <string.h>
#include <libxml/parser.h>
unsigned char*
convert (unsigned char *in, char *encoding)
{
  unsigned char *out;
  int ret,size,out_size,temp;
  xmlCharEncodingHandlerPtr handler;
  size = (int)strlen(in)+1;
  out_size = size*2-1;
  out = malloc((size_t)out_size);
  printf("--> size = %d\n", size);
  printf("--> out_size = %d\n", out_size);
  if (out) {
    handler = xmlFindCharEncodingHandler(encoding);
    if (!handler) {
      printf("--> %s encoding handler not found\n", encoding);
      free(out);
      out = NULL;
    }
  }
  if (out) {
    temp=size-1;
    printf("--> temp = %d\n", temp);
    /**
     * xmlCharEncodingInputFunc:
     * @out:  a pointer to an array of bytes to store the UTF-8 result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of chars in the original encoding
     * @inlen:  the length of @in
     *
     * Take a block of chars in the original encoding and try to convert
     * it to an UTF-8 block of chars out.
     *
     * Returns the number of bytes written, -1 if lack of space, or -2
     *     if the transcoding failed.
     * The value of @inlen after return is the number of octets consumed
     *     if the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of octets consumed.
     */
    ret = handler->input(out, &out_size, in, &temp);
    if (ret == -1) {
      printf("Conversion couldn't be done, lack of space.\n");
    } else {
      if (ret == -2) {
        printf("Conversion couldn't be done, transcoding failed.\n");
      } else {
        if (ret == 0) {
          printf("--> temp = %d\n", temp);
          printf("--> temp = %d\n", temp);
          printf("--> out_size = %d\n", out_size);
          printf("conversion wasn't successful. converted: %i octets.\n",temp);
          free(out);
          out = NULL;
        } else {
          // ret > 0
          printf("--> ret = %d\n", ret);
          printf("--> temp = %d\n", temp);
          printf("--> out_size = %d\n", out_size);
          printf("conversion was successful. converted: %i octets.\n",temp);
          out = realloc(out,out_size+1);
          out[out_size]=0; /*null terminating out*/
        }}}
    } else {
      printf("no mem\n");
    }
    return (out);
  }
  int
  main(int argc, char **argv) {
    unsigned char *content, *out;
    xmlDocPtr doc;
    xmlNodePtr rootnode;
    char *encoding = "ISO-8859-7"; // JMI, was -1
    if (argc <= 1) {
      printf("Usage: %s content\n", argv[0]);
      return(0);
    }
    printf("--> encoding = %s\n", encoding);
    content = argv[1];
    printf("--> content = %s\n", content);
    out = convert(content, encoding);
    printf("--> out = %s\n", out);
    doc = xmlNewDoc ("1.0");
          rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
          xmlDocSetRootElement(doc, rootnode);
          xmlSaveFormatFileEnc("-", doc, encoding, 1);
          return (1);
  }
//-----------------------------
Le 30 nov. 2015 à 23:31, Menu Jacques <imj-219 bluewin ch> a écrit :
Hello,
Same problem on CentOS 7.1.
My environment contains:
23:25:30 (690) menu@ - ~ > echo $LC_TYPE 
iso_8859_1
23:25:48 (691) menu@ - ~ > echo $LANG
C
23:26:00 (692) menu@ - ~ > echo GDM_LANG
GDM_LANG
and I get:
23:27:02 (695) menu@ - ~/examples_libxml2 > ./EncodingConversionExample foo
--> content = foo
--> size = 4
--> out_size = 7
--> temp = 3
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>
23:27:20 (696) menu@ - ~/examples_libxml2 > ./EncodingConversionExample éöîà
--> content = éöîà
--> size = 9
--> out_size = 17
--> temp = 8
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>
I’m clearly doing something wrong, but what?
JM
Le 25 nov. 2015 à 13:08, Menu Jacques <imj-219 bluewin ch> a écrit :
I use:
13:07:34 (254) menu@ - ~/libxml2-git > apt list libxml2 libxml2-dev
Listing... Done
libxml2/stable,now 2.9.1+dfsg1-5 i386 [installed,automatic]
libxml2-dev/stable,now 2.9.1+dfsg1-5 i386 [installed]
on Debian 8 32bit (jessie)
JM
Le 25 nov. 2015 à 11:17, Menu Jacques <imj-219 bluewin ch> a écrit :
Hello folks,
I’ve successfully built the examples from http://xmlsoft.org/tutorial/index.html, except the last one 
about encoding conversion.
I added printouts to the original code to help (see below) and get:
10:51:35 (250) menu@ - ~/libxml2-git > ./EncodingConversionExample foo
--> content = foo
--> size = 4
--> out_size = 7
--> temp = 3
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>
Thanks for your help!
JM
—
11:15:24 (251) menu@ - ~/libxml2-git > cat EncodingConversionExample.c
#include <string.h>
#include <libxml/parser.h>
unsigned char*
convert (unsigned char *in, char *encoding)
{
    unsigned char *out;
     int ret,size,out_size,temp;
     xmlCharEncodingHandlerPtr handler;
     size = (int)strlen(in)+1; 
     out_size = size*2-1; 
     out = malloc((size_t)out_size); 
     printf("--> size = %d\n", size);
     printf("--> out_size = %d\n", out_size);
     if (out) {
             handler = xmlFindCharEncodingHandler(encoding);
             if (!handler) {
                     printf("--> %s encoding handler not found\n", encoding);
                     free(out);
                     out = NULL;
             }
     }
     if (out) {
             temp=size-1;
             printf("--> temp = %d\n", temp);
             ret = handler->input(out, &out_size, in, &temp);
             if (ret || temp-size+1) {
                     printf("--> temp-size+1 = %d\n", temp-size+1);
                     if (ret) {
                             printf("conversion wasn't successful.\n");
                     } else {
                             printf("conversion wasn't successful. converted: %i octets.\n",temp);
                     }
                     free(out);
                     out = NULL;
             } else {
                     out = realloc(out,out_size+1); 
                     out[out_size]=0; /*null terminating out*/
             }
     } else {
             printf("no mem\n");
     }
     return (out);
}   
int
main(int argc, char **argv) {
    unsigned char *content, *out;
    xmlDocPtr doc;
    xmlNodePtr rootnode;
    char *encoding = "ISO-8859-1";
    
            
    if (argc <= 1) {
            printf("Usage: %s content\n", argv[0]);
            return(0);
    }
    content = argv[1];
     printf("--> content = %s\n", content);
    out = convert(content, encoding);
     printf("--> out = %s\n", out);
    doc = xmlNewDoc ("1.0");
    rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
    xmlDocSetRootElement(doc, rootnode);
    xmlSaveFormatFileEnc("-", doc, encoding, 1);
    return (1);
}
[Date Prev][
Date Next]   [Thread Prev][
Thread Next]   
[
Thread Index]
[
Date Index]
[
Author Index]