Re: [xml] Newbie question about an example from the web site



Hello folks,

Found out that the code of convert() in EncodingConversionExample.c wasn’t so clear after all...
Fixing that with the code below gives:



07:09:48 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash 

--> Compiling EncodingConversionExample.c...

16 -rwxr-xr-x. 1 menu menu 13243 Dec  1 07:10 EncodingConversionExample

--> Running EncodingConversionExample:

--> encoding = ISO-8859-1
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
--> ret = 13
--> temp = 7
--> out_size = 13
conversion was successful. converted: 7 octets.
--> out = élà ö
<?xml version="1.0" encoding="ISO-8859-1"?>
<root>élàö</root>



With ISO-8859-7, though:

07:10:05 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash 

--> Compiling EncodingConversionExample.c...

16 -rwxr-xr-x. 1 menu menu 13243 Dec  1 07:10 EncodingConversionExample

--> Running EncodingConversionExample:

--> encoding = ISO-8859-7
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
./DoEncodingConversionExample.bash: line 28: 14446 Segmentation fault      (core dumped) ./${EXECUTABLE} 
"élàö"



//-----------------------------

#include <string.h>
#include <libxml/parser.h>


unsigned char*
convert (unsigned char *in, char *encoding)
{
  unsigned char *out;
  int ret,size,out_size,temp;
  xmlCharEncodingHandlerPtr handler;

  size = (int)strlen(in)+1;
  out_size = size*2-1;
  out = malloc((size_t)out_size);

  printf("--> size = %d\n", size);
  printf("--> out_size = %d\n", out_size);

  if (out) {
    handler = xmlFindCharEncodingHandler(encoding);

    if (!handler) {
      printf("--> %s encoding handler not found\n", encoding);
      free(out);
      out = NULL;
    }
  }

  if (out) {
    temp=size-1;
    printf("--> temp = %d\n", temp);

    /**
     * xmlCharEncodingInputFunc:
     * @out:  a pointer to an array of bytes to store the UTF-8 result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of chars in the original encoding
     * @inlen:  the length of @in
     *
     * Take a block of chars in the original encoding and try to convert
     * it to an UTF-8 block of chars out.
     *
     * Returns the number of bytes written, -1 if lack of space, or -2
     *     if the transcoding failed.
     * The value of @inlen after return is the number of octets consumed
     *     if the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of octets consumed.
     */
    ret = handler->input(out, &out_size, in, &temp);

    if (ret == -1) {
      printf("Conversion couldn't be done, lack of space.\n");
    } else {
      if (ret == -2) {
        printf("Conversion couldn't be done, transcoding failed.\n");
      } else {
        if (ret == 0) {
          printf("--> temp = %d\n", temp);
          printf("--> temp = %d\n", temp);
          printf("--> out_size = %d\n", out_size);
          printf("conversion wasn't successful. converted: %i octets.\n",temp);
          free(out);
          out = NULL;
        } else {
          // ret > 0
          printf("--> ret = %d\n", ret);
          printf("--> temp = %d\n", temp);
          printf("--> out_size = %d\n", out_size);
          printf("conversion was successful. converted: %i octets.\n",temp);
          out = realloc(out,out_size+1);
          out[out_size]=0; /*null terminating out*/

        }}}
    } else {
      printf("no mem\n");
    }
    return (out);
  }


  int
  main(int argc, char **argv) {

    unsigned char *content, *out;
    xmlDocPtr doc;
    xmlNodePtr rootnode;
    char *encoding = "ISO-8859-7"; // JMI, was -1


    if (argc <= 1) {
      printf("Usage: %s content\n", argv[0]);
      return(0);
    }

    printf("--> encoding = %s\n", encoding);

    content = argv[1];
    printf("--> content = %s\n", content);

    out = convert(content, encoding);
    printf("--> out = %s\n", out);

    doc = xmlNewDoc ("1.0");
          rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
          xmlDocSetRootElement(doc, rootnode);

          xmlSaveFormatFileEnc("-", doc, encoding, 1);
          return (1);
  }


//-----------------------------



Le 30 nov. 2015 à 23:31, Menu Jacques <imj-219 bluewin ch> a écrit :

Hello,

Same problem on CentOS 7.1.

My environment contains:

23:25:30 (690) menu@ - ~ > echo $LC_TYPE 
iso_8859_1
23:25:48 (691) menu@ - ~ > echo $LANG
C
23:26:00 (692) menu@ - ~ > echo GDM_LANG
GDM_LANG

and I get:

23:27:02 (695) menu@ - ~/examples_libxml2 > ./EncodingConversionExample foo
--> content = foo
--> size = 4
--> out_size = 7
--> temp = 3
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>
23:27:20 (696) menu@ - ~/examples_libxml2 > ./EncodingConversionExample éöîà
--> content = éöîà
--> size = 9
--> out_size = 17
--> temp = 8
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>

I’m clearly doing something wrong, but what?

JM

Le 25 nov. 2015 à 13:08, Menu Jacques <imj-219 bluewin ch> a écrit :

I use:

13:07:34 (254) menu@ - ~/libxml2-git > apt list libxml2 libxml2-dev
Listing... Done
libxml2/stable,now 2.9.1+dfsg1-5 i386 [installed,automatic]
libxml2-dev/stable,now 2.9.1+dfsg1-5 i386 [installed]

on Debian 8 32bit (jessie)

JM

Le 25 nov. 2015 à 11:17, Menu Jacques <imj-219 bluewin ch> a écrit :

Hello folks,

I’ve successfully built the examples from http://xmlsoft.org/tutorial/index.html, except the last one 
about encoding conversion.

I added printouts to the original code to help (see below) and get:

10:51:35 (250) menu@ - ~/libxml2-git > ./EncodingConversionExample foo
--> content = foo
--> size = 4
--> out_size = 7
--> temp = 3
--> temp-size+1 = 0
conversion wasn't successful.
--> out = (null)
<?xml version="1.0" encoding="ISO-8859-1"?>
<root/>

Thanks for your help!

JM

—

11:15:24 (251) menu@ - ~/libxml2-git > cat EncodingConversionExample.c
#include <string.h>
#include <libxml/parser.h>


unsigned char*
convert (unsigned char *in, char *encoding)
{
    unsigned char *out;
     int ret,size,out_size,temp;
     xmlCharEncodingHandlerPtr handler;

     size = (int)strlen(in)+1; 
     out_size = size*2-1; 
     out = malloc((size_t)out_size); 

     printf("--> size = %d\n", size);
     printf("--> out_size = %d\n", out_size);

     if (out) {
             handler = xmlFindCharEncodingHandler(encoding);

             if (!handler) {
                     printf("--> %s encoding handler not found\n", encoding);
                     free(out);
                     out = NULL;
             }
     }

     if (out) {
             temp=size-1;
             printf("--> temp = %d\n", temp);

             ret = handler->input(out, &out_size, in, &temp);

             if (ret || temp-size+1) {
                     printf("--> temp-size+1 = %d\n", temp-size+1);

                     if (ret) {
                             printf("conversion wasn't successful.\n");
                     } else {
                             printf("conversion wasn't successful. converted: %i octets.\n",temp);
                     }

                     free(out);
                     out = NULL;
             } else {
                     out = realloc(out,out_size+1); 
                     out[out_size]=0; /*null terminating out*/

             }
     } else {
             printf("no mem\n");
     }
     return (out);
}   


int
main(int argc, char **argv) {

    unsigned char *content, *out;
    xmlDocPtr doc;
    xmlNodePtr rootnode;
    char *encoding = "ISO-8859-1";
    
            
    if (argc <= 1) {
            printf("Usage: %s content\n", argv[0]);
            return(0);
    }

    content = argv[1];
     printf("--> content = %s\n", content);

    out = convert(content, encoding);
     printf("--> out = %s\n", out);

    doc = xmlNewDoc ("1.0");
    rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
    xmlDocSetRootElement(doc, rootnode);

    xmlSaveFormatFileEnc("-", doc, encoding, 1);
    return (1);
}






[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]