Hmm, actually I didn't use the encoding functionality of libxml, because I didn't use encodings different from UTF-8 yet. Being glad to contribute a little I put together this little program: #include <string.h> #include <stdio.h> #include <libxml/encoding.h> int main(int argc, char **argv) { unsigned char *ptr,*in,*out; int ret,size,out_size,temp; xmlCharEncodingHandlerPtr handler; in = argv[1]; size = (int)strlen(in)+1; /*works if "in" is null terminated and contains no 0 octets (not the case for UTF-16 for example)*/ /*you need to know how to determine the in and out buffer size the rest is dealt with by libxml (transparently using iconv if present)*/ out_size = size*2-1; /*works for iso latin-1 or subsets (-1 because the terminating 0 always is a single octet)*/ out = malloc((size_t)out_size); if (out) { handler = xmlFindCharEncodingHandler("ISO-8859-1"); /*alternatively:*/ /*handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_8859_1);*/ temp=size-1; ret = handler->input(out, &out_size, in, &temp); if (ret || temp-size+1) { if (ret) { printf("conversion wasn't successful.\n"); } else { printf("conversion wasn't successful. converted: %i octets.\n",temp); } free(out); out = NULL; } else { out = realloc(out,out_size+1); /*to save a couple of bytes*/ out[out_size]=0; /*null terminating out*/ for (ptr=out;*ptr;ptr++) { printf("%x\n",*((unsigned char *)ptr)); } } } else { printf("no mem\n",out); } return -(!out); } less comprehensive for the special case of latin-1: unsigned char *ptr,*in,*out; int ret,size,out_size,temp; xmlCharEncodingHandlerPtr handler; in = "some null terminated iso latin-1 string"; temp = size = (int)strlen(in)+1; /*terminating null included*/ out_size = size*2-1; /*terminating null is just one byte*/ out = malloc((size_t)out_size); if (!out || (ret=isolat1ToUTF8(out, &out_size, in, &temp)) || temp-size) { free(out); out=NULL; } I included the latter (without the definitions) in the faq patch. the former could be used for the tutorial or whatever. Maybe one should mention, that there are functions to define aliases for encondings and even add encoding handler of ones own (all described in encoding.html). Cheers, Marcus
Attachment:
faq.diff
Description: Binary data