[libgsf] msole: first attempt at writing non-ascii properties.
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libgsf] msole: first attempt at writing non-ascii properties.
- Date: Sat, 26 Feb 2022 01:22:58 +0000 (UTC)
commit 6ac8154c729e149b998fcef48ceafc7834898076
Author: Morten Welinder <terra gnome org>
Date: Fri Feb 25 20:21:24 2022 -0500
msole: first attempt at writing non-ascii properties.
We guess a codepage for strings instead of just using 1252.
If anything doesn't convert to 1252, we switch to -535 which,
apparently, means UTF-8.
gsf/gsf-msole-utils.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 97 insertions(+), 1 deletion(-)
---
diff --git a/gsf/gsf-msole-utils.c b/gsf/gsf-msole-utils.c
index 6978b685..1c224ce7 100644
--- a/gsf/gsf-msole-utils.c
+++ b/gsf/gsf-msole-utils.c
@@ -1694,6 +1694,91 @@ cb_count_props (char const *name, GsfDocProp *prop, WritePropState *state)
}
}
+static void
+guess_codepage_string (WritePropState *state, const char *str)
+{
+ const char *p;
+ gboolean is_ascii;
+ gsize bytes_written;
+ char *cstr;
+
+ if (state->codepage)
+ return;
+
+ if (!str)
+ return;
+
+ // Don't bother with ascii strings
+ is_ascii = TRUE;
+ for (p = str; *p && is_ascii; p++)
+ is_ascii = (*p & 0x80) == 0;
+ if (is_ascii)
+ return;
+
+ cstr = g_convert_with_iconv (str, strlen (str), state->iconv_handle,
+ NULL, &bytes_written, NULL);
+ if (cstr) {
+ g_free (cstr);
+ return;
+ }
+
+ // Conversion failed. Switch to UTF-8
+ state->codepage = -535;
+}
+
+static void
+guess_codepage_prop (WritePropState *state, const char *name, GValue const *value)
+{
+ GsfMSOleMetaDataPropMap const *map =
+ (name != NULL) ? msole_gsf_name_to_prop (name) : NULL;
+ GsfMSOleVariantType type;
+
+ type = gvalue_to_msole_vt (value, map);
+
+ if (type & VT_VECTOR) {
+ GArray *vector = gsf_value_get_docprop_array (value);
+ unsigned i, n = vector->len;
+ for (i = 0; i < n; i++)
+ guess_codepage_prop (state, NULL, &g_array_index (vector, GValue, i));
+ return;
+ }
+
+ switch (type) {
+ case VT_LPSTR:
+ guess_codepage_string (state, g_value_get_string (value));
+ return;
+ default:
+ // Don't care.
+ return;
+ }
+}
+
+static void
+guess_codepage (WritePropState *state, gboolean user)
+{
+ GSList *ptr = user ? state->user.props : state->builtin.props;
+ unsigned count = user ? state->user.count : state->builtin.count;
+ unsigned i = 0;
+
+ if (i < count) {
+ // Codepage
+ i++;
+ }
+
+ if (user && i < count) {
+ // Dictionary
+ i++;
+ }
+
+ for (; ptr != NULL && i < count ; ptr = ptr->next, i++) {
+ GsfDocProp const *prop = ptr->data;
+ const char *name = gsf_doc_prop_get_name (prop);
+ guess_codepage_string (state, name);
+ guess_codepage_prop (state, name, gsf_doc_prop_get_val (prop));
+ }
+}
+
+
/**
* gsf_doc_meta_data_write_to_msole:
* @out: #GsfOutput
@@ -1720,8 +1805,9 @@ gsf_doc_meta_data_write_to_msole (GsfDocMetaData const *meta_data,
gboolean success = FALSE;
guint8 buf [4];
WritePropState state;
+ const int default_codepage = 1252;
- state.codepage = 1252;
+ state.codepage = 0;
state.iconv_handle = (GIConv)-1;
state.char_size = 1;
state.out = out;
@@ -1736,6 +1822,16 @@ gsf_doc_meta_data_write_to_msole (GsfDocMetaData const *meta_data,
d (g_print ("Done\n"
"================================\n"););
+ state.iconv_handle = gsf_msole_iconv_open_codepage_for_export (default_codepage);
+ if (state.codepage == 0) {
+ guess_codepage (&state, FALSE);
+ if (state.dict)
+ guess_codepage (&state, TRUE);
+ if (state.codepage == 0)
+ state.codepage = default_codepage;
+ }
+ gsf_iconv_close (state.iconv_handle);
+
state.iconv_handle = gsf_msole_iconv_open_codepage_for_export (state.codepage);
state.char_size = msole_codepage_char_size (state.codepage);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]