[pan2: 6/68] Fix handling of multibyte spaces in text-massager.



commit ce356bd5df6ce5291305c177ae150b10c30acb97
Author: K. Haley <haleykd users sf net>
Date:   Thu Dec 3 23:38:40 2009 -0700

    Fix handling of multibyte spaces in text-massager.
    
    Original code assumed spaces were always one byte.  Also added
    support for non-breaking spaces.

 pan/usenet-utils/text-massager.cc |   21 +++++++++++++++++----
 1 files changed, 17 insertions(+), 4 deletions(-)
---
diff --git a/pan/usenet-utils/text-massager.cc b/pan/usenet-utils/text-massager.cc
index 7139331..f932d5b 100644
--- a/pan/usenet-utils/text-massager.cc
+++ b/pan/usenet-utils/text-massager.cc
@@ -19,6 +19,7 @@
 
 #include <config.h>
 #include <vector>
+#include <cstring>
 extern "C" {
 #include <glib/gi18n.h>
 }
@@ -201,21 +202,33 @@ namespace
                         int       column)
    {
       int pos = 0;
+      int space_len;
       char * linefeed_here = NULL;
 
       // walk through the entire string
       for (char *pch=str, *end=pch+len; pch!=end; )
       {
          // a linefeed could go here; remember this space
-         if (g_unichar_isspace (g_utf8_get_char (pch)) || *pch=='\n')
-            linefeed_here = pch;
+         gunichar ch = g_utf8_get_char (pch);
+         if (g_unichar_isspace ( ch ) || *pch=='\n')
+           if (g_unichar_break_type(ch) != G_UNICODE_BREAK_NON_BREAKING_GLUE)
+           {
+             linefeed_here = pch;
+             // not all spaces are single char
+             space_len = g_utf8_next_char (pch) - pch;
+           }
 
          // line's too long; add a linefeed if we can
          if (pos>=column && linefeed_here!=NULL)
          {
-            *linefeed_here = '\n';
-            pch = linefeed_here + 1;
+            const char nl[5]="   \n";
+            if( space_len == 1)
+              *linefeed_here = '\n';
+             else
+               memcpy( linefeed_here, 4 - space_len + nl, space_len);
+            pch = linefeed_here + space_len;
             linefeed_here = NULL;
+            space_len = 0;
             pos = 0;
          }
          else



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]