[pango/segmentation-util] Add pango-segmentation




commit 4555b12e78f700a7d52b9ed438b8100bdd352ec8
Author: Matthias Clasen <mclasen redhat com>
Date:   Sun May 9 23:25:37 2021 -0400

    Add pango-segmentation
    
    A little utility to show text segmentation.

 utils/meson.build          |   7 ++
 utils/pango-segmentation.c | 182 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+)
---
diff --git a/utils/meson.build b/utils/meson.build
index b71cfe2a..12968ef7 100644
--- a/utils/meson.build
+++ b/utils/meson.build
@@ -80,3 +80,10 @@ if cairo_dep.found()
                           ])
 
 endif
+
+pango_segmentation = executable('pango-segmentation',
+                                'pango-segmentation.c',
+                                dependencies: [ libpango_dep, libpangocairo_dep ],
+                                include_directories: [ root_inc ],
+                                install: false,
+                                )
diff --git a/utils/pango-segmentation.c b/utils/pango-segmentation.c
new file mode 100644
index 00000000..3e774167
--- /dev/null
+++ b/utils/pango-segmentation.c
@@ -0,0 +1,182 @@
+/* Pango
+ * pango-segmentation.c: Test Pango line breaking
+ *
+ * Copyright (C) 2021 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <pango/pangocairo.h>
+#include <string.h>
+#include <locale.h>
+
+#ifndef G_OS_WIN32
+#include <unistd.h>
+#endif
+
+typedef enum {
+  GRAPHEME,
+  WORD,
+  LINE,
+  SENTENCE
+} BreakKind;
+
+static BreakKind
+kind_from_string (const char *str)
+{
+  if (strcmp (str, "grapheme") == 0)
+    return GRAPHEME;
+  else if (strcmp (str, "word") == 0)
+    return WORD;
+  else if (strcmp (str, "line") == 0)
+    return LINE;
+  else if (strcmp (str, "sentence") == 0)
+    return SENTENCE;
+  else
+    {
+      g_printerr ("Not a segmentation: %s", str);
+      return 0;
+    }
+}
+
+static gboolean
+show_segmentation (const char *input,
+                   BreakKind   kind)
+{
+  GString *string;
+  PangoContext *context;
+  gsize  length;
+  GError *error = NULL;
+  PangoLogAttr *attrs;
+  int len;
+  char *p;
+  int i;
+  char *text;
+  PangoAttrList *attributes;
+  PangoLayout *layout;
+
+  context = pango_font_map_create_context (pango_cairo_font_map_get_default ());
+
+  string = g_string_new ("");
+
+  length = strlen (input);
+  len = g_utf8_strlen (input, -1) + 1;
+
+  pango_parse_markup (input, -1, 0, &attributes, &text, NULL, &error);
+  g_assert_no_error (error);
+
+  layout = pango_layout_new (context);
+  pango_layout_set_text (layout, text, length);
+  pango_layout_set_attributes (layout, attributes);
+
+  if (pango_layout_get_unknown_glyphs_count (layout) > 0)
+    {
+      char *msg = g_strdup_printf ("Missing glyphs - skipping. Maybe fonts are missing?");
+      g_test_skip (msg);
+      g_free (msg);
+      g_object_unref (layout);
+      pango_attr_list_unref (attributes);
+      g_free (text);
+      return FALSE;
+    }
+
+  pango_layout_get_log_attrs (layout, &attrs, &len);
+
+  for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p))
+    {
+      PangoLogAttr log = attrs[i];
+      gboolean is_break = FALSE;
+
+      switch (kind)
+        {
+        case GRAPHEME:
+          is_break = log.is_cursor_position;
+          break;
+        case WORD:
+          is_break = log.is_word_boundary;
+          break;
+        case LINE:
+          is_break = log.is_line_break;
+          break;
+        case SENTENCE:
+          is_break = log.is_sentence_boundary;
+          break;
+        default:
+          g_assert_not_reached ();
+        }
+
+      if (is_break)
+        g_string_append (string, "|");
+
+      if (i < len - 1)
+        {
+          gunichar ch = g_utf8_get_char (p);
+          if (ch == 0x20)
+            g_string_append (string, " ");
+          else if (g_unichar_isgraph (ch) &&
+                   !(g_unichar_type (ch) == G_UNICODE_LINE_SEPARATOR ||
+                     g_unichar_type (ch) == G_UNICODE_PARAGRAPH_SEPARATOR))
+            g_string_append_unichar (string, ch);
+          else
+            g_string_append_printf (string, "[%#04x]", ch);
+        }
+    }
+
+  g_object_unref (layout);
+  g_free (attrs);
+  g_free (text);
+  pango_attr_list_unref (attributes);
+
+  g_print ("%s\n", string->str);
+
+  g_string_free (string, TRUE);
+
+  return TRUE;
+}
+
+int
+main (int argc, char *argv[])
+{
+  setlocale (LC_ALL, "");
+  char *opt_kind = "grapheme";
+  GOptionEntry entries[] = {
+    { "kind", 0, 0, G_OPTION_ARG_STRING, &opt_kind, "Kind of boundary (grapheme/word/line/sentence)", "KIND" 
},
+    { NULL, },
+  };
+  GOptionContext *context;
+  GError *error = NULL;
+
+  context = g_option_context_new ("TEXT");
+  g_option_context_add_main_entries (context, entries, NULL);
+  g_option_context_set_description (context,
+      "Show text segmentation as determined by Pango.");
+  if (!g_option_context_parse (context, &argc, &argv, &error))
+    {
+      g_printerr ("%s\n", error->message);
+      exit (1);
+    }
+
+  if (argc < 2)
+    {
+      g_printerr ("Usage: pango-segmentation [OPTIONS…] TEXT");
+      exit (1);
+    }
+
+  show_segmentation (argv[1], kind_from_string (opt_kind));
+
+  return 0;
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]