[gimp] script-fu: make regex matching return character indexes



commit bdae0de1aaa4657454787c2a33bc009bebf8531d
Author: Simon Budig <simon budig de>
Date:   Wed May 25 20:17:02 2016 +0200

    script-fu: make regex matching return character indexes
    
    Since tinyscheme uses utf-8 for strings match the behaviour for the
    script-fu regex utilities as well. Fixes bug #764811

 plug-ins/script-fu/script-fu-regex.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)
---
diff --git a/plug-ins/script-fu/script-fu-regex.c b/plug-ins/script-fu/script-fu-regex.c
index ef54b72..3bbff56 100644
--- a/plug-ins/script-fu/script-fu-regex.c
+++ b/plug-ins/script-fu/script-fu-regex.c
@@ -79,6 +79,7 @@ foreign_re_match (scheme  *sc,
 {
   pointer   retval = sc->F;
   gboolean  success;
+  gboolean  is_valid_utf8;
   GRegex   *regex;
   pointer   first_arg, second_arg;
   pointer   third_arg = sc->NIL;
@@ -98,6 +99,8 @@ foreign_re_match (scheme  *sc,
   pattern = sc->vptr->string_value (first_arg);
   string  = sc->vptr->string_value (second_arg);
 
+  is_valid_utf8 = g_utf8_validate (string, -1, NULL);
+
   args = sc->vptr->pair_cdr (args);
 
   if (args != sc->NIL)
@@ -134,6 +137,12 @@ foreign_re_match (scheme  *sc,
 
           g_match_info_fetch_pos (match_info, i, &start, &end);
 
+          if (is_valid_utf8)
+            {
+              start = g_utf8_pointer_to_offset (string, string + start);
+              end   = g_utf8_pointer_to_offset (string, string + end);
+            }
+
 #undef cons
           set_vector_elem (third_arg, i,
                            sc->vptr->cons(sc,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]