[glibmm] gmmproc: GtkDefs::split_tokens() uses split() instead of substr().



commit 8335682f484c407a67efbe790f690f9ea0639a78
Author: Kjell Ahlstedt <kjell ahlstedt bredband net>
Date:   Tue Apr 26 15:35:23 2011 +0200

    gmmproc: GtkDefs::split_tokens() uses split() instead of substr().
    
    * tools/pm/GtkDefs.pm: GtkDefs::split_tokens() splits the string with
    split() instead of substr(). substr() can be very inefficient when splitting
    a utf8 string. Bug #644037

 ChangeLog           |    8 ++++++++
 tools/pm/GtkDefs.pm |   31 +++++++++++++++++++------------
 2 files changed, 27 insertions(+), 12 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index c003cc3..75d5558 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2011-04-26  Kjell Ahlstedt  <kjell ahlstedt bredband net>
+
+	gmmproc: GtkDefs::split_tokens() uses split() instead of substr().
+
+	* tools/pm/GtkDefs.pm: GtkDefs::split_tokens() splits the string with
+	split() instead of substr(). substr() can be very inefficient when splitting
+	a utf8 string. Bug #644037
+
 2011-04-06  Murray Cumming  <murrayc murrayc com>
 
 	Minor documentation improvements.
diff --git a/tools/pm/GtkDefs.pm b/tools/pm/GtkDefs.pm
index f665099..4bab8ef 100644
--- a/tools/pm/GtkDefs.pm
+++ b/tools/pm/GtkDefs.pm
@@ -179,16 +179,19 @@ sub split_tokens($)
   my $inside_squotes = 0;
   # number of yet unpaired opening parens.
   my $parens = 0;
-  my $len = length($token_string);
   # whether previous char was a backslash - important only when being between
   # double quotes.
   my $backslash = 0;
   # index of first opening paren - beginning of a new token.
   my $begin_token = 0;
 
-  for (my $index = 0; $index < $len; $index++)
+  # Isolate characters with special significance for the token split.
+  my @substrings = split(/([\\"'()])/, $token_string);
+
+  my $index = -1;
+  for my $substring (@substrings)
   {
-    my $char = substr($token_string, $index, 1);
+    $index++;
     # if we are inside double quotes.
     if ($inside_dquotes)
     {
@@ -199,13 +202,13 @@ sub split_tokens($)
         $backslash = 0;
       }
       # if current char is backslash.
-      elsif ($char eq '\\')
+      elsif ($substring eq '\\')
       {
         $backslash = 1;
       }
       # if current char is unescaped double quotes and we are not inside single
       # ones - means, we are going outside string.
-      elsif ($char eq '"' and not $inside_squotes)
+      elsif ($substring eq '"' and not $inside_squotes)
       {
         $inside_dquotes = 0;
       }
@@ -217,7 +220,7 @@ sub split_tokens($)
       # if there is near (2 or 3 indexes away) second quote, then it is 2a,
       # if 2a occured earlier, then it is 2b.
       # otherwise is 1.
-      elsif ($char eq '\'')
+      elsif ($substring eq '\'')
       {
         # if we are already inside single quotes, it is 2b.
         if ($inside_squotes)
@@ -227,7 +230,7 @@ sub split_tokens($)
         else
         {
           # if there is closing quotes near, it is 2a.
-          if (substr($token_string, $index, 4) =~ /^'\\?.'/)
+          if (join('', @substrings[$index .. min($#substrings, $index+3)]) =~ /^'\\?.'/)
           {
             $inside_squotes = 1;
           }
@@ -236,12 +239,12 @@ sub split_tokens($)
       }
     }
     # double quotes - beginning of a string.
-    elsif ($char eq '"')
+    elsif ($substring eq '"')
     {
       $inside_dquotes = 1;
     }
     # opening paren - if paren count is 0 then this is a beginning of a token.
-    elsif ($char eq '(')
+    elsif ($substring eq '(')
     {
       unless ($parens)
       {
@@ -251,13 +254,12 @@ sub split_tokens($)
     }
     # closing paren - if paren count is 1 then this is an end of a token, so we
     # extract it from token string and push into token list.
-    elsif ($char eq ')')
+    elsif ($substring eq ')')
     {
       $parens--;
       unless ($parens)
       {
-        my $token_len = $index + 1 - $begin_token;
-        my $token = substr($token_string, $begin_token, $token_len);
+        my $token = join('', @substrings[$begin_token .. $index]);
         push(@tokens, $token);
       }
     }
@@ -266,6 +268,11 @@ sub split_tokens($)
   return @tokens;
 }
 
+sub min($$)
+{
+  return ($_[0] < $_[1]) ? $_[0] : $_[1];
+}
+
 sub read_file($$)
 {
   my ($path, $filename)= _;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]