[gnote] WikiWord regexp now use PCRE. This also support Unicode (Closes #581495)



commit 1f1ecce7cdb829d1cd6219250c773649d6a6b84c
Author: Hubert Figuiere <hub figuiere net>
Date:   Fri Jun 5 23:01:02 2009 -0400

    WikiWord regexp now use PCRE. This also support Unicode (Closes #581495)
---
 NEWS             |    2 ++
 src/watchers.cpp |   26 +++++++++++++-------------
 src/watchers.hpp |    6 ++----
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/NEWS b/NEWS
index 471e75f..4f0360b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,5 @@
+  * WikiWord now support non ASCII (Closes #581495)
+
 0.5.0 -
 
 New features:
diff --git a/src/watchers.cpp b/src/watchers.cpp
index ceec756..b76148b 100644
--- a/src/watchers.cpp
+++ b/src/watchers.cpp
@@ -25,7 +25,6 @@
 #include <string.h>
 
 #include <boost/format.hpp>
-#include <boost/regex.hpp>
 
 #include <glibmm/i18n.h>
 
@@ -942,8 +941,8 @@ namespace gnote {
 
   ////////////////////////////////////////////////////////////////////////
 
-  // NOTE \\u is upper. \\l is lower. make sure it works with non roman scripts.
-  const char * NoteWikiWatcher::WIKIWORD_REGEX = "\\b((\\u+[\\l0-9]+){2}([\\u\\l0-9])*)\\b";
+  // This is a PCRE regex.
+  const char * NoteWikiWatcher::WIKIWORD_REGEX = "\\b((\\p{Lu}+[\\p{Ll}0-9]+){2}([\\p{Lu}\\p{Ll}0-9])*)\\b";
 
 
   NoteAddin * NoteWikiWatcher::create()
@@ -1030,29 +1029,30 @@ namespace gnote {
     get_buffer()->remove_tag (m_broken_link_tag, start, end);
 
     std::string s(start.get_slice(end));
-    boost::sregex_iterator m1(s.begin(), s.end(), m_regex);
-    boost::sregex_iterator m2;
-    while(m1 != m2) {
-      const boost::sub_match<std::string::const_iterator> & match = (*m1)[1];
+    std::string match;
+    const char * p = s.c_str();
+    pcrecpp::StringPiece input(p);
+
 
-      if (match.matched && !is_patronymic_name (match.str())) {
+    while(m_regex.FindAndConsume(&input, &match)) {
 
+      if (!is_patronymic_name (match)) {
       
         Gtk::TextIter start_cpy = start;
-        Glib::ustring segment(std::string(s.c_str(), match.first - s.begin()));
+        Glib::ustring segment(std::string(p, input.data() - p - match.size()));
         start_cpy.forward_chars (segment.length());
+
         DBG_OUT("Highlighting wikiword: '%s' at offset %d",
-                match.str().c_str(), segment.length());
+                match.c_str(), segment.length());
 
         end = start_cpy;
-        segment = match.str();
+        segment = match;
         end.forward_chars (segment.length());
 
-        if (!manager().find (match.str())) {
+        if (!manager().find(match)) {
           get_buffer()->apply_tag (m_broken_link_tag, start_cpy, end);
         }
       }
-      ++m1;
     }
   }
 
diff --git a/src/watchers.hpp b/src/watchers.hpp
index 6e0eb7e..f2028d5 100644
--- a/src/watchers.hpp
+++ b/src/watchers.hpp
@@ -27,8 +27,6 @@
 #include <config.h>
 #endif
 
-#include <boost/regex.hpp>
-
 #include <pcrecpp.h>
 
 #if FIXED_GTKSPELL
@@ -210,7 +208,7 @@ namespace gnote {
 
   protected:
     NoteWikiWatcher()
-      : m_regex(WIKIWORD_REGEX)
+      : m_regex(WIKIWORD_REGEX, pcrecpp::RE_Options(PCRE_UTF8))
       {
       }
   private:
@@ -223,7 +221,7 @@ namespace gnote {
 
     static const char * WIKIWORD_REGEX;
     Glib::RefPtr<Gtk::TextTag>   m_broken_link_tag;
-    boost::regex        m_regex;
+    pcrecpp::RE         m_regex;
     sigc::connection    m_on_insert_text_cid;
     sigc::connection    m_on_delete_range_cid;
   };



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]