[smuxi: 172/179] Common, Engine(-Tests): use precise regex for parsing emojis



commit d99f363711726703b39baf0d3b0591a8222aa190
Author: Mirco Bauer <meebey meebey net>
Date:   Thu Oct 5 20:49:36 2017 +0800

    Common, Engine(-Tests): use precise regex for parsing emojis
    
    The emoji regex was simply :\w+: which did not match all emojis like :+1: or
    :e-mail:. Instead of guessing what characters are valid we are now building a
    regex that is based on the emoji list we have from EmojiOne anyhow.
    
    As this is a very large regex with 27k of characters and 2373 emojis, this could
    have been a serious performance regression but a benchmark says disagrees:
    
    Performance with the old but simple regex:
    
        /benchmark_message_builder -c 10000 --append-message
        MessageBuilder().AppendMessage(). count: 10000 took: 3370 ms avg: 0.34 ms
        MessageBuilder().AppendMessage(). count: 10000 took: 3331 ms avg: 0.33 ms
        MessageBuilder().AppendMessage(). count: 10000 took: 3300 ms avg: 0.33 ms
    
    Performance with the complex but precisely generated regex:
    
        /benchmark_message_builder -c 10000 --append-message
        MessageBuilder().AppendMessage(). count: 10000 took: 3353 ms avg: 0.34 ms
        MessageBuilder().AppendMessage(). count: 10000 took: 3294 ms avg: 0.33 ms
        MessageBuilder().AppendMessage(). count: 10000 took: 3316 ms avg: 0.33 ms
    
    For the /benchmark_message_builder command to actually use emojis I was
    temporarily making the following code change in the CommandManager class:
    
        MessageBuilder CreateMessageBuilder()
        {
                var builder = new MessageBuilder();
                builder.Settings.Emojis = true;
                return builder;
        }

 src/Common/Emojione.cs                      |    6 ++++
 src/Engine-Tests/MessageBuilderTests.cs     |   39 +++++++++++++++++++++++++++
 src/Engine/Config/MessageBuilderSettings.cs |   13 ++++++++-
 3 files changed, 57 insertions(+), 1 deletions(-)
---
diff --git a/src/Common/Emojione.cs b/src/Common/Emojione.cs
index fa5f273..3699204 100644
--- a/src/Common/Emojione.cs
+++ b/src/Common/Emojione.cs
@@ -29,6 +29,12 @@ namespace Smuxi.Common
     {
         readonly static string BaseUri = "http://cdnjs.cloudflare.com/ajax/libs/emojione/2.2.7/assets/png/";;
 
+        public static Dictionary<string, string> ShortnameToUnicodeMap {
+            get {
+                return map;
+            }
+        }
+
         public static string ShortnameToUnicode(string shortName)
         {
             string val;
diff --git a/src/Engine-Tests/MessageBuilderTests.cs b/src/Engine-Tests/MessageBuilderTests.cs
index 8a877f4..6a951a5 100644
--- a/src/Engine-Tests/MessageBuilderTests.cs
+++ b/src/Engine-Tests/MessageBuilderTests.cs
@@ -19,6 +19,7 @@
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 using System;
 using NUnit.Framework;
+using Smuxi.Common;
 
 namespace Smuxi.Engine
 {
@@ -785,6 +786,7 @@ namespace Smuxi.Engine
         [Test]
         public void AppendMessageWithEmojis()
         {
+            // simple emoji
             var msg = "foo :smiley: bar";
             var builder = new MessageBuilder();
             builder.Settings.Emojis = true;
@@ -795,6 +797,43 @@ namespace Smuxi.Engine
             );
             builder.Append(new TextMessagePartModel(" bar"));
             TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+            // emoji with underscore
+            msg = ":slightly_smiling_face:";
+            builder = new MessageBuilder();
+            builder.Settings.Emojis = true;
+            builder.TimeStamp = DateTime.MinValue;
+            builder.Append(
+                new ImageMessagePartModel("smuxi-emoji://slightly_smiling_face", ":slightly_smiling_face:")
+            );
+            TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+            // emoji with plus
+            msg = ":+1:";
+            builder = new MessageBuilder();
+            builder.Settings.Emojis = true;
+            builder.TimeStamp = DateTime.MinValue;
+            builder.Append(
+                new ImageMessagePartModel("smuxi-emoji://+1", ":+1:")
+            );
+            TestMessage(msg, builder.ToMessage(), builder.Settings);
+
+            // test all supported emojis of the Emojione provider
+            foreach (var emojiShortname in Emojione.ShortnameToUnicodeMap.Keys) {
+                var msgWithEmoji = ":" + emojiShortname + ":";
+                builder = new MessageBuilder();
+                builder.Settings.Emojis = true;
+                builder.TimeStamp = DateTime.MinValue;
+                builder.Append(
+                    new ImageMessagePartModel(
+                        String.Format("smuxi-emoji://{0}", emojiShortname),
+                        msgWithEmoji
+                    )
+                );
+                TestMessage(msgWithEmoji, builder.ToMessage(), builder.Settings,
+                            String.Format("failed testing emoji '{0}'",
+                                          emojiShortname));
+            }
         }
     }
 }
diff --git a/src/Engine/Config/MessageBuilderSettings.cs b/src/Engine/Config/MessageBuilderSettings.cs
index b5955f8..9a6b7aa 100644
--- a/src/Engine/Config/MessageBuilderSettings.cs
+++ b/src/Engine/Config/MessageBuilderSettings.cs
@@ -18,6 +18,7 @@
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 using System;
+using System.Text;
 using System.Text.RegularExpressions;
 using System.Collections.Generic;
 using Smuxi.Common;
@@ -52,7 +53,17 @@ namespace Smuxi.Engine
 
         static MessageBuilderSettings()
         {
-            var emojiRegex = new Regex(@":(\w+):", RegexOptions.Compiled);
+            // OPT: this emoji regex is really long, around 27k characters
+            var emojiRegexBuilder = new StringBuilder(32 * 1024);
+            emojiRegexBuilder.Append(":(");
+            foreach (var emojiShortname in Emojione.ShortnameToUnicodeMap.Keys) {
+                emojiRegexBuilder.AppendFormat("{0}|", Regex.Escape(emojiShortname));
+            }
+            // remove trailing |
+            emojiRegexBuilder.Length--;
+            emojiRegexBuilder.Append("):");
+
+            var emojiRegex = new Regex(emojiRegexBuilder.ToString(), RegexOptions.Compiled);
             EmojiMessagePattern = new MessagePatternModel(emojiRegex) {
                 MessagePartType = typeof(ImageMessagePartModel),
                 LinkFormat = "smuxi-emoji://{1}"


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]