[gtksourceview: 3/9] css.lang (and family): Invert identifier character classes



commit 96d4b2f7a1cb995a9e39fe300c18582f77b317bd
Author: Jeffery To <jeffery to gmail com>
Date:   Tue Jun 2 02:31:32 2020 +0800

    css.lang (and family): Invert identifier character classes
    
    The resulting character classes are harder to read but much smaller (and
    more performant).

 data/language-specs/css.lang  | 79 +++++++++++++++++++++++++++++++++++--------
 data/language-specs/less.lang | 23 +++++++------
 data/language-specs/scss.lang | 23 +++++++------
 3 files changed, 88 insertions(+), 37 deletions(-)
---
diff --git a/data/language-specs/css.lang b/data/language-specs/css.lang
index 15e59e18..1dd21fc5 100644
--- a/data/language-specs/css.lang
+++ b/data/language-specs/css.lang
@@ -96,7 +96,8 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+  <!-- from keyword-code-point -->
+  
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
 
   <definitions>
 
@@ -125,30 +126,78 @@
       )
     </define-regex>
 
-    <!-- https://drafts.csswg.org/css-syntax/#non-ascii-code-point -->
-    <define-regex id="non-ascii">[\x{80}-\x{10ffff}]</define-regex>
+    <!--
+    https://drafts.csswg.org/css-syntax/#name-start-code-point
+
+    includes:
+    U+0041-U+005A Latin capital letter A to Z (A-Z)
+    U+005F        Low line (_)
+    U+0061-U+007A Latin small letter A to Z (a-z)
+    U+0080-       Non-ASCII
+    -->
+    <define-regex id="identifier-start-code-point" extended="true">
+      [^\x{0}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+    </define-regex>
+
+    <!--
+    https://drafts.csswg.org/css-syntax/#name-code-point
+
+    includes:
+    U+002D        Hyphen-minus (-)
+    U+0030-U+0039 Digit zero to nine (0-9)
+    U+0041-U+005A Latin capital letter A to Z (A-Z)
+    U+005F        Low line (_)
+    U+0061-U+007A Latin small letter A to Z (a-z)
+    U+0080-       Non-ASCII
+    -->
+    <define-regex id="identifier-code-point" extended="true">
+      [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+    </define-regex>
+
+    <!--
+    identifier-code-point, excluding hyphen
+
+    includes:
+    U+0030-U+0039 Digit zero to nine (0-9)
+    U+0041-U+005A Latin capital letter A to Z (A-Z)
+    U+005F        Low line (_)
+    U+0061-U+007A Latin small letter A to Z (a-z)
+    U+0080-       Non-ASCII
+    -->
+    <define-regex id="identifier-prefix-code-point" extended="true">
+      [^\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+    </define-regex>
+
+    <!--
+    identifier-code-point, including backslash
+
+    includes:
+    U+002D        Hyphen-minus (-)
+    U+0030-U+0039 Digit zero to nine (0-9)
+    U+0041-U+005A Latin capital letter A to Z (A-Z)
+    U+005C        Reverse solidus (\)
+    U+005F        Low line (_)
+    U+0061-U+007A Latin small letter A to Z (a-z)
+    U+0080-       Non-ASCII
+    -->
+    <define-regex id="keyword-code-point" extended="true">
+      [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]
+    </define-regex>
 
     <!--
     a combination of name-start code point and escape
-    https://drafts.csswg.org/css-syntax/#name-start-code-point
     https://www.w3.org/TR/selectors-3/#lex
     -->
     <define-regex id="identifier-start-char" extended="true">
-      [a-z_] | \%{non-ascii} | \%{escape}
+      \%{identifier-start-code-point} | \%{escape}
     </define-regex>
 
     <!--
     a combination of name code point and escape
-    https://drafts.csswg.org/css-syntax/#name-code-point
     https://www.w3.org/TR/selectors-3/#lex
     -->
     <define-regex id="identifier-chars" extended="true">
-      (?&gt; (?: [a-z0-9_-]+ | \%{non-ascii}+ | \%{escape}+ )+ )
-    </define-regex>
-
-    <!-- for lookbehinds (and lookaheads) -->
-    <define-regex id="single-identifier-char" extended="true">
-      [a-z0-9_-] | \%{non-ascii}
+      (?&gt; (?: \%{identifier-code-point}+ | \%{escape}+ )+ )
     </define-regex>
 
     <!-- https://drafts.csswg.org/css-syntax/#ident-token-diagram -->
@@ -158,7 +207,7 @@
 
     <!-- identifier-chars, excluding hyphen -->
     <define-regex id="identifier-prefix-chars" extended="true">
-      (?&gt; (?: [a-z0-9_]+ | \%{non-ascii}+ | \%{escape}+ )+ )
+      (?&gt; (?: \%{identifier-prefix-code-point}+ | \%{escape}+ )+ )
     </define-regex>
 
     <!--
@@ -462,7 +511,7 @@
     </define-regex>
 
     <define-regex id="non-number-char" extended="true">
-      \%{single-identifier-char} | \.
+      \%{keyword-code-point} | \.
     </define-regex>
 
     <context id="dimension" style-ref="dimension">
@@ -545,7 +594,7 @@
     <context id="unicode-range" style-ref="unicode-range">
       <match extended="true">
         \%[ u \+ (?&gt; [a-f0-9?]{1,6} ) (?&gt; - [a-f0-9]{1,6} )?
-        (?! \%{single-identifier-char} )
+        (?! \%{keyword-code-point} )
       </match>
     </context>
 
diff --git a/data/language-specs/less.lang b/data/language-specs/less.lang
index 5d11a679..d0c7a177 100644
--- a/data/language-specs/less.lang
+++ b/data/language-specs/less.lang
@@ -55,7 +55,8 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+  <!-- from css:keyword-code-point -->
+  
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
 
   <definitions>
 
@@ -153,9 +154,9 @@
     <context id="arithmetic-operator" style-ref="operator-symbol">
       <match extended="true">
         [+*/] |
-        (?&lt;! \%{css:single-identifier-char} )
+        (?&lt;! \%{css:keyword-code-point} )
         -
-        (?! \%{css:single-identifier-char} )
+        (?! \%{css:keyword-code-point} )
       </match>
     </context>
 
@@ -497,13 +498,13 @@
           \+_?: |  # property merge
           :
           (?:
-            (?!                                 # not the start of a
-              \%{css:single-identifier-char} |  #   pseudo-class
-              [:\\] |                           #   pseudo-element, escape
-              @{ |                              #   variable interpolation
-              /\*                               #   comment
-            ) |                                 # or
-            (?=                                 # ends like a normal declaration
+            (?!                              # not the start of a
+              \%{css:keyword-code-point} |   #   pseudo-class
+              [:\\] |                        #   pseudo-element, escape
+              @{ |                           #   variable interpolation
+              /\*                            #   comment
+            ) |                              # or
+            (?=                              # ends like a normal declaration
               (?:
                 (?&gt;
                   (?:
@@ -519,7 +520,7 @@
                 (?&amp;double_quote_string) |
                 (?&amp;single_quote_string)
               )*
-              \%{css:declaration-value-end}     #   with a semicolon or at the end of a block
+              \%{css:declaration-value-end}  #   with a semicolon or at the end of a block
             )
           )
         )
diff --git a/data/language-specs/scss.lang b/data/language-specs/scss.lang
index 9a3024c3..1266edf7 100644
--- a/data/language-specs/scss.lang
+++ b/data/language-specs/scss.lang
@@ -58,7 +58,8 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+  <!-- from css:keyword-code-point -->
+  
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
 
   <definitions>
 
@@ -134,9 +135,9 @@
     <context id="arithmetic-operator" style-ref="operator-symbol">
       <match extended="true">
         [+*%] |
-        (?&lt;! \%{css:single-identifier-char} )
+        (?&lt;! \%{css:keyword-code-point} )
         -
-        (?! \%{css:single-identifier-char} )
+        (?! \%{css:keyword-code-point} )
       </match>
     </context>
 
@@ -520,13 +521,13 @@
 
         :
         (?:
-          (?!                                 # not the start of a
-            \%{css:single-identifier-char} |  #   pseudo-class
-            [:\\] |                           #   pseudo-element, escape
-            \#{ |                             #   interpolation
-            /\*                               #   comment
-          ) |                                 # or
-          (?=                                 # ends like a normal declaration
+          (?!                              # not the start of a
+            \%{css:keyword-code-point} |   #   pseudo-class
+            [:\\] |                        #   pseudo-element, escape
+            \#{ |                          #   interpolation
+            /\*                            #   comment
+          ) |                              # or
+          (?=                              # ends like a normal declaration
             (?&gt;
               (?:
                 (?&gt;
@@ -547,7 +548,7 @@
                 (?&amp;single_quote_string)
               )*
             )
-            \%{css:declaration-value-end}     #   with a semicolon or at the end of a block
+            \%{css:declaration-value-end}  #   with a semicolon or at the end of a block
           )
         )
       </start>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]