[gtksourceview: 8/13] css.lang (and family): Update/cleanup regular expressions



commit 7e750e2ef4f62f0a660ae4ae26b3a9e7e17d1ef4
Author: Jeffery To <jeffery to gmail com>
Date:   Tue Dec 3 06:20:25 2019 +0800

    css.lang (and family): Update/cleanup regular expressions
    
    * Add valid non-ASCII (Unicode) identifier characters to
      <keyword-char-class>.
    
    * Add custom word boundaries to left side (start) of identifier regular
      expressions. Since the right side (end) will consume all valid
      identifier characters, no word boundary is necessary.
    
    * Remove non-capturing groups around defined regexes, since they are
      automatically added when the defined regex is referenced/included.

 data/language-specs/css.lang  | 93 +++++++++++++------------------------------
 data/language-specs/less.lang |  8 ++--
 data/language-specs/scss.lang |  2 +-
 3 files changed, 32 insertions(+), 71 deletions(-)
---
diff --git a/data/language-specs/css.lang b/data/language-specs/css.lang
index ca33daa1..1d3e6829 100644
--- a/data/language-specs/css.lang
+++ b/data/language-specs/css.lang
@@ -96,7 +96,7 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_-]</keyword-char-class>
+  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
 
   <definitions>
 
@@ -118,12 +118,10 @@
 
     <!-- https://drafts.csswg.org/css-syntax/#escaping -->
     <define-regex id="escape" extended="true">
+      \\                   # backslash
       (?:
-        \\                   # backslash
-        (?:
-          [^\n\r\f0-9a-f] |  # not newline or hex digit; or
-          [0-9a-f]{1,6} \s?  # 1-6 hex digits, trailing whitespace (not necessary in some cases)
-        )
+        [^\n\r\f0-9a-f] |  # not newline or hex digit; or
+        [0-9a-f]{1,6} \s?  # 1-6 hex digits, trailing whitespace (not necessary in some cases)
       )
     </define-regex>
 
@@ -136,11 +134,7 @@
     https://www.w3.org/TR/selectors-3/#lex
     -->
     <define-regex id="identifier-start-char" extended="true">
-      (?:
-        [a-z_] |
-        \%{non-ascii} |
-        \%{escape}
-      )
+      [a-z_] | \%{non-ascii} | \%{escape}
     </define-regex>
 
     <!--
@@ -149,32 +143,17 @@
     https://www.w3.org/TR/selectors-3/#lex
     -->
     <define-regex id="identifier-chars" extended="true">
-      (?&gt;
-        (?:
-          [a-z0-9_-]+ |
-          \%{non-ascii}+ |
-          \%{escape}+
-        )+
-      )
+      (?&gt; (?: [a-z0-9_-]+ | \%{non-ascii}+ | \%{escape}+ )+ )
     </define-regex>
 
     <!-- for lookbehinds (and lookaheads) -->
     <define-regex id="single-identifier-char" extended="true">
-      (?:
-        [a-z0-9_-] |
-        \%{non-ascii}
-      )
+      [a-z0-9_-] | \%{non-ascii}
     </define-regex>
 
     <!-- https://drafts.csswg.org/css-syntax/#ident-token-diagram -->
     <define-regex id="identifier" extended="true">
-      (?:
-        (?:
-          -- |
-          -? \%{identifier-start-char}
-        )
-        \%{identifier-chars}?
-      )
+      (?: -- | -? \%{identifier-start-char} ) \%{identifier-chars}?
     </define-regex>
 
     <!--
@@ -182,26 +161,16 @@
     https://en.wikipedia.org/wiki/CSS_hack#List_of_prefixes
     -->
     <define-regex id="vendor-specific-prefix" extended="true">
-      (?:
-        (?:
-          [_-] \%{identifier-start-char}+ |
-          mso |
-          prince
-        )
-        -
-      )
+      (?: [_-] \%{identifier-start-char}+ | mso | prince ) -
     </define-regex>
 
     <define-regex id="vendor-specific-identifier" extended="true">
-      (?:
-        (?&lt;! \%{single-identifier-char} )
-        \%{vendor-specific-prefix} \%{identifier-chars}
-      )
+      \%{vendor-specific-prefix} \%{identifier-chars}
     </define-regex>
 
     <!-- https://drafts.csswg.org/css-variables/#defining-variables -->
     <define-regex id="custom-property-identifier" extended="true">
-      (?: -- \%{identifier-chars}+ )
+      -- \%{identifier-chars}+
     </define-regex>
 
     <define-regex id="declaration-value-end">(?=[!;}])</define-regex>
@@ -251,15 +220,15 @@
 
     <!-- custom identifier -->
     <context id="name" style-ref="name">
-      <match>\%{identifier}</match>
+      <match>\%[\%{identifier}</match>
     </context>
 
     <context id="vendor-specific-name" style-ref="vendor-specific">
-      <match>\%{vendor-specific-identifier}</match>
+      <match>\%[\%{vendor-specific-identifier}</match>
     </context>
 
     <context id="custom-property-name" style-ref="custom-property-name">
-      <match>\%{custom-property-identifier}</match>
+      <match>\%[\%{custom-property-identifier}</match>
     </context>
 
     <context id="escape" style-ref="escape">
@@ -459,15 +428,15 @@
     </context>
 
     <define-regex id="number-magnitude" extended="true">
-      (?: (?&gt; \d* \. \d+ | \d+ ) (?: e [+-]? \d+ )? )
+      (?&gt; \d* \. \d+ | \d+ ) (?: e [+-]? \d+ )?
     </define-regex>
 
     <define-regex id="number" extended="true">
-      (?: [+-]? \%{number-magnitude} )
+      [+-]? \%{number-magnitude}
     </define-regex>
 
     <define-regex id="positive-number" extended="true">
-      (?: \+? \%{number-magnitude} )
+      \+? \%{number-magnitude}
     </define-regex>
 
     <define-regex id="integer-magnitude" extended="true">
@@ -475,18 +444,15 @@
     </define-regex>
 
     <define-regex id="integer" extended="true">
-      (?: [+-]? \%{integer-magnitude} )
+      [+-]? \%{integer-magnitude}
     </define-regex>
 
     <define-regex id="positive-integer" extended="true">
-      (?: \+? \%{integer-magnitude} )
+      \+? \%{integer-magnitude}
     </define-regex>
 
     <define-regex id="non-number-char" extended="true">
-      (?:
-        \%{single-identifier-char} |
-        \.
-      )
+      \%{single-identifier-char} | \.
     </define-regex>
 
     <context id="dimension" style-ref="dimension">
@@ -634,7 +600,7 @@
     </context>
 
     <context id="vendor-specific-function">
-      <start>\%{vendor-specific-identifier}\(</start>
+      <start>\%[\%{vendor-specific-identifier}\(</start>
       <end>\)</end>
       <include>
         <context sub-pattern="0" where="start" style-ref="vendor-specific"/>
@@ -646,7 +612,7 @@
     </context>
 
     <context id="function">
-      <start>\%{identifier}\(</start>
+      <start>\%[\%{identifier}\(</start>
       <end>\)</end>
       <include>
         <context sub-pattern="0" where="start" style-ref="function"/>
@@ -2194,10 +2160,7 @@
     </context>
 
     <define-regex id="at-supports-declaration-value-end" extended="true">
-      (?:
-        \%{test-value-end} |
-        (?= ! )
-      )
+      \%{test-value-end} | (?= ! )
     </define-regex>
 
     <context id="at-supports-declaration-value">
@@ -2274,7 +2237,7 @@
     <!-- namespace qualifier -->
 
     <context id="namespace-qualifier">
-      <match>(\%{identifier}|\*)?(\|)</match>
+      <match>(\%[\%{identifier}|\*)?(\|)</match>
       <include>
         <context sub-pattern="1" style-ref="namespace"/>
         <context sub-pattern="2" style-ref="namespace-qualifier"/>
@@ -2292,7 +2255,7 @@
     </context>
 
     <context id="type-selector" style-ref="type-selector">
-      <match>\%{identifier}</match>
+      <match>\%[\%{identifier}</match>
     </context>
 
     <context id="universal-selector" style-ref="universal-selector">
@@ -2300,7 +2263,7 @@
     </context>
 
     <context id="attribute-selector-attribute-name" once-only="true" style-ref="attribute-selector-name">
-      <match>\%{identifier}</match>
+      <match>\%[\%{identifier}</match>
     </context>
 
     <context id="attribute-selector-operator" once-only="true" style-ref="attribute-selector-operator">
@@ -2348,7 +2311,7 @@
     <!-- pseudo-elements -->
 
     <context id="vendor-specific-pseudo-element-name" end-parent="true" style-ref="vendor-specific">
-      <start>\%{vendor-specific-identifier}</start>
+      <start>\%[\%{vendor-specific-identifier}</start>
       <end>\%{def:always-match}</end>
     </context>
 
@@ -2388,7 +2351,7 @@
     <!-- pseudo-classes -->
 
     <context id="vendor-specific-pseudo-class-name" end-parent="true" style-ref="vendor-specific">
-      <start>\%{vendor-specific-identifier}</start>
+      <start>\%[\%{vendor-specific-identifier}</start>
       <end>\%{def:always-match}</end>
     </context>
 
diff --git a/data/language-specs/less.lang b/data/language-specs/less.lang
index f465c058..26aa9afa 100644
--- a/data/language-specs/less.lang
+++ b/data/language-specs/less.lang
@@ -55,15 +55,13 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_-]</keyword-char-class>
+  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
 
   <definitions>
 
     <!-- global -->
 
-    <define-regex id="statement-end" extended="true">
-      (?: ; | (?= } ) )
-    </define-regex>
+    <define-regex id="statement-end" extended="true">;|(?=})</define-regex>
 
     <context id="embedded-lang-hook"/>
 
@@ -97,7 +95,7 @@
     <!-- variables -->
 
     <define-regex id="variable" extended="true">
-      (?: @ \%{css:identifier} )
+      @ \%{css:identifier}
     </define-regex>
 
     <context id="variable" style-ref="variable">
diff --git a/data/language-specs/scss.lang b/data/language-specs/scss.lang
index ba24fdc7..6d100336 100644
--- a/data/language-specs/scss.lang
+++ b/data/language-specs/scss.lang
@@ -58,7 +58,7 @@
 
   <default-regex-options case-sensitive="false"/>
 
-  <keyword-char-class>[a-z0-9_-]</keyword-char-class>
+  <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
 
   <definitions>
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]