[gtksourceview: 2/3] javascript.lang (and family): Use definition of whitespace from spec



commit c6f226d860f49754467085cca10cde56cc218cf8
Author: Jeffery To <jeffery to gmail com>
Date:   Fri Nov 15 07:39:56 2019 +0800

    javascript.lang (and family): Use definition of whitespace from spec
    
    These include characters like line tabulation (vertical tab), no-break
    space, and Unicode code points in the "Space_Separator" (Zs) category.

 data/language-specs/javascript-expressions.lang |  4 ++-
 data/language-specs/javascript.lang             | 36 +++++++++++++++++++++++--
 data/language-specs/typescript-jsx.lang         |  4 +--
 data/language-specs/typescript.lang             |  8 +++---
 4 files changed, 43 insertions(+), 9 deletions(-)
---
diff --git a/data/language-specs/javascript-expressions.lang b/data/language-specs/javascript-expressions.lang
index c07f177d..c34fea33 100644
--- a/data/language-specs/javascript-expressions.lang
+++ b/data/language-specs/javascript-expressions.lang
@@ -397,7 +397,9 @@
     -->
 
     <define-regex id="_expression-start" extended="true">
-      (?= [^\s:;\])}] ) (?! /[/*] )
+      (?= \%{js:not-whitespace} )
+      (?= [^:;\])}] )
+      (?! /[/*] )
     </define-regex> <!-- /_expression-start -->
 
     <!-- <LeftHandSideExpression> -->
diff --git a/data/language-specs/javascript.lang b/data/language-specs/javascript.lang
index 7fee198f..2806283a 100644
--- a/data/language-specs/javascript.lang
+++ b/data/language-specs/javascript.lang
@@ -222,7 +222,39 @@
       (?: \%{identifier-start} \%{identifier-part}* )
     </define-regex> <!-- /identifier-->
 
-    <define-regex id="before-next-token">(?=\S)</define-regex>
+    <!-- <WhiteSpace> -->
+    <!-- does not include characters from <LineTerminator>,
+         to only allow space separators when we want match a
+         "whitespace" character
+
+         U+0009 Character tabulation <TAB>
+         U+000B Line tabulation <VT>
+         U+000C Form feed <FF>
+         U+0020 Space <SP>
+         U+00A0 No-break space <NBSP>
+         U+FEFF Zero width no-break space <ZWNBSP>
+         Zs Space_Separator category <USP>
+    -->
+    <define-regex id="whitespace" extended="true">
+      [\t\x{000B}\f \x{00A0}\x{FEFF}\p{Zs}]
+    </define-regex> <!-- /whitespace -->
+
+    <!-- includes characters from <LineTerminator>,
+         to ignore line terminators when we want to match a "not
+         whitespace" character
+
+         U+000A Line feed <LF>
+         U+000D Carriage return <CR>
+         U+2028 Line separator <LS>
+         U+2029 Paragraph separator <PS>
+    -->
+    <define-regex id="not-whitespace" extended="true">
+      [^\t\x{000B}\f \x{00A0}\x{FEFF}\p{Zs}\n\r\x{2028}\x{2029}]
+    </define-regex> <!-- /not-whitespace -->
+
+    <define-regex id="before-next-token" extended="true">
+      (?= \%{not-whitespace} )
+    </define-regex> <!-- /before-next-token -->
 
     <define-regex id="statement-end" extended="true">
       (?: ; | \%{before-next-token} )
@@ -235,7 +267,7 @@
     <!-- wherever this is used, the highlighting will be more brittle,
          because comments can span multiple lines -->
     <define-regex id="optional-whitespace-or-comments" extended="true">
-      (?&gt; (?: \s+ | /\*.*?\*/ )* )
+      (?&gt; (?: \%{whitespace}+ | /\*.*?\*/ )* )
     </define-regex> <!-- /optional-whitespace-or-comments -->
 
     <define-regex id="generator-modifier">\*</define-regex>
diff --git a/data/language-specs/typescript-jsx.lang b/data/language-specs/typescript-jsx.lang
index 9d638120..bc9dda78 100644
--- a/data/language-specs/typescript-jsx.lang
+++ b/data/language-specs/typescript-jsx.lang
@@ -61,7 +61,7 @@
          any definitive documentation -->
     <context id="_jsx-comment-pragmas" style-ref="js:directive" class="comment">
       <start case-sensitive="false" extended="true">
-        /\*\*? \s* @jsx \%]
+        /\*\*? \%{js:whitespace}* @jsx \%]
       </start>
       <end>\*/</end>
       <include>
@@ -71,7 +71,7 @@
 
     <context id="_jsx-comment-pragmas-no-extend-parent" style-ref="js:directive" class="comment" 
extend-parent="false">
       <start extended="true">
-        /\*\*? \s* @jsx \%]
+        /\*\*? \%{js:whitespace}* @jsx \%]
       </start>
       <end>\*/</end>
       <include>
diff --git a/data/language-specs/typescript.lang b/data/language-specs/typescript.lang
index 6f079af1..54858210 100644
--- a/data/language-specs/typescript.lang
+++ b/data/language-specs/typescript.lang
@@ -154,7 +154,7 @@
     -->
     <context id="_ts-ignore-comment-pragmas" style-ref="js:directive" end-at-line-end="true" class="comment">
       <start extended="true">
-        ///? \s* @ts-ignore \%]
+        ///? \%{js:whitespace}* @ts-ignore \%]
       </start>
       <include>
         <context ref="def:in-comment"/>
@@ -163,7 +163,7 @@
 
     <context id="_ts-ignore-comment-pragmas-no-extend-parent" style-ref="js:directive" 
end-at-line-end="true" class="comment" extend-parent="false">
       <start extended="true">
-        ///? \s* @ts-ignore \%]
+        ///? \%{js:whitespace}* @ts-ignore \%]
       </start>
       <include>
         <context ref="def:in-comment"/>
@@ -200,13 +200,13 @@
 
     <context id="_triple-slash-directives" style-ref="js:directive" end-at-line-end="true" class="comment">
       <start case-sensitive="false" extended="true">
-        /// \s* &lt;
+        /// \%{js:whitespace}* &lt;
         (
           amd-module |
           amd-dependency |
           reference
         )
-        \s
+        \%{js:whitespace}
       </start>
     </context> <!-- /_triple-slash-directives -->
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]