[gtksourceview] python3.lang: Allow non-ASCII characters in identifiers



commit 67ea911c22f8f8a82a62fbd130bc98d126a44fac
Author: Jeffery To <jeffery to gmail com>
Date:   Mon Oct 28 04:22:39 2019 +0800

    python3.lang: Allow non-ASCII characters in identifiers
    
    Python 3 allows (some) Unicode characters in identifiers[1]. This
    changes highlighting for identifiers, e.g. function/class names, to
    account for these characters.
    
    Fixes #31.
    
    [1]: https://docs.python.org/3/reference/lexical_analysis.html#identifiers

 data/language-specs/python3.lang | 109 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 96 insertions(+), 13 deletions(-)
---
diff --git a/data/language-specs/python3.lang b/data/language-specs/python3.lang
index e36f4422..3d3d0f44 100644
--- a/data/language-specs/python3.lang
+++ b/data/language-specs/python3.lang
@@ -34,6 +34,7 @@
     <style id="string"            name="String"                map-to="python:string"/>
     <style id="escaped-char"      name="Escaped Character"     map-to="python:escaped-char"/>
     <style id="format"            name="Format"                map-to="python:format"/>
+    <style id="string-conversion" name="String Conversion"     map-to="python:string-conversion"/>
     <style id="special-variable"  name="Special Variable"      map-to="python:special-variable"/>
     <style id="boolean"           name="Boolean"               map-to="python:boolean"/>
     <style id="floating-point"    name="Floating point number" map-to="python:floating-point"/>
@@ -43,12 +44,50 @@
     <style id="builtin-constant"  name="Builtin Constant"      map-to="python:builtin-constant"/>
     <style id="builtin-object"    name="Builtin Object"        map-to="python:builtin-object"/>
     <style id="builtin-function"  name="Builtin Function"      map-to="python:builtin-function"/>
+    <style id="function-name"     name="Function Name"         map-to="python:function-name"/>
+    <style id="class-name"        name="Class Name"            map-to="python:class-name"/>
+    <style id="decorator"         name="Decorator"             map-to="python:decorator"/>
   </styles>
 
   <definitions>
-    <define-regex id="identifier">[_a-zA-Z][_a-zA-Z0-9]*</define-regex>
+    <!-- https://docs.python.org/3/reference/lexical_analysis.html#identifiers -->
+    <define-regex id="id-start" extended="true">
+      (?:
+        [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_] |
+        [\x{1885}-\x{1886}\x{2118}\x{212E}\x{309B}-\x{309C}]  # Other_ID_Start (Unicode 12.1.0)
+      )
+    </define-regex>
+    <define-regex id="id-continue" extended="true">
+      (?:
+        \%{id-start} |
+        [\p{Mn}\p{Mc}\p{Nd}\p{Pc}] |
+        [\x{00B7}\x{0387}\x{1369}-\x{1371}\x{19DA}]  # Other_ID_Continue (Unicode 12.1.0)
+      )
+    </define-regex>
+    <define-regex id="identifier" extended="true">
+      (?&gt; \%{id-start} \%{id-continue}* )
+    </define-regex>
     <define-regex id="number">[1-9][0-9]*</define-regex>
 
+    <define-regex id="identifier-path" extended="true">
+      (\%{identifier}\.)*\%{identifier}
+    </define-regex>
+    <define-regex id="relative-path" extended="true">
+      (\.*\%{identifier-path}|\.+)
+    </define-regex>
+
+    <!-- http://docs.python.org/lib/typesseq-strings.html -->
+    <context id="format" style-ref="format" extend-parent="false">
+      <match extended="true">
+        %                       # leading % sign
+        \(\%{identifier}\)?     # mapping key
+        [#0\-\ \+]*             # conversion flags
+        (\-?\%{number}|\*)?     # minimum field width
+        (\.(\-?\%{number}|\*))? # precision
+        (hlL)?                  # lentgh modifier
+        [diouxXeEfFgGcrs%]      # conversion type
+      </match>
+    </context>
 
     <define-regex id="string-prefix">(b|B)?</define-regex>
     <define-regex id="raw-string-prefix">(r|R|rb|RB|rB|Rb|br|BR|bR|Br)</define-regex>
@@ -57,7 +96,7 @@
       <start>\%{string-prefix}"""</start>
       <end>"""</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="python:escaped-char"/>
       </include>
     </context>
@@ -66,7 +105,7 @@
       <start>\%{string-prefix}'''</start>
       <end>'''</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="python:escaped-char"/>
       </include>
     </context>
@@ -75,7 +114,7 @@
       <start>\%{string-prefix}"</start>
       <end>"</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="python:escaped-char"/>
         <context ref="def:line-continue"/>
       </include>
@@ -85,7 +124,7 @@
       <start>\%{string-prefix}'</start>
       <end>'</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="python:escaped-char"/>
         <context ref="def:line-continue"/>
       </include>
@@ -95,7 +134,7 @@
       <start>\%{raw-string-prefix}"""</start>
       <end>"""</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
       </include>
     </context>
 
@@ -103,7 +142,7 @@
       <start>\%{raw-string-prefix}'''</start>
       <end>'''</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
       </include>
     </context>
 
@@ -111,7 +150,7 @@
       <start>\%{raw-string-prefix}"</start>
       <end>"</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="def:line-continue"/>
       </include>
     </context>
@@ -120,11 +159,55 @@
       <start>\%{raw-string-prefix}'</start>
       <end>'</end>
       <include>
-        <context ref="python:format"/>
+        <context ref="format"/>
         <context ref="def:line-continue"/>
       </include>
     </context>
 
+    <context id="module-handler-from">
+      <match extended="true">
+        (from)
+        \s+
+        (\%{relative-path})
+      </match>
+      <include>
+        <context sub-pattern="1" style-ref="module-handler"/>
+        <context sub-pattern="2" style-ref="class-name"/>
+      </include>
+    </context>
+
+    <!-- 'def' is also present in the 'keyword' context, but has a lower
+         priority, so 'def' is highlighted even if the function name doesn't match. -->
+    <context id="function-definition">
+      <match extended="true">
+        (def)
+        \s+
+        (\%{identifier})
+      </match>
+      <include>
+        <context sub-pattern="1" style-ref="keyword"/>
+        <context sub-pattern="2" style-ref="function-name"/>
+      </include>
+    </context>
+
+    <!-- 'class' is also present in the 'keyword' context, but has a lower
+         priority, so 'class' is highlighted even if the class name doesn't match. -->
+    <context id="class-definition">
+      <match extended="true">
+        (class)
+        \s+
+        (\%{identifier})
+      </match>
+      <include>
+        <context sub-pattern="1" style-ref="keyword"/>
+        <context sub-pattern="2" style-ref="class-name"/>
+      </include>
+    </context>
+
+    <context id="decorator" style-ref="decorator">
+      <match>@\%{identifier-path}</match>
+    </context>
+
     <context id="python3" class="no-spell-check">
       <include>
         <context ref="def:shebang"/>
@@ -159,11 +242,11 @@
 
         <context ref="python:hex"/>
 
-        <context ref="python:module-handler-from"/>
+        <context ref="module-handler-from"/>
         <context ref="python:module-handler"/>
-        <context ref="python:function-definition"/>
-        <context ref="python:class-definition"/>
-        <context ref="python:decorator"/>
+        <context ref="function-definition"/>
+        <context ref="class-definition"/>
+        <context ref="decorator"/>
 
         <context ref="python:keywords"/>
         <context id="3x-only-keywords" style-ref="keyword">


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]