[gtksourceview: 3/9] css.lang (and family): Invert identifier character classes
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview: 3/9] css.lang (and family): Invert identifier character classes
- Date: Tue, 2 Jun 2020 18:31:24 +0000 (UTC)
commit 96d4b2f7a1cb995a9e39fe300c18582f77b317bd
Author: Jeffery To <jeffery to gmail com>
Date: Tue Jun 2 02:31:32 2020 +0800
css.lang (and family): Invert identifier character classes
The resulting character classes are harder to read but much smaller (and
more performant).
data/language-specs/css.lang | 79 +++++++++++++++++++++++++++++++++++--------
data/language-specs/less.lang | 23 +++++++------
data/language-specs/scss.lang | 23 +++++++------
3 files changed, 88 insertions(+), 37 deletions(-)
---
diff --git a/data/language-specs/css.lang b/data/language-specs/css.lang
index 15e59e18..1dd21fc5 100644
--- a/data/language-specs/css.lang
+++ b/data/language-specs/css.lang
@@ -96,7 +96,8 @@
<default-regex-options case-sensitive="false"/>
- <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+ <!-- from keyword-code-point -->
+
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
<definitions>
@@ -125,30 +126,78 @@
)
</define-regex>
- <!-- https://drafts.csswg.org/css-syntax/#non-ascii-code-point -->
- <define-regex id="non-ascii">[\x{80}-\x{10ffff}]</define-regex>
+ <!--
+ https://drafts.csswg.org/css-syntax/#name-start-code-point
+
+ includes:
+ U+0041-U+005A Latin capital letter A to Z (A-Z)
+ U+005F Low line (_)
+ U+0061-U+007A Latin small letter A to Z (a-z)
+ U+0080- Non-ASCII
+ -->
+ <define-regex id="identifier-start-code-point" extended="true">
+ [^\x{0}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+ </define-regex>
+
+ <!--
+ https://drafts.csswg.org/css-syntax/#name-code-point
+
+ includes:
+ U+002D Hyphen-minus (-)
+ U+0030-U+0039 Digit zero to nine (0-9)
+ U+0041-U+005A Latin capital letter A to Z (A-Z)
+ U+005F Low line (_)
+ U+0061-U+007A Latin small letter A to Z (a-z)
+ U+0080- Non-ASCII
+ -->
+ <define-regex id="identifier-code-point" extended="true">
+ [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+ </define-regex>
+
+ <!--
+ identifier-code-point, excluding hyphen
+
+ includes:
+ U+0030-U+0039 Digit zero to nine (0-9)
+ U+0041-U+005A Latin capital letter A to Z (A-Z)
+ U+005F Low line (_)
+ U+0061-U+007A Latin small letter A to Z (a-z)
+ U+0080- Non-ASCII
+ -->
+ <define-regex id="identifier-prefix-code-point" extended="true">
+ [^\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{5E}\x{60}\x{7B}-\x{7F}]
+ </define-regex>
+
+ <!--
+ identifier-code-point, including backslash
+
+ includes:
+ U+002D Hyphen-minus (-)
+ U+0030-U+0039 Digit zero to nine (0-9)
+ U+0041-U+005A Latin capital letter A to Z (A-Z)
+ U+005C Reverse solidus (\)
+ U+005F Low line (_)
+ U+0061-U+007A Latin small letter A to Z (a-z)
+ U+0080- Non-ASCII
+ -->
+ <define-regex id="keyword-code-point" extended="true">
+ [^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]
+ </define-regex>
<!--
a combination of name-start code point and escape
- https://drafts.csswg.org/css-syntax/#name-start-code-point
https://www.w3.org/TR/selectors-3/#lex
-->
<define-regex id="identifier-start-char" extended="true">
- [a-z_] | \%{non-ascii} | \%{escape}
+ \%{identifier-start-code-point} | \%{escape}
</define-regex>
<!--
a combination of name code point and escape
- https://drafts.csswg.org/css-syntax/#name-code-point
https://www.w3.org/TR/selectors-3/#lex
-->
<define-regex id="identifier-chars" extended="true">
- (?> (?: [a-z0-9_-]+ | \%{non-ascii}+ | \%{escape}+ )+ )
- </define-regex>
-
- <!-- for lookbehinds (and lookaheads) -->
- <define-regex id="single-identifier-char" extended="true">
- [a-z0-9_-] | \%{non-ascii}
+ (?> (?: \%{identifier-code-point}+ | \%{escape}+ )+ )
</define-regex>
<!-- https://drafts.csswg.org/css-syntax/#ident-token-diagram -->
@@ -158,7 +207,7 @@
<!-- identifier-chars, excluding hyphen -->
<define-regex id="identifier-prefix-chars" extended="true">
- (?> (?: [a-z0-9_]+ | \%{non-ascii}+ | \%{escape}+ )+ )
+ (?> (?: \%{identifier-prefix-code-point}+ | \%{escape}+ )+ )
</define-regex>
<!--
@@ -462,7 +511,7 @@
</define-regex>
<define-regex id="non-number-char" extended="true">
- \%{single-identifier-char} | \.
+ \%{keyword-code-point} | \.
</define-regex>
<context id="dimension" style-ref="dimension">
@@ -545,7 +594,7 @@
<context id="unicode-range" style-ref="unicode-range">
<match extended="true">
\%[ u \+ (?> [a-f0-9?]{1,6} ) (?> - [a-f0-9]{1,6} )?
- (?! \%{single-identifier-char} )
+ (?! \%{keyword-code-point} )
</match>
</context>
diff --git a/data/language-specs/less.lang b/data/language-specs/less.lang
index 5d11a679..d0c7a177 100644
--- a/data/language-specs/less.lang
+++ b/data/language-specs/less.lang
@@ -55,7 +55,8 @@
<default-regex-options case-sensitive="false"/>
- <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+ <!-- from css:keyword-code-point -->
+
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
<definitions>
@@ -153,9 +154,9 @@
<context id="arithmetic-operator" style-ref="operator-symbol">
<match extended="true">
[+*/] |
- (?<! \%{css:single-identifier-char} )
+ (?<! \%{css:keyword-code-point} )
-
- (?! \%{css:single-identifier-char} )
+ (?! \%{css:keyword-code-point} )
</match>
</context>
@@ -497,13 +498,13 @@
\+_?: | # property merge
:
(?:
- (?! # not the start of a
- \%{css:single-identifier-char} | # pseudo-class
- [:\\] | # pseudo-element, escape
- @{ | # variable interpolation
- /\* # comment
- ) | # or
- (?= # ends like a normal declaration
+ (?! # not the start of a
+ \%{css:keyword-code-point} | # pseudo-class
+ [:\\] | # pseudo-element, escape
+ @{ | # variable interpolation
+ /\* # comment
+ ) | # or
+ (?= # ends like a normal declaration
(?:
(?>
(?:
@@ -519,7 +520,7 @@
(?&double_quote_string) |
(?&single_quote_string)
)*
- \%{css:declaration-value-end} # with a semicolon or at the end of a block
+ \%{css:declaration-value-end} # with a semicolon or at the end of a block
)
)
)
diff --git a/data/language-specs/scss.lang b/data/language-specs/scss.lang
index 9a3024c3..1266edf7 100644
--- a/data/language-specs/scss.lang
+++ b/data/language-specs/scss.lang
@@ -58,7 +58,8 @@
<default-regex-options case-sensitive="false"/>
- <keyword-char-class>[a-z0-9_\x{80}-\x{10ffff}\\-]</keyword-char-class>
+ <!-- from css:keyword-code-point -->
+
<keyword-char-class>[^\x{0}-\x{2C}\x{2E}\x{2F}\x{3A}-\x{40}\x{5B}\x{5D}\x{5E}\x{60}\x{7B}-\x{7F}]</keyword-char-class>
<definitions>
@@ -134,9 +135,9 @@
<context id="arithmetic-operator" style-ref="operator-symbol">
<match extended="true">
[+*%] |
- (?<! \%{css:single-identifier-char} )
+ (?<! \%{css:keyword-code-point} )
-
- (?! \%{css:single-identifier-char} )
+ (?! \%{css:keyword-code-point} )
</match>
</context>
@@ -520,13 +521,13 @@
:
(?:
- (?! # not the start of a
- \%{css:single-identifier-char} | # pseudo-class
- [:\\] | # pseudo-element, escape
- \#{ | # interpolation
- /\* # comment
- ) | # or
- (?= # ends like a normal declaration
+ (?! # not the start of a
+ \%{css:keyword-code-point} | # pseudo-class
+ [:\\] | # pseudo-element, escape
+ \#{ | # interpolation
+ /\* # comment
+ ) | # or
+ (?= # ends like a normal declaration
(?>
(?:
(?>
@@ -547,7 +548,7 @@
(?&single_quote_string)
)*
)
- \%{css:declaration-value-end} # with a semicolon or at the end of a block
+ \%{css:declaration-value-end} # with a semicolon or at the end of a block
)
)
</start>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]