[dasher: 13/38] Mandarin: Add lots of missing pronunciations that are used in the training text
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher: 13/38] Mandarin: Add lots of missing pronunciations that are used in the training text
- Date: Tue, 3 Jan 2012 15:33:45 +0000 (UTC)
commit 35fcbe6ea32254ae42ab8c011537d51dcf3614a1
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Thu Dec 8 22:27:35 2011 +0000
Mandarin: Add lots of missing pronunciations that are used in the training text
Of course this doesn't answer the question as to whether the pronunciations
that we _do_ have in chineseRuby are correct...or the training text either :-!
Data/alphabets/alphabet.bopoDict.xml | 2 +-
Data/alphabets/alphabet.chineseRuby.xml | 40 +++++++++++++++++++++++++++---
Data/alphabets/alphabet.spyDict.xml | 6 ++--
Data/alphabets/alphabet.spyToneMarks.xml | 2 +-
Data/alphabets/alphabet.spyTones2.xml | 2 +-
5 files changed, 42 insertions(+), 10 deletions(-)
---
diff --git a/Data/alphabets/alphabet.bopoDict.xml b/Data/alphabets/alphabet.bopoDict.xml
index b4cfde3..d415ee3 100644
--- a/Data/alphabets/alphabet.bopoDict.xml
+++ b/Data/alphabets/alphabet.bopoDict.xml
@@ -4861,7 +4861,7 @@
<s d="(" t="(" b="20" visible="yes" note="Left bracket"/>
<s d=")" t=")" b="17" visible="yes" note="Right bracket" />
<s d="…" t="…" b="20" visible="yes" note="Elipses" />
-<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out by ACL as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
+<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
<s d="—" t="—" b="17" visible="yes" note="Long dash" />
<s d="《" t="《" b="20" visible="yes" note="Double title (reference) brackets (left)" />
<s d="》" t="》" b="17" visible="yes" note="Double title (reference) brackets (right)" />
diff --git a/Data/alphabets/alphabet.chineseRuby.xml b/Data/alphabets/alphabet.chineseRuby.xml
index 4c03c6b..1b7748a 100644
--- a/Data/alphabets/alphabet.chineseRuby.xml
+++ b/Data/alphabets/alphabet.chineseRuby.xml
@@ -3,7 +3,7 @@
<?xml-stylesheet type="text/xsl" href="alphabet.xsl"?>
<alphabets>
<alphabet name="Chinese 简体中文 (simplified chinese, in pin yin groups)" hidden="yes"> <!-- Alphabet created by David MacKay using write.p. Thanks to Juan K Lin for help -->
-<!--ACL 10Feb2011: Pinyin characters removed (but pinyin groups retained), and "punctuation" inc. roman letters + numerals added, as per rewrite of MandarinDasher to include punctuation in context-->
+<!--10Feb2011: Pinyin characters removed (but pinyin groups retained), and "punctuation" inc. roman letters + numerals added, as per rewrite of MandarinDasher to include punctuation in context-->
<orientation type="LR"/>
<encoding type="Western"/>
<control d="Control" t=""/>
@@ -29,7 +29,7 @@
<group name="à (a4)" label="à" f="113" b="117">
<s d="啊" t="啊" />
</group>
-<!--ACL using unicode "latin small letter a with ring above" to avoid conflict
+<!--Note use of unicode "latin small letter a with ring above" to avoid conflict
with roman letter a-->
<group name="a (a5)" label="å" f="113" b="118">
<s d="啊" t="啊" />
@@ -295,6 +295,7 @@
<s d="北" t="北" />
</group>
<group name="bèi (bei4)" label="bèi" f="113" b="121">
+<s d="臂" t="臂" /><!--ACL adding as present in training text-->
<s d="辈" t="辈" />
<s d="背" t="背" />
<s d="贝" t="贝" />
@@ -539,6 +540,7 @@
<s d="饽" t="饽" />
</group>
<group name="bó (bo2)" label="bó" f="113" b="117">
+<s d="卜" t="卜" /><!--ACL adding as present in training text-->
<s d="柏" t="柏" />
<s d="薄" t="薄" />
<s d="博" t="博" />
@@ -951,6 +953,7 @@
<s d="魑" t="魑" />
</group>
<group name="chí (chi2)" label="chí" f="113" b="122">
+<s d="茬" t="茬" /><!--ACL adding as present in training text-->
<s d="持" t="持" />
<s d="匙" t="匙" />
<s d="池" t="池" />
@@ -1445,6 +1448,7 @@
<s d="锝" t="锝" />
</group>
<group name="de (de5)" label="de" f="113" b="119">
+<s d="大" t="大" /><!--ACL adding as present in training text-->
<s d="得" t="得" />
<s d="的" t="的" />
<s d="底" t="底" />
@@ -1657,6 +1661,7 @@
<s d="硐" t="硐" />
</group>
<group name="dōu (dou1)" label="dōu" f="113" b="122">
+<s d="要" t="要" /><!--ACL adding as present in training text-->
<s d="兜" t="兜" />
<s d="都" t="都" />
<s d="蔸" t="蔸" />
@@ -1889,6 +1894,7 @@
<s d="垡" t="垡" />
</group>
<group name="fǎ (fa3)" label="fǎ" f="113" b="122">
+<s d="发" t="发" /><!--ACL adding as present in training text-->
<s d="法" t="法" />
<s d="砝" t="砝" />
</group>
@@ -2295,6 +2301,7 @@
<s d="袼" t="袼" />
</group>
<group name="gé (ge2)" label="gé" f="113" b="123">
+<s d="合" t="合" /><!--ACL adding as present in training text-->
<s d="搁" t="搁" />
<s d="革" t="革" />
<s d="葛" t="葛" />
@@ -2321,6 +2328,7 @@
<s d="舸" t="舸" />
</group>
<group name="gè (ge4)" label="gè" f="113" b="114">
+<s d="量" t="量" /><!--ACL adding as present in training text-->
<s d="铬" t="铬" />
<s d="个" t="个" />
<s d="各" t="各" />
@@ -2618,6 +2626,7 @@
<s d="蜾" t="蜾" />
</group>
<group name="gùo (guo4)" label="gùo" f="113" b="119">
+<s d="的" t="的" /><!--ACL adding as present in training text-->
<s d="过" t="过" />
</group>
<group name="hā (ha1)" label="hā" f="113" b="120">
@@ -2680,6 +2689,7 @@
<s d="阚" t="阚" />
</group>
<group name="hàn (han4)" label="hàn" f="113" b="120">
+<s d="颌" t="颌" /><!--ACL adding as present in training text-->
<s d="翰" t="翰" />
<s d="撼" t="撼" />
<s d="捍" t="捍" />
@@ -3351,6 +3361,7 @@
<s d="趼" t="趼" />
</group>
<group name="jìan (jian4)" label="jìan" f="113" b="120">
+<s d="化" t="化" /><!--ACL adding as present in training text-->
<s d="间" t="间" />
<s d="荐" t="荐" />
<s d="槛" t="槛" />
@@ -4020,6 +4031,7 @@
<s d="蒯" t="蒯" />
</group>
<group name="kùai (kuai4)" label="kùai" f="113" b="114">
+<s d="桧" t="桧" /><!--ACL adding as present in training text-->
<s d="会" t="会" />
<s d="块" t="块" />
<s d="筷" t="筷" />
@@ -4209,6 +4221,7 @@
<s d="螂" t="螂" />
</group>
<group name="lǎng (lang3)" label="lǎng" f="113" b="119">
+<s d="阆" t="阆" /><!--ACL adding as present in training text-->
<s d="朗" t="朗" />
</group>
<group name="làng (lang4)" label="làng" f="113" b="120">
@@ -4837,7 +4850,7 @@
<s d="跞" t="跞" />
<s d="雒" t="雒" />
</group>
-<!--ACL using unicode "latin small letter m with acute" to avoid conflict with
+<!--Note use of unicode "latin small letter m with acute" to avoid conflict with
roman letter m-->
<group name="m (m2)" label="ḿ" f="113" b="121">
<s d="呒" t="呒" />
@@ -5288,6 +5301,7 @@
<s d="镎" t="镎" />
</group>
<group name="nǎ (na3)" label="nǎ" f="113" b="118">
+<s d="那" t="那" /><!--ACL adding as present in training text-->
<s d="哪" t="哪" />
</group>
<group name="nà (na4)" label="nà" f="113" b="119">
@@ -5961,6 +5975,7 @@
<s d="噗" t="噗" />
</group>
<group name="pú (pu2)" label="pú" f="113" b="117">
+<s d="朴" t="朴" /><!--ACL adding as present in training text-->
<s d="脯" t="脯" />
<s d="仆" t="仆" />
<s d="莆" t="莆" />
@@ -6056,6 +6071,7 @@
<s d="綮" t="綮" />
</group>
<group name="qì (qi4)" label="qì" f="113" b="123">
+<s d="缉" t="缉" /><!--ACL adding as present in training text-->
<s d="妻" t="妻" />
<s d="契" t="契" />
<s d="砌" t="砌" />
@@ -6090,6 +6106,7 @@
<s d="髂" t="髂" />
</group>
<group name="qīan (qian1)" label="qīan" f="113" b="117">
+<s d="嵌" t="嵌" /><!--ACL adding as present in training text-->
<s d="牵" t="牵" />
<s d="扦" t="扦" />
<s d="钎" t="钎" />
@@ -6685,6 +6702,7 @@
<s d="瘙" t="瘙" />
</group>
<group name="sè (se4)" label="sè" f="113" b="115">
+<s d="过" t="过" /><!--ACL adding as present in training text-->
<s d="塞" t="塞" />
<s d="瑟" t="瑟" />
<s d="色" t="色" />
@@ -6740,6 +6758,7 @@
<s d="晒" t="晒" />
</group>
<group name="shān (shan1)" label="shān" f="113" b="114">
+<s d="掺" t="掺" /><!--ACL adding as present in training text-->
<s d="珊" t="珊" />
<s d="苫" t="苫" />
<s d="杉" t="杉" />
@@ -6855,6 +6874,8 @@
<s d="舍" t="舍" />
</group>
<group name="shè (she4)" label="shè" f="113" b="117">
+<s d="的" t="的" /><!--ACL adding as present in training text-->
+<s d="中" t="中" /><!--ACL adding as present in training text-->
<s d="舍" t="舍" />
<s d="赦" t="赦" />
<s d="摄" t="摄" />
@@ -6950,6 +6971,7 @@
<s d="鲺" t="鲺" />
</group>
<group name="shí (shi2)" label="shí" f="113" b="117">
+<s d="解" t="解" /><!--ACL adding as present in training text-->
<s d="十" t="十" />
<s d="石" t="石" />
<s d="拾" t="拾" />
@@ -7163,6 +7185,7 @@
<s d="铄" t="铄" />
</group>
<group name="sī (si1)" label="sī" f="113" b="113">
+<s d="思" t="思" /><!--ACL adding as present in training text-->
<s d="斯" t="斯" />
<s d="撕" t="撕" />
<s d="嘶" t="嘶" />
@@ -7477,6 +7500,7 @@
<s d="饕" t="饕" />
</group>
<group name="táo (tao2)" label="táo" f="113" b="117">
+<s d="焘" t="焘" /><!--ACL adding as present in training text-->
<s d="萄" t="萄" />
<s d="桃" t="桃" />
<s d="逃" t="逃" />
@@ -7629,6 +7653,7 @@
<s d="梃" t="梃" />
</group>
<group name="tōng (tong1)" label="tōng" f="113" b="119">
+<s d="革" t="革" /><!--ACL adding as present in training text-->
<s d="通" t="通" />
<s d="嗵" t="嗵" />
</group>
@@ -8164,6 +8189,7 @@
<s d="呷" t="呷" />
</group>
<group name="xía (xia2)" label="xía" f="113" b="120">
+<s d="挟" t="挟" /><!--ACL adding as present in training text-->
<s d="匣" t="匣" />
<s d="霞" t="霞" />
<s d="辖" t="辖" />
@@ -8799,6 +8825,7 @@
<s d="窈" t="窈" />
</group>
<group name="yào (yao4)" label="yào" f="113" b="116">
+<s d="都" t="都" /><!--ACL adding as present in training text-->
<s d="疟" t="疟" />
<s d="药" t="药" />
<s d="要" t="要" />
@@ -8901,6 +8928,7 @@
<s d="酏" t="酏" />
</group>
<group name="yì (yi4)" label="yì" f="113" b="113">
+<s d="的" t="的" /><!--ACL adding as present in training text-->
<s d="艾" t="艾" />
<s d="衣" t="衣" />
<s d="艺" t="艺" />
@@ -9154,6 +9182,7 @@
<s d="瘀" t="瘀" />
</group>
<group name="yú (yu2)" label="yú" f="113" b="122">
+<s d="圩" t="圩" /><!--ACL adding as present in training text-->
<s d="于" t="于" />
<s d="盂" t="盂" />
<s d="榆" t="榆" />
@@ -9606,6 +9635,7 @@
<s d="嶂" t="嶂" />
</group>
<group name="zhāo (zhao1)" label="zhāo" f="113" b="120">
+<s d="召" t="召" /><!--ACL adding as present in training text-->
<s d="朝" t="朝" />
<s d="嘲" t="嘲" />
<s d="招" t="招" />
@@ -9615,6 +9645,7 @@
<s d="钊" t="钊" />
</group>
<group name="zháo (zhao2)" label="zháo" f="113" b="121">
+<s d="著" t="著" /><!--ACL adding as present in training text-->
<s d="着" t="着" />
</group>
<group name="zhǎo (zhao3)" label="zhǎo" f="113" b="122">
@@ -10026,6 +10057,7 @@
<s d="焯" t="焯" />
</group>
<group name="zhúo (zhuo2)" label="zhúo" f="113" b="123">
+<s d="焯" t="焯" /><!--ACL adding as present in training text-->
<s d="缴" t="缴" />
<s d="著" t="著" />
<s d="卓" t="卓" />
@@ -10202,7 +10234,7 @@
<s d="酢" t="酢" />
</group>
-<!-- ACL idea of all further groups, is to match up with individual punctuation/roman characters from
+<!-- Idea of all further groups, is to match up with individual punctuation/roman characters from
the Pinyin alphabet, again by equality of display text. This should mean that when the user writes a
pinyin punctuation symbol, the equivalent/identical chinese character/symbol will be written instead,
and entered into the (chinese-character) language model context just as for other _chinese_ symbols. -->
diff --git a/Data/alphabets/alphabet.spyDict.xml b/Data/alphabets/alphabet.spyDict.xml
index b235c3a..afce265 100644
--- a/Data/alphabets/alphabet.spyDict.xml
+++ b/Data/alphabets/alphabet.spyDict.xml
@@ -27,7 +27,7 @@
<s d="à" t="㠁" b="65" visible="no"/>
</group>
<group name="a5" label="・" b="66" visible="yes">
-<!--ACL using unicode "latin small letter a with ring above" to avoid confusion with roman letter a-->
+<!--Note use of unicode "latin small letter a with ring above" to avoid confusion with roman letter a-->
<s d="å" t="呵" b="67" visible="no"/>
</group>
<group name="ai" label="i" b="68" visible="yes">
@@ -2280,7 +2280,7 @@
</group>
<group name="m" label="m" b="90" visible="yes">
<group name="m2" label="ˊ" b="57" visible="yes">
-<!-- ACL using unicode "latin small letter m with acute" to avoid conflict with roman letter m -->
+<!-- Note use of unicode "latin small letter m with acute" to avoid conflict with roman letter m -->
<s d="ḿ" t="呒" b="58" visible="no"/>
</group>
<group name="ma" label="a" b="59" visible="yes">
@@ -4887,7 +4887,7 @@
<s d="(" t="(" b="20" visible="yes" note="Left bracket"/>
<s d=")" t=")" b="17" visible="yes" note="Right bracket" />
<s d="…" t="…" b="20" visible="yes" note="Elipses" />
-<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out by ACL as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
+<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
<s d="—" t="—" b="17" visible="yes" note="Long dash" />
<s d="《" t="《" b="20" visible="yes" note="Double title (reference) brackets (left)" />
<s d="》" t="》" b="17" visible="yes" note="Double title (reference) brackets (right)" />
diff --git a/Data/alphabets/alphabet.spyToneMarks.xml b/Data/alphabets/alphabet.spyToneMarks.xml
index 1746fbe..c7448f7 100644
--- a/Data/alphabets/alphabet.spyToneMarks.xml
+++ b/Data/alphabets/alphabet.spyToneMarks.xml
@@ -4502,7 +4502,7 @@
<s d="(" t="(" b="20" visible="yes" note="Left bracket"/>
<s d=")" t=")" b="17" visible="yes" note="Right bracket" />
<s d="…" t="…" b="20" visible="yes" note="Elipses" />
-<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out by ACL as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
+<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
<s d="—" t="—" b="17" visible="yes" note="Long dash" />
<s d="《" t="《" b="20" visible="yes" note="Double title (reference) brackets (left)" />
<s d="》" t="》" b="17" visible="yes" note="Double title (reference) brackets (right)" />
diff --git a/Data/alphabets/alphabet.spyTones2.xml b/Data/alphabets/alphabet.spyTones2.xml
index 2ac1101..b6d6e00 100644
--- a/Data/alphabets/alphabet.spyTones2.xml
+++ b/Data/alphabets/alphabet.spyTones2.xml
@@ -4875,7 +4875,7 @@
<s d="(" t="(" b="20" visible="yes" note="Left bracket"/>
<s d=")" t=")" b="17" visible="yes" note="Right bracket" />
<s d="…" t="…" b="20" visible="yes" note="Elipses" />
-<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out by ACL as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
+<!--<s d="……" t="……" b="20" visible="yes" note="Double elipses, commented out as symbols must be single characters. Anything wrong with just writing two ellipses?" />-->
<s d="—" t="—" b="17" visible="yes" note="Long dash" />
<s d="《" t="《" b="20" visible="yes" note="Double title (reference) brackets (left)" />
<s d="》" t="》" b="17" visible="yes" note="Double title (reference) brackets (right)" />
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]