fantasdic r407 - in trunk: . lib/fantasdic/sources test test/data



Author: mblondel
Date: Wed Apr  1 11:34:35 2009
New Revision: 407
URL: http://svn.gnome.org/viewvc/fantasdic?rev=407&view=rev

Log:
    * lib/fantasdic/sources/edict_file.rb: 
    A line in a standard EDICT file should be:
   
    word [reading] /meanings/

    but in UTF-8 CEDICT files, it is:

    word-traditional word-simplified [reading] /meanings/

    Fixed to support CEDICT as well.

    * test/test_edict_file.rb: Added test-case for CEDICT.
    * test/data/cedict_ts.u8: Test data.


Added:
   trunk/test/data/cedict_ts.u8
Modified:
   trunk/ChangeLog
   trunk/lib/fantasdic/sources/edict_file.rb
   trunk/test/test_edict_file.rb

Modified: trunk/lib/fantasdic/sources/edict_file.rb
==============================================================================
--- trunk/lib/fantasdic/sources/edict_file.rb	(original)
+++ trunk/lib/fantasdic/sources/edict_file.rb	Wed Apr  1 11:34:35 2009
@@ -30,8 +30,8 @@
         "suffix" => "Results match with the end of the word."
     }
 
-    REGEXP_WORD = '([^\[\/ ]+)'
-    REGEXP_READING = '( \[([^\]\/ ]+)\])?'
+    REGEXP_WORD = '([^\[\/]+)'
+    REGEXP_READING = '( \[([^\]\/]+)\])?'
     REGEXP_TRANSLATIONS = ' /(.+)/'
     REGEXP = Regexp.new('^' + REGEXP_WORD + REGEXP_READING +
                          REGEXP_TRANSLATIONS)
@@ -68,6 +68,7 @@
                     if @config[:encoding] and @config[:encoding] != "UTF-8"
                         line = convert_to_utf8(@config[:encoding], line)
                     end
+                    next if is_comment_line?(line)
                     n_errors += 1 if REGEXP.match(line).nil?
                     n_lines += 1
                     break if n_lines >= 20
@@ -90,21 +91,12 @@
 
     def define(db, word)
         wesc = escape_string(word)
-
-        if word.latin?
-            regexp = "/#{wesc}/"
-        elsif word.kana?
-            regexp = "^#{wesc} |\\[#{wesc}\\]"
-        elsif word.japanese?
-            regexp = "^#{wesc} "
-        else
-            regexp = "^#{wesc}|\\[#{wesc}\\]|/#{wesc}/"
-        end
+        regexp = "^#{wesc} | #{wesc} \\[|\\[#{wesc}\\]|/#{wesc}/"
         
         db = File.basename(@config[:filename])
         db_capitalize = db.capitalize
 
-        match_with_regexp(regexp).map do |line|
+        match_with_regexp(wesc).grep(Regexp.new(regexp)) do |line|
             defi = Definition.new
             defi.word = word
             defi.body = line.strip
@@ -124,10 +116,10 @@
 
         arr = arr_lines.map do |line|
             found_word, found_reading, found_trans = get_fields(line)
-            if word.kana? or word.japanese?
-                found_word
-            else
+            if word.latin?
                 found_trans
+            else
+                found_word
             end
         end
 
@@ -139,6 +131,10 @@
 
     private
 
+    def is_comment_line?(line)
+        line.strip =~ /^#/
+    end
+
     def match_word(db, word)
         arr = []
         match_substring(db, word).each do |line|
@@ -157,32 +153,14 @@
 
     def match_prefix(db, word)
         wesc = escape_string(word)
-        if word.latin?
-            regexp = "/#{wesc}"
-        elsif word.kana?
-            regexp = "^#{wesc}| \\[#{wesc}"
-        elsif word.japanese?
-            regexp = "^#{wesc}"
-        else
-            regexp = "^#{wesc}|\\[#{wesc}|/#{wesc}"
-        end
-
-        match_with_regexp(regexp)
+        regexp = "^#{wesc}| #{wesc}[^\\[]+\\[|\\[#{wesc}|/#{wesc}"
+        match_with_regexp(wesc).grep(Regexp.new(regexp))
     end
 
     def match_suffix(db, word)
         wesc = escape_string(word)
-        if word.latin?
-            regexp = "#{wesc}/"
-        elsif word.kana?
-            regexp = "#{wesc} \\[|#{wesc}\\]"
-        elsif word.japanese?
-            regexp = "#{wesc} \\["
-        else
-            regexp = "#{wesc} \\[|#{wesc}\\]|#{wesc}/"
-        end
-
-        match_with_regexp(regexp)
+        regexp = "#{wesc} [^\\[]+\\[|#{wesc} \\[|#{wesc}\\]|#{wesc}/"
+        match_with_regexp(wesc).grep(Regexp.new(regexp))
     end
 
     def match_substring(db, word)

Added: trunk/test/data/cedict_ts.u8
==============================================================================
--- (empty file)
+++ trunk/test/data/cedict_ts.u8	Wed Apr  1 11:34:35 2009
@@ -0,0 +1,200 @@
+# CEDICT 25 August 2007; Copyright 2007; -*- coding: utf-8-dos -*-; Entries: 44782
+å å [a1] /(phonetic particle)/
+å å [a1] /(interj.)/ah/
+é é [a1] /actinium/
+é é [a1] /an initial particle/prefix to names of people/
+éå éå [A1 ba1] /Aba/
+éååçåå éååçåå [A1 ba1 ai1 te4 tu2 ba1] /Abaetetuba/
+éååæ éååæ [A1 ba1 ga1 qi2] /(N) Abaga qi (place in Inner Mongolia)/
+éåæåä éåæåä [A1 ba1 la1 qi4 ya4] /the Appalachians/
+éå éå [A1 ba4] /(N) Ngawa, ch. Aba (town in Kham prov. of Tibet, pres. Sichuan)/
+éç éç [A1 ba4] /Abba/
+éèç éèå [A1 bei4 er3] /N.H. Abel, Norwegian mathematician/(math.) abelian/
+éæè éæè [A1 bi3 rang4] /Abidjan (city in the Ivory Coast)/
+éæèåä éæèåä [A1 bi3 xi1 ni2 ya4] /Abyssinia/
+éæèåää éæèåää [A1 bi3 xi1 ni2 ya4 ren2] /Abyssinian (person)/
+éæç éæç [A1 bo1 luo2] /Apollo/
+éåææ éåææ [A1 bu3 du4 la1] /Abdullah (name)/
+éååè éååå [A1 bu4 ha1 zi1] /Abkhazia/
+éåè éåè [A1 bu4 jia3] /Abuja (capital of Nigeria)/
+ééå ééå [A1 bu4 kuan1] /Hiroshi Abe/
+éääæ.éäéçèæ éäææ.éäéçèæ [A1 bu4 lai2 ti2 A1 bu4 du1 re4 xi1 ti2] /Abdulaidi Amudurexiti (chairman of Xinjiang autonomous region)/
+éåæèå éåæèå [A1 bu4 Sha1 ye1 fu1] /Abu Sayyaf/
+éåææ éåææ [A1 bu4 Za1 bi3] /Abu Dhabi/
+éåææå éåææå [A1 bu4 zha1 bi3 shi4] /Abu Dhabi (capital of United Arab Emirates)/
+éææ éææ [A1 chang1 zu2] /the Achang nationality living in Yunnan/
+éåç éåå [A1 che4 xian4] /Archer County, Texas/
+éå éå [A1 cheng2] /(N) Acheng (city in Heilongjiang)/
+éåä éåä [A1 chu1 fo2] /Akshobhya Buddha/
+ééååæèää éèååæéää [A1 da2 de2 ni2 la1 li3 yi1 shi4] /Adad-nirari I/
+ééç éèç [A1 da2 na4] /Adana/
+éåèå éåèå [A1 de2 lai2 de2] /Adelaide (an Australian city)/
+éåèå éåèå [A1 de2 lai2 de2] /Adelaide (city in Australia)/
+éè éè [A1 di2] /Ardy (name)/
+éååæ éååæ [A1 duo1 ni2 si1] /Adonis/
+éåè éåè [A1 duo1 nuo4] /Adorno (philosopher)/
+éçååä éåååä [A1 er3 ba1 ni2 ya4] /Albania/
+éçååää éåååää [A1 er3 ba1 ni2 ya4 ren2] /Albanian (person)/
+éçåæ éååæ [A1 er3 bei1 si1] /Alps (mountain range bordering Switzerland)/
+éçåæå éååæå [A1 er3 bei1 si1 shan1] /Alps/
+éçäç éåäç [A1 er3 bo2 te4] /Albert (name)/
+éçéçåä éåèçåä [A1 er3 da2 ban1 si4 shi4] /Artabanus III of Parthia/
+éçéçää éåèçää [A1 er3 da2 ban1 Yi1 shi4] /Arsaces II of Parthia/
+éçéå éåéå [A1 er3 dou1 sai1] /Althusser (philosopher)/
+éçæçææè éåæçææè [A1 er3 fa3 ji2 he2 qian2 ting3] /alfa class submarine/
+éçæ éåæ [a1 er3 fa4] /alpha/
+éçåç éååå [A1 er3 ji2 er3] /Algiers (capital of Algeria)/
+éçååä éåååä [A1 er3 ji2 li4 ya4] /Algeria/
+éçååäåæ éåååäåæ [A1 er3 ji2 li4 ya4 guo2 qi2] /flag of Algeria/
+éçåç éåæå [A1 er3 jie2 xian4] /Alger County, Michigan/
+éçåç éååç [A1 er3 ka3 te4] /Alcatel (old company)/
+éçååæ éåååæ [A1 er3 kan3 ta3 la1] /Alcantara, Brazil space launch site/
+éçé éåé [A1 er3 long2] /Arlon/
+éçæåää éåæåää [A1 er3 sha1 ke4 Yi1 shi4] /Arsaces I of Parthia/
+éçæéåå éåæéåå [A1 er3 si1 tong1 gong1 si1] /Alstom/
+éçæ éåæ [A1 er3 tai4] /Altai (region in Russia)/
+éççéæ éåçéæ [A1 er3 wa3 lei2 si1] /Alvarez/
+éçææé éåææè [A1 er3 wo4 la1 da2] /Alvorada/
+éé éé [a1 fei1] /hoodlum/
+éäååå éäååå [A1 fo2 lie4 da4 di4] /Alfred the Great/
+éåç éåå [A1 fu1 lu2] /Aflou (town in Algeria)/
+éääåçåå éääåçåå [A1 fu2 jia1 de2 luo2 ding4 lu:4] /Avogadro's law/
+éåæ éåæ [A1 fu4 han4] /Afghanistan/Afghan/
+éå éå [a1 ge1] /(regional expr.) elder brother/
+éæææ éæææ [A1 ge2 lang3 en1] /Aglon/
+éæççäæå éæççäæå [A1 ge2 lie4 rui4 bo2 zhan4 yi4] /Dagor Aglareb/
+éæå éæå [A1 gen1 ting2] /Argentina/
+éåå éåå [A1 he2 qi2] /(N) Aheqi (place in Xinjiang)/
+éè éè [a1 hong1] /imam/
+éåçåæ éåçåæ [A1 ji1 mi3 de2 qiao2] /Archimedes bridge/
+éåèç éåèå [A1 jia1 di2 er3] /Agadir (city in southwest Morocco)/
+ééæçè ééæçè [A1 jin1 si1 ke1 ye1] /Aginskoye, Agin-Buryat Autonomous Okrug/
+éå éå [A1 ka3] /Acre, Israel/
+éåçå éåçå [A1 ka3 luo2 jia1] /AzcÃrraga/
+éåæçç éåæåç [A1 ka3 pu3 er3 ke1] /Acapulco/
+éåæ éåæ [A1 ke4 la1] /Accra (capital of Ghana)/
+éåå éåä [A1 ke4 lun2] /Acheron/
+éåäèæ éåäèæ [A1 ke4 shi2 hu2 si1] /Akershus/
+éåè éåè [A1 ke4 su1] /(N) Aksu (place in Xinjiang)/
+éåèåå éåèåå [A1 ke4 su1 di4 qu1] /(N) Aksu district (district in Xinjiang)/
+éåé éåé [A1 ke4 tao2] /(N) Aketao (place in Xinjiang)/
+éèè éèè [A1 ken3 se4] /Arkansas/
+éèèé éèèé [A1 ken3 se4 long2] /Arkansaurus/
+éèèå éèèå [A1 ken3 se4 zhou1] /Arkansas/
+éçåç éçåç [A1 kong1 jia1 gua1] /Aconcagua/
+éåç éåå [A1 kui2 er3] /Aguirre/
+éåç éåç [A1 kui2 na4] /Aquinas (philosopher)/
+éæåé éæåé [A1 la1 ba1 ma3] /Alabama (U.S. state)/
+éæä éæä [A1 la1 bo2] /Arabian/Arabic/Arab/
+éæäåå éæäåå [A1 la1 bo2 ban4 dao3] /Arabian Peninsula/
+éæäååèç éæäååèç [A1 la1 bo2 Guo2 jia1 Lian2 meng2] /Arab League (League of Arab States)/
+éæäæ éæäæ [A1 la1 bo2 Hai3] /Arabian Sea/
+éæäèåééå éæäèåééå [A1 la1 bo2 Lian2 he2 Qiu2 chang2 guo2] /United Arab Emirates/
+éæää éæää [A1 la1 bo2 ren2] /Arabs/
+éæääç éæääç [A1 la1 bo2 shi4 jie4] /Arab world/
+éæäæå éæäæå [A1 la1 bo2 shu4 zi4] /Arabic numerals/
+éæäæ éæäæ [A1 la1 bo2 wen2] /Arabic (language)/
+éæäè éæäè [A1 la1 bo2 yu3] /Arabic (language)/
+éæä éæä [A1 la1 ding1] /Aladdin/
+éæç éæå [A1 la1 er3] /(N) Alaer (place in Xinjiang)/
+éææç éææç [A1 la1 fa3 te4] /Arafat (Palestinian leader)/
+éææäåäæ éææäåäæ [A1 la1 ge1 Yi1 ni2 ya4 si1] /Alagoinhas/
+éæåè éæåè [A1 la1 ka3 ru2] /Aracaju/
+éææåæ éææåæ [A1 la1 la1 kua1 la1] /Araraquara/
+éæææ éæææ [A1 la1 la1 si1] /Araras/
+éææ éææ [A1 la1 mo2] /Alamo/
+éææå éææå [A1 la1 mu4 tu2] /Almaty (capital of Kazakhstan)/
+éæçæå éæçæå [A1 la1 pi2 la1 ka3] /Arapiraca/
+éæèåå éæèåå [A1 la1 sa4 tu2 ba1] /AraÃatuba/
+éæåç éæåç [A1 la1 shan4 meng2] /Alxa League/
+éæååæ éæååæ [A1 la1 shan4 you4 qi2] /(N) Alashan youqi (place in Gansu)/
+éæååæ éæååæ [A1 la1 shan4 zuo3 qi2] /(N) Alashan zuoqi (place in Ningxia)/
+éææ éææ [A1 la1 si1] /Arras (town in northern France)/
+éææå éææå [A1 la1 si1 jia1] /Alaska/
+éææåå éææåå [A1 la1 si1 jia1 zhou1] /Alaska/
+éæåæ éæåæ [A1 la1 ta3 si1] /(Ali) Alatas (Indonesian Foreign Minister)/
+éèæ éèæ [A1 lai2 man4] /El Alamein (town in Egypt)/
+éåæ éåæ [A1 le4 tai4] /(N) Altay (city in Xinjiang)/
+éåæåå éåæåå [A1 le4 tai4 di4 qu1] /(N) Altay district (district in Xinjiang)/
+éçåçææç éçåäææç [A1 lei4 ni2 wu1 si1 fang1 cheng2] /Arrhenius equation/
+éé éé [A1 li3] /Ali (proper name)/
+éèå ééå [A1 li3 shan1] /Alishan/
+ééåé ééåä [A1 li3 shan1 xiang1] /(N) Alishan (village in Taiwan)/
+ééèçèçæ ééèçèçä [A1 li3 xin1 te4 zu2 qiu2 hui4] /Aalesunds F.K./
+ééäå éääå [A1 li4 ya4 na4] /Ariane (French space rocket)/
+éèé éèä [A1 lian2 xiang1] /(N) Alien (village in Taiwan)/
+éééåååå éçéåååå [A1 ling2 dun4 guo2 jia1 gong1 mu4] /Arlington National Cemetery/
+éçå éåå [A1 lu2 ba1] /Aruba/
+ééå ééå [A1 lu3 ba1] /(N) Aruba/
+ééççææ ééçåææ [A1 lu3 ke1 er3 qin4 qi2] /(N) Aluke'erqin qi (place in Inner Mongolia)/
+éå éä [A1 lun2] /Aalen (town in Germany)/
+éçç éçç [A1 luo2 yue1] /Arroyo (person name)/
+ééçèå ééåéå [A1 Ma3 er3 li3 ke4] /Amalric/
+éæ éæ [A1 man4] /Oman/
+éæç éææ [A1 man4 wan1] /Gulf of Oman/
+éå éä [a1 men2] /amen/
+éè éè [a1 meng2] /Amun (deity in Egyptian mythology)/
+éåéä éåéä [A1 mi2 tuo2 fo2] /Amitabha Buddha/
+éçå éçå [a1 mi3 ba1] /amoeba/
+éçåçç éçåçç [a1 mi3 ba1 li4 ji5] /amoebic dysentery/
+éæ éæ [A1 ming2] /Al-Amin/
+éæåä éæåä [a1 mo2 ni2 ya4] /ammonia/
+éæåæ éæåä [A1 mo2 si1 shu1] /Book of Amos/
+ééæçç ééæçç [A1 mo4 si1 fu2 te4] /Amersfoort/
+éåæçä éåæçä [A1 mu3 si1 te4 dan1] /Amsterdam (capital of Netherlands)/
+éåæçæ éåæçæ [A1 mu3 si1 te4 lang3] /Armstrong/
+éçæåæ éçæåæ [A1 na4 bo1 li4 si1] /AnÃpolis/
+éåéåçå éåèååå [A1 nen4 da2 bu4 er3 qu1] /Anantapur district/
+éåçåå éåçåå [A1 ni2 Ma3 qing1 shan1] /Amne Machin/
+éå éå [A1 nu2] /A Nu/
+éååé éååé [A1 nu3 lu:4 tuo2] /Anawrahta/
+éçä éçä [A1 pi2 ya4] /Apia (capital of Samoa)/
+éç éç [a1 pian4] /opium/
+éæåæç éæåæç [A1 pu3 Ka3 la1 na4] /Apucarana/
+éåéæ éåéæ [A1 qi2 li3 si1] /Aeschylus/
+éåææ éååæ [A1 qi2 li4 si1] /Achilles/
+éææ éèæ [A1 rong2 qi2] /(N) Arongqi (place in Heilongjiang)/
+éåæç éåæç [A1 sai4 bai4 jiang1] /Azerbaijan/
+éåçå éåçå [A1 sai4 wei2 duo1] /Acevedo/
+éçç éçå [A1 se4 xian4] /Arthur County, Nebraska/
+éæ éæ [A1 sen1] /Assen (city in the Netherlands)/
+éææ éææ [A1 sen1 si1] /Athens, Ohio/
+éæéå éææå [A1 sen1 song1 dao3] /Ascension Island/
+éäååå éäååå [A1 shen2 ha1 ba1 de2] /Ashgabat (capital of Turkmenistan)/
+éäæç éäæç [A1 shen2 la1 wei2] /Hanan Ashrawi/
+éäæå éäæå [A1 shi2 du4 de2] /Ashdod/
+éæåæå éæåæå [A1 si1 ke4 Xin1 cheng2] /Villeneuve d'Ascq/
+éæéæ éæéæ [A1 si1 ma3 la1] /Asmara/
+éæçç éæçå [A1 si1 na4 er3] /Aznar/
+éææå éææå [A1 si1 pai4 de2] /Selenia Aspide/
+éååæ éååæ [a1 si1 pi3 lin2] /aspirin/
+éæåæ éæåæ [a1 si1 pi3 lin2] /aspirin/
+éæåç éæåç [A1 si1 ta3 na4] /Astana (capital of Kazakhstan)/
+éæåé éæåé [A1 si1 tan3 long2] /Arstanosaurus/
+éåæääç éåæääç [A1 ta3 na2 xiu1 xin4 jing1] /Athanasian Creed/
+éçéæç éçéæå [A1 te4 jin1 sen1 xian4] /Atkinson County, Georgia/
+éçææçææé éçææçææé [A1 te4 la1 si1 ke1 pu3 ke1 long2] /Atlascopcosaurus/
+éåä éåä [A1 tu2 shi2] /(N) Atushi (place in Xinjiang)/
+éæå éæå [a1 tuo1 pin3] /atropine/
+éæåå éæåå [a1 tuo1 pin3 hua4] /atropinization/
+éçæ éçæ [A1 wa3 ti2] /(N) Awati (place in Xinjiang)/
+éæææ éæææ [A1 wang4 qu3 pei4] /Ngawang Choepel (Tibetan, Fulbright scholar)/
+éåçç éåçç [A1 wei1 luo2 sheng3] /Aveiro/
+ééäåé ééäåè [A1 wei2 ya4 nei4 da2] /Avellaneda/
+éçåç éçåç [a1 xi4 yuan2 su4] /actinides/
+åå åå [a1 ya1] /oh/
+éäæ éäæ [A1 Ya4 la1] /Ayala/
+éäè éäè [A1 yi1 sha1] /a wife of prophet Muhammad (peace be on him)/
+éå éå [a1 yi2] /auntie/
+å å [a2] /an interjection/to express doubt or to question/to show realization/to stress/
+å å [a2] /ah (exclamatory part.)/
+å å [a3] /(interj. for surprise)/
+å å [a4] /oh (interjection)/
+é é [a4] /(phonetic character)/
+å å [a5] /(a modal particle showing affirmation, approval, or consent)/
+é é [a5] /(final part.)/(interj.)/
+å å [ai1] /sorrow/grief/pity/to grieve for/to pity/to lament/
+å å [ai1] /an interjection/hey/lookout/why etc/
+å å [ai1] /an interjection/to express realization or agreement (yes, oh, right, etc)/
+å å [ai1] /dirt/dust/angstrom/
+æ æ [ai1] /lean to/in order/in sequence/

Modified: trunk/test/test_edict_file.rb
==============================================================================
--- trunk/test/test_edict_file.rb	(original)
+++ trunk/test/test_edict_file.rb	Wed Apr  1 11:34:35 2009
@@ -31,6 +31,12 @@
     include Fantasdic::Source
 
     private
+    
+    def test_check_validity(source)
+        assert_nothing_raised do
+            source.check_validity
+        end
+    end
 
     def test_define(source)
         defs = source.define("*", "éç")
@@ -140,9 +146,11 @@
             klass_short = klass.to_s.split("::").last.downcase
             gz = hash[:filename] =~ /gz$/ ? "gz" : "nogz"
 
-            method = "test_#{klass_short}_#{encoding}_#{gz}_define"
-            define_method(method) do
-                send("test_define", klass.new(hash))
+            ["define", "check_validity"].each do |m|
+                method = "test_#{klass_short}_#{encoding}_#{gz}_#{m}"
+                define_method(method) do
+                    send("test_#{m}", klass.new(hash))
+                end
             end
 
             ["prefix", "suffix", "word", "substring"].each do |match|
@@ -155,3 +163,143 @@
     end
 
 end
+
+class TestEdictFileSourceWithCedict < Test::Unit::TestCase
+    include Fantasdic::Source
+
+    def test_check_validity(source)
+        assert_nothing_raised do
+            source.check_validity
+        end
+    end
+
+    def test_define(source)
+        ["éæç", "éæç", "A1 bo1 luo2", "Apollo"].each do |w|
+            defs = source.define("*", w)
+            assert_equal(defs.length, 1)
+            assert_equal(defs[0].word, w)
+            assert_equal(defs[0].body, "éæç éæç [A1 bo1 luo2] /Apollo/")
+        end
+
+        defs = source.define("*", "tototititutu")
+        assert_equal(defs.length, 0)
+    end
+
+    def test_match_prefix(source)
+        matches = source.match("*", "prefix", "éæ")
+        key = matches.keys.first
+        assert_equal(matches,
+                     {key=>["éæåæå éæåæå",
+                            "éæéæ éæéæ",
+                            "éæçç éæçå",
+                            "éææå éææå",
+                            "éæåæ éæåæ",
+                            "éæåç éæåç",
+                            "éæåé éæåé"]})
+
+        ["éæ", "éè"].each do |pre|
+            matches = source.match("*", "prefix", pre)
+            key = matches.keys.first
+            assert_equal(matches,
+                        {key=>["éææ éèæ"]})
+        end
+
+        matches = source.match("*", "prefix", "A1 si1")
+        assert_equal(matches,
+                     {key=>["Villeneuve d'Ascq",
+                            "Asmara",
+                            "Aznar",
+                            "Selenia Aspide",
+                            "Astana (capital of Kazakhstan)",
+                            "Arstanosaurus"]})
+
+        matches = source.match("*", "prefix", "Ara")
+        assert_equal(matches,
+                     {key=>["Arabian/Arabic/Arab",
+                            "Arabian Peninsula",
+                            "Arab League (League of Arab States)",
+                            "Arabian Sea",
+                            "Arabs",
+                            "Arab world",
+                            "Arabic numerals",
+                            "Arabic (language)",
+                            "Arabic (language)",
+                            "Arafat (Palestinian leader)",
+                            "Aracaju",
+                            "Araraquara",
+                            "Araras",
+                            "Arapiraca",
+                            "AraÃatuba"]})
+
+    end
+
+    def test_match_suffix(source)
+        matches = source.match("*", "suffix", "åæ")
+        key = matches.keys.first
+        assert_equal(matches,
+                     {key=>["éååæ éååæ", "éæåæ éæåæ"]})
+
+        ["çç","çç"].each do |suf|
+            matches = source.match("*", "suffix", suf)
+            key = matches.keys.first
+            assert_equal(matches,
+                        {key=>["éåçç éåçç"]})
+        end
+
+        matches = source.match("*", "suffix", "qi2")
+        key = matches.keys.first
+        assert_equal(matches,
+                     {key=>["(N) Abaga qi (place in Inner Mongolia)",
+                            "flag of Algeria",
+                            "(N) Aheqi (place in Xinjiang)",
+                            "(N) Alashan youqi (place in Gansu)",
+                            "(N) Alashan zuoqi (place in Ningxia)",
+                            "(N) Aluke'erqin qi (place in Inner Mongolia)",
+                            "(N) Arongqi (place in Heilongjiang)"]})
+    
+        matches = source.match("*", "suffix", "ada")
+        key = matches.keys.first
+        assert_equal(matches,
+                     {key=>["Alvorada"]})
+
+    end
+
+    def test_match_word(source)
+        matches = source.match("*", "word", "doubt")
+        key = matches.keys.first
+        assert_equal(matches,
+                     {key=> ["an interjection/to express doubt or to " + \
+                             "question/to show realization/to stress"]})
+
+    end
+
+    def test_match_substring(source)
+    end
+
+    utf8 = {:filename => File.join($test_data_dir, "cedict_ts.u8"),
+            :encoding => "UTF-8"}
+
+    [EdictFileRuby, EdictFileEgrep].each do |klass|
+        [utf8].each do |hash|
+            encoding = hash[:encoding].gsub("-", "").downcase
+
+            klass_short = klass.to_s.split("::").last.downcase
+            gz = hash[:filename] =~ /gz$/ ? "gz" : "nogz"
+
+            ["define", "check_validity"].each do |m|
+                method = "test_#{klass_short}_#{encoding}_#{gz}_#{m}"
+                define_method(method) do
+                    send("test_#{m}", klass.new(hash))
+                end
+            end
+
+            ["prefix", "suffix", "word", "substring"].each do |match|
+                method = "test_#{klass_short}_#{encoding}_#{gz}_#{match}"
+                define_method(method) do
+                    send("test_match_#{match}", klass.new(hash))
+                end
+            end
+        end
+    end
+
+end
\ No newline at end of file



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]