[pybliographer/wip/gtk3] format: recode source files from latin-1 to utf-8
- From: Germán Poo-Caamaño <gpoo src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pybliographer/wip/gtk3] format: recode source files from latin-1 to utf-8
- Date: Wed, 11 Oct 2017 20:13:31 +0000 (UTC)
commit 56b8e4ff913b64b6df49ce86a5a66e27317acf5f
Author: Germán Poo-Caamaño <gpoo gnome org>
Date: Wed Oct 11 09:23:52 2017 -0300
format: recode source files from latin-1 to utf-8
Pyblio/Format/Medline.py | 100 ++++++++++++++++++++--------------------
Pyblio/Format/test_Ovidlike.py | 57 +++++++++++------------
Pyblio/Format/test_medline.py | 19 ++------
3 files changed, 83 insertions(+), 93 deletions(-)
---
diff --git a/Pyblio/Format/Medline.py b/Pyblio/Format/Medline.py
index e47093b..aeaf3b0 100644
--- a/Pyblio/Format/Medline.py
+++ b/Pyblio/Format/Medline.py
@@ -1,24 +1,24 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
# This file is part of pybliographer
-#
+#
# Copyright (C) 1998-2004 Frederic GOBRY
# Email : gobry pybliographer org
-#
+#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
+# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
+# GNU General Public License for more details.
+#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-#
+#
+#
# Extension module for Medline files
@@ -40,8 +40,8 @@ class MedlineIterator (Iterator.Iterator):
def __init__ (self, file):
self.file = file
return
-
-
+
+
def first (self):
# rewind the file
self.file.seek (0)
@@ -51,14 +51,14 @@ class MedlineIterator (Iterator.Iterator):
def next (self):
current = None
data = ''
-
+
table = {}
# Skip whitespace
while 1:
line = self.file.readline ()
if line == '': return table
-
+
line = string.rstrip (line)
if line != '': break
@@ -70,14 +70,14 @@ class MedlineIterator (Iterator.Iterator):
table [current].append (data)
else:
table [current] = [data]
-
+
current = string.strip (head.group (1))
data = head.group (2)
else:
cont = contin.match (line)
if cont:
data = data + ' ' + cont.group (1)
-
+
line = self.file.readline ()
if line == '': break
@@ -99,18 +99,18 @@ class MedlineIterator (Iterator.Iterator):
norm ['url'] = Fields.URL (medurl + table ['PMID'] [0])
norm ['medline-pmid'] = Fields.Text (table ['PMID'] [0])
del table ['PMID']
-
+
if table.has_key ('UI'):
norm [one_to_one ['UI']] = Fields.Text (table ['UI'] [0])
del table ['UI']
if table.has_key ('AU'):
group = Fields.AuthorGroup ()
-
+
for au in table ['AU']:
# analyze the author by ourself.
first, last, lineage = [], [], []
-
+
for part in string.split (au, ' '):
if part.isupper ():
# in upper-case, this is a first name
@@ -144,9 +144,9 @@ class MedlineIterator (Iterator.Iterator):
lineage = string.join (lineage, ' ')
else:
lineage = None
-
+
group.append (Fields.Author ((None, first, last, lineage)))
-
+
norm [one_to_one ['AU']] = group
del table ['AU']
@@ -154,50 +154,50 @@ class MedlineIterator (Iterator.Iterator):
fields = string.split (table ['DP'][0], ' ')
norm [one_to_one ['DP']] = Fields.Date (fields [0])
del table ['DP']
-
+
# The simple fields...
for f in table.keys ():
f_mapped = one_to_one.get(f, 'medline-%s' %(f.lower()))
text_type = Types.get_field(f_mapped).type
norm [f_mapped] = text_type (string.join (table [f], " ; "))
-
+
return Base.Entry (None, type, norm)
# UI identifiant unique
# AU auteurs *
-# TI titre
+# TI titre
# LA langue *
-# MH mots cl�s *
+# MH mots clés *
# PT * type : JOURNAL ARTICLE, REVIEW, REVIEW, TUTORIAL,CLINICAL TRIAL,
# RANDOMIZED CONTROLLED TRIAL, LETTER, EDITORIAL, MULTICENTER STUDY,
# NEWS, HISTORICAL ARTICLE
-# DA date de ?? en yyyymmdd
+# DA date de ?? en yyyymmdd
# DP date de ?? en yyyy mois +/-j
-# IS
+# IS
# TA titre de la revue
-# PG
-# SB
-# CY pays d'�dition ?
-# IP
-# VI
-# JC
-# AA semble �tre toujours Author ou AUTHOR
+# PG
+# SB
+# CY pays d'édition ?
+# IP
+# VI
+# JC
+# AA semble être toujours Author ou AUTHOR
# EM date de?? en yyyymm
-# AB
-# AD
+# AB
+# AD
# PMID
-# SO r�f�rence compl�te
+# SO référence complète
# RN semble indexer des substances chimiques
# TT titre dans la langue d'origine
# 4099 URL vers l'article
# 4100 URL vers abstract de l'article ??
-
+
class Medline (Base.DataBase):
-
+
id = 'Medline'
-
+
properties = {
'change_id' : 0,
'change_type' : 0
@@ -223,7 +223,7 @@ def writer (iter, output, **argh):
ekeys = {}
for k in entry.keys (): ekeys [k] = 1
-
+
med = one_to_one ['UI']
if entry.has_key (med):
@@ -232,11 +232,11 @@ def writer (iter, output, **argh):
else:
print "warning: entry has no medline reference"
ui = 0
-
+
output.write ('%-4.4s- %s\n' % ('UI', ui))
med = one_to_one ['AU']
-
+
if entry.has_key (med):
del ekeys [med]
for auth in entry [med]:
@@ -247,7 +247,7 @@ def writer (iter, output, **argh):
first = string.join (compact, '')
text = string.join ((auth.last or '', first, auth.lineage or ''), ' ')
-
+
output.write ('%-4.4s- %s\n' % ('AU', text))
med = one_to_one ['DP']
@@ -261,7 +261,7 @@ def writer (iter, output, **argh):
if not ekeys.has_key (field): continue
del ekeys [field]
-
+
output.write ('%-4.4s- %s\n' % (key, Utils.format (str (entry [field]),
75, 0, 6)))
# write the unknown fields
@@ -272,19 +272,19 @@ def writer (iter, output, **argh):
key = string.upper (field [8:])
output.write ('%-4.4s- %s\n' % (key, Utils.format (str (entry [field]),
75, 0, 6)))
-
-
+
+
entry = iter.next ()
if entry: output.write ('\n')
-
+
def opener (url, check):
-
+
base = None
if (not check) or (url.url [2] [-4:] == '.med'):
base = Medline (url)
-
+
return base
@@ -294,7 +294,7 @@ def iterator (url, check):
databases '''
if check and url.url [2] [-4:] != '.med': return
-
+
return MedlineIterator (open (Open.url_to_local (url), 'r'))
diff --git a/Pyblio/Format/test_Ovidlike.py b/Pyblio/Format/test_Ovidlike.py
index 95fff2a..9ea27db 100644
--- a/Pyblio/Format/test_Ovidlike.py
+++ b/Pyblio/Format/test_Ovidlike.py
@@ -1,4 +1,5 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
+
data = ["""Authors
Muller S. Garda P. Muller JD. Cansi Y.
Title
@@ -120,7 +121,7 @@ from Pyblio.Base import DataBase, Entry
from Pyblio import Config
from Pyblio.Key import Key
from Pyblio.Types import get_entry
-from Pyblio.Format.OvidLike import OvidLike, writer, write_source_field
+from Pyblio.Format.OvidLike import OvidLike, writer, write_source_field
from Pyblio.Fields import Date
class WriterCase (unittest.TestCase):
@@ -134,20 +135,20 @@ class WriterCase (unittest.TestCase):
self.output = cStringIO.StringIO()
self.mapping = Config.get('ovid/mapping').data
-
+
def test01(self):
self.entry = Entry ( Key('TEST', 'KEY1'), get_entry('article'),
{'journal': 'CACM',
- 'number': 22,
+ 'number': 22,
'volume': 123,
'pages': '234-543'})
self.db.add(self.entry)
self.itera = self.db.iterator()
-
+
writer (self.itera, self.output, self.mapping)
print self.output.getvalue()
-
+
def test02source (self):
@@ -158,18 +159,18 @@ class WriterCase (unittest.TestCase):
'volume': 69, 'number': 2, 'pages': '81-88',
'date': Date ((1999, 2, None))},
{'result':
- 'Journal of Trauma-Injury Infection & Critical Care. 44(6):1047-1054; discussion 1054-5, 1998
Jun.',
+ 'Journal of Trauma-Injury Infection & Critical Care. 44(6):1047-1054; discussion 1054-5, 1998
Jun.',
'journal': 'Journal of Trauma-Injury Infection & Critical Care',
'volume': 44, 'number': 6, 'pages': '1047-1054',
'date': Date ((1998, 6, None)),
'other-note': 'discussion 1054-5'},
{'result': 'Chemotherapy. 42(3):215-219, 1996 May.',
- ## date in �cites.ovid�: '1996 May-Jun' ##
- ## pages in �cites.ovid�: '215-19' ##
+ ## date in »cites.ovid«: '1996 May-Jun' ##
+ ## pages in »cites.ovid«: '215-19' ##
'journal': 'Chemotherapy',
'volume': 42, 'number': 3, 'pages': '215-219',
'date': Date ((1996, 5, None))},
- {'result': 'Circulatory Shock. 18(3):193-203, 1986.',
+ {'result': 'Circulatory Shock. 18(3):193-203, 1986.',
'journal': 'Circulatory Shock',
'volume': 18, 'number': 3, 'pages': '193-203',
'date': Date ('1986')},
@@ -180,7 +181,7 @@ class WriterCase (unittest.TestCase):
'date': Date ((2002, 9, 23))},]
-
+
for i in data :
e = Entry ( Key('TEST', 'KEY1'), get_entry('article'),
@@ -190,7 +191,7 @@ class WriterCase (unittest.TestCase):
write_source_field (self.output, e, self.mapping)
r = self.output.getvalue()
self.assertEqual (e['result'], r[9:-1])
-
+
@@ -260,8 +261,8 @@ class RexpCase (unittest.TestCase):
'Biochemistry', '38', '49', None, '16333-16339',
'1999', 'Dec 7', None),
]
-
-
+
+
def test01 (self):
@@ -273,14 +274,14 @@ class RexpCase (unittest.TestCase):
print m.group(
'journal', 'volume', 'number', 'inseries',
'pages', 'year', 'month', 'other')
- self.assertEqual (journal, m.group('journal'))
- self.assertEqual (volume, m.group('volume'))
- self.assertEqual (number, m.group('number'))
- self.assertEqual (inseries, m.group('inseries'))
- self.assertEqual (pages, m.group('pages'))
- self.assertEqual (year, m.group('year'))
- self.assertEqual (month, m.group('month'))
- self.assertEqual (other, m.group('other'))
+ self.assertEqual (journal, m.group('journal'))
+ self.assertEqual (volume, m.group('volume'))
+ self.assertEqual (number, m.group('number'))
+ self.assertEqual (inseries, m.group('inseries'))
+ self.assertEqual (pages, m.group('pages'))
+ self.assertEqual (year, m.group('year'))
+ self.assertEqual (month, m.group('month'))
+ self.assertEqual (other, m.group('other'))
else: print 'Fehler'
class Rexp2Case (unittest.TestCase):
@@ -299,7 +300,7 @@ class Rexp2Case (unittest.TestCase):
\.\Z
"""
, flags= re.VERBOSE)
-
+
data = ['Biophysical Journal. 71(6):3320-3329, 1996 Dec.',
'Biochemistry. 38(49):16333-16339, 1999 Dec 7.',
'VERY HIGH FREQUENCY (VHF) ESR/EPR. 22 PG. 431-464. 2004 [Figures].'
@@ -327,7 +328,7 @@ class Rexp2Case (unittest.TestCase):
(?P<month>.*)
\.\s*\Z"""
, flags= re.VERBOSE)
-
+
data = ['Biophysical Journal. 71(6):3320-3329, 1996 Dec.',
'Biochemistry. 38(49):16333-16339, 1999 Dec 7.',
'VERY HIGH FREQUENCY (VHF) ESR/EPR. 22 PG. 431-464. 2004 [Figures].'
@@ -373,20 +374,18 @@ def suite():
def main ():
unittest.main (defaultTest='suite' )
-
+
if __name__ == '__main__':
-
main()
-
### Local Variables:
### Mode: python
-### encoding: iso-8859-1
+### encoding: utf-8
### End:
-
+
diff --git a/Pyblio/Format/test_medline.py b/Pyblio/Format/test_medline.py
index 98f4e00..743c282 100644
--- a/Pyblio/Format/test_medline.py
+++ b/Pyblio/Format/test_medline.py
@@ -1,5 +1,4 @@
-# -*- coding: iso8859-1 -*-
-
+# -*- coding: utf-8 -*-
import cStringIO, os, sys, unittest
@@ -19,8 +18,6 @@ Config.load_user ()
from Pyblio.Format import Medline
-
-
example_1 = """PMID- 15985842
OWN - NLM
STAT- MEDLINE
@@ -155,15 +152,12 @@ comparison = {'Holmes': 'W. C.',
'Pardini': 'D.'}
-
class ReaderCase (unittest.TestCase):
-
def setUp (self):
-
self.db = Base.DataBase ('//localhost/Internal')
self.output = cStringIO.StringIO()
-
+
def test01(self):
"""Test that all fields are Instances, as
opposed to strings"""
@@ -186,7 +180,7 @@ class ReaderCase (unittest.TestCase):
"""Test that Initials are formatted correctly.
According to Bibtex specs, they must be separated
by period, space ('. ')."""
-
+
inpt = cStringIO.StringIO (example_2)
rdr = Medline.MedlineIterator (inpt)
e = rdr.first ()
@@ -212,7 +206,6 @@ class ReaderCase (unittest.TestCase):
auth.first, comparison [auth.last])
e = rdr.next ()
-
def suite():
theSuite = unittest.TestSuite()
@@ -222,15 +215,13 @@ def suite():
def main ():
unittest.main (defaultTest='suite' )
-
+
if __name__ == '__main__':
-
main()
-
### Local Variables:
### Mode: python
-### encoding: iso-8859-1
+### encoding: utf-8
### End:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]