[pybliographer/wip/gtk3] format: recode source files from latin-1 to utf-8



commit 56b8e4ff913b64b6df49ce86a5a66e27317acf5f
Author: Germán Poo-Caamaño <gpoo gnome org>
Date:   Wed Oct 11 09:23:52 2017 -0300

    format: recode source files from latin-1 to utf-8

 Pyblio/Format/Medline.py       |  100 ++++++++++++++++++++--------------------
 Pyblio/Format/test_Ovidlike.py |   57 +++++++++++------------
 Pyblio/Format/test_medline.py  |   19 ++------
 3 files changed, 83 insertions(+), 93 deletions(-)
---
diff --git a/Pyblio/Format/Medline.py b/Pyblio/Format/Medline.py
index e47093b..aeaf3b0 100644
--- a/Pyblio/Format/Medline.py
+++ b/Pyblio/Format/Medline.py
@@ -1,24 +1,24 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # This file is part of pybliographer
-# 
+#
 # Copyright (C) 1998-2004 Frederic GOBRY
 # Email : gobry pybliographer org
-#         
+#
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2 
+# as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
-#   
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details. 
-# 
+# GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-# 
-# 
+#
+#
 
 # Extension module for Medline files
 
@@ -40,8 +40,8 @@ class MedlineIterator (Iterator.Iterator):
     def __init__ (self, file):
         self.file = file
         return
-    
-    
+
+
     def first (self):
         # rewind the file
         self.file.seek (0)
@@ -51,14 +51,14 @@ class MedlineIterator (Iterator.Iterator):
     def next (self):
         current = None
         data    = ''
-        
+
         table = {}
 
         # Skip whitespace
         while 1:
             line = self.file.readline ()
             if line == '': return table
-            
+
             line = string.rstrip (line)
             if line != '': break
 
@@ -70,14 +70,14 @@ class MedlineIterator (Iterator.Iterator):
                         table [current].append (data)
                     else:
                         table [current] = [data]
-                        
+
                 current = string.strip (head.group (1))
                 data    = head.group (2)
             else:
                 cont = contin.match (line)
                 if cont:
                     data = data + ' ' + cont.group (1)
-        
+
             line = self.file.readline ()
             if line == '': break
 
@@ -99,18 +99,18 @@ class MedlineIterator (Iterator.Iterator):
             norm ['url'] = Fields.URL (medurl + table ['PMID'] [0])
             norm ['medline-pmid'] = Fields.Text (table ['PMID'] [0])
             del table ['PMID']
-    
+
         if table.has_key ('UI'):
             norm [one_to_one ['UI']] = Fields.Text (table ['UI'] [0])
             del table ['UI']
 
         if table.has_key ('AU'):
             group = Fields.AuthorGroup ()
-            
+
             for au in table ['AU']:
                 # analyze the author by ourself.
                 first, last, lineage = [], [], []
-                
+
                 for part in string.split (au, ' '):
                    if part.isupper ():
                         # in upper-case, this is a first name
@@ -144,9 +144,9 @@ class MedlineIterator (Iterator.Iterator):
                     lineage = string.join (lineage, ' ')
                 else:
                     lineage = None
-                    
+
                 group.append (Fields.Author ((None, first, last, lineage)))
-                
+
             norm [one_to_one ['AU']] = group
             del table ['AU']
 
@@ -154,50 +154,50 @@ class MedlineIterator (Iterator.Iterator):
             fields = string.split (table ['DP'][0], ' ')
             norm [one_to_one ['DP']] = Fields.Date (fields [0])
             del table ['DP']
-            
+
         # The simple fields...
         for f in table.keys ():
             f_mapped = one_to_one.get(f, 'medline-%s' %(f.lower()))
             text_type = Types.get_field(f_mapped).type
             norm [f_mapped] = text_type (string.join (table [f], " ; "))
-        
+
         return Base.Entry (None, type, norm)
 
 
 # UI identifiant unique
 # AU auteurs *
-# TI titre 
+# TI titre
 # LA langue *
-# MH mots cl�s *
+# MH mots clés *
 # PT *  type : JOURNAL ARTICLE, REVIEW, REVIEW, TUTORIAL,CLINICAL TRIAL,
 #              RANDOMIZED CONTROLLED TRIAL, LETTER, EDITORIAL, MULTICENTER STUDY,
 #              NEWS, HISTORICAL ARTICLE
-# DA date de ?? en yyyymmdd 
+# DA date de ?? en yyyymmdd
 # DP date de ?? en yyyy mois +/-j
-# IS 
+# IS
 # TA titre de la revue
-# PG  
-# SB 
-# CY pays d'�dition ?
-# IP  
-# VI 
-# JC  
-# AA semble �tre toujours Author ou AUTHOR
+# PG
+# SB
+# CY pays d'édition ?
+# IP
+# VI
+# JC
+# AA semble être toujours Author ou AUTHOR
 # EM date de?? en yyyymm
-# AB  
-# AD 
+# AB
+# AD
 # PMID
-# SO  r�f�rence compl�te
+# SO  référence complète
 # RN semble indexer des substances chimiques
 # TT titre dans la langue d'origine
 # 4099 URL vers l'article
 # 4100 URL vers abstract de l'article ??
-    
+
 
 class Medline (Base.DataBase):
-    
+
     id = 'Medline'
-    
+
     properties = {
         'change_id'   : 0,
         'change_type' : 0
@@ -223,7 +223,7 @@ def writer (iter, output, **argh):
 
         ekeys = {}
         for k in entry.keys (): ekeys [k] = 1
-        
+
         med = one_to_one ['UI']
 
         if entry.has_key (med):
@@ -232,11 +232,11 @@ def writer (iter, output, **argh):
         else:
             print "warning: entry has no medline reference"
             ui = 0
-            
+
         output.write ('%-4.4s- %s\n' % ('UI', ui))
 
         med = one_to_one ['AU']
-        
+
         if entry.has_key (med):
             del ekeys [med]
             for auth in entry [med]:
@@ -247,7 +247,7 @@ def writer (iter, output, **argh):
 
                 first = string.join (compact, '')
                 text = string.join ((auth.last or '', first, auth.lineage or ''), ' ')
-                
+
                 output.write ('%-4.4s- %s\n' % ('AU', text))
 
         med = one_to_one ['DP']
@@ -261,7 +261,7 @@ def writer (iter, output, **argh):
 
             if not ekeys.has_key (field): continue
             del ekeys [field]
-            
+
             output.write ('%-4.4s- %s\n' % (key, Utils.format (str (entry [field]),
                                                               75, 0, 6)))
         # write the unknown fields
@@ -272,19 +272,19 @@ def writer (iter, output, **argh):
                 key = string.upper (field [8:])
                 output.write ('%-4.4s- %s\n' % (key, Utils.format (str (entry [field]),
                                                                    75, 0, 6)))
-            
-        
+
+
         entry = iter.next ()
         if entry: output.write ('\n')
 
-        
+
 def opener (url, check):
-       
+
        base = None
 
        if (not check) or (url.url [2] [-4:] == '.med'):
                base = Medline (url)
-               
+
        return base
 
 
@@ -294,7 +294,7 @@ def iterator (url, check):
        databases '''
 
         if check and url.url [2] [-4:] != '.med': return
-        
+
         return MedlineIterator (open (Open.url_to_local (url), 'r'))
 
 
diff --git a/Pyblio/Format/test_Ovidlike.py b/Pyblio/Format/test_Ovidlike.py
index 95fff2a..9ea27db 100644
--- a/Pyblio/Format/test_Ovidlike.py
+++ b/Pyblio/Format/test_Ovidlike.py
@@ -1,4 +1,5 @@
-#    -*- coding: iso-8859-1 -*-
+#    -*- coding: utf-8 -*-
+
 data = ["""Authors
   Muller S.  Garda P.  Muller JD.  Cansi Y.
 Title
@@ -120,7 +121,7 @@ from Pyblio.Base import DataBase, Entry
 from Pyblio import Config
 from Pyblio.Key import Key
 from Pyblio.Types import get_entry
-from Pyblio.Format.OvidLike import OvidLike, writer, write_source_field 
+from Pyblio.Format.OvidLike import OvidLike, writer, write_source_field
 from Pyblio.Fields import Date
 
 class WriterCase (unittest.TestCase):
@@ -134,20 +135,20 @@ class WriterCase (unittest.TestCase):
         self.output = cStringIO.StringIO()
         self.mapping = Config.get('ovid/mapping').data
 
-        
+
     def test01(self):
 
         self.entry = Entry ( Key('TEST', 'KEY1'), get_entry('article'),
             {'journal': 'CACM',
-             'number': 22, 
+             'number': 22,
              'volume': 123,
              'pages': '234-543'})
         self.db.add(self.entry)
         self.itera = self.db.iterator()
-        
+
         writer (self.itera, self.output, self.mapping)
         print self.output.getvalue()
-        
+
 
     def test02source (self):
 
@@ -158,18 +159,18 @@ class WriterCase (unittest.TestCase):
              'volume': 69, 'number': 2, 'pages': '81-88',
              'date': Date ((1999, 2, None))},
             {'result':
-             'Journal of Trauma-Injury Infection & Critical Care. 44(6):1047-1054; discussion 1054-5, 1998 
Jun.',              
+             'Journal of Trauma-Injury Infection & Critical Care. 44(6):1047-1054; discussion 1054-5, 1998 
Jun.',
              'journal': 'Journal of Trauma-Injury Infection & Critical Care',
              'volume': 44, 'number': 6, 'pages': '1047-1054',
              'date': Date ((1998, 6, None)),
              'other-note': 'discussion 1054-5'},
             {'result': 'Chemotherapy. 42(3):215-219, 1996 May.',
-             ## date in �cites.ovid�: '1996 May-Jun' ##
-             ## pages in �cites.ovid�: '215-19' ##
+             ## date in »cites.ovid«: '1996 May-Jun' ##
+             ## pages in »cites.ovid«: '215-19' ##
              'journal': 'Chemotherapy',
              'volume': 42, 'number': 3, 'pages': '215-219',
              'date': Date ((1996, 5, None))},
-            {'result': 'Circulatory Shock. 18(3):193-203, 1986.', 
+            {'result': 'Circulatory Shock. 18(3):193-203, 1986.',
              'journal': 'Circulatory Shock',
              'volume': 18, 'number': 3, 'pages': '193-203',
              'date': Date ('1986')},
@@ -180,7 +181,7 @@ class WriterCase (unittest.TestCase):
              'date': Date ((2002, 9, 23))},]
 
 
-        
+
         for i in data :
 
             e =  Entry ( Key('TEST', 'KEY1'), get_entry('article'),
@@ -190,7 +191,7 @@ class WriterCase (unittest.TestCase):
             write_source_field (self.output, e, self.mapping)
             r = self.output.getvalue()
             self.assertEqual (e['result'], r[9:-1])
-            
+
 
 
 
@@ -260,8 +261,8 @@ class RexpCase  (unittest.TestCase):
               'Biochemistry', '38', '49', None, '16333-16339',
               '1999', 'Dec 7', None),
              ]
-             
-             
+
+
 
 
     def test01 (self):
@@ -273,14 +274,14 @@ class RexpCase  (unittest.TestCase):
                 print m.group(
                     'journal', 'volume', 'number', 'inseries',
                     'pages', 'year', 'month', 'other')
-                self.assertEqual (journal, m.group('journal'))             
-                self.assertEqual (volume, m.group('volume'))             
-                self.assertEqual (number, m.group('number'))             
-                self.assertEqual (inseries, m.group('inseries'))             
-                self.assertEqual (pages, m.group('pages'))             
-                self.assertEqual (year, m.group('year'))             
-                self.assertEqual (month, m.group('month'))             
-                self.assertEqual (other, m.group('other'))             
+                self.assertEqual (journal, m.group('journal'))
+                self.assertEqual (volume, m.group('volume'))
+                self.assertEqual (number, m.group('number'))
+                self.assertEqual (inseries, m.group('inseries'))
+                self.assertEqual (pages, m.group('pages'))
+                self.assertEqual (year, m.group('year'))
+                self.assertEqual (month, m.group('month'))
+                self.assertEqual (other, m.group('other'))
             else: print 'Fehler'
 
 class Rexp2Case (unittest.TestCase):
@@ -299,7 +300,7 @@ class Rexp2Case (unittest.TestCase):
         \.\Z
          """
             , flags= re.VERBOSE)
-       
+
         data = ['Biophysical Journal. 71(6):3320-3329, 1996 Dec.',
                 'Biochemistry. 38(49):16333-16339, 1999 Dec 7.',
                 'VERY HIGH FREQUENCY (VHF) ESR/EPR. 22 PG. 431-464. 2004 [Figures].'
@@ -327,7 +328,7 @@ class Rexp2Case (unittest.TestCase):
             (?P<month>.*)
             \.\s*\Z"""
             , flags= re.VERBOSE)
-       
+
         data = ['Biophysical Journal. 71(6):3320-3329, 1996 Dec.',
                 'Biochemistry. 38(49):16333-16339, 1999 Dec 7.',
                 'VERY HIGH FREQUENCY (VHF) ESR/EPR. 22 PG. 431-464. 2004 [Figures].'
@@ -373,20 +374,18 @@ def suite():
 
 def main ():
     unittest.main (defaultTest='suite' )
-    
+
 
 if __name__ == '__main__':
-    
     main()
 
 
-
 ### Local Variables:
 ### Mode: python
-### encoding: iso-8859-1    
+### encoding: utf-8
 ### End:
 
 
-    
+
 
 
diff --git a/Pyblio/Format/test_medline.py b/Pyblio/Format/test_medline.py
index 98f4e00..743c282 100644
--- a/Pyblio/Format/test_medline.py
+++ b/Pyblio/Format/test_medline.py
@@ -1,5 +1,4 @@
-#    -*- coding: iso8859-1 -*-
-
+# -*- coding: utf-8 -*-
 
 import cStringIO, os, sys, unittest
 
@@ -19,8 +18,6 @@ Config.load_user ()
 from Pyblio.Format import Medline
 
 
-
-
 example_1 = """PMID- 15985842
 OWN - NLM
 STAT- MEDLINE
@@ -155,15 +152,12 @@ comparison = {'Holmes': 'W. C.',
              'Pardini': 'D.'}
 
 
-
 class ReaderCase (unittest.TestCase):
-
     def setUp (self):
-
         self.db = Base.DataBase ('//localhost/Internal')
         self.output = cStringIO.StringIO()
 
-        
+
     def test01(self):
        """Test that all fields are Instances, as
        opposed to strings"""
@@ -186,7 +180,7 @@ class ReaderCase (unittest.TestCase):
        """Test that Initials are formatted correctly.
        According to Bibtex specs, they must be separated
        by period, space ('. ')."""
-           
+
        inpt = cStringIO.StringIO (example_2)
        rdr = Medline.MedlineIterator (inpt)
        e = rdr.first ()
@@ -212,7 +206,6 @@ class ReaderCase (unittest.TestCase):
                    auth.first, comparison [auth.last])
            e = rdr.next ()
 
-
 def suite():
     theSuite = unittest.TestSuite()
 
@@ -222,15 +215,13 @@ def suite():
 
 def main ():
     unittest.main (defaultTest='suite' )
-    
+
 
 if __name__ == '__main__':
-    
     main()
 
 
-
 ### Local Variables:
 ### Mode: python
-### encoding: iso-8859-1    
+### encoding: utf-8
 ### End:


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]