[damned-lies] Separated checking po file conformity from getting translation stats



commit 8a4a4519e74bc07c850e436587f3cc207c7c436d
Author: Claude Paroz <claude 2xlibre net>
Date:   Mon Mar 26 16:02:39 2018 +0200

    Separated checking po file conformity from getting translation stats

 stats/models.py      |   16 ++++--
 stats/tests/tests.py |    1 -
 stats/utils.py       |  134 ++++++++++++++++++++++++-------------------------
 vertimus/forms.py    |    5 +-
 4 files changed, 78 insertions(+), 78 deletions(-)
---
diff --git a/stats/models.py b/stats/models.py
index 9dde167..a0ae524 100644
--- a/stats/models.py
+++ b/stats/models.py
@@ -485,7 +485,7 @@ class Branch(models.Model):
 
                 # 6. Generate pot stats and update DB
                 # ***********************************
-                pot_stat.update_stats(potfile, pot_method, msgfmt_checks=False)
+                pot_stat.update_stats(potfile, pot_method)
 
                 if potfile != previous_pot:
                     try:
@@ -532,7 +532,11 @@ class Branch(models.Model):
                                 continue
                         stat = Statistics.objects.create(language=language, branch=self, domain=dom)
 
-                    stat.update_stats(outpo, pot_method, msgfmt_checks=True)
+                    errs = utils.check_po_conformity(outpo)
+                    for err in errs:
+                        stat.set_error(*err)
+                    if not errs:
+                        stat.update_stats(outpo, pot_method)
 
                     if linguas['langs'] is not None and lang not in linguas['langs']:
                         stat.set_error('warn-ext', linguas['error'])
@@ -1330,7 +1334,7 @@ class PoFile(models.Model):
             return int(100*self.untranslated_words/pot_size)
 
     def update_stats(self):
-        stats = utils.po_file_stats(Path(self.path), msgfmt_checks=False)
+        stats = utils.po_file_stats(Path(self.path))
         self.translated   = stats['translated']
         self.fuzzy        = stats['fuzzy']
         self.untranslated = stats['untranslated']
@@ -1547,10 +1551,10 @@ class Statistics(models.Model):
     def pot_url(self):
         return self.po_url(potfile=True)
 
-    def update_stats(self, file_path=None, pot_method=None, msgfmt_checks=False):
+    def update_stats(self, file_path=None, pot_method=None):
         if file_path is None and self.full_po:
             file_path = self.full_po.path
-        stats = utils.po_file_stats(file_path, msgfmt_checks=msgfmt_checks)
+        stats = utils.po_file_stats(file_path)
         for err in stats['errors']:
             self.set_error(*err)
         fig_stats = utils.get_fig_stats(file_path, pot_method)
@@ -1587,7 +1591,7 @@ class Statistics(models.Model):
                 else:
                     part_po_path = Path(self.full_po.path[:-3] + ".reduced.po")
                 utils.po_grep(self.full_po.path, str(part_po_path), self.domain.red_filter)
-                part_stats = utils.po_file_stats(part_po_path, msgfmt_checks=False)
+                part_stats = utils.po_file_stats(part_po_path)
                 if (part_stats['translated'] + part_stats['fuzzy'] + part_stats['untranslated'] ==
                         stats['translated'] + stats['fuzzy'] + stats['untranslated']):
                     # No possible gain, set part_po = full_po so it is possible to compute complete stats at 
database level
diff --git a/stats/tests/tests.py b/stats/tests/tests.py
index 463ea6a..57bc56c 100644
--- a/stats/tests/tests.py
+++ b/stats/tests/tests.py
@@ -302,7 +302,6 @@ class ModuleTestCase(TestCase):
             # Quoting is done at the Popen level
             'git commit -m Update French translation --author Author <someone example org>',
             'git push origin master', 'git log -n1 --format=oneline',
-            'msgfmt --statistics -o /dev/null',
         )
         with patch_shell_command() as cmds:
             branch.commit_po(po_file, domain, fr_lang, 'Author <someone example org>')
diff --git a/stats/utils.py b/stats/utils.py
index 0115de4..29e4172 100644
--- a/stats/utils.py
+++ b/stats/utils.py
@@ -23,6 +23,7 @@ from common.utils import send_mail
 from . import potdiff
 
 STATUS_OK = 0
+C_ENV = {"LC_ALL": "C", "LANG": "C", "LANGUAGE": "C"}
 
 NOT_CHANGED = 0
 CHANGED_ONLY_FORMATTING = 1
@@ -469,8 +470,50 @@ def pot_diff_status(pota, potb):
     else:
         return CHANGED_NO_ADDITIONS, result_all
 
-def po_file_stats(pofile, msgfmt_checks=True):
-    """ Compute pofile translation statistics, and proceed to some validity checks if msgfmt_checks is True 
"""
+
+def check_po_conformity(pofile):
+    """Return errors/wanrings about pofile conformity."""
+    errors = []
+    # Allow pofile provided as open file (e.g. to validate a temp uploaded file)
+    if isinstance(pofile, File):
+        input_data = pofile.read()
+        input_file = "-"
+    else:
+        input_data = None
+        input_file = str(pofile)
+
+    command = ['msgfmt', '-cv', '-o', '/dev/null', input_file]
+    status, _, _ = run_shell_command(command, env=C_ENV, input_data=input_data)
+    if status != STATUS_OK:
+        errors.append((
+            "error",
+            ugettext_noop("PO file “%s” doesn’t pass msgfmt check.") % pofile.name
+        ))
+
+    if input_file != '-' and os.access(str(pofile), os.X_OK):
+        errors.append(("warn", ugettext_noop("This PO file has an executable bit set.")))
+
+    # Check if PO file is in UTF-8
+    if input_file == "-":
+        try:
+            input_data.decode('UTF-8')
+            status = STATUS_OK
+        except UnicodeDecodeError:
+            status = STATUS_OK + 1
+    else:
+        command = "msgconv -t UTF-8 \"%s\" | diff -i -I '^#~' -u \"%s\" - >/dev/null" % (
+            pofile, pofile)
+        status, _, _ = run_shell_command(command, env=C_ENV)
+    if status != STATUS_OK:
+        errors.append((
+            "warn",
+             ugettext_noop("PO file “%s” is not UTF-8 encoded.") % pofile.name
+        ))
+    return errors
+
+
+def po_file_stats(pofile):
+    """Compute pofile translation statistics."""
     res = {
         'translated' : 0,
         'fuzzy' : 0,
@@ -479,77 +522,32 @@ def po_file_stats(pofile, msgfmt_checks=True):
         'fuzzy_words': 0,
         'untranslated_words': 0,
         'errors' : [],
-        }
-    c_env = {"LC_ALL": "C", "LANG": "C", "LANGUAGE": "C"}
-
-    if isinstance(pofile, Path):
-        # pofile is a filesystem path
-        if not pofile.exists():
-            res['errors'].append(
-                ("error", ugettext_noop("PO file “%s” does not exist or cannot be read.") % pofile.name)
-            )
-            return res
-        input_data = None
-        input_file = str(pofile)
-
-        status = pocount.calcstats_old(input_file)
-        if status:
-            res['fuzzy_words'] = status['fuzzysourcewords']
-            res['translated_words'] = status['translatedsourcewords']
-            res['untranslated_words'] = status['untranslatedsourcewords']
-
-    elif isinstance(pofile, File):
-        input_data = pofile.read()
-        input_file = "-"
-    else:
-        raise ValueError("pofile type not recognized")
+    }
 
-    if msgfmt_checks:
-        command = ['msgfmt', '-cv', '-o', '/dev/null', input_file]
+    if not pofile.exists():
+        res['errors'].append((
+            "error",
+            ugettext_noop("PO file “%s” does not exist or cannot be read.") % pofile.name
+        ))
+        return res
+
+    status = pocount.calcstats_old(str(pofile))
+    if not status:
+        res['errors'].append((
+            "error",
+            ugettext_noop("Can’t get statistics for POT file “%s”.") % pofile.name
+        ))
     else:
-        command = ['msgfmt', '--statistics', '-o', '/dev/null', input_file]
+        res['fuzzy'] = status['fuzzy']
+        res['translated'] = status['translated']
+        res['untranslated'] = status['untranslated']
+        res['fuzzy_words'] = status['fuzzysourcewords']
+        res['translated_words'] = status['translatedsourcewords']
+        res['untranslated_words'] = status['untranslatedsourcewords']
 
-    (status, output, errs) = run_shell_command(command, env=c_env, input_data=input_data)
-
-    if status != STATUS_OK:
-        if msgfmt_checks:
-            res['errors'].append(("error", ugettext_noop("PO file “%s” doesn’t pass msgfmt check: not 
updating.") % (pofile.name)))
-        else:
-            res['errors'].append(("error", ugettext_noop("Can’t get statistics for POT file “%s”.") % 
(pofile.name)))
-
-    if msgfmt_checks and input_file != "-" and os.access(str(pofile), os.X_OK):
-        res['errors'].append(("warn", ugettext_noop("This PO file has an executable bit set.")))
-
-    # msgfmt output stats on stderr
-    errs = force_text(errs)
-    r_tr = re.search(r"([0-9]+) translated", errs)
-    r_un = re.search(r"([0-9]+) untranslated", errs)
-    r_fz = re.search(r"([0-9]+) fuzzy", errs)
-
-    if r_tr:
-        res['translated'] = int(r_tr.group(1))
-    if r_un:
-        res['untranslated'] = int(r_un.group(1))
-    if r_fz:
-        res['fuzzy'] = int(r_fz.group(1))
-
-    if msgfmt_checks:
-        # Check if PO file is in UTF-8
-        if input_file == "-":
-            try:
-                input_data.decode('UTF-8')
-                status = STATUS_OK
-            except:
-                status = STATUS_OK+1
-        else:
-            command = ("msgconv -t UTF-8 \"%s\" | diff -i -I '^#~' -u \"%s\" - >/dev/null") % (
-                        pofile, pofile)
-            (status, output, errs) = run_shell_command(command, env=c_env)
-        if status != STATUS_OK:
-            res['errors'].append(("warn",
-                              ugettext_noop("PO file “%s” is not UTF-8 encoded.") % (pofile.name)))
     return res
 
+
 def read_linguas_file(full_path):
     """ Read a LINGUAS file (each language code on a line by itself) """
     langs = []
diff --git a/vertimus/forms.py b/vertimus/forms.py
index 7f25984..c9f00f6 100644
--- a/vertimus/forms.py
+++ b/vertimus/forms.py
@@ -8,7 +8,7 @@ from django.utils.translation import ugettext, ugettext_lazy as _
 
 from vertimus.models import Action, ActionCI, ActionSeparator
 from stats.models import Person
-from stats.utils import po_file_stats
+from stats.utils import check_po_conformity
 
 
 class DisabledLabel(str):
@@ -82,8 +82,7 @@ class ActionForm(forms.Form):
                 raise ValidationError(_("Only files with extension .po, .gz, .bz2 or .png are admitted."))
             # If this is a .po file, check validity (msgfmt)
             if ext == '.po':
-                res = po_file_stats(data)
-                if res['errors']:
+                if check_po_conformity(data):
                     raise ValidationError(_(".po file does not pass “msgfmt -vc”. Please correct the file 
and try again."))
         return data
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]