[gimp-help/wip/wormnest/validate-po: 1/2] tools: add validate_po.py that can test for erros in po files.
- From: Jacob Boerema <jboerema src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gimp-help/wip/wormnest/validate-po: 1/2] tools: add validate_po.py that can test for erros in po files.
- Date: Sun, 13 Jun 2021 16:47:51 +0000 (UTC)
commit fd26e25e1e2a536b83e8030cec2f0c40b59f2cb5
Author: Jacob Boerema <jgboerema gmail com>
Date: Sun Jun 13 12:30:57 2021 -0400
tools: add validate_po.py that can test for erros in po files.
tools/validate_po.py | 307 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 307 insertions(+)
---
diff --git a/tools/validate_po.py b/tools/validate_po.py
new file mode 100644
index 000000000..eb7406954
--- /dev/null
+++ b/tools/validate_po.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+#
+# validate_po.py - Validates translations in po files.
+# Copyright (c) 2021 Jacob Boerema.
+#
+# This file is part of gimp-help.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+
+import sys
+import getopt
+import polib
+
+VERSION = 0.1
+
+class Validate(object):
+ def __init__(self, verbose, warnings):
+ self.pofile = None
+ self.po = None
+ self.errors = 0
+ self.filecnt = 0
+ self.files_with_errors = 0
+ self.log=sys.stdout
+ self.verbose = verbose
+ self.warnings = warnings
+
+ def setFile(self, pofile):
+ self.pofile = pofile
+ self.po = polib.pofile(pofile)
+ self.filecnt += 1
+
+ def printErrorHeader(self, entry, log):
+ if self.headerPrinted:
+ return
+ self.headerPrinted = True
+ print(f"\nPo line number: {entry.linenum}")
+ print(f"Source text: {entry.occurrences}")
+ #print(f"Comment: {entry.comment}")
+ #print(f"Translator Comment: {entry.tcomment}")
+ #print(f"Context: {entry.msgctxt}")
+ if self.verbose:
+ print(f"\nOriginal msgid:\n{entry.msgid}", file=self.log)
+ print(f"\nTranslated msgstr:\n{entry.msgstr}\n", file=self.log)
+
+ def parse_text(self, entry):
+ stack = []
+ textblock = entry.msgstr
+ err = 0
+ self.headerPrinted = False
+
+ # Note that XML tags do not get escaped. This means we can't really detect
+ # the difference between the use of < and > signs and tag begin/end.
+ # For now let's just assume that all occurrences of < and > are for tags.
+
+ start_tag = textblock.find('<')
+ while start_tag > -1:
+ textblock = textblock[start_tag+1:]
+ end_tag = textblock.find('>')
+ if end_tag > -1:
+ # Found left and right brackets: grab tag
+ tag = textblock[: end_tag]
+ # Check that it's not a tag that closes itself and comment tags starting with <!
+ if textblock[end_tag-1] != '/' and textblock[0] != '!':
+ # check for closing tag first
+ if len(tag) > 0 and tag[0] == '/':
+ if tag[1] == ' ':
+ # space not allowed when closing tag between / and tag
+ err += 1
+ self.printErrorHeader(entry, self.log)
+ tag = tag[2:]
+ print(f"ERROR: No space allowed between '/' and [{tag}] when closing a tag.",
file=self.log)
+ else:
+ tag = tag[1:]
+ # space not allowed anywhere when closing tag
+ space = tag.find(' ')
+ if space > -1:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ tag = tag[: space]
+ print(f"ERROR: No space allowed when closing a tag, see [{tag}].", file=self.log)
+ # no uppercase allowed in tag
+ lo_tag = tag.lower()
+ if tag != lo_tag:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: Closing tag [{tag}] should be all lowercase.", file=self.log)
+ tag = lo_tag
+ if len(stack) == 0:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: Closing tag [{tag}] before opening tag.", file=self.log)
+ else:
+ if stack[-1] == tag:
+ # Correct closing tag found, remove from stack
+ stack.pop()
+ else:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: Found closing tag [{tag}], however we expected
[{stack[0]}].", file=self.log)
+ print(f"\tRemaining tags: {str(stack)}", file=self.log)
+ if tag in stack:
+ stack.remove(tag)
+ print("\t Assuming incorrect tag order, found and removed tag from the
stack", file=self.log)
+ elif len(stack) == 1:
+ stack.pop()
+ print("\t Assuming typo, removed remaining tag from the stack",
file=self.log)
+
+ else:
+ # Tag can have multiple elements inside, watch for first space
+ space = tag.find(' ')
+ err_space = False
+ # Get rid of unlikely occurrence of multiple spaces before opening tag
+ while space == 0:
+ err_space = True
+ tag = tag[1:]
+ space = tag.find(' ')
+ if space == -1:
+ break
+
+ if space > -1:
+ tag = tag[: space]
+
+ skip = False
+ if err_space:
+ if textblock[end_tag-1] == ' ':
+ # Assume these are random < and > characters not a tag
+ if self.warnings:
+ self.printErrorHeader(entry, self.log)
+ print(f"WARNING: Assuming random < and > encountered, but could be a
faulty tag too.", file=self.log)
+ skip = True
+ else:
+ # Suspicious, erroneous space(s) before tag name?
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: No space allowed when opening a tag, see [{tag}].",
file=self.log)
+
+ if not skip:
+ open_tag = (len(tag) > 0)
+ if open_tag:
+ # no uppercase allowed in tag
+ lo_tag = tag.lower()
+ if tag != lo_tag:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: Opening tag [{tag}] should be all lowercase.",
file=self.log)
+ tag = lo_tag
+ # Add opening tag to stack
+ stack.append(tag)
+ else:
+ self.printErrorHeader(entry, self.log)
+ err += 1
+ print(f"ERROR: Empty opening tag <> not allowed.", file=self.log)
+ else:
+ if len(tag) == 1:
+ # empty closing tag not allowed
+ err += 1
+ self.printErrorHeader(entry, self.log)
+ tag = tag[1:]
+ print(f"ERROR: Empty closing tag not allowed.", file=self.log)
+
+ textblock = textblock[end_tag+1:]
+ start_tag = textblock.find('<')
+ else:
+ start_tag = -1
+
+
+ if len(stack):
+ err += 1
+ self.printErrorHeader(entry, self.log)
+ print(f"ERROR: Found tags that were not closed: {str(stack)}.", file=self.log)
+
+ return err
+
+ def check_illegal_chars(self, entry):
+ text = entry.msgstr
+ result = 0
+ idx = 0
+ textlen = len(text)
+
+ # Apparently polib ignores (un)escaping \v but msgfmt doesn't like it so detect it here...
+ # See: https://gitlab.gnome.org/GNOME/gimp-help/-/commit/6b661af55bc6dc90bb198c59702d9b6cfc42f94f
+ vert_tab = text.find("\\v")
+ if vert_tab > -1:
+ result += 1
+ self.printErrorHeader(entry, self.log)
+ print(f"ERROR: Vertical tab (\\v) not allowed.", file=self.log)
+
+ while idx < textlen:
+ if text[idx] < ' ':
+ if text[idx] != "\n" and text[idx] != "\t":
+ result += 1
+ self.printErrorHeader(entry, self.log)
+ print(f"ERROR: Found illegal character in text: {ord(text[idx])}.", file=self.log)
+
+ idx += 1
+
+ return result
+
+
+ def run(self):
+ errcnt = 0
+ valid_entries = [e for e in self.po if not e.obsolete]
+ for entry in valid_entries:
+ if entry.msgid.startswith("@@image:"):
+ # TODO: check and warn for img not found msg?
+ # But that probably shouldn't be done in the
+ # po validator but in a source xml validator.
+ continue
+ elif entry.msgid.startswith("translator-credits"):
+ # TODO: Should we check for translators not in AUTHORS?
+ continue
+
+ errcnt += self.check_illegal_chars(entry)
+ errcnt += self.parse_text(entry)
+
+ # TODO Possible enhancements:
+ # - Compare tags found in msgid and msgstr
+ # The same tags and the same number of tags should be found
+ # - compare msgid and msgstr also for differences in:
+ # + number of brackets () [] {}
+ # + number of sentences
+ # + use of quotes (which can differ per language)
+
+ if errcnt > 0:
+ self.errors += errcnt
+ self.files_with_errors += 1
+ print(f"\n{self.pofile}: {errcnt} errors detected.\n")
+
+def printVersion():
+ print(f"\nPo Validator v {VERSION}, copyright 2021 Jacob Boerema")
+
+def main(argv):
+ # Make sure stdout and stderr output utf-8 even on Windows where it's not the default
+ sys.stdout = open(sys.stdout.fileno(), 'w', encoding='utf-8', closefd=False)
+ sys.stderr = open(sys.stderr.fileno(), 'w', encoding='utf-8', closefd=False)
+
+ verbose = False
+ warnings = False
+
+ try:
+ opts, remaining_args = getopt.getopt(argv, "hvw",
+ [
+ "help", "verbose", "warnings"
+ ])
+ except getopt.GetoptError as err:
+ usage()
+ sys.exit(0)
+
+ for opt, arg in opts:
+ if opt == "-h" or opt == "--help":
+ usage()
+ sys.exit(0)
+ elif opt == "-v" or opt == "--verbose":
+ verbose = True
+ elif opt == "-w" or opt == "--warnings":
+ warnings = True
+
+ # Treat remaining arguments as po files
+ filenames = []
+ while remaining_args:
+ filenames.append(remaining_args.pop())
+
+ if len(filenames) == 0:
+ usage()
+ sys.exit(0)
+
+ if verbose:
+ printVersion()
+
+ validator = Validate(verbose, warnings)
+ for file in filenames:
+ print(f"Checking {file}")
+ validator.setFile(file)
+ validator.run()
+
+ if validator.errors > 0:
+ print(f"{validator.files_with_errors} of {validator.filecnt} files contain errors")
+ print(f"Total number of errors: {validator.errors}")
+ sys.exit(1)
+
+
+def usage():
+ printVersion()
+ print("""Validates translations in po files.
+
+usage: validate_po.py [options] POFILES
+
+ options:
+ -v --verbose be more verbose
+ -w --warnings show warnings
+ -h --help this help""")
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
\ No newline at end of file
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]