[gtkmm-documentation] Python scripts: Specify file encoding

From: Kjell Ahlstedt <kjellahl src gnome org>
To: commits-list gnome org
Cc:
Subject: [gtkmm-documentation] Python scripts: Specify file encoding
Date: Sun, 18 Jul 2021 14:55:19 +0000 (UTC)
commit e75f6c824b843350030ae95151c7114c3ed08cf0
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date:   Sun Jul 18 16:46:36 2021 +0200

    Python scripts: Specify file encoding
    
    The default file encoding is platform dependent in Python.
    Better specify encoding when text files are read or written.

 docs/tutorial/insert_example_code.py | 14 +++++++++-----
 tools/meson_aux/extra-dist-cmd.py    |  7 ++++---
 2 files changed, 13 insertions(+), 8 deletions(-)
---
diff --git a/docs/tutorial/insert_example_code.py b/docs/tutorial/insert_example_code.py
index 12350d8..cfe3c15 100755
--- a/docs/tutorial/insert_example_code.py
+++ b/docs/tutorial/insert_example_code.py
@@ -20,7 +20,7 @@ start_of_source_pattern = re.compile(r'[#\w]')
 def process_source_file(source_directory, source_basename, outfile, skip_leading_comments):
   found_start = not skip_leading_comments
   source_filename = os.path.join(source_directory, source_basename)
-  with open(source_filename, mode='r') as srcfile:
+  with open(source_filename, mode='r', encoding='utf-8', errors='surrogateescape') as srcfile:
     outfile.write('<para>File: <filename>' + source_basename + '</filename> (For use with gtkmm 4)</para>\n')
     outfile.write('<programlisting>\n<![CDATA[')
 
@@ -35,12 +35,16 @@ def process_source_file(source_directory, source_basename, outfile, skip_leading
     outfile.write(']]></programlisting>\n')
 
 def insert_example_code(examples_base_dir, input_xml_files, output_xml_file):
-  if not isinstance(input_xml_files, list):
-    input_xml_files = [input_xml_files]
+  if not (isinstance(input_xml_files, list) or isinstance(input_xml_files, tuple)):
+    input_xml_files = [] if input_xml_files == None else [input_xml_files]
 
-  with open(output_xml_file, mode='w') as outfile:
+  # Assume that all files are UTF-8 encoded.
+  # If illegal UTF-8 bytes in the range 0x80..0xff are encountered, they are
+  # replaced by Unicode Private Use characters in the range 0xdc80..0xdcff
+  # and restored to their original values when the file is rewritten.
+  with open(output_xml_file, mode='w', encoding='utf-8', errors='surrogateescape') as outfile:
     for input_xml_file in input_xml_files:
-      with open(input_xml_file, mode='r') as infile:
+      with open(input_xml_file, mode='r', encoding='utf-8', errors='surrogateescape') as infile:
         for line in infile:
           # Look for
           # <para><ulink url="&url_examples_base;helloworld">Source Code</ulink></para> [<!-- Insert 
filenames... -->]
diff --git a/tools/meson_aux/extra-dist-cmd.py b/tools/meson_aux/extra-dist-cmd.py
index 78812f6..051c4e1 100755
--- a/tools/meson_aux/extra-dist-cmd.py
+++ b/tools/meson_aux/extra-dist-cmd.py
@@ -30,7 +30,8 @@ cmd = [
   '--max-count=200',
   '--pretty=tformat:%cd  %an  <%ae>%n%n  %s%n%w(0,0,2)%+b',
 ]
-with open(os.path.join(project_dist_root, 'ChangeLog'), mode='w') as logfile:
+logfilename = os.path.join(project_dist_root, 'ChangeLog')
+with open(logfilename, mode='w', encoding='utf-8') as logfile:
   result = subprocess.run(cmd, stdout=logfile)
   if result.returncode:
     sys.exit(result.returncode)
@@ -52,8 +53,8 @@ shutil.copytree(os.path.join('docs', 'tutorial', 'html'),
 linguas = os.path.join(dist_docs_tutorial, 'LINGUAS')
 langs = []
 if os.path.isfile(linguas):
-  with open(linguas) as linguas_file_obj:
-    buffer = linguas_file_obj.read().splitlines()
+  with open(linguas, mode='r', encoding='utf-8') as linguas_file_obj:
+    buffer = linguas_file_obj.readlines()
   comment_pattern = re.compile(r'\s*(?:#|$)') # comment or blank line
   for line in buffer:
     if not comment_pattern.match(line):
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]