[ocrfeeder: 1/3] Do not invoke commands through shell. Fixes #82




commit 9209bce8afaf6fde19cdac7f5eaea1b744c3e79e
Author: RenWal <65450-RenWal users noreply gitlab gnome org>
Date:   Mon Feb 21 12:34:30 2022 +0100

    Do not invoke commands through shell. Fixes #82
    
    Executing shell commands through mechanisms such as os.system() or
    subprocess.run(shell=True) with user-controllable input is prone to
    arbitrary shell command injection. In this particular case, a malicious
    actor controlling any input name, either in PDF or image form, can
    force ocrfeeder to execute shell commands embedded in the file name.
    While a workaround for #20, mentioning problems opening files with
    special characters, was introduced in 5286120c, this was not applied to
    every subprocess invocation. Furthermore, it is good practice to make
    use of the parameterization of arguments available in the subprocess
    package instead of relying on character escaping alone, avoiding shell
    invocation completely. This minimizes the attack surface.

 src/ocrfeeder/studio/widgetPresenter.py | 28 +++++-----
 src/ocrfeeder/util/lib.py               | 93 +++++++++++++++------------------
 2 files changed, 56 insertions(+), 65 deletions(-)
---
diff --git a/src/ocrfeeder/studio/widgetPresenter.py b/src/ocrfeeder/studio/widgetPresenter.py
index 088fa06..47a8ca9 100644
--- a/src/ocrfeeder/studio/widgetPresenter.py
+++ b/src/ocrfeeder/studio/widgetPresenter.py
@@ -17,6 +17,7 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
+import shlex
 from .dataHolder import DataBox, TEXT_TYPE, IMAGE_TYPE
 from ocrfeeder.util import lib, PAPER_SIZES
 from ocrfeeder.util.configuration import ConfigurationManager
@@ -972,7 +973,7 @@ class UnpaperDialog(Gtk.Dialog):
         unpapered_image = os.path.splitext(name)[0] + '_unpapered.ppm'
         if os.path.exists(unpapered_image):
             unpapered_image = lib.getNonExistingFileName(unpapered_image)
-        command += ' %s %s' % (name, unpapered_image)
+        command += [name, unpapered_image]
         progress_bar = CommandProgressBarDialog(self, command, _('Performing Unpaper'), _('Performing 
unpaper. Please wait…'))
         progress_bar.run()
         self.unpapered_image = unpapered_image
@@ -1107,22 +1108,23 @@ class UnpaperPreferences(Gtk.VBox):
         self.gray_filter_size.set_sensitive(has_gray_filter)
 
     def getUnpaperCommand(self):
-        command = '%s --layout single' % self.configuration_manager.unpaper
+        command = [
+            self.configuration_manager.unpaper,
+            '--layout', 'single',
+        ]
         if not self.black_filter_usage.get_active():
-            command += ' --no-blackfilter'
+            command.append('--no-blackfilter')
         if self.noise_filter_none.get_active():
-            command += ' --no-noisefilter'
+            command.append('--no-noisefilter')
         elif self.noise_filter_custom.get_active():
-            command += ' --noisefilter-intensity %s' % \
-                       self.noise_filter_intensity.get_value()
+            command += ['--noisefilter-intensity', self.noise_filter_intensity.get_value()]
         if self.gray_filter_none.get_active():
-            command += ' --no-grayfilter'
+            command.append('--no-grayfilter')
         elif self.gray_filter_custom.get_active():
-            command += ' --grayfilter-size %s' % \
-                       self.gray_filter_size.get_value()
-        extra_options_text = self.extra_options.get_text()
-        if extra_options_text.strip():
-            command += ' %s ' % extra_options_text
+            command += ['--grayfilter-size', self.gray_filter_size.get_value()]
+        extra_options_text = self.extra_options.get_text().strip()
+        if extra_options_text:
+            command += shlex.split(extra_options_text)
         return command
 
     def save(self):
@@ -1206,7 +1208,7 @@ class CommandProgressBarDialog(Gtk.Dialog):
 
     def __startPulse(self):
         try:
-            self.process = subprocess.Popen(self.command.split(), stdout = subprocess.PIPE, stderr = 
subprocess.STDOUT, bufsize=1)
+            self.process = subprocess.Popen(self.command, stdout = subprocess.PIPE, stderr = 
subprocess.STDOUT, bufsize=1)
         except:
             warning = SimpleDialog(self, _('An error occurred!'), _('Error'), 'warning')
             warning.run()
diff --git a/src/ocrfeeder/util/lib.py b/src/ocrfeeder/util/lib.py
index b1c7767..ce61de6 100644
--- a/src/ocrfeeder/util/lib.py
+++ b/src/ocrfeeder/util/lib.py
@@ -17,21 +17,23 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
+import math
+import mimetypes
 import os
+import re
+import shlex
 import subprocess
-import mimetypes
-from PIL import Image
 import tempfile
+import xml.etree.ElementTree as etree
+
+import sane
 from gi.repository import Gtk
-import math
+from PIL import Image
+
 from .constants import *
-import sane
-import tempfile
-import locale
-import re
-import xml.etree.ElementTree as etree
 from .log import debug
 
+
 def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR):
     icon = Gtk.Image()
     theme = Gtk.IconTheme.get_default()
@@ -47,11 +49,8 @@ def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR
 def getSafeGhostscriptPath(file_path):
     return re.sub(r'[^\w !#$%&()*+,./:;<=>?@\[\\\]^_`{|}~-]', '_', file_path)
 
-def getSafeGhostscriptInputFilename(file_name):
-    return re.sub(r'[/]', '_', getSafeGhostscriptPath(file_name))
-
 def getSafeGhostscriptOutputBasename(file_name):
-    return re.sub(r'[%]', '_', getSafeGhostscriptInputFilename(file_name))
+    return re.sub(r'[%]', '_', getSafeGhostscriptPath(file_name))
 
 def convertPdfToImages(pdf_file, temp_dir = '/tmp'):
     if not os.path.isfile(pdf_file):
@@ -67,41 +66,27 @@ def convertPdfToImages(pdf_file, temp_dir = '/tmp'):
 
     file_name = os.path.basename(pdf_file)
     base_name = os.path.splitext(file_name)[0]
-    file_name_safe = getSafeGhostscriptInputFilename(file_name)
+    # GhostScript will interpret the '%' character, which is allowed
+    # in Unix file names, so we need to escape it
     base_name_safe = getSafeGhostscriptOutputBasename(base_name)
-    pdf_file_safe = getSafeGhostscriptPath(pdf_file)
 
-    if pdf_file != pdf_file_safe:
-        try:
-            # The prefix added here is for extra safety so there are less chances
-            # for this path to match the original one in future changes.
-            pdf_path_safe = os.path.join(dir_name, 'OCRFEEDER_' + file_name_safe)
-            os.symlink(pdf_file, pdf_path_safe)
-        except:
-            debug('Unable to convert PDF: Cannot create temp symlink in: %s', dir_name)
-            return None
-
-        runGhostscript(dir_name, base_name_safe, pdf_path_safe)
-        try:
-            os.unlink(pdf_path_safe)
-        except:
-            debug('PDF conversion warning: Cannot remove temp symlink: %s', pdf_path_safe)
-    else:
-        runGhostscript(dir_name, base_name_safe, pdf_file)
+    runGhostscript(dir_name, base_name_safe, pdf_file)
 
     return dir_name
 
 def runGhostscript(dir_name, base_name, pdf_path, format = 'jpeg', resolution = 300, size = 'letter'):
-    command = 'gs -SDEVICE=%(format)s -r%(resolution)sx%(resolution)s -sPAPERSIZE=%(size)s ' \
-              '-sOutputFile=\'%(temp_name)s/%(file_name)s_%%04d.jpg\' ' \
-              '-dNOPAUSE -dBATCH -- \'%(pdf_file)s\'' % \
-              {'format': format,
-               'temp_name': dir_name,
-               'file_name': base_name,
-               'pdf_file': pdf_path,
-               'resolution': resolution,
-               'size': size}
-    process = subprocess.run(command, shell=True)
+    command = [
+        'gs',
+        f'-SDEVICE={format}',
+        f'-r{resolution}x{resolution}s',
+        f'-sPAPERSIZE={size}',
+        f'-sOutputFile={dir_name}/{base_name}_%04d.jpg',
+        '-dNOPAUSE',
+        '-dBATCH',
+        '--',
+        pdf_path
+    ]
+    subprocess.run(command)
 
 def getImagesFromFolder(folder):
     if folder is None:
@@ -159,20 +144,24 @@ def getExecPath(exec_name):
     return real_exec_name
 
 def getUnpaperCommand(configuration_manager):
-    command = '%s --layout single --overwrite ' % configuration_manager.unpaper
+    command = [
+        configuration_manager.unpaper,
+        '--layout', 'single',
+        '--overwrite',
+    ]
     if not configuration_manager.unpaper_use_black_filter:
-        command += ' --no-blackfilter'
+        command.append('--no-blackfilter')
     if configuration_manager.unpaper_noise_filter_intensity == 'none':
-        command += ' --no-noisefilter'
+        command.append('--no-noisefilter')
     elif configuration_manager.unpaper_noise_filter_intensity != 'auto':
-        command += ' --noisefilter-intensity %s' % \
-            configuration_manager.unpaper_noise_filter_intensity
+        command += ['--noisefilter-intensity', configuration_manager.unpaper_noise_filter_intensity]
     if configuration_manager.unpaper_gray_filter_size == 'none':
-        command += ' --no-grayfilter'
+        command.append('--no-grayfilter')
     elif configuration_manager.unpaper_gray_filter_size != 'auto':
-        command += ' --grayfilter-size %s' % \
-            configuration_manager.unpaper_gray_filter_size
-    command += ' %s ' % configuration_manager.unpaper_extra_options
+        command += ['--grayfilter-size', configuration_manager.unpaper_gray_filter_size]
+    extra_options = configuration_manager.unpaper_extra_options.strip()
+    if extra_options:
+        command += shlex.split(extra_options)
     return command
 
 def unpaperImage(configuration_manager, image_path):
@@ -186,10 +175,10 @@ def unpaperImage(configuration_manager, image_path):
     image_path = Image.open(image_path)
     image_path.save(unpapered_in, format = 'PPM')
     command = getUnpaperCommand(configuration_manager)
-    command += ' %s %s' % (unpapered_in, unpapered_name)
+    command += [unpapered_in, unpapered_name]
     debug(command)
     try:
-        os.system(command)
+        subprocess.run(command)
     except Exception as exception:
         debug(exception)
         return None


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]