[gimp/wip/wormnest/windows-tiff-delete: 60/61] plug-ins: when loading TIFF images ignore thumbnail pages




commit 2a38ec9e4b9730d93d2ebad95ae5fda5447ba477
Author: Jacob Boerema <jgboerema gmail com>
Date:   Fri Aug 27 15:15:12 2021 -0400

    plug-ins: when loading TIFF images ignore thumbnail pages
    
    TIFF image pages can specify what type of image that page represents.
    
    If the page is marked as FILETYPE_REDUCEDIMAGE we will consider it to be
    a thumbnail and filter that out of the list of pages that can be selected
    to be loaded.
    
    In addition to that we will try to recognize certain pages as thumbnail
    that don't have the subfiletype tag set.
    We will consider it a thumbnail if:
    - It's the second page
    - PhotometricInterpretation is YCbCr
    - Compression is old style jpeg
    - First page uses a different compression or PhotometricInterpretation
    If these conditions are true the page will also be filtered out.
    
    We could consider having an option whether to filter out thumbnail pages.
    Since in the situation up until now we also don't load thumbnails, I think
    this should be considered as a separate feature.

 plug-ins/file-tiff/file-tiff-load.c | 223 ++++++++++++++++++++++++------------
 plug-ins/file-tiff/file-tiff-load.h |   2 +
 2 files changed, 150 insertions(+), 75 deletions(-)
---
diff --git a/plug-ins/file-tiff/file-tiff-load.c b/plug-ins/file-tiff/file-tiff-load.c
index f0abf7711c..368d918e8a 100644
--- a/plug-ins/file-tiff/file-tiff-load.c
+++ b/plug-ins/file-tiff/file-tiff-load.c
@@ -240,6 +240,7 @@ load_image (GFile        *file,
   gint               max_col            = 0;
   gboolean           save_transp_pixels = FALSE;
   GimpColorProfile  *first_profile      = NULL;
+  const gchar       *extra_message      = NULL;
   gint               li;
 
   *image = NULL;
@@ -300,97 +301,165 @@ load_image (GFile        *file,
     }
 
   pages.pages = NULL;
-  if (run_mode != GIMP_RUN_INTERACTIVE)
-    {
-      pages.pages = g_new (gint, pages.n_pages);
+  pages.n_filtered_pages = pages.n_pages;
+
+  pages.filtered_pages  = g_new0 (gint, pages.n_pages);
+  for (li = 0; li < pages.n_pages; li++)
+    pages.filtered_pages[li] = li;
 
-      for (li = 0; li < pages.n_pages; li++)
-        pages.pages[li] = li;
+  if (pages.n_pages == 1)
+    {
+      pages.pages  = g_new0 (gint, pages.n_pages);
+      pages.target = GIMP_PAGE_SELECTOR_TARGET_LAYERS;
     }
-  else
+
+  /* Check all pages if any has an unspecified or unset channel. */
+  for (li = 0; li < pages.n_pages; li++)
     {
-      const gchar *extra_message = NULL;
+      gushort  spp;
+      gushort  photomet;
+      gushort  extra;
+      gushort *extra_types;
+      gushort  file_type = 0;
+      gboolean first_page_old_jpeg = FALSE;
 
-      if (pages.n_pages == 1)
+      if (TIFFSetDirectory (tif, li) == 0)
+        continue;
+
+      TIFFGetFieldDefaulted (tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
+      if (! TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photomet))
         {
-          pages.pages  = g_new0 (gint, pages.n_pages);
-          pages.target = GIMP_PAGE_SELECTOR_TARGET_LAYERS;
+          guint16 compression;
+
+          if (TIFFGetField (tif, TIFFTAG_COMPRESSION, &compression) &&
+              (compression == COMPRESSION_CCITTFAX3 ||
+               compression == COMPRESSION_CCITTFAX4 ||
+               compression == COMPRESSION_CCITTRLE  ||
+               compression == COMPRESSION_CCITTRLEW))
+            {
+              photomet = PHOTOMETRIC_MINISWHITE;
+            }
+          else
+            {
+              /* old AppleScan software misses out the photometric tag
+               * (and incidentally assumes min-is-white, but xv
+               * assumes min-is-black, so we follow xv's lead.  It's
+               * not much hardship to invert the image later).
+               */
+              photomet = PHOTOMETRIC_MINISBLACK;
+            }
         }
+      if (! TIFFGetField (tif, TIFFTAG_EXTRASAMPLES, &extra, &extra_types))
+        extra = 0;
 
-      /* Check all pages if any has an unspecified or unset channel. */
-      for (li = 0; li < pages.n_pages; li++)
+      /* Try to detect if a TIFF page is a thumbnail.
+       * Easy case: if subfiletype is set to FILETYPE_REDUCEDIMAGE.
+       * If no subfiletype is defined we try to detect it ourselves.
+       * We will consider it a thumbnail if:
+       * - It's the second page
+       * - PhotometricInterpretation is YCbCr
+       * - Compression is old style jpeg
+       * - First page uses a different compression or PhotometricInterpretation
+       *
+       * We could also add a check for the presence of TIFFTAG_EXIFIFD since
+       * this should usually be a thumbnail part of EXIF metadata. Since that
+       * probably won't make a difference, I will leave that out for now.
+       */
+      if (li == 0)
         {
-          gushort  spp;
-          gushort  photomet;
-          gushort  extra;
-          gushort *extra_types;
+          guint16 compression;
 
-          if (TIFFSetDirectory (tif, li) == 0)
-            continue;
+          if (TIFFGetField (tif, TIFFTAG_COMPRESSION, &compression) &&
+              compression == COMPRESSION_OJPEG &&
+              photomet    == PHOTOMETRIC_YCBCR)
+            first_page_old_jpeg = TRUE;
+        }
 
-          TIFFGetFieldDefaulted (tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
-          if (! TIFFGetField (tif, TIFFTAG_PHOTOMETRIC, &photomet))
+      if (TIFFGetField (tif, TIFFTAG_SUBFILETYPE, &file_type))
+        {
+          if (file_type == FILETYPE_REDUCEDIMAGE)
+            {
+              /* file_type is a mask but we will only filter out pages
+               * that only have FILETYPE_REDUCEDIMAGE set */
+              pages.filtered_pages[li] = -1;
+              pages.n_filtered_pages--;
+              g_debug ("Page %d is a FILETYPE_REDUCEDIMAGE thumbnail.\n", li);
+            }
+        }
+      else
+        {
+          if (li == 1 && photomet == PHOTOMETRIC_YCBCR &&
+              ! first_page_old_jpeg)
             {
               guint16 compression;
 
               if (TIFFGetField (tif, TIFFTAG_COMPRESSION, &compression) &&
-                  (compression == COMPRESSION_CCITTFAX3 ||
-                   compression == COMPRESSION_CCITTFAX4 ||
-                   compression == COMPRESSION_CCITTRLE  ||
-                   compression == COMPRESSION_CCITTRLEW))
-                {
-                  photomet = PHOTOMETRIC_MINISWHITE;
-                }
-              else
+                  compression == COMPRESSION_OJPEG)
                 {
-                  /* old AppleScan software misses out the photometric tag
-                   * (and incidentally assumes min-is-white, but xv
-                   * assumes min-is-black, so we follow xv's lead.  It's
-                   * not much hardship to invert the image later).
-                   */
-                  photomet = PHOTOMETRIC_MINISBLACK;
+                  pages.filtered_pages[li] = -1;
+                  pages.n_filtered_pages--;
+                  g_debug ("Page %d is most likely a thumbnail.\n", li);
                 }
             }
-          if (! TIFFGetField (tif, TIFFTAG_EXTRASAMPLES, &extra, &extra_types))
-            extra = 0;
-
-          /* TODO: current code always assumes that the alpha channel
-           * will be the first extra channel, though the TIFF spec does
-           * not mandate such assumption. A future improvement should be
-           * to actually loop through the extra channels and save the
-           * alpha channel index.
-           * Of course, this is an edge case, as most image would likely
-           * have only a single extra channel anyway. But still we could
-           * be more accurate.
+        }
+
+      /* TODO: current code always assumes that the alpha channel
+       * will be the first extra channel, though the TIFF spec does
+       * not mandate such assumption. A future improvement should be
+       * to actually loop through the extra channels and save the
+       * alpha channel index.
+       * Of course, this is an edge case, as most image would likely
+       * have only a single extra channel anyway. But still we could
+       * be more accurate.
+       */
+      if (extra > 0 && (extra_types[0] == EXTRASAMPLE_UNSPECIFIED))
+        {
+          extra_message = _("Extra channels with unspecified data.");
+          break;
+        }
+      else if (extra == 0 && is_non_conformant_tiff (photomet, spp))
+        {
+          /* ExtraSamples field not set, yet we have more channels than
+           * the PhotometricInterpretation field suggests.
+           * This should not happen as the spec clearly says "This field
+           * must be present if there are extra samples". So the files
+           * can be considered non-conformant.
+           * Let's ask what to do with the channel.
            */
-          if (extra > 0 && (extra_types[0] == EXTRASAMPLE_UNSPECIFIED))
+          extra_message = _("Non-conformant TIFF: extra channels without 'ExtraSamples' field.");
+        }
+    }
+  TIFFSetDirectory (tif, 0);
+
+  if (run_mode == GIMP_RUN_INTERACTIVE &&
+      (pages.n_pages > 1 || extra_message) &&
+      ! load_dialog (tif, LOAD_PROC, &pages,
+                     extra_message, &default_extra))
+    {
+      TIFFClose (tif);
+      g_clear_pointer (&pages.pages, g_free);
+
+      return GIMP_PDB_CANCEL;
+    }
+  /* Adjust pages to take filtered out pages into account. */
+  if (pages.o_pages > pages.n_filtered_pages)
+    {
+      gint fi;
+      gint sel_index = 0;
+      gint sel_add   = 0;
+
+      for (fi = 0; fi < pages.o_pages && sel_index < pages.n_pages; fi++)
+        {
+          if (pages.filtered_pages[fi] == -1)
             {
-              extra_message = _("Extra channels with unspecified data.");
-              break;
+              sel_add++;
             }
-          else if (extra == 0 && is_non_conformant_tiff (photomet, spp))
+          if (pages.pages[sel_index] + sel_add == fi)
             {
-              /* ExtraSamples field not set, yet we have more channels than
-               * the PhotometricInterpretation field suggests.
-               * This should not happen as the spec clearly says "This field
-               * must be present if there are extra samples". So the files
-               * can be considered non-conformant.
-               * Let's ask what to do with the channel.
-               */
-              extra_message = _("Non-conformant TIFF: extra channels without 'ExtraSamples' field.");
+              pages.pages[sel_index] = fi;
+              sel_index++;
             }
         }
-      TIFFSetDirectory (tif, 0);
-
-      if ((pages.n_pages > 1 || extra_message) &&
-          ! load_dialog (tif, LOAD_PROC, &pages,
-                         extra_message, &default_extra))
-        {
-          TIFFClose (tif);
-          g_clear_pointer (&pages.pages, g_free);
-
-          return GIMP_PDB_CANCEL;
-        }
     }
 
   gimp_set_data (LOAD_PROC "-target",
@@ -2491,7 +2560,7 @@ load_dialog (TIFF              *tif,
 
   if (pages->n_pages > 1)
     {
-      gint i;
+      gint i, j;
 
       /* Page Selector */
       selector = gimp_page_selector_new ();
@@ -2499,16 +2568,20 @@ load_dialog (TIFF              *tif,
       gtk_box_pack_start (GTK_BOX (vbox), selector, TRUE, TRUE, 0);
 
       gimp_page_selector_set_n_pages (GIMP_PAGE_SELECTOR (selector),
-                                      pages->n_pages);
+                                      pages->n_filtered_pages);
       gimp_page_selector_set_target (GIMP_PAGE_SELECTOR (selector), pages->target);
 
-      for (i = 0; i < pages->n_pages; i++)
+      for (i = 0, j = 0; i < pages->n_pages && j < pages->n_filtered_pages; i++)
         {
-          const gchar *name = tiff_get_page_name (tif);
+          if (pages->filtered_pages[i] != -1)
+            {
+              const gchar *name = tiff_get_page_name (tif);
 
-          if (name)
-            gimp_page_selector_set_page_label (GIMP_PAGE_SELECTOR (selector),
-                                               i, name);
+              if (name)
+                gimp_page_selector_set_page_label (GIMP_PAGE_SELECTOR (selector),
+                                                   j, name);
+              j++;
+            }
 
           TIFFReadDirectory (tif);
         }
diff --git a/plug-ins/file-tiff/file-tiff-load.h b/plug-ins/file-tiff/file-tiff-load.h
index c94f881d79..394f0931db 100644
--- a/plug-ins/file-tiff/file-tiff-load.h
+++ b/plug-ins/file-tiff/file-tiff-load.h
@@ -29,6 +29,8 @@ typedef struct
   gint                    o_pages;
   gint                    n_pages;
   gint                   *pages;
+  gint                   *filtered_pages;   /* thumbnail is marked as -1 */
+  gint                    n_filtered_pages;
   GimpPageSelectorTarget  target;
   gboolean                keep_empty_space;
 } TiffSelectedPages;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]