[gimp] app: improve file magic matching



commit 6af83a5a08435db238c87d7046a254bacfbb6879
Author: Michael Natterer <mitch gimp org>
Date:   Sun Apr 24 23:56:57 2016 +0200

    app: improve file magic matching
    
    Change file magic matching from using a simple boolean "magic matches"
    logic to using a matching quality. The quality is measured by the
    number of bytes that matched.
    
    Matching a single file procedure's magics now tries all magics and
    returns the best match quality.
    
    Searching a file procedure for a given file now tries all file
    procedures and returns the one with the best match quality.
    
    This fixes raw camera files being opened as TIFF, given a better magic
    than just the generic TIFF magic is provided.

 app/plug-in/gimppluginmanager-file-procedure.c |  205 +++++++++++++++++-------
 1 files changed, 145 insertions(+), 60 deletions(-)
---
diff --git a/app/plug-in/gimppluginmanager-file-procedure.c b/app/plug-in/gimppluginmanager-file-procedure.c
index 24e9956..27a0eb9 100644
--- a/app/plug-in/gimppluginmanager-file-procedure.c
+++ b/app/plug-in/gimppluginmanager-file-procedure.c
@@ -40,9 +40,10 @@
 
 typedef enum
 {
-  FILE_MATCH_NONE,
-  FILE_MATCH_MAGIC,
-  FILE_MATCH_SIZE
+  /*  positive values indicate the lenght of a matching magic  */
+
+  FILE_MATCH_NONE = 0,
+  FILE_MATCH_SIZE = -1
 } FileMatchType;
 
 
@@ -116,11 +117,13 @@ file_procedure_find (GSList  *procs,
   /* Then look for magics, but not on remote files */
   if (g_file_is_native (file))
     {
-      GSList       *list;
-      GInputStream *input     = NULL;
-      gboolean      opened    = FALSE;
-      gsize         head_size = 0;
-      guchar        head[256];
+      GSList              *list;
+      GInputStream        *input     = NULL;
+      gboolean             opened    = FALSE;
+      gsize                head_size = 0;
+      guchar               head[256];
+      FileMatchType        best_match_val = FILE_MATCH_NONE;
+      GimpPlugInProcedure *best_file_proc = NULL;
 
       for (list = procs; list; list = g_slist_next (list))
         {
@@ -172,24 +175,28 @@ file_procedure_find (GSList  *procs,
                     }
                   else if (match_val != FILE_MATCH_NONE)
                     {
-                      g_object_unref (input);
+                      g_printerr ("magic match %d on %s\n",
+                                  match_val,
+                                  gimp_object_get_name (file_proc));
 
-                      return file_proc;
+                      if (match_val > best_match_val)
+                        {
+                          best_match_val = match_val;
+                          best_file_proc = file_proc;
+                        }
                     }
                 }
             }
         }
 
       if (input)
+        g_object_unref (input);
+
+      if (best_file_proc)
         {
-#if 0
-          if (ferror (ifp))
-            g_set_error_literal (error, G_FILE_ERROR,
-                                 g_file_error_from_errno (errno),
-                                 g_strerror (errno));
-#endif
-
-          g_object_unref (input);
+          g_printerr ("best magic match on %s\n",
+                      gimp_object_get_name (best_file_proc));
+          return best_file_proc;
         }
     }
 
@@ -414,7 +421,7 @@ file_check_single_magic (const gchar  *offset,
   FileMatchType found = FILE_MATCH_NONE;
   glong         offs;
   gulong        num_testval;
-  gulong        num_operatorval;
+  gulong        num_operator_val;
   gint          numbytes, k;
   const gchar  *num_operator_ptr;
   gchar         num_operator;
@@ -461,18 +468,20 @@ file_check_single_magic (const gchar  *offset,
       if (g_ascii_isdigit (num_operator_ptr[1]))
         {
           if (num_operator_ptr[1] != '0')      /* decimal */
-            sscanf (num_operator_ptr+1, "%lu", &num_operatorval);
+            sscanf (num_operator_ptr+1, "%lu", &num_operator_val);
           else if (num_operator_ptr[2] == 'x') /* hexadecimal */
-            sscanf (num_operator_ptr+3, "%lx", &num_operatorval);
+            sscanf (num_operator_ptr+3, "%lx", &num_operator_val);
           else                                 /* octal */
-            sscanf (num_operator_ptr+2, "%lo", &num_operatorval);
+            sscanf (num_operator_ptr+2, "%lo", &num_operator_val);
 
           num_operator = *num_operator_ptr;
         }
     }
 
-  if (numbytes > 0)   /* Numerical test ? */
+  if (numbytes > 0)
     {
+      /* Numerical test */
+
       gchar   num_test = '=';
       gulong  fileval  = 0;
 
@@ -489,8 +498,10 @@ file_check_single_magic (const gchar  *offset,
       if (errno != 0)
         return FILE_MATCH_NONE;
 
-      if (numbytes == 5)    /* Check for file size ? */
+      if (numbytes == 5)
         {
+          /* Check for file size */
+
           GFileInfo *info = g_file_query_info (file,
                                                G_FILE_ATTRIBUTE_STANDARD_SIZE,
                                                G_FILE_QUERY_INFO_NONE,
@@ -502,13 +513,17 @@ file_check_single_magic (const gchar  *offset,
           g_object_unref (info);
         }
       else if (offs >= 0 &&
-               (offs + numbytes <= headsize)) /* We have it in memory ? */
+               (offs + numbytes <= headsize))
         {
+           /* We have it in memory */
+
           for (k = 0; k < numbytes; k++)
             fileval = (fileval << 8) | (glong) file_head[offs + k];
         }
-      else   /* Read it from file */
+      else
         {
+          /* Read it from file */
+
           if (! g_seekable_seek (G_SEEKABLE (input), offs,
                                  (offs >= 0) ? G_SEEK_SET : G_SEEK_END,
                                  NULL, NULL))
@@ -532,20 +547,31 @@ file_check_single_magic (const gchar  *offset,
         }
 
       if (num_operator == '&')
-        fileval &= num_operatorval;
+        fileval &= num_operator_val;
 
       if (num_test == '<')
-        found = (fileval < num_testval);
+        {
+          if (fileval < num_testval)
+            found = numbytes;
+        }
       else if (num_test == '>')
-        found = (fileval > num_testval);
+        {
+          if (fileval > num_testval)
+            found = numbytes;
+        }
       else
-        found = (fileval == num_testval);
+        {
+          if (fileval == num_testval)
+            found = numbytes;
+        }
 
       if (found && (numbytes == 5))
         found = FILE_MATCH_SIZE;
     }
-  else if (numbytes == 0) /* String test */
+  else if (numbytes == 0)
     {
+       /* String test */
+
       gchar mem_testval[256];
 
       file_convert_string (value,
@@ -556,20 +582,23 @@ file_check_single_magic (const gchar  *offset,
         return FILE_MATCH_NONE;
 
       if (offs >= 0 &&
-          (offs + numbytes <= headsize)) /* We have it in memory ? */
+          (offs + numbytes <= headsize))
         {
-          found = (memcmp (mem_testval, file_head + offs, numbytes) == 0);
+          /* We have it in memory */
+
+          if (memcmp (mem_testval, file_head + offs, numbytes) == 0)
+            found = numbytes;
         }
-      else   /* Read it from file */
+      else
         {
+          /* Read it from file */
+
           if (! g_seekable_seek (G_SEEKABLE (input), offs,
                                  (offs >= 0) ? G_SEEK_SET : G_SEEK_END,
                                  NULL, NULL))
             return FILE_MATCH_NONE;
 
-          found = FILE_MATCH_MAGIC;
-
-          for (k = 0; found && (k < numbytes); k++)
+          for (k = 0; k < numbytes; k++)
             {
               guchar  byte;
               GError *error = NULL;
@@ -579,12 +608,15 @@ file_check_single_magic (const gchar  *offset,
               if (error)
                 {
                   g_clear_error (&error);
+
                   return FILE_MATCH_NONE;
                 }
 
               if (byte != mem_testval[k])
-                found = FILE_MATCH_NONE;
+                return FILE_MATCH_NONE;
             }
+
+          found = numbytes;
         }
     }
 
@@ -599,36 +631,89 @@ file_check_magic_list (GSList       *magics_list,
                        GInputStream *input)
 
 {
-  const gchar   *offset;
-  const gchar   *type;
-  const gchar   *value;
-  gboolean       and   = FALSE;
-  gboolean       found = FALSE;
-  FileMatchType  match_val;
-
-  while (magics_list)
+  gboolean      and            = FALSE;
+  gboolean      found          = FALSE;
+  FileMatchType best_match_val = FILE_MATCH_NONE;
+  FileMatchType match_val      = FILE_MATCH_NONE;
+
+  for (; magics_list; magics_list = magics_list->next)
     {
-      if ((offset      = magics_list->data) == NULL) break;
-      if ((magics_list = magics_list->next) == NULL) break;
-      if ((type        = magics_list->data) == NULL) break;
-      if ((magics_list = magics_list->next) == NULL) break;
-      if ((value       = magics_list->data) == NULL) break;
+      const gchar   *offset;
+      const gchar   *type;
+      const gchar   *value;
+      FileMatchType  single_match_val = FILE_MATCH_NONE;
 
-      magics_list = magics_list->next;
+      if ((offset      = magics_list->data) == NULL) return FILE_MATCH_NONE;
+      if ((magics_list = magics_list->next) == NULL) return FILE_MATCH_NONE;
+      if ((type        = magics_list->data) == NULL) return FILE_MATCH_NONE;
+      if ((magics_list = magics_list->next) == NULL) return FILE_MATCH_NONE;
+      if ((value       = magics_list->data) == NULL) return FILE_MATCH_NONE;
+
+      single_match_val = file_check_single_magic (offset, type, value,
+                                                  head, headsize,
+                                                  file, input);
 
-      match_val = file_check_single_magic (offset, type, value,
-                                           head, headsize,
-                                           file, input);
       if (and)
-        found = found && (match_val != FILE_MATCH_NONE);
+        found = found && (single_match_val != FILE_MATCH_NONE);
       else
-        found = (match_val != FILE_MATCH_NONE);
+        found = (single_match_val != FILE_MATCH_NONE);
+
+      if (match_val == FILE_MATCH_NONE)
+        {
+          /* if we have no match yet, this is it in any case */
+
+          match_val = single_match_val;
+        }
+      else if (single_match_val != FILE_MATCH_NONE)
+        {
+          /* else if we have a match on this one, combine it with the
+           * existing return value
+           */
+
+          if (single_match_val == FILE_MATCH_SIZE)
+            {
+              /* if we already have a magic match, simply increase
+               * that by one to indicate "better match", not perfect
+               * but better than losing the additional size match
+               * entirely
+               */
+              if (match_val != FILE_MATCH_SIZE)
+                match_val += 1;
+            }
+          else
+            {
+              /* if we already have a magic match, simply add to its
+               * length; otherwise if we already have a size match,
+               * combine it with this match, see comment above
+               */
+              if (match_val != FILE_MATCH_SIZE)
+                match_val += single_match_val;
+              else
+                match_val = single_match_val + 1;
+           }
+        }
+
+      if (best_match_val == FILE_MATCH_NONE)
+        {
+          /* if we have no best match yet, this is it */
+
+          best_match_val = match_val;
+        }
+      else if (match_val != FILE_MATCH_NONE)
+        {
+          /* otherwise if this was a match, update the best match, note
+           * that by using MAX we will not overwrite a magic match
+           * with a size match
+           */
+
+          best_match_val = MAX (best_match_val, match_val);
+        }
 
       and = (strchr (offset, '&') != NULL);
 
-      if (! and && found)
-        return match_val;
+      if (! and)
+        match_val = FILE_MATCH_NONE;
     }
 
-  return FILE_MATCH_NONE;
+  return best_match_val;
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]