[gtksourceview] Improve the guess language algorithm. (Fixes bug #598579)



commit 5090e395fd5e9423b06410e2d90e220a945e513d
Author: Ignacio Casal Quinteiro <icq gnome org>
Date:   Sun Oct 18 15:37:32 2009 +0200

    Improve the guess language algorithm. (Fixes bug #598579)
    
    With a lot of help from Paolo Borelli now the algorithm works in this way:
    Glob take precedence over mime match. Mime match is used in the
    following cases:
     - to pick among the list of glob matches
     - to refine a glob match (e.g. glob is xml and mime is an xml dialect)
     - no glob matches

 gtksourceview/gtksourcelanguagemanager.c |  135 +++++++++++++++++++++--------
 gtksourceview/language-specs/xml.lang    |    2 +-
 gtksourceview/language-specs/xslt.lang   |    2 +-
 tests/test-languagemanager.c             |    5 +-
 4 files changed, 102 insertions(+), 42 deletions(-)
---
diff --git a/gtksourceview/gtksourcelanguagemanager.c b/gtksourceview/gtksourcelanguagemanager.c
index dc525e8..37689b2 100644
--- a/gtksourceview/gtksourcelanguagemanager.c
+++ b/gtksourceview/gtksourcelanguagemanager.c
@@ -409,12 +409,13 @@ gtk_source_language_manager_get_language (GtkSourceLanguageManager *lm,
 	return g_hash_table_lookup (lm->priv->language_ids, id);
 }
 
-static GtkSourceLanguage *
-pick_lang_for_filename (GtkSourceLanguageManager *lm,
-			const gchar              *filename)
+static GSList *
+pick_langs_for_filename (GtkSourceLanguageManager *lm,
+			 const gchar              *filename)
 {
 	char *filename_utf8;
 	const gchar* const * p;
+	GSList *langs = NULL;
 
 	/* Use g_filename_display_name() instead of g_filename_to_utf8() because
 	 * g_filename_display_name() doesn't fail and replaces non-convertible
@@ -438,9 +439,7 @@ pick_lang_for_filename (GtkSourceLanguageManager *lm,
 			 * to include them literally in a pattern.  */
 			if (g_pattern_match_simple (*gptr, filename_utf8))
 			{
-				g_strfreev (globs);
-				g_free (filename_utf8);
-				return lang;
+				langs = g_slist_prepend (langs, lang);
 			}
 		}
 
@@ -448,7 +447,7 @@ pick_lang_for_filename (GtkSourceLanguageManager *lm,
 	}
 
 	g_free (filename_utf8);
-	return NULL;
+	return langs;
 }
 
 static GtkSourceLanguage *
@@ -491,8 +490,8 @@ pick_lang_for_mime_type_pass (GtkSourceLanguageManager *lm,
 }
 
 static GtkSourceLanguage *
-pick_lang_for_mime_type (GtkSourceLanguageManager *lm,
-			 const char               *mime_type)
+pick_lang_for_mime_type_real (GtkSourceLanguageManager *lm,
+			      const char               *mime_type)
 {
 	GtkSourceLanguage *lang;
 	lang = pick_lang_for_mime_type_pass (lm, mime_type, TRUE);
@@ -519,6 +518,39 @@ grok_win32_content_type (const gchar  *content_type,
 }
 #endif
 
+static GtkSourceLanguage *
+pick_lang_for_mime_type (GtkSourceLanguageManager *lm,
+			 const gchar              *content_type)
+{
+	GtkSourceLanguage *lang;
+
+#ifndef G_OS_WIN32
+	/* On Unix "content type" is mime type */
+	lang = pick_lang_for_mime_type_real (lm, content_type);
+#else
+	/* On Windows "content type" is an extension, but user may pass a mime type too */
+	gchar *mime_type;
+	gchar *alt_filename;
+
+	grok_win32_content_type (content_type, &alt_filename, &mime_type);
+
+	if (alt_filename != NULL)
+	{
+		GSList *langs;
+		
+		langs = pick_lang_for_filename (lm, alt_filename);
+		lang = GTK_SOURCE_LANGUAGE (langs->data);
+	}
+	
+	if (lang == NULL && mime_type != NULL)
+		lang = pick_lang_for_mime_type_real (lm, mime_type);
+
+	g_free (mime_type);
+	g_free (alt_filename);
+#endif
+	return lang;
+}
+
 /**
  * gtk_source_language_manager_guess_language:
  * @lm: a #GtkSourceLanguageManager.
@@ -570,6 +602,7 @@ gtk_source_language_manager_guess_language (GtkSourceLanguageManager *lm,
 					    const gchar		     *content_type)
 {
 	GtkSourceLanguage *lang = NULL;
+	GSList *langs = NULL;
 
 	g_return_val_if_fail (GTK_IS_SOURCE_LANGUAGE_MANAGER (lm), NULL);
 	g_return_val_if_fail (filename != NULL || content_type != NULL, NULL);
@@ -578,40 +611,68 @@ gtk_source_language_manager_guess_language (GtkSourceLanguageManager *lm,
 
 	ensure_languages (lm);
 
-	/* TODO
-	> Maybe the logic should be:
-	>  - match mime
-	>  - match glob
-	>  - if just one matches use it
-	>  - if they both match and the corresponding mime inside the lang files are one
-	> the anchestor of the other pick the more strict lang
-	>  - if they both match and the corresponding mime inside the lang files are
-	> unrelated, pick the glob one
+	/* Glob take precedence over mime match. Mime match is used in the
+	   following cases:
+	  - to pick among the list of glob matches
+	  - to refine a glob match (e.g. glob is xml and mime is an xml dialect)
+	  - no glob matches
 	*/
 
 	if (filename != NULL)
-		lang = pick_lang_for_filename (lm, filename);
+		langs = pick_langs_for_filename (lm, filename);
 
-	if (lang == NULL && content_type != NULL)
+	if (langs != NULL)
+	{
+		/* Use mime to pick among glob matches */
+		if (content_type != NULL)
+		{
+			GSList *l;
+			
+			for (l = langs; l != NULL; l = g_slist_next (l))
+			{
+				gchar **mime_types, **gptr;
+				
+				lang = GTK_SOURCE_LANGUAGE (l->data);
+				mime_types = gtk_source_language_get_mime_types (lang);
+				
+				for (gptr = mime_types; gptr != NULL && *gptr != NULL; gptr++)
+				{
+					gchar *content;
+					
+					content = g_content_type_from_mime_type (*gptr);
+					
+					if (content != NULL && g_content_type_is_a (content_type, content))
+					{
+						if (!g_content_type_equals (content_type, content))
+						{
+							GtkSourceLanguage *mimelang;
+							
+							mimelang = pick_lang_for_mime_type (lm, content_type);
+							
+							if (mimelang != NULL)
+								lang = mimelang;
+						}
+						
+						g_strfreev (mime_types);
+						g_slist_free (langs);
+						g_free (content);
+						
+						return lang;
+					}
+					g_free (content);
+				}
+				
+				g_strfreev (mime_types);
+			}
+		}
+		lang = GTK_SOURCE_LANGUAGE (langs->data);
+		
+		g_slist_free (langs);
+	}
+	/* No glob match */
+	else if (langs == NULL && content_type != NULL)
 	{
-#ifndef G_OS_WIN32
-		/* On Unix "content type" is mime type */
 		lang = pick_lang_for_mime_type (lm, content_type);
-#else
-		/* On Windows "content type" is an extension, but user may pass a mime type too */
-		gchar *mime_type;
-		gchar *alt_filename;
-
-		grok_win32_content_type (content_type, &alt_filename, &mime_type);
-
-		if (alt_filename != NULL)
-			lang = pick_lang_for_filename (lm, alt_filename);
-		if (lang == NULL && mime_type != NULL)
-			lang = pick_lang_for_mime_type (lm, mime_type);
-
-		g_free (mime_type);
-		g_free (alt_filename);
-#endif
 	}
 
 	return lang;
diff --git a/gtksourceview/language-specs/xml.lang b/gtksourceview/language-specs/xml.lang
index 5c0c7c1..bdc1784 100644
--- a/gtksourceview/language-specs/xml.lang
+++ b/gtksourceview/language-specs/xml.lang
@@ -25,7 +25,7 @@
 <language id="xml" _name="XML" version="2.0" _section="Markup">
     <metadata>
         <property name="mimetypes">application/xml;text/xml</property>
-        <property name="globs">*.xml;*.xspf;*.siv;*.smil;*.smi;*.sml;*.kino;*.xul;*.xbel;*.abw;*.zabw;*.glade;*.jnlp;*.xhtml;*.svg;*.mml;*.rdf;*.rss;*.wml;*.xmi;*.fo;*.xslfo;*.xslt;*.xsl</property>
+        <property name="globs">*.xml;*.xspf;*.siv;*.smil;*.smi;*.sml;*.kino;*.xul;*.xbel;*.abw;*.zabw;*.glade;*.jnlp;*.xhtml;*.svg;*.mml;*.rdf;*.rss;*.wml;*.xmi;*.fo;*.xslfo</property>
         <property name="block-comment-start">&lt;!--</property>
         <property name="block-comment-end">--&gt;</property>
     </metadata>
diff --git a/gtksourceview/language-specs/xslt.lang b/gtksourceview/language-specs/xslt.lang
index 02e2494..f8dd823 100644
--- a/gtksourceview/language-specs/xslt.lang
+++ b/gtksourceview/language-specs/xslt.lang
@@ -23,7 +23,7 @@
 <language id="xslt" _name="XSLT" version="2.0" _section="Markup">
   <metadata>
     <property name="mimetypes">application/xslt+xml</property>
-    <property name="globs">*.xsl</property>
+    <property name="globs">*.xslt;*.xsl</property>
     <property name="block-comment-start">&lt;!--</property>
     <property name="block-comment-end">--&gt;</property>
   </metadata>
diff --git a/tests/test-languagemanager.c b/tests/test-languagemanager.c
index ba2d64a..1d3c04d 100644
--- a/tests/test-languagemanager.c
+++ b/tests/test-languagemanager.c
@@ -75,9 +75,8 @@ test_guess_language (void)
 	g_assert_cmpstr (gtk_source_language_get_id (l), ==, "c");
 
 	/* when content type is a descendent of the mime matched by the glob, mime wins */
-//	FIXME: this fails with the current logic
-//	l = gtk_source_language_manager_guess_language (lm, "foo.xml", "application/xslt+xml");
-//	g_assert_cmpstr (gtk_source_language_get_id (l), ==, "xslt");
+	l = gtk_source_language_manager_guess_language (lm, "foo.xml", "application/xslt+xml");
+	g_assert_cmpstr (gtk_source_language_get_id (l), ==, "xslt");
 }
 
 int



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]