[easytag] Move remaining upper-casing functions to scan.c



commit b00b5ec6913be484159294c51e866f58b42516bb
Author: David King <amigadave amigadave com>
Date:   Tue Sep 16 17:19:51 2014 +0100

    Move remaining upper-casing functions to scan.c
    
    Split out Scan_Process_Fields_First_Letters_Uppercase() and
    Scan_Word_Is_Roman_Numeral() and add some parameters, so that they do
    not depend on the state of GSettings keys internally.

 src/scan.c        |  244 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/scan.h        |    1 +
 src/scan_dialog.c |  259 ++---------------------------------------------------
 src/scan_dialog.h |    2 -
 src/tag_area.c    |   11 ++-
 5 files changed, 262 insertions(+), 255 deletions(-)
---
diff --git a/src/scan.c b/src/scan.c
index cc74e04..f9b2f38 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -229,3 +229,247 @@ Scan_Process_Fields_Letter_Uppercase (const gchar *string)
 
     return g_string_free (string1, FALSE);
 }
+
+static gint
+Scan_Word_Is_Roman_Numeral (const gchar *text)
+{
+    /* No need for caseless strchr. */
+    static const gchar romans[] = "MmDdCcLlXxVvIi";
+
+    gsize next_allowed = 0;
+    gsize prev = 0;
+    gsize count = 0;
+    const gchar *i;
+
+    for (i = text; *i; i++)
+    {
+        const char *s = strchr (romans, *i);
+
+        if (s)
+        {
+            gsize c = (s - romans) / 2;
+
+            if (c < next_allowed)
+            {
+                return 0;
+            }
+
+            if (c < prev)
+            {
+                /* After subtraction, no more subtracted chars allowed. */
+                next_allowed = prev + 1;
+            }
+            else if (c == prev)
+            {
+                /* Allow indefinite repetition for m; three for c, x and i; and
+                 * none for d, l and v. */
+                if ((c && ++count > 3) || (c & 1))
+                {
+                    return 0;
+                }
+
+                /* No more subtraction. */
+                next_allowed = c;
+            }
+            else if (c && !(c & 1))
+            {
+                /* For first occurrence of c, x and i, allow "subtraction" from
+                 * 10 and 5 times self, reset counting. */
+                next_allowed = c - 2;
+                count = 1;
+            }
+
+            prev = c;
+        }
+        else
+        {
+            if (g_unichar_isalnum (g_utf8_get_char (i)))
+            {
+                return 0;
+            }
+
+            break;
+        }
+    }
+
+    /* Return length of found Roman numeral. */
+    return i - text;
+}
+
+/*
+ * Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style" 
(http://www.docstyles.com/cmscrib.htm#Note2)
+ * No needed to reallocate
+ */
+void
+Scan_Process_Fields_First_Letters_Uppercase (gchar **str,
+                                             gboolean uppercase_preps,
+                                             gboolean handle_roman)
+{
+/**** DANIEL TEST *****
+    gchar *iter;
+    gchar utf8_character[6];
+    gboolean set_to_upper_case = TRUE;
+    gunichar c;
+
+    for (iter = text; *iter; iter = g_utf8_next_char(iter))
+    {
+        c = g_utf8_get_char(iter);
+        if (set_to_upper_case && g_unichar_islower(c))
+            strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+        else if (!set_to_upper_case && g_unichar_isupper(c))
+            strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
+
+        set_to_upper_case = (g_unichar_isalpha(c)
+                            || c == (gunichar)'.'
+                            || c == (gunichar)'\''
+                            || c == (gunichar)'`') ? FALSE : TRUE;
+    }
+****/
+/**** Barış Çiçek version ****/
+    gchar *string = *str;
+    gchar *word, *word1, *word2, *temp;
+    gint i, len;
+    gchar utf8_character[6];
+    gunichar c;
+    gboolean set_to_upper_case, set_to_upper_case_tmp;
+    // There have to be space at the end of words to seperate them from prefix
+    // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282) 
(http://www.docstyles.com/cmscrib.htm#Note2)
+    const gchar * exempt[] =
+    {
+        "a ",       "a_",
+        "against ", "against_",
+        "an ",      "an_",
+        "and ",     "and_",
+        "at ",      "at_",
+        "between ", "between_",
+        "but ",     "but_",
+        "feat. ",   "feat._",
+        "for ",     "for_",
+        "in ",      "in_",
+        "nor ",     "nor_",
+        "of ",      "of_",
+        //"off ",     "off_",   // Removed by Slash Bunny
+        "on ",      "on_",
+        "or ",      "or_",
+        //"over ",    "over_",  // Removed by Slash Bunny
+        "so ",      "so_",
+        "the ",     "the_",
+        "to ",      "to_",
+        "with ",    "with_",
+        "yet ",     "yet_",
+        NULL
+    };
+
+    temp = Scan_Process_Fields_All_Downcase (string);
+    g_free (*str);
+    *str = string = temp;
+
+    if (!g_utf8_validate(string,-1,NULL))
+    {
+        /* FIXME: Translatable string. */
+        g_warning ("%s",
+                   "Scan_Process_Fields_First_Letters_Uppercase: Not valid UTF-8!");
+        return;
+    }
+    /* Removes trailing whitespace. */
+    string = g_strchomp(string);
+
+    temp = string;
+
+    /* If the word is a roman numeral, capitalize all of it. */
+    if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (temp)))
+    {
+        gchar *tmp = g_utf8_strup (temp, len);
+        strncpy (string, tmp, len);
+        g_free (tmp);
+    }
+    else
+    {
+        // Set first character to uppercase
+        c = g_utf8_get_char(temp);
+        strncpy(string, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+    }
+
+    // Uppercase first character of each word, except for 'exempt[]' words lists
+    while ( temp )
+    {
+        word = temp; // Needed if there is only one word
+        word1 = g_utf8_strchr(temp,-1,' ');
+        word2 = g_utf8_strchr(temp,-1,'_');
+
+        // Take the first string found (near beginning of string)
+        if (word1 && word2)
+            word = MIN(word1,word2);
+        else if (word1)
+            word = word1;
+        else if (word2)
+            word = word2;
+        else
+        {
+            // Last word of the string: the first letter is always uppercase,
+            // even if it's in the exempt list. This is a Chicago Manual of Style rule.
+            // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
+            c = g_utf8_get_char(word);
+            strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+            break;
+        }
+
+        // Go to first character of the word (char. after ' ' or '_')
+        word = word+1;
+
+        // If the word is a roman numeral, capitalize all of it
+        if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (word)))
+        {
+            gchar *tmp = g_utf8_strup (word, len);
+            strncpy (word, tmp, len);
+            g_free (tmp);
+        }
+        else
+        {
+            // Set uppercase the first character of this word
+            c = g_utf8_get_char(word);
+            strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+
+            if (uppercase_preps)
+            {
+                goto increment;
+            }
+
+            /* Lowercase the first character of this word if found in the
+             * exempt words list. */
+            for (i=0; exempt[i]!=NULL; i++)
+            {
+                if (g_ascii_strncasecmp(exempt[i], word, strlen(exempt[i])) == 0)
+                {
+                    c = g_utf8_get_char(word);
+                    strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
+                    break;
+                }
+            }
+        }
+
+increment:
+        temp = word;
+    }
+
+    // Uppercase letter placed after some characters like '(', '[', '{'
+    set_to_upper_case = FALSE;
+    for (temp = string; *temp; temp = g_utf8_next_char(temp))
+    {
+        c = g_utf8_get_char(temp);
+        set_to_upper_case_tmp = (  c == (gunichar)'('
+                                || c == (gunichar)'['
+                                || c == (gunichar)'{'
+                                || c == (gunichar)'"'
+                                || c == (gunichar)':'
+                                || c == (gunichar)'.'
+                                || c == (gunichar)'`'
+                                || c == (gunichar)'-'
+                                ) ? TRUE : FALSE;
+
+        if (set_to_upper_case && g_unichar_islower(c))
+            strncpy(temp, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+
+        set_to_upper_case = set_to_upper_case_tmp;
+    }
+}
diff --git a/src/scan.h b/src/scan.h
index e8a8e05..4e6a479 100644
--- a/src/scan.h
+++ b/src/scan.h
@@ -33,6 +33,7 @@ void Scan_Remove_Spaces (gchar *string);
 gchar* Scan_Process_Fields_All_Uppercase (const gchar *string);
 gchar* Scan_Process_Fields_All_Downcase (const gchar *string);
 gchar* Scan_Process_Fields_Letter_Uppercase (const gchar *string);
+void Scan_Process_Fields_First_Letters_Uppercase (gchar **str, gboolean uppercase_preps, gboolean 
handle_roman);
 
 G_END_DECLS
 
diff --git a/src/scan_dialog.c b/src/scan_dialog.c
index d086014..9a097b6 100644
--- a/src/scan_dialog.c
+++ b/src/scan_dialog.c
@@ -1171,7 +1171,15 @@ Scan_Process_Fields_Functions (EtScanDialog *self, gchar **string)
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_first_style_uppercase_toggle)))
     {
-        Scan_Process_Fields_First_Letters_Uppercase (self, string);
+        gboolean uppercase_preps;
+        gboolean handle_roman;
+
+        uppercase_preps = g_settings_get_boolean (MainSettings,
+                                                  "process-uppercase-prepositions");
+        handle_roman = g_settings_get_boolean (MainSettings,
+                                               "process-detect-roman-numerals");
+        Scan_Process_Fields_First_Letters_Uppercase (string, uppercase_preps,
+                                                     handle_roman);
     }
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_remove_space_toggle)))
@@ -1451,255 +1459,6 @@ Scan_Process_Fields (EtScanDialog *self, ET_File *ETFile)
 
 }
 
-static gint
-Scan_Word_Is_Roman_Numeral (EtScanDialog *self, const gchar *text)
-{
-    EtScanDialogPrivate *priv;
-
-    /* No need for caseless strchr. */
-    static const gchar romans[] = "MmDdCcLlXxVvIi";
-
-    gsize next_allowed = 0;
-    gsize prev = 0;
-    gsize count = 0;
-    const gchar *i;
-
-    priv = et_scan_dialog_get_instance_private (self);
-
-    if (!gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (priv->process_roman_numerals_check)))
-    {
-        return 0;
-    }
-    
-    for (i = text; *i; i++)
-    {
-        const char *s = strchr (romans, *i);
-
-        if (s)
-        {
-            gsize c = (s - romans) / 2;
-
-            if (c < next_allowed)
-            {
-                return 0;
-            }
-
-            if (c < prev)
-            {
-                /* After subtraction, no more subtracted chars allowed. */
-                next_allowed = prev + 1;
-            }
-            else if (c == prev)
-            {
-                /* Allow indefinite repetition for m; three for c, x and i; and
-                 * none for d, l and v. */
-                if ((c && ++count > 3) || (c & 1))
-                {
-                    return 0;
-                }
-
-                /* No more subtraction. */
-                next_allowed = c;
-            }
-            else if (c && !(c & 1))
-            {
-                /* For first occurrence of c, x and i, allow "subtraction" from
-                 * 10 and 5 times self, reset counting. */
-                next_allowed = c - 2;
-                count = 1;
-            }
-
-            prev = c;
-        }
-        else
-        {
-            if (g_unichar_isalnum (g_utf8_get_char (i)))
-            {
-                return 0;
-            }
-
-            break;
-        }
-    }
-
-    /* Return length of found Roman numeral. */
-    return i - text;
-}
-
-/*
- * Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style" 
(http://www.docstyles.com/cmscrib.htm#Note2)
- * No needed to reallocate
- */
-void
-Scan_Process_Fields_First_Letters_Uppercase (EtScanDialog *self, gchar **str)
-{
-/**** DANIEL TEST *****
-    gchar *iter;
-    gchar utf8_character[6];
-    gboolean set_to_upper_case = TRUE;
-    gunichar c;
-
-    for (iter = text; *iter; iter = g_utf8_next_char(iter))
-    {
-        c = g_utf8_get_char(iter);
-        if (set_to_upper_case && g_unichar_islower(c))
-            strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-        else if (!set_to_upper_case && g_unichar_isupper(c))
-            strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
-
-        set_to_upper_case = (g_unichar_isalpha(c)
-                            || c == (gunichar)'.'
-                            || c == (gunichar)'\''
-                            || c == (gunichar)'`') ? FALSE : TRUE;
-    }
-****/
-/**** Barış Çiçek version ****/
-    gchar *string = *str;
-    gchar *word, *word1, *word2, *temp;
-    gint i, len;
-    gchar utf8_character[6];
-    gunichar c;
-    gboolean set_to_upper_case, set_to_upper_case_tmp;
-    // There have to be space at the end of words to seperate them from prefix
-    // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282) 
(http://www.docstyles.com/cmscrib.htm#Note2)
-    const gchar * exempt[] =
-    {
-        "a ",       "a_",
-        "against ", "against_",
-        "an ",      "an_",
-        "and ",     "and_",
-        "at ",      "at_",
-        "between ", "between_",
-        "but ",     "but_",
-        //"feat. ",   "feat._", // Removed by Slash Bunny
-        "for ",     "for_",
-        "in ",      "in_",
-        "nor ",     "nor_",
-        "of ",      "of_",
-        //"off ",     "off_",   // Removed by Slash Bunny
-        "on ",      "on_",
-        "or ",      "or_",
-        //"over ",    "over_",  // Removed by Slash Bunny
-        "so ",      "so_",
-        "the ",     "the_",
-        "to ",      "to_",
-        "with ",    "with_",
-        "yet ",     "yet_",
-        NULL
-    };
-
-    temp = Scan_Process_Fields_All_Downcase (string);
-    g_free (*str);
-    *str = string = temp;
-
-    if (!g_utf8_validate(string,-1,NULL))
-    {
-        Log_Print(LOG_ERROR,"Scan_Process_Fields_First_Letters_Uppercase: Not a valid utf8! quiting");
-        return;
-    }
-    /* Removes trailing whitespace. */
-    string = g_strchomp(string);
-
-    temp = string;
-
-    /* If the word is a roman numeral, capitalize all of it. */
-    if ((len = Scan_Word_Is_Roman_Numeral (self, temp)))
-    {
-        gchar *tmp = g_utf8_strup (temp, len);
-        strncpy (string, tmp, len);
-        g_free (tmp);
-    } else
-    {
-        // Set first character to uppercase
-        c = g_utf8_get_char(temp);
-        strncpy(string, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-    }
-
-    // Uppercase first character of each word, except for 'exempt[]' words lists
-    while ( temp )
-    {
-        word = temp; // Needed if there is only one word
-        word1 = g_utf8_strchr(temp,-1,' ');
-        word2 = g_utf8_strchr(temp,-1,'_');
-
-        // Take the first string found (near beginning of string)
-        if (word1 && word2)
-            word = MIN(word1,word2);
-        else if (word1)
-            word = word1;
-        else if (word2)
-            word = word2;
-        else
-        {
-            // Last word of the string: the first letter is always uppercase,
-            // even if it's in the exempt list. This is a Chicago Manual of Style rule.
-            // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
-            c = g_utf8_get_char(word);
-            strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-            break;
-        }
-
-        // Go to first character of the word (char. after ' ' or '_')
-        word = word+1;
-
-        // If the word is a roman numeral, capitalize all of it
-        if ((len = Scan_Word_Is_Roman_Numeral (self, word)))
-        {
-            gchar *tmp = g_utf8_strup (word, len);
-            strncpy (word, tmp, len);
-            g_free (tmp);
-        } else
-        {
-            // Set uppercase the first character of this word
-            c = g_utf8_get_char(word);
-            strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-
-            if (g_settings_get_boolean (MainSettings,
-                                        "process-uppercase-prepositions"))
-            {
-                goto increment;
-            }
-
-            /* Lowercase the first character of this word if found in the
-             * exempt words list. */
-            for (i=0; exempt[i]!=NULL; i++)
-            {
-                if (g_ascii_strncasecmp(exempt[i], word, strlen(exempt[i])) == 0)
-                {
-                    c = g_utf8_get_char(word);
-                    strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
-                    break;
-                }
-            }
-        }
-
-increment:
-        temp = word;
-    }
-
-    // Uppercase letter placed after some characters like '(', '[', '{'
-    set_to_upper_case = FALSE;
-    for (temp = string; *temp; temp = g_utf8_next_char(temp))
-    {
-        c = g_utf8_get_char(temp);
-        set_to_upper_case_tmp = (  c == (gunichar)'('
-                                || c == (gunichar)'['
-                                || c == (gunichar)'{'
-                                || c == (gunichar)'"'
-                                || c == (gunichar)':'
-                                || c == (gunichar)'.'
-                                || c == (gunichar)'`'
-                                || c == (gunichar)'-'
-                                ) ? TRUE : FALSE;
-
-        if (set_to_upper_case && g_unichar_islower(c))
-            strncpy(temp, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-
-        set_to_upper_case = set_to_upper_case_tmp;
-    }
-
-}
-
 /*
  * Return the field of a 'File_Tag' structure corresponding to the mask code
  */
diff --git a/src/scan_dialog.h b/src/scan_dialog.h
index ce9dabe..f54dfb1 100644
--- a/src/scan_dialog.h
+++ b/src/scan_dialog.h
@@ -68,6 +68,4 @@ gchar *Scan_Generate_New_Directory_Name_From_Mask (ET_File *ETFile, gchar *mask,
 
 void entry_check_rename_file_mask (GtkEntry *entry, gpointer user_data);
 
-void Scan_Process_Fields_First_Letters_Uppercase (EtScanDialog *self, gchar **str);
-
 #endif /* ET_SCAN_DIALOG_H_ */
diff --git a/src/tag_area.c b/src/tag_area.c
index 1050fb2..f8862f2 100644
--- a/src/tag_area.c
+++ b/src/tag_area.c
@@ -741,13 +741,18 @@ Convert_Letter_Uppercase (GtkWidget *entry)
 static void
 Convert_First_Letters_Uppercase (GtkWidget *entry)
 {
-    EtScanDialog *dialog;
     gchar *string;
+    gboolean uppercase_preps;
+    gboolean handle_roman;
 
     string = g_strdup (gtk_entry_get_text (GTK_ENTRY (entry)));
-    dialog = ET_SCAN_DIALOG (et_application_window_get_scan_dialog (ET_APPLICATION_WINDOW (MainWindow)));
+    uppercase_preps = g_settings_get_boolean (MainSettings,
+                                              "process-uppercase-prepositions");
+    handle_roman = g_settings_get_boolean (MainSettings,
+                                           "process-detect-roman-numerals");
 
-    Scan_Process_Fields_First_Letters_Uppercase (dialog, &string);
+    Scan_Process_Fields_First_Letters_Uppercase (&string, uppercase_preps,
+                                                 handle_roman);
     gtk_entry_set_text (GTK_ENTRY (entry), string);
     g_free (string);
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]