[easytag] Move remaining upper-casing functions to scan.c
- From: David King <davidk src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [easytag] Move remaining upper-casing functions to scan.c
- Date: Tue, 16 Sep 2014 21:37:10 +0000 (UTC)
commit b00b5ec6913be484159294c51e866f58b42516bb
Author: David King <amigadave amigadave com>
Date: Tue Sep 16 17:19:51 2014 +0100
Move remaining upper-casing functions to scan.c
Split out Scan_Process_Fields_First_Letters_Uppercase() and
Scan_Word_Is_Roman_Numeral() and add some parameters, so that they do
not depend on the state of GSettings keys internally.
src/scan.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++
src/scan.h | 1 +
src/scan_dialog.c | 259 ++---------------------------------------------------
src/scan_dialog.h | 2 -
src/tag_area.c | 11 ++-
5 files changed, 262 insertions(+), 255 deletions(-)
---
diff --git a/src/scan.c b/src/scan.c
index cc74e04..f9b2f38 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -229,3 +229,247 @@ Scan_Process_Fields_Letter_Uppercase (const gchar *string)
return g_string_free (string1, FALSE);
}
+
+static gint
+Scan_Word_Is_Roman_Numeral (const gchar *text)
+{
+ /* No need for caseless strchr. */
+ static const gchar romans[] = "MmDdCcLlXxVvIi";
+
+ gsize next_allowed = 0;
+ gsize prev = 0;
+ gsize count = 0;
+ const gchar *i;
+
+ for (i = text; *i; i++)
+ {
+ const char *s = strchr (romans, *i);
+
+ if (s)
+ {
+ gsize c = (s - romans) / 2;
+
+ if (c < next_allowed)
+ {
+ return 0;
+ }
+
+ if (c < prev)
+ {
+ /* After subtraction, no more subtracted chars allowed. */
+ next_allowed = prev + 1;
+ }
+ else if (c == prev)
+ {
+ /* Allow indefinite repetition for m; three for c, x and i; and
+ * none for d, l and v. */
+ if ((c && ++count > 3) || (c & 1))
+ {
+ return 0;
+ }
+
+ /* No more subtraction. */
+ next_allowed = c;
+ }
+ else if (c && !(c & 1))
+ {
+ /* For first occurrence of c, x and i, allow "subtraction" from
+ * 10 and 5 times self, reset counting. */
+ next_allowed = c - 2;
+ count = 1;
+ }
+
+ prev = c;
+ }
+ else
+ {
+ if (g_unichar_isalnum (g_utf8_get_char (i)))
+ {
+ return 0;
+ }
+
+ break;
+ }
+ }
+
+ /* Return length of found Roman numeral. */
+ return i - text;
+}
+
+/*
+ * Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style"
(http://www.docstyles.com/cmscrib.htm#Note2)
+ * No needed to reallocate
+ */
+void
+Scan_Process_Fields_First_Letters_Uppercase (gchar **str,
+ gboolean uppercase_preps,
+ gboolean handle_roman)
+{
+/**** DANIEL TEST *****
+ gchar *iter;
+ gchar utf8_character[6];
+ gboolean set_to_upper_case = TRUE;
+ gunichar c;
+
+ for (iter = text; *iter; iter = g_utf8_next_char(iter))
+ {
+ c = g_utf8_get_char(iter);
+ if (set_to_upper_case && g_unichar_islower(c))
+ strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+ else if (!set_to_upper_case && g_unichar_isupper(c))
+ strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
+
+ set_to_upper_case = (g_unichar_isalpha(c)
+ || c == (gunichar)'.'
+ || c == (gunichar)'\''
+ || c == (gunichar)'`') ? FALSE : TRUE;
+ }
+****/
+/**** Barış Çiçek version ****/
+ gchar *string = *str;
+ gchar *word, *word1, *word2, *temp;
+ gint i, len;
+ gchar utf8_character[6];
+ gunichar c;
+ gboolean set_to_upper_case, set_to_upper_case_tmp;
+ // There have to be space at the end of words to seperate them from prefix
+ // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282)
(http://www.docstyles.com/cmscrib.htm#Note2)
+ const gchar * exempt[] =
+ {
+ "a ", "a_",
+ "against ", "against_",
+ "an ", "an_",
+ "and ", "and_",
+ "at ", "at_",
+ "between ", "between_",
+ "but ", "but_",
+ "feat. ", "feat._",
+ "for ", "for_",
+ "in ", "in_",
+ "nor ", "nor_",
+ "of ", "of_",
+ //"off ", "off_", // Removed by Slash Bunny
+ "on ", "on_",
+ "or ", "or_",
+ //"over ", "over_", // Removed by Slash Bunny
+ "so ", "so_",
+ "the ", "the_",
+ "to ", "to_",
+ "with ", "with_",
+ "yet ", "yet_",
+ NULL
+ };
+
+ temp = Scan_Process_Fields_All_Downcase (string);
+ g_free (*str);
+ *str = string = temp;
+
+ if (!g_utf8_validate(string,-1,NULL))
+ {
+ /* FIXME: Translatable string. */
+ g_warning ("%s",
+ "Scan_Process_Fields_First_Letters_Uppercase: Not valid UTF-8!");
+ return;
+ }
+ /* Removes trailing whitespace. */
+ string = g_strchomp(string);
+
+ temp = string;
+
+ /* If the word is a roman numeral, capitalize all of it. */
+ if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (temp)))
+ {
+ gchar *tmp = g_utf8_strup (temp, len);
+ strncpy (string, tmp, len);
+ g_free (tmp);
+ }
+ else
+ {
+ // Set first character to uppercase
+ c = g_utf8_get_char(temp);
+ strncpy(string, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+ }
+
+ // Uppercase first character of each word, except for 'exempt[]' words lists
+ while ( temp )
+ {
+ word = temp; // Needed if there is only one word
+ word1 = g_utf8_strchr(temp,-1,' ');
+ word2 = g_utf8_strchr(temp,-1,'_');
+
+ // Take the first string found (near beginning of string)
+ if (word1 && word2)
+ word = MIN(word1,word2);
+ else if (word1)
+ word = word1;
+ else if (word2)
+ word = word2;
+ else
+ {
+ // Last word of the string: the first letter is always uppercase,
+ // even if it's in the exempt list. This is a Chicago Manual of Style rule.
+ // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
+ c = g_utf8_get_char(word);
+ strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+ break;
+ }
+
+ // Go to first character of the word (char. after ' ' or '_')
+ word = word+1;
+
+ // If the word is a roman numeral, capitalize all of it
+ if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (word)))
+ {
+ gchar *tmp = g_utf8_strup (word, len);
+ strncpy (word, tmp, len);
+ g_free (tmp);
+ }
+ else
+ {
+ // Set uppercase the first character of this word
+ c = g_utf8_get_char(word);
+ strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+
+ if (uppercase_preps)
+ {
+ goto increment;
+ }
+
+ /* Lowercase the first character of this word if found in the
+ * exempt words list. */
+ for (i=0; exempt[i]!=NULL; i++)
+ {
+ if (g_ascii_strncasecmp(exempt[i], word, strlen(exempt[i])) == 0)
+ {
+ c = g_utf8_get_char(word);
+ strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
+ break;
+ }
+ }
+ }
+
+increment:
+ temp = word;
+ }
+
+ // Uppercase letter placed after some characters like '(', '[', '{'
+ set_to_upper_case = FALSE;
+ for (temp = string; *temp; temp = g_utf8_next_char(temp))
+ {
+ c = g_utf8_get_char(temp);
+ set_to_upper_case_tmp = ( c == (gunichar)'('
+ || c == (gunichar)'['
+ || c == (gunichar)'{'
+ || c == (gunichar)'"'
+ || c == (gunichar)':'
+ || c == (gunichar)'.'
+ || c == (gunichar)'`'
+ || c == (gunichar)'-'
+ ) ? TRUE : FALSE;
+
+ if (set_to_upper_case && g_unichar_islower(c))
+ strncpy(temp, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+
+ set_to_upper_case = set_to_upper_case_tmp;
+ }
+}
diff --git a/src/scan.h b/src/scan.h
index e8a8e05..4e6a479 100644
--- a/src/scan.h
+++ b/src/scan.h
@@ -33,6 +33,7 @@ void Scan_Remove_Spaces (gchar *string);
gchar* Scan_Process_Fields_All_Uppercase (const gchar *string);
gchar* Scan_Process_Fields_All_Downcase (const gchar *string);
gchar* Scan_Process_Fields_Letter_Uppercase (const gchar *string);
+void Scan_Process_Fields_First_Letters_Uppercase (gchar **str, gboolean uppercase_preps, gboolean
handle_roman);
G_END_DECLS
diff --git a/src/scan_dialog.c b/src/scan_dialog.c
index d086014..9a097b6 100644
--- a/src/scan_dialog.c
+++ b/src/scan_dialog.c
@@ -1171,7 +1171,15 @@ Scan_Process_Fields_Functions (EtScanDialog *self, gchar **string)
if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_first_style_uppercase_toggle)))
{
- Scan_Process_Fields_First_Letters_Uppercase (self, string);
+ gboolean uppercase_preps;
+ gboolean handle_roman;
+
+ uppercase_preps = g_settings_get_boolean (MainSettings,
+ "process-uppercase-prepositions");
+ handle_roman = g_settings_get_boolean (MainSettings,
+ "process-detect-roman-numerals");
+ Scan_Process_Fields_First_Letters_Uppercase (string, uppercase_preps,
+ handle_roman);
}
if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_remove_space_toggle)))
@@ -1451,255 +1459,6 @@ Scan_Process_Fields (EtScanDialog *self, ET_File *ETFile)
}
-static gint
-Scan_Word_Is_Roman_Numeral (EtScanDialog *self, const gchar *text)
-{
- EtScanDialogPrivate *priv;
-
- /* No need for caseless strchr. */
- static const gchar romans[] = "MmDdCcLlXxVvIi";
-
- gsize next_allowed = 0;
- gsize prev = 0;
- gsize count = 0;
- const gchar *i;
-
- priv = et_scan_dialog_get_instance_private (self);
-
- if (!gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (priv->process_roman_numerals_check)))
- {
- return 0;
- }
-
- for (i = text; *i; i++)
- {
- const char *s = strchr (romans, *i);
-
- if (s)
- {
- gsize c = (s - romans) / 2;
-
- if (c < next_allowed)
- {
- return 0;
- }
-
- if (c < prev)
- {
- /* After subtraction, no more subtracted chars allowed. */
- next_allowed = prev + 1;
- }
- else if (c == prev)
- {
- /* Allow indefinite repetition for m; three for c, x and i; and
- * none for d, l and v. */
- if ((c && ++count > 3) || (c & 1))
- {
- return 0;
- }
-
- /* No more subtraction. */
- next_allowed = c;
- }
- else if (c && !(c & 1))
- {
- /* For first occurrence of c, x and i, allow "subtraction" from
- * 10 and 5 times self, reset counting. */
- next_allowed = c - 2;
- count = 1;
- }
-
- prev = c;
- }
- else
- {
- if (g_unichar_isalnum (g_utf8_get_char (i)))
- {
- return 0;
- }
-
- break;
- }
- }
-
- /* Return length of found Roman numeral. */
- return i - text;
-}
-
-/*
- * Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style"
(http://www.docstyles.com/cmscrib.htm#Note2)
- * No needed to reallocate
- */
-void
-Scan_Process_Fields_First_Letters_Uppercase (EtScanDialog *self, gchar **str)
-{
-/**** DANIEL TEST *****
- gchar *iter;
- gchar utf8_character[6];
- gboolean set_to_upper_case = TRUE;
- gunichar c;
-
- for (iter = text; *iter; iter = g_utf8_next_char(iter))
- {
- c = g_utf8_get_char(iter);
- if (set_to_upper_case && g_unichar_islower(c))
- strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
- else if (!set_to_upper_case && g_unichar_isupper(c))
- strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
-
- set_to_upper_case = (g_unichar_isalpha(c)
- || c == (gunichar)'.'
- || c == (gunichar)'\''
- || c == (gunichar)'`') ? FALSE : TRUE;
- }
-****/
-/**** Barış Çiçek version ****/
- gchar *string = *str;
- gchar *word, *word1, *word2, *temp;
- gint i, len;
- gchar utf8_character[6];
- gunichar c;
- gboolean set_to_upper_case, set_to_upper_case_tmp;
- // There have to be space at the end of words to seperate them from prefix
- // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282)
(http://www.docstyles.com/cmscrib.htm#Note2)
- const gchar * exempt[] =
- {
- "a ", "a_",
- "against ", "against_",
- "an ", "an_",
- "and ", "and_",
- "at ", "at_",
- "between ", "between_",
- "but ", "but_",
- //"feat. ", "feat._", // Removed by Slash Bunny
- "for ", "for_",
- "in ", "in_",
- "nor ", "nor_",
- "of ", "of_",
- //"off ", "off_", // Removed by Slash Bunny
- "on ", "on_",
- "or ", "or_",
- //"over ", "over_", // Removed by Slash Bunny
- "so ", "so_",
- "the ", "the_",
- "to ", "to_",
- "with ", "with_",
- "yet ", "yet_",
- NULL
- };
-
- temp = Scan_Process_Fields_All_Downcase (string);
- g_free (*str);
- *str = string = temp;
-
- if (!g_utf8_validate(string,-1,NULL))
- {
- Log_Print(LOG_ERROR,"Scan_Process_Fields_First_Letters_Uppercase: Not a valid utf8! quiting");
- return;
- }
- /* Removes trailing whitespace. */
- string = g_strchomp(string);
-
- temp = string;
-
- /* If the word is a roman numeral, capitalize all of it. */
- if ((len = Scan_Word_Is_Roman_Numeral (self, temp)))
- {
- gchar *tmp = g_utf8_strup (temp, len);
- strncpy (string, tmp, len);
- g_free (tmp);
- } else
- {
- // Set first character to uppercase
- c = g_utf8_get_char(temp);
- strncpy(string, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
- }
-
- // Uppercase first character of each word, except for 'exempt[]' words lists
- while ( temp )
- {
- word = temp; // Needed if there is only one word
- word1 = g_utf8_strchr(temp,-1,' ');
- word2 = g_utf8_strchr(temp,-1,'_');
-
- // Take the first string found (near beginning of string)
- if (word1 && word2)
- word = MIN(word1,word2);
- else if (word1)
- word = word1;
- else if (word2)
- word = word2;
- else
- {
- // Last word of the string: the first letter is always uppercase,
- // even if it's in the exempt list. This is a Chicago Manual of Style rule.
- // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
- c = g_utf8_get_char(word);
- strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
- break;
- }
-
- // Go to first character of the word (char. after ' ' or '_')
- word = word+1;
-
- // If the word is a roman numeral, capitalize all of it
- if ((len = Scan_Word_Is_Roman_Numeral (self, word)))
- {
- gchar *tmp = g_utf8_strup (word, len);
- strncpy (word, tmp, len);
- g_free (tmp);
- } else
- {
- // Set uppercase the first character of this word
- c = g_utf8_get_char(word);
- strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-
- if (g_settings_get_boolean (MainSettings,
- "process-uppercase-prepositions"))
- {
- goto increment;
- }
-
- /* Lowercase the first character of this word if found in the
- * exempt words list. */
- for (i=0; exempt[i]!=NULL; i++)
- {
- if (g_ascii_strncasecmp(exempt[i], word, strlen(exempt[i])) == 0)
- {
- c = g_utf8_get_char(word);
- strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
- break;
- }
- }
- }
-
-increment:
- temp = word;
- }
-
- // Uppercase letter placed after some characters like '(', '[', '{'
- set_to_upper_case = FALSE;
- for (temp = string; *temp; temp = g_utf8_next_char(temp))
- {
- c = g_utf8_get_char(temp);
- set_to_upper_case_tmp = ( c == (gunichar)'('
- || c == (gunichar)'['
- || c == (gunichar)'{'
- || c == (gunichar)'"'
- || c == (gunichar)':'
- || c == (gunichar)'.'
- || c == (gunichar)'`'
- || c == (gunichar)'-'
- ) ? TRUE : FALSE;
-
- if (set_to_upper_case && g_unichar_islower(c))
- strncpy(temp, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
-
- set_to_upper_case = set_to_upper_case_tmp;
- }
-
-}
-
/*
* Return the field of a 'File_Tag' structure corresponding to the mask code
*/
diff --git a/src/scan_dialog.h b/src/scan_dialog.h
index ce9dabe..f54dfb1 100644
--- a/src/scan_dialog.h
+++ b/src/scan_dialog.h
@@ -68,6 +68,4 @@ gchar *Scan_Generate_New_Directory_Name_From_Mask (ET_File *ETFile, gchar *mask,
void entry_check_rename_file_mask (GtkEntry *entry, gpointer user_data);
-void Scan_Process_Fields_First_Letters_Uppercase (EtScanDialog *self, gchar **str);
-
#endif /* ET_SCAN_DIALOG_H_ */
diff --git a/src/tag_area.c b/src/tag_area.c
index 1050fb2..f8862f2 100644
--- a/src/tag_area.c
+++ b/src/tag_area.c
@@ -741,13 +741,18 @@ Convert_Letter_Uppercase (GtkWidget *entry)
static void
Convert_First_Letters_Uppercase (GtkWidget *entry)
{
- EtScanDialog *dialog;
gchar *string;
+ gboolean uppercase_preps;
+ gboolean handle_roman;
string = g_strdup (gtk_entry_get_text (GTK_ENTRY (entry)));
- dialog = ET_SCAN_DIALOG (et_application_window_get_scan_dialog (ET_APPLICATION_WINDOW (MainWindow)));
+ uppercase_preps = g_settings_get_boolean (MainSettings,
+ "process-uppercase-prepositions");
+ handle_roman = g_settings_get_boolean (MainSettings,
+ "process-detect-roman-numerals");
- Scan_Process_Fields_First_Letters_Uppercase (dialog, &string);
+ Scan_Process_Fields_First_Letters_Uppercase (&string, uppercase_preps,
+ handle_roman);
gtk_entry_set_text (GTK_ENTRY (entry), string);
g_free (string);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]