add date parser to TextChainerBackend.cs
- From: dave <davidr sucs org>
- To: dashboard <dashboard-hackers gnome org>
- Subject: add date parser to TextChainerBackend.cs
- Date: Sat, 06 Mar 2004 00:03:25 +0000
Attached is a small patch to parse dates from text. It tries to deal
sensibly with ambiguous dates, and copes reasonably well with a variety
of formats.
cheers,
dave
--- dashboard/backends/TextChainerBackend.cs 2004-03-06 00:00:32.572809844 +0000
+++ TextChainerBackend-date.cs 2004-03-05 23:59:59.875394530 +0000
@@ -38,6 +38,18 @@
private const string REGEX_BUGZILLA
= "#([1-9][0-9]*)";
+
+ // some regexes to match dates. these probably won't match all dates,
+ // or non-english dates, but it's a start.
+ private const string MONTHS
+ = "(?<month>(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\w*)";
+ private const string REGEX_DATE1
+ = "((?<day>\\d{1,4})(\\-|\\/)(?<month>\\d{1,2})(\\-|\\/)(?<year>\\d{1,4}))";
+ private const string REGEX_DATE2
+ = "((?<day>\\d{1,2})\\w*\\s"+MONTHS+"\\s((?<year>\\d{2}){1,2}))";
+ private const string REGEX_DATE3
+ = "("+MONTHS+"\\s(?<day>\\d{1,2})[\\s,]+(\\d\\d:{0,1}){0,3}\\D*(?<year>\\d{4}))";
+
public override bool Startup ()
{
@@ -131,6 +143,9 @@
newclues.AddRange (GetPatternMatches (clue, "email", REGEX_EMAIL));
newclues.AddRange (GetPatternMatches (clue, "phone", REGEX_PHONE));
newclues.AddRange (GetPatternMatches (clue, "bugzilla", REGEX_BUGZILLA));
+ newclues.AddRange (GetPatternMatches (clue, "date", REGEX_DATE1));
+ newclues.AddRange (GetPatternMatches (clue, "date", REGEX_DATE2));
+ newclues.AddRange (GetPatternMatches (clue, "date", REGEX_DATE3));
// convert htmlblocks to textblocks
ConvertHtmlToText (cp, newclues);
@@ -142,6 +157,62 @@
return result;
}
+ // convert dates to YYYY-MM-DD format. handle ambiguous dates by generating additional datestrings
+ private String [] NormalizeDateString (Regex rx, System.Text.RegularExpressions.Match m) {
+ String day = m.Groups["day"].Value;
+ String month = m.Groups["month"].Value;
+ String year = m.Groups["year"].Value;
+ String [] results = new String[2];
+
+ // swap day & year if they're (unambiguously) the wrong way round
+ // useful for dealing with e.g. 02/05/78 and 78/05/02
+ if (((day.Length == 4) && (year.Length <= 2)) ||
+ ((System.Convert.ToInt32 (day) > 31) && (System.Convert.ToInt32 (year) <= 31)) ||
+ ((year.Length == 1) && (day.Length == 2)) ) {
+ day = year;
+ year = m.Groups["day"].Value;
+ }
+
+ // convert month to a number
+ if (month.Length > 3)
+ month = month.Substring (0, 3).ToLower ();
+ if (month == "jan") month = "01";
+ if (month == "feb") month = "02";
+ if (month == "mar") month = "03";
+ if (month == "apr") month = "04";
+ if (month == "may") month = "05";
+ if (month == "jun") month = "06";
+ if (month == "jul") month = "07";
+ if (month == "aug") month = "08";
+ if (month == "sep") month = "09";
+ if (month == "oct") month = "10";
+ if (month == "nov") month = "11";
+ if (month == "dec") month = "12";
+
+ if (year.Length == 2 && System.Convert.ToInt32 (year) < 50) year = "20" + year; // totally arbitrary.
+ if (year.Length == 2 && System.Convert.ToInt32 (year) >= 50) year = "19" + year; // oh well.
+ if (month.Length == 1) month = "0" + month;
+ if (day.Length == 1) day = "0" + day;
+
+ results[0] = year + "-" + month + "-" + day;
+ results[1] = null;
+
+ // handle dates like 05/02/1994 - could be 5th Feb or 2nd May 1994
+ // if ambiguous generate a second date clue
+ if (rx.ToString () == REGEX_DATE1)
+ if ((System.Convert.ToInt32 (day) <= 12) && (System.Convert.ToInt32 (month) <= 12))
+ results[1] = year + "-" + day + "-" + month;
+
+ Console.WriteLine("--------------------------");
+ Console.WriteLine("{0} -> {1}", m.Groups[1].ToString (), results[0]);
+ if (results[1] != null)
+ Console.WriteLine("{0} -> {1}", m.Groups[1].ToString (), results[1]);
+ Console.WriteLine("--------------------------");
+
+ return results;
+ }
+
+
private ArrayList GetPatternMatches (Clue clue, string type, string regex)
{
Regex r;
@@ -152,7 +223,13 @@
ArrayList newclues = new ArrayList ();
for (m = r.Match (clue.Text); m.Success; m = m.NextMatch ()) {
Console.WriteLine ("TextChainer chaining to type " + type + ": " + m.Groups[1].ToString ());
- newclues.Add (new Clue (type, m.Groups[1].ToString (), 10, clue));
+ if (type == "date")
+ foreach (String d in NormalizeDateString (r, m)) {
+ if (d != null)
+ newclues.Add (new Clue (type, d, 10, clue));
+ }
+ else
+ newclues.Add (new Clue (type, m.Groups[1].ToString (), 10, clue));
}
return newclues;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]