textchain backend
- From: Edd Dumbill <edd usefulinc com>
- To: "dashboard-hackers gnome org" <dashboard-hackers gnome org>
- Subject: textchain backend
- Date: 21 Jul 2003 12:21:29 +0100
Here's a patch to try for backend-textchain.cs:
* new phone regexp: captures most international numbers as well as US
numbers. The downside is that it sometimes lets other stuff through by
accident (e.g. 2003-07-21 would make it through). Backends really need
to normalise before comparing as well (my concept of this: strip out all
but digits and the "+" international introducer.)
* URL normalisation. puts the 'http://' and 'ftp://' back on the front
of URLs inferred from a string beginning 'www.' or 'ftp.'.
-- Edd
(BTW I'm happy to check in these patches myself, I just figured I might
be wise to send them to the list for review before doing so.)
Index: backends/backend-textchain.cs
===================================================================
RCS file: /cvs/gnome/dashboard/backends/backend-textchain.cs,v
retrieving revision 1.2
diff -u -r1.2 backend-textchain.cs
--- backends/backend-textchain.cs 19 Jul 2003 13:33:45 -0000 1.2
+++ backends/backend-textchain.cs 21 Jul 2003 11:16:17 -0000
@@ -19,10 +19,17 @@
private const string REGEX_URL
= "((((http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+(:[0-9]*)?(/)?)";
+
+ // this regex found at http://www.dotnetforums.net/t49640.html
private const string REGEX_EMAIL
- = "(\\S+ \\S+)";
+ = "([\\w\\ \\-]+ [a-zA-Z0-9\\-]+(\\.[a-zA-Z0-9\\-]{1,})*(\\.[a-zA-Z]{2,3}){1,2})";
+
private const string REGEX_PHONE_US
= "(\\(?[2-9][0-9]{2}\\)?[-. ][1-9][0-9]{2}[-. ][0-9]{4})";
+
+ private const string REGEX_PHONE
+ = "([\\+\\(]?\\d(\\d|\\)|[\\-\\(\\. ]\\d){6,})[\\s$]";
+
private const string REGEX_BUGZILLA
= "#([1-9][0-9]*)";
@@ -38,15 +45,43 @@
public override BackendResult ProcessCluePacket (CluePacket cp)
{
ArrayList newclues = new ArrayList ();
+ ArrayList urlclues = new ArrayList ();
foreach (Clue clue in cp.Clues) {
if (clue.Type != "textblock")
continue;
- newclues.AddRange (this.GetPatternMatches (clue, "url", REGEX_URL));
+
+ // we normalize url clues to have their
+ // URI schemes on the front
+
+ urlclues.AddRange
+ (this.GetPatternMatches (clue, "url", REGEX_URL));
+
+ foreach (Clue c in urlclues) {
+ Clue r=null;
+
+ if (String.Compare
+ (c.Text, 0, "www.", 0, 4, true) == 0) {
+ r=new Clue (c.Type,
+ "http://" + c.Text,
+ c.Relevance);
+ } else if (String.Compare
+ (c.Text, 0, "ftp.", 0, 4, true) == 0) {
+ r=new Clue (c.Type,
+ "ftp://" + c.Text,
+ c.Relevance);
+ }
+ if (r == null) {
+ newclues.Add (c);
+ } else {
+ newclues.Add (r);
+ }
+ }
+
newclues.AddRange (this.GetPatternMatches (clue, "email", REGEX_EMAIL));
- newclues.AddRange (this.GetPatternMatches (clue, "phone", REGEX_PHONE_US));
+ newclues.AddRange (this.GetPatternMatches (clue, "phone", REGEX_PHONE));
newclues.AddRange (this.GetPatternMatches (clue, "bugzilla", REGEX_BUGZILLA));
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]