Re: PhoneChainer backend (update)



Hi again,

I've made some changes to this patch. Firstly, it's now integrated into
the GeoSites backend. This makes much more sense, although I think that
some of it (i.e. the areacode database stuff) ought to move into
utils/geo/gazetteer.

Secondly, it doesn't chain clues any more; it generates matches in the
same way as lat/long clues, but from phone numbers. For big area codes
which cover multiple cities it matches the bigest city. Small cities
which aren't in the latlong database don't get as detailed matches.

Patches attached (note that the areacodes format has been changed);
feedback welcomed.

By the way, I can't get it displaying local info (as in
http://usefulinc.com/edd/blog/contents/2003/07/geosites.jpg). Am I
missing something?

dave

On Wed, 2004-02-25 at 15:41, dave wrote:
> Hi all,
> 
> I've written a backend which will take phone clues and generate
> placenames from the areacode. Areacode data (from free sources on the
> web) for the UK and US is provided.
> 
> Problems:
> I'm not sure how to setup the path to the areacode data. How should I do
> this?
> What kind of clue should this generate? Address? Keyword? Textblock? Or
> should we consider a new keyword such as "place"? My own feeling is that
> a "place" keyword for places which aren't full addresses is a good idea.
> 
> Longer term ideas:
> I'd like to integrate this with Edd's lat/long work (generate lat/long
> clues). Maybe do similar things with postcode / zip code data as well. 
> More detailed data for the US might be useful, the area codes seem
> pretty big. I like the idea of dashboard doing useful things (e.g. maps)
> with geographic data, I think that has a lot of potential.
> 
> Please let me know what you think.
> 
> Cheers,
> 
> dave
--- dashboard/backends/GeoSitesBackend.cs	2004-02-17 03:06:06.000000000 +0000
+++ dashboard-ac/backends/GeoSitesBackend.cs	2004-02-26 11:40:27.258706826 +0000
@@ -1,6 +1,7 @@
 // Geosites backend: generates web links to related geographical
 // resources
 // by Edd Dumbill <edd usefulinc com>
+// Areacode stuff by Dave Rodgman <davidr sucs org>
 
 using System;
 using System.IO;
@@ -13,6 +14,7 @@
 using System.Runtime.Remoting.Messaging;
 using System.Reflection;
 using System.Runtime.InteropServices;
+using ICSharpCode.SharpZipLib;
 
 using Dashboard.Geo;
 
@@ -27,80 +29,211 @@
 			Console.WriteLine ("GeoSites backend starting");
 			Name = "GeoSites";
 
+			gazetteer = new Gazetteer ();
+
 			this.SubscribeToClues ("latlong");
-			this.Initialized = true;
+			this.SubscribeToClues ("phone");
+				
+			StreamReader sr;
+			SortedList table;
+
+			Assembly assembly = System.Reflection.Assembly.GetExecutingAssembly ();
+			foreach (String areaFile in assembly.GetManifestResourceNames ()) {
+				if (!(areaFile.StartsWith ("areacodes/areacodes.") && areaFile.EndsWith (".csv.gz")))
+					continue;
 
-			gazetteer = new Gazetteer ();
+				System.IO.Stream s = assembly.GetManifestResourceStream (areaFile);
+				String country;
+				try {
+					sr = new StreamReader (new GZip.GZipInputStream (s));
+
+					country = sr.ReadLine ().Substring (1).Trim (null);
+					if (!codeToAreaTable.ContainsKey (country))
+						codeToAreaTable.Add (country, new SortedList ());
+					table = (SortedList) codeToAreaTable [country];
+
+					RegularExpressions.Match intlCodes = Regex.Match (sr.ReadLine (), @"#\+(\d+)\s*(\d*)");
+					table.Add ("CountryCode",   intlCodes.Groups [1].Captures [0].ToString ());
+					table.Add ("CountryPrefix", intlCodes.Groups [2].Captures [0].ToString ());
+				}
+				catch (Exception e) {
+					Console.WriteLine ("GeoSites: Could not load area codes database: " + areaFile);
+					continue;
+				}
+
+				String line, code, city, adminregion;
+				Regex extractData = new Regex (@"\x22(?<code>\d+)\x22,\x22(?<city>[^\x22]*)\x22,\x22(?<adminregion>[^\x22]*)\x22", RegexOptions.Compiled);
+
+				while ((line = sr.ReadLine ()) != null) {
+					if (line [0] == '#')
+						continue;
+
+					try {
+						RegularExpressions.Match m = extractData.Match (line);
+
+						code = m.Groups ["code"].Captures [0].ToString ();
+						city = m.Groups ["city"].Captures [0].ToString ();
+						adminregion = m.Groups ["adminregion"].Captures [0].ToString ();
+
+						if ((city.Length == 0 && adminregion.Length == 0) || (code.Length == 0))
+							throw new Exception ();
+					} catch (Exception e) {
+						Console.WriteLine ("GeoSites: error in {0} areacode data, line: {1}", country, line);
+						continue;
+					}
+
+					if (code.Length < minAreacodeLength)
+						minAreacodeLength = code.Length;
+
+					Point p = gazetteer.GetLatLong (city, adminregion, country);
+
+					// We permit multiple towns per area code
+					if (!table.ContainsKey (code))
+						table.Add (code, new ArrayList ());
+					((ArrayList) (table [code])).Add (p);
+				}
+				Console.WriteLine ("GeoSites: Loaded {0} area codes", country);
+			}
+
+			this.Initialized = true;
 
 			return true;
 		}
 
-		public Match GetMatch (Clue clue)
-		{
-			string txt = clue.Text;
-			string [] coords = Regex.Split(txt, "\\s*,\\s*");
+		// generate list of points (usually only one) from an area code
+		private ArrayList lookupArea (String originalNumber) {
+			ArrayList result = new ArrayList ();
+
+			String number = Regex.Replace (originalNumber, @"[^\d\+]", "");
+
+			if (number.Length == 0)
+				return result;
+
+			IDictionaryEnumerator ie = codeToAreaTable.GetEnumerator ();
+			while (ie.MoveNext ()) {
+				SortedList table = (SortedList) (ie.Value);
+				String code;
+
+				// deal with international prefixes
+				if (number [0] == '+') {
+					code = number.Substring (1);
+					if (!code.StartsWith ((String) (table ["CountryCode"])))
+						continue;
+					code = table ["CountryPrefix"] + code.Substring (((String) (table ["CountryCode"])).Length);
+				} else
+					code = number;
+
+				String key = null;
+				for (int len = minAreacodeLength; len <= code.Length; len++) {
+					int i = table.IndexOfKey (code.Substring (0, len));
+					if (i == -1)
+						continue;
+					key = code.Substring (0, len);
+				}
 
-			if (coords.Length != 2)
-				return null;
+				if (key == null) continue;
 
-			string lat = coords[0];
-			string lng = coords[1];
+				result.AddRange ((ArrayList) table [key]);
+			}
+			return result;
+		}
 
-			double dlat = Double.Parse (lat);
-			double dlng = Double.Parse (lng);
+		private Hashtable codeToAreaTable = new Hashtable ();
+		private int minAreacodeLength = 999999;
 
-			int intlat = (int) dlat;
-			int intlng = (int) dlng;
+		public ArrayList GetMatch (Clue clue)
+		{
+			ArrayList points = new ArrayList (), matches = new ArrayList ();
 
-			Point p = gazetteer.NearestCity (dlat, dlng);
-			if (p == null)
-				return null;
+			double dlat = 0, dlng = 0;
+			String lat = "", lng = "";
 
-			Match match = new Match ("Location", clue);
-			match ["City"] = p.City;
-			match ["Admin region"] = p.Adminregion;
-			match ["Country"] = p.Country;
-
-			String vicinityurl = String.Format("http://www.vicinity.com/myblast/map.mb?CMD=LFILL&CT={0}:{1}:90000";, lat, lng);
-			match ["MapBlast"] = vicinityurl;
-
-			// yawn, special case for mapquest
-			if (p.Country == "United States of America")
-			{
-				p.Country = "United States";
+			switch (clue.Type) {
+			case "latlong":
+				string txt = clue.Text;
+				string [] coords = Regex.Split(txt, "\\s*,\\s*");
+
+				if (coords.Length != 2)
+					return null;
+
+				lat = coords[0];
+				lng = coords[1];
+
+				dlat = Double.Parse (lat);
+				dlng = Double.Parse (lng);
+
+				Point p = gazetteer.NearestCity (dlat, dlng);
+				if (p == null)
+					return null;
+				else
+					points.Add(p);
+				break;
+			case "phone":
+				points = lookupArea(clue.Text);
+				break;
 			}
 
-			// FIXME: MapQuest requires the state name as a two
-			// letter code where the US is concerned.
-			String mapquesturl = String.Format (
-					"http://www.mapquest.com/maps/map.adp?city={0}&state={1}&country={2}&zoom=5";,
-					HttpUtility.UrlEncode(p.City),
-					HttpUtility.UrlEncode(p.Adminregion),
-					HttpUtility.UrlEncode(p.Country));
-			match ["MapQuest"] = mapquesturl;
+			foreach (Point p in points) {
+				if (clue.Type == "phone") {
+					dlat = p.Latitude;
+					dlng = p.Longitude;
+					lat = Convert.ToString(p.Latitude);
+					lng = Convert.ToString(p.Longitude);
+					Console.WriteLine(p.City);
+					Console.WriteLine(p.Adminregion);
+					Console.WriteLine(p.Latitude);
+					Console.WriteLine(p.Longitude);
+				}
+				int intlat = (int) dlat;
+				int intlng = (int) dlng;
+
+				Match match = new Match ("Location", clue);
+				match ["City"] = p.City;
+				match ["Admin region"] = p.Adminregion;
+				match ["Country"] = p.Country;
+
+				String vicinityurl = String.Format("http://www.vicinity.com/myblast/map.mb?CMD=LFILL&CT={0}:{1}:90000";, lat, lng);
+				match ["MapBlast"] = vicinityurl;
+
+				// yawn, special case for mapquest
+				if (p.Country == "United States of America")
+				{
+					p.Country = "United States";
+				}
+
+				// FIXME: MapQuest requires the state name as a two
+				// letter code where the US is concerned.
+				String mapquesturl = String.Format (
+						"http://www.mapquest.com/maps/map.adp?city={0}&state={1}&country={2}&zoom=5";,
+						HttpUtility.UrlEncode(p.City),
+						HttpUtility.UrlEncode(p.Adminregion),
+						HttpUtility.UrlEncode(p.Country));
+				match ["MapQuest"] = mapquesturl;
 			
-			// yawn, special case for wunderground
-			// what is it with US-centricity? ;-p
-			String searchstring;
-			if (p.Country == "United States")
-			{
-				searchstring = String.Format("{0}, {1}", p.City, p.Adminregion);
-			} else {
-				searchstring = String.Format("{0}, {1}", p.City, p.Country);
-			}
-			String wundergroundurl = String.Format(
-			    "http://www.wunderground.com/cgi-bin/findweather/getForecast?query={0}";,
-			    HttpUtility.UrlEncode(searchstring));
-			match ["Weather"] = wundergroundurl;
+				// yawn, special case for wunderground
+				// what is it with US-centricity? ;-p
+				String searchstring;
+				if (p.Country == "United States")
+				{
+					searchstring = String.Format("{0}, {1}", p.City, p.Adminregion);
+				} else {
+					searchstring = String.Format("{0}, {1}", p.City, p.Country);
+				}
+				String wundergroundurl = String.Format(
+				    "http://www.wunderground.com/cgi-bin/findweather/getForecast?query={0}";,
+				    HttpUtility.UrlEncode(searchstring));
+				match ["Weather"] = wundergroundurl;
 			
-			String degreeconfluenceurl = String.Format(
-			    "http://www.confluence.org/confluence.php?lat={0}&lon={1}&visit=1";, intlat, intlng);
-			match ["Degree Confluence"] = degreeconfluenceurl;
+				String degreeconfluenceurl = String.Format(
+				    "http://www.confluence.org/confluence.php?lat={0}&lon={1}&visit=1";, intlat, intlng);
+				match ["Degree Confluence"] = degreeconfluenceurl;
 
-			String terraserverurl = String.Format("http://terraserver.com/coordinates2.asp?y={0}&x={1}";, lat, lng);
-			match ["Terraserver"] = terraserverurl;
+				String terraserverurl = String.Format("http://terraserver.com/coordinates2.asp?y={0}&x={1}";, lat, lng);
+				match ["Terraserver"] = terraserverurl;
 
-			return match;
+				matches.Add (match);
+			}
+			return matches;
 		}
 
 		public override BackendResult ProcessCluePacket (CluePacket cp)
@@ -113,8 +246,9 @@
 			foreach (Clue c in cp.Clues) {
 				if (c.Text.Length == 0)
 					continue;
-
-				result.AddMatch (GetMatch (c));
+				// areacodes can (in theory) generate multiple results
+				foreach (Match m in GetMatch (c))
+					result.AddMatch (m);
 			}
 
 			return result;
--- dashboard/backends/Makefile.am	2004-02-25 07:43:10.000000000 +0000
+++ dashboard-ac/backends/Makefile.am	2004-02-26 11:51:34.558737977 +0000
@@ -3,6 +3,7 @@
 DLLDEPS=-r:glib-sharp.dll -r:gdk-sharp.dll -r:gtkhtml-sharp -r:gtk-sharp.dll -r:gnome-sharp.dll -r:System.Web -r:$(dashboard_exe) -r:../util/drive/drive.dll -r:../engine/rdf.dll
 LIBFLAGS=-target:library -L ../util/webservices  $(DLLDEPS)
 EXEFLAGS=$(DLLDEPS)
+AREACODES=-resource:areacodes/areacodes.uk.csv.gz -resource:areacodes/areacodes.us.csv.gz
 
 # The backends to build by default.
 backends =                              \
@@ -78,7 +79,7 @@
 	$(CSC) $(LIBFLAGS) -out:HTMLIndexBackend.dll $(srcdir)/HTMLIndexBackend.cs -r:../index/index-manager.dll
 
 GeoSitesBackend.dll: GeoSitesBackend.cs $(dashboard_exe)
-	$(CSC) $(LIBFLAGS) -out:GeoSitesBackend.dll $(srcdir)/GeoSitesBackend.cs -r:../util/geo/geo.dll
+	$(CSC) $(LIBFLAGS) -out:GeoSitesBackend.dll $(srcdir)/GeoSitesBackend.cs -r:../util/geo/geo.dll -r:ICSharpCode.SharpZipLib $(AREACODES)
 
 ManPagesBackend.dll: ManPagesBackend.cs $(dashboard_exe)
 	$(CSC) $(LIBFLAGS) -out:ManPagesBackend.dll $(srcdir)/ManPagesBackend.cs -r:ICSharpCode.SharpZipLib
--- dashboard/util/geo/gazetteer.cs	2003-07-27 21:35:44.000000000 +0100
+++ dashboard-ac/util/geo/gazetteer.cs	2004-02-26 11:17:22.069237175 +0000
@@ -35,6 +35,17 @@
 		{
 		}
 
+		public Point (string city, string adminregion, string country) {
+			this.City = city;
+			this.Adminregion = adminregion;
+			this.Country = country;
+
+			this.Id = 0;
+			this.Latitude  = -1.0;
+			this.Longitude = -1.0;
+			this.Isocode = null;
+		}
+
 		public Point (int id, double latitude, double longitude,
 				string city,
 				string adminregion,
@@ -119,6 +130,8 @@
 	public class Gazetteer {
 		
 		private ArrayList points = new ArrayList ();
+		private Hashtable pointsByCity = new Hashtable (); // copy of above, indexed by city
+	
 		private ArrayList latindex = new ArrayList ();
 		private ArrayList lngindex = new ArrayList ();
 		
@@ -171,6 +184,11 @@
 						vals[1], vals[3], vals[4], vals[5]);
 
 				points.Add (p);
+
+				if (!pointsByCity.ContainsKey (p.City))
+					pointsByCity.Add (p.City, new ArrayList ());
+				((ArrayList) pointsByCity [p.City]).Add (p);
+
 				latindex.Add ( p);
 				lngindex.Add ( p);
 				l = f.ReadLine();
@@ -182,6 +200,29 @@
 			// Console.WriteLine ("Read {0} cities.", i);
 		}
 
+		// given a city name, region, country, generate a point which
+		// might or might not include lat/long data
+		public Point GetLatLong (string city, string adminregion, string country) {
+			Point q = null;
+
+			// exact match on city name
+			if (city != null && pointsByCity.ContainsKey (city))
+				foreach (Point r in ((ArrayList) pointsByCity [city]))
+					if ((r.Adminregion == adminregion || adminregion == "") &&
+						r.Country == country)
+							return r;
+
+			// search all looking for biggest city (population == point.Id)
+			// in specified region. cities are sorted by pop. so just take first match
+			if (adminregion != "")
+				foreach (Point r in points)
+					if (r.Adminregion == adminregion && r.Country == country)
+						return r;
+
+			// no match
+			return new Point(city, adminregion, country);
+		}
+
 		public ArrayList NearestCities (double latitude, double longitude) {
 			Point p = new Point ();
 			double minlat, maxlat, minlng, maxlng;

Attachment: areacodes.tar.bz2
Description: application/bzip



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]