[tracker] tracker-extract: Filter all file based ontology from LSA extractor
- From: Martyn James Russell <mr src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Filter all file based ontology from LSA extractor
- Date: Wed, 28 Oct 2009 13:02:46 +0000 (UTC)
commit 30f6184b257c1082bc4d683318c2abc280e76072
Author: Martyn Russell <martyn lanedo com>
Date: Wed Oct 28 13:00:13 2009 +0000
tracker-extract: Filter all file based ontology from LSA extractor
src/tracker-extract/tracker-topanalyzer.cpp | 163 ++++++++++++++++++++++-----
1 files changed, 134 insertions(+), 29 deletions(-)
---
diff --git a/src/tracker-extract/tracker-topanalyzer.cpp b/src/tracker-extract/tracker-topanalyzer.cpp
index af11db4..048dd06 100644
--- a/src/tracker-extract/tracker-topanalyzer.cpp
+++ b/src/tracker-extract/tracker-topanalyzer.cpp
@@ -100,8 +100,9 @@ namespace Tracker {
gchar *content_type;
private:
- const gchar* PredicateMapping (const RegisteredField *field);
- const gchar* PredicateMapping (const std::string &key);
+ const gchar *predicateMapping (const RegisteredField *field);
+ const gchar *predicateMapping (const std::string &key);
+ gboolean predicateNeeded (const gchar *predicate);
const gchar *uri;
TrackerSparqlBuilder *metadata;
@@ -116,15 +117,13 @@ namespace Tracker {
{
uri = uri_;
metadata = metadata_;
- if (content_type)
- g_free (content_type);
+ g_free (content_type);
content_type = NULL;
}
Tracker::TripleCollector::~TripleCollector ()
{
- if (content_type)
- g_free (content_type);
+ g_free (content_type);
}
void Tracker::TripleCollector::commit () { }
@@ -143,14 +142,73 @@ namespace Tracker {
text);
}
- const gchar* Tracker::TripleCollector::PredicateMapping (const std::string &key)
+ const gchar* Tracker::TripleCollector::predicateMapping (const std::string &key)
{
- return (const gchar *) key.c_str();
+ /* const gchar *original; */
+ /* gchar *str, *p; */
+
+ /* original = key.c_str(); */
+
+ /* p = strrchr (original, '/'); */
+ /* if (G_UNLIKELY (!p)) { */
+ /* return g_strdup (original); */
+ /* } */
+
+ /* if (G_UNLIKELY (!strchr (p, '#'))) { */
+ /* return g_strdup (original); */
+ /* } */
+
+ /* str = g_strdup (p + 1); */
+ /* p = strchr (str, '#'); */
+ /* *p = ':'; */
+
+ return key.c_str();
+ }
+
+ const gchar* Tracker::TripleCollector::predicateMapping (const RegisteredField *field)
+ {
+ /* const gchar *original; */
+ /* gchar *str, *p; */
+
+ /* original = field->key().c_str(); */
+
+ /* p = strrchr (original, '/'); */
+ /* if (G_UNLIKELY (!p)) { */
+ /* return g_strdup (original); */
+ /* } */
+
+ /* if (G_UNLIKELY (!strchr (p, '#'))) { */
+ /* return g_strdup (original); */
+ /* } */
+
+ /* str = g_strdup (p + 1); */
+ /* p = strchr (str, '#'); */
+ /* *p = ':'; */
+
+ return field->key().c_str();
}
- const gchar* Tracker::TripleCollector::PredicateMapping (const RegisteredField *field)
+ gboolean Tracker::TripleCollector::predicateNeeded (const gchar *predicate)
{
- return (const gchar *) field->key().c_str();
+ if (!predicate) {
+ return FALSE;
+ }
+
+ /* We already cover these in the miner-fs */
+ if (strstr (predicate, "nfo#FileDataObject") ||
+ strstr (predicate, "nfo#belongsToContainer") ||
+ strstr (predicate, "nfo#fileName") ||
+ strstr (predicate, "nfo#fileSize") ||
+ strstr (predicate, "nfo#fileLastModified") ||
+ strstr (predicate, "nfo#fileLastAccessed") ||
+ strstr (predicate, "nie#InformationElement") ||
+ strstr (predicate, "nie#isStoredAs") ||
+ strstr (predicate, "nie#mimeType") ||
+ strstr (predicate, "nie#dataSource")) {
+ return FALSE;
+ }
+
+ return TRUE;
}
/* The methods below basically just convert the C++ world to the C world
@@ -160,15 +218,21 @@ namespace Tracker {
const RegisteredField* field,
const std::string& value)
{
+ const gchar *predicate = predicateMapping (field);
+
if (field->key() == FieldRegister::mimetypeFieldName && idx->depth() == 0) {
- if (content_type)
- g_free (content_type);
+ g_free (content_type);
content_type = g_strdup (value.c_str());
}
- tracker_statement_list_insert (metadata, idx->path().c_str(),
- PredicateMapping (field),
- value.c_str());
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert (metadata,
+ idx->path().c_str(),
+ predicate,
+ value.c_str());
}
void Tracker::TripleCollector::addValue (const AnalysisResult* idx,
@@ -176,8 +240,15 @@ namespace Tracker {
const unsigned char* data,
uint32_t size )
{
- tracker_statement_list_insert (metadata, idx->path().c_str(),
- PredicateMapping (field),
+ const gchar *predicate = predicateMapping (field);
+
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert (metadata,
+ idx->path().c_str(),
+ predicate,
(const gchar*) data);
}
@@ -185,8 +256,15 @@ namespace Tracker {
const RegisteredField* field,
int32_t value)
{
- tracker_statement_list_insert_with_int (metadata, idx->path().c_str(),
- PredicateMapping (field),
+ const gchar *predicate = predicateMapping (field);
+
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert_with_int (metadata,
+ idx->path().c_str(),
+ predicate,
(gint) value);
}
@@ -194,8 +272,15 @@ namespace Tracker {
const RegisteredField* field,
uint32_t value )
{
- tracker_statement_list_insert_with_int (metadata, idx->path().c_str(),
- PredicateMapping (field),
+ const gchar *predicate = predicateMapping (field);
+
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert_with_int (metadata,
+ idx->path().c_str(),
+ predicate,
(gint) value);
}
@@ -203,8 +288,15 @@ namespace Tracker {
const RegisteredField* field,
double value )
{
- tracker_statement_list_insert_with_double (metadata, idx->path().c_str(),
- PredicateMapping (field),
+ const gchar *predicate = predicateMapping (field);
+
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert_with_double (metadata,
+ idx->path().c_str(),
+ predicate,
(gdouble) value);
}
@@ -212,8 +304,15 @@ namespace Tracker {
const std::string& predicate,
const std::string& object )
{
- tracker_statement_list_insert (metadata, subject.c_str(),
- PredicateMapping (predicate),
+ const gchar *predicate_str = predicateMapping (predicate);
+
+ if (!predicateNeeded (predicate_str)) {
+ return;
+ }
+
+ tracker_statement_list_insert (metadata,
+ subject.c_str(),
+ predicate_str,
object.c_str());
}
@@ -222,14 +321,20 @@ namespace Tracker {
const std::string& name,
const std::string& value )
{
+ const gchar *predicate = predicateMapping (field);
+
if (field->key() == FieldRegister::mimetypeFieldName && idx->depth() == 0) {
- if (content_type)
- g_free (content_type);
+ g_free (content_type);
content_type = g_strdup (value.c_str());
}
- tracker_statement_list_insert (metadata, idx->path().c_str(),
- PredicateMapping (name),
+ if (!predicateNeeded (predicate)) {
+ return;
+ }
+
+ tracker_statement_list_insert (metadata,
+ idx->path().c_str(),
+ predicate,
value.c_str());
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]