[tracker/wip/carlosg/sparql1.1: 61/80] libtracker-data: Handle rdf:langString type



commit 7e5a677c2f9487d4f5cf36b97b13a8e0d99a9f34
Author: Carlos Garnacho <carlosg gnome org>
Date:   Mon Jul 8 01:35:19 2019 +0200

    libtracker-data: Handle rdf:langString type
    
    This is generally stored as a BLOB type, with the "$string\0$locale"
    format. Conversions between string and langString types are allowed.

 src/libtracker-data/tracker-data-manager.c |   7 +-
 src/libtracker-data/tracker-data-update.c  |  19 +++++
 src/libtracker-data/tracker-property.c     |   8 ++
 src/libtracker-data/tracker-property.h     |   1 +
 src/libtracker-data/tracker-sparql.c       | 113 +++++++++++++++++++++++++----
 src/libtracker-data/tracker-vtab-triples.c |   1 +
 src/ontologies/11-rdf.ontology             |  10 +++
 7 files changed, 144 insertions(+), 15 deletions(-)
---
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index 08cf84df5..d03d64122 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -2586,6 +2586,7 @@ create_decomposed_metadata_property_table (TrackerDBInterface *iface,
 
        switch (tracker_property_get_data_type (property)) {
        case TRACKER_PROPERTY_TYPE_STRING:
+       case TRACKER_PROPERTY_TYPE_LANGSTRING:
                sql_type = "TEXT";
                break;
        case TRACKER_PROPERTY_TYPE_INTEGER:
@@ -3138,7 +3139,8 @@ create_decomposed_metadata_tables (TrackerDataManager  *manager,
                                                schedule_copy (copy_schedule, property, field_name, NULL);
                                        }
 
-                                       if (g_ascii_strcasecmp (sql_type_for_single_value, "TEXT") == 0) {
+                                       if (g_ascii_strcasecmp (sql_type_for_single_value, "TEXT") == 0 ||
+                                           g_ascii_strcasecmp (sql_type_for_single_value, "BLOB") == 0) {
                                                g_string_append (create_sql, " COLLATE " 
TRACKER_COLLATION_NAME);
                                        }
 
@@ -3165,7 +3167,8 @@ create_decomposed_metadata_tables (TrackerDataManager  *manager,
                                                                field_name,
                                                                sql_type_for_single_value);
 
-                                       if (g_ascii_strcasecmp (sql_type_for_single_value, "TEXT") == 0) {
+                                       if (g_ascii_strcasecmp (sql_type_for_single_value, "TEXT") == 0 ||
+                                           g_ascii_strcasecmp (sql_type_for_single_value, "BLOB") == 0) {
                                                g_string_append (alter_sql, " COLLATE " 
TRACKER_COLLATION_NAME);
                                        }
 
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index bff026c38..caebff462 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -741,6 +741,21 @@ statement_bind_gvalue (TrackerDBStatement *stmt,
                        } else {
                                tracker_db_statement_bind_int (stmt, (*idx)++, round (time));
                        }
+               } else if (type == G_TYPE_BYTES) {
+                       GBytes *bytes;
+                       gconstpointer data;
+                       gsize len;
+
+                       bytes = g_value_get_boxed (value);
+                       data = g_bytes_get_data (bytes, &len);
+
+                       if (len == strlen (data) + 1) {
+                               /* No ancillary data */
+                               tracker_db_statement_bind_text (stmt, (*idx)++, data);
+                       } else {
+                               /* String with langtag */
+                               tracker_db_statement_bind_bytes (stmt, (*idx)++, bytes);
+                       }
                } else {
                        g_warning ("Unknown type for binding: %s\n", G_VALUE_TYPE_NAME (value));
                }
@@ -1450,6 +1465,10 @@ bytes_to_gvalue (GBytes              *bytes,
                g_value_init (gvalue, G_TYPE_STRING);
                g_value_set_string (gvalue, value);
                break;
+       case TRACKER_PROPERTY_TYPE_LANGSTRING:
+               g_value_init (gvalue, G_TYPE_BYTES);
+               g_value_set_boxed (gvalue, bytes);
+               break;
        case TRACKER_PROPERTY_TYPE_INTEGER:
                g_value_init (gvalue, G_TYPE_INT64);
                g_value_set_int64 (gvalue, atoll (value));
diff --git a/src/libtracker-data/tracker-property.c b/src/libtracker-data/tracker-property.c
index 83efcc1da..8fc4dfaf3 100644
--- a/src/libtracker-data/tracker-property.c
+++ b/src/libtracker-data/tracker-property.c
@@ -36,6 +36,7 @@
 #define XSD_DOUBLE   TRACKER_PREFIX_XSD "double"
 #define XSD_INTEGER  TRACKER_PREFIX_XSD "integer"
 #define XSD_STRING   TRACKER_PREFIX_XSD "string"
+#define RDF_LANGSTRING TRACKER_PREFIX_RDF "langString"
 
 typedef struct _TrackerPropertyPrivate TrackerPropertyPrivate;
 
@@ -108,6 +109,9 @@ tracker_property_type_get_type (void)
                        { TRACKER_PROPERTY_TYPE_RESOURCE,
                          "TRACKER_PROPERTY_TYPE_RESOURCE",
                          "resource" },
+                       { TRACKER_PROPERTY_TYPE_LANGSTRING,
+                         "TRACKER_PROPERTY_TYPE_LANGSTRING",
+                         "langString" },
                        { 0, NULL, NULL }
                };
 
@@ -267,6 +271,8 @@ tracker_property_get_data_type (TrackerProperty *property)
                range_uri = tracker_ontologies_get_property_string_gvdb (priv->ontologies, priv->uri, 
"range");
                if (strcmp (range_uri, XSD_STRING) == 0) {
                        priv->data_type = TRACKER_PROPERTY_TYPE_STRING;
+               } else if (strcmp (range_uri, RDF_LANGSTRING) == 0) {
+                       priv->data_type = TRACKER_PROPERTY_TYPE_LANGSTRING;
                } else if (strcmp (range_uri, XSD_BOOLEAN) == 0) {
                        priv->data_type = TRACKER_PROPERTY_TYPE_BOOLEAN;
                } else if (strcmp (range_uri, XSD_INTEGER) == 0) {
@@ -827,6 +833,8 @@ tracker_property_set_range (TrackerProperty *property,
        range_uri = tracker_class_get_uri (priv->range);
        if (strcmp (range_uri, XSD_STRING) == 0) {
                priv->data_type = TRACKER_PROPERTY_TYPE_STRING;
+       } else if (strcmp (range_uri, RDF_LANGSTRING) == 0) {
+               priv->data_type = TRACKER_PROPERTY_TYPE_LANGSTRING;
        } else if (strcmp (range_uri, XSD_BOOLEAN) == 0) {
                priv->data_type = TRACKER_PROPERTY_TYPE_BOOLEAN;
        } else if (strcmp (range_uri, XSD_INTEGER) == 0) {
diff --git a/src/libtracker-data/tracker-property.h b/src/libtracker-data/tracker-property.h
index a84072268..5101a791a 100644
--- a/src/libtracker-data/tracker-property.h
+++ b/src/libtracker-data/tracker-property.h
@@ -44,6 +44,7 @@ typedef enum {
        TRACKER_PROPERTY_TYPE_DATE,
        TRACKER_PROPERTY_TYPE_DATETIME,
        TRACKER_PROPERTY_TYPE_RESOURCE,
+       TRACKER_PROPERTY_TYPE_LANGSTRING,
 } TrackerPropertyType;
 
 GType        tracker_property_type_get_type  (void) G_GNUC_CONST;
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index b0b0a1c2d..653b4b2a6 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -508,6 +508,7 @@ _append_literal_sql (TrackerSparql         *sparql,
                        break;
                case TRACKER_PROPERTY_TYPE_DATETIME:
                case TRACKER_PROPERTY_TYPE_STRING:
+               case TRACKER_PROPERTY_TYPE_LANGSTRING:
                case TRACKER_PROPERTY_TYPE_RESOURCE:
                        escaped = _escape_sql_string (binding->literal);
                        _append_string (sparql, escaped);
@@ -535,7 +536,8 @@ _append_literal_sql (TrackerSparql         *sparql,
 
        if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_RESOURCE)
                _append_string_printf (sparql, "), 0) ");
-       if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_STRING)
+       if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_STRING ||
+           TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_LANGSTRING)
                _append_string (sparql, "COLLATE " TRACKER_COLLATION_NAME " ");
 }
 
@@ -1480,6 +1482,8 @@ rdf_type_to_property_type (const gchar *type)
                return TRACKER_PROPERTY_TYPE_DATETIME;
        } else if (g_str_equal (type, XSD_NS "string")) {
                return TRACKER_PROPERTY_TYPE_STRING;
+       } else if (g_str_equal (type, RDF_NS "langString")) {
+               return TRACKER_PROPERTY_TYPE_LANGSTRING;
        } else {
                return TRACKER_PROPERTY_TYPE_UNKNOWN;
        }
@@ -1516,6 +1520,8 @@ convert_expression_to_string (TrackerSparql       *sparql,
                /* ISO 8601 format */
                _prepend_string (sparql, "SparqlFormatTime (");
                _append_string (sparql, ") ");
+               break;
+       case TRACKER_PROPERTY_TYPE_LANGSTRING:
        default:
                /* Let sqlite convert the expression to string */
                _prepend_string (sparql, "CAST (");
@@ -2533,7 +2539,8 @@ translate_OrderCondition (TrackerSparql  *sparql,
                g_assert_not_reached ();
        }
 
-       if (sparql->current_state.expression_type == TRACKER_PROPERTY_TYPE_STRING)
+       if (sparql->current_state.expression_type == TRACKER_PROPERTY_TYPE_STRING ||
+           sparql->current_state.expression_type == TRACKER_PROPERTY_TYPE_LANGSTRING)
                _append_string (sparql, "COLLATE " TRACKER_COLLATION_NAME " ");
        else if (sparql->current_state.expression_type == TRACKER_PROPERTY_TYPE_RESOURCE)
                convert_expression_to_string (sparql, sparql->current_state.expression_type);
@@ -4134,6 +4141,7 @@ static gboolean
 translate_DataBlockValue (TrackerSparql  *sparql,
                           GError        **error)
 {
+       TrackerSelectContext *select_context;
        TrackerGrammarNamedRule rule;
        TrackerBinding *binding;
 
@@ -4144,16 +4152,22 @@ translate_DataBlockValue (TrackerSparql  *sparql,
                return TRUE;
        }
 
+       select_context = TRACKER_SELECT_CONTEXT (sparql->current_state.select_context);
        rule = _current_rule (sparql);
 
        switch (rule) {
-       case NAMED_RULE_iri:
        case NAMED_RULE_RDFLiteral:
+               _call_rule (sparql, rule, error);
+               binding = g_ptr_array_index (select_context->literal_bindings,
+                                            select_context->literal_bindings->len - 1);
+               _append_literal_sql (sparql, TRACKER_LITERAL_BINDING (binding));
+               break;
+       case NAMED_RULE_iri:
        case NAMED_RULE_NumericLiteral:
        case NAMED_RULE_BooleanLiteral:
                _call_rule (sparql, rule, error);
                binding = _convert_terminal (sparql);
-               tracker_select_context_add_literal_binding (TRACKER_SELECT_CONTEXT 
(sparql->current_state.select_context),
+               tracker_select_context_add_literal_binding (select_context,
                                                            TRACKER_LITERAL_BINDING (binding));
                _append_literal_sql (sparql, TRACKER_LITERAL_BINDING (binding));
                g_object_unref (binding);
@@ -5543,8 +5557,10 @@ translate_GraphTerm (TrackerSparql  *sparql,
        rule = _current_rule (sparql);
 
        switch (rule) {
-       case NAMED_RULE_iri:
        case NAMED_RULE_RDFLiteral:
+               _call_rule (sparql, rule, error);
+               break;
+       case NAMED_RULE_iri:
        case NAMED_RULE_NumericLiteral:
        case NAMED_RULE_BooleanLiteral:
                _call_rule (sparql, rule, error);
@@ -5927,6 +5943,11 @@ handle_type_cast (TrackerSparql  *sparql,
                _call_rule (sparql, NAMED_RULE_ArgList, error);
                _append_string (sparql, "AS TEXT) ");
                sparql->current_state.expression_type = TRACKER_PROPERTY_TYPE_STRING;
+       } else if (g_str_equal (function, RDF_NS "langString")) {
+               _append_string (sparql, "CAST (");
+               _call_rule (sparql, NAMED_RULE_ArgList, error);
+               _append_string (sparql, "AS BLOB) ");
+               sparql->current_state.expression_type = TRACKER_PROPERTY_TYPE_LANGSTRING;
        } else if (g_str_equal (function, XSD_NS "integer")) {
                _append_string (sparql, "CAST (");
                _call_rule (sparql, NAMED_RULE_ArgList, error);
@@ -6246,7 +6267,8 @@ handle_function_call (TrackerSparql  *sparql,
        convert_to_string = sparql->current_state.convert_to_string;
        sparql->current_state.convert_to_string = FALSE;
 
-       if (g_str_has_prefix (function, XSD_NS)) {
+       if (g_str_has_prefix (function, XSD_NS) ||
+           strcmp (function, RDF_NS "langString") == 0) {
                handled = handle_type_cast (sparql, function, error);
        } else if (g_str_has_prefix (function, FN_NS)) {
                handled = handle_xpath_function (sparql, function, error);
@@ -6674,6 +6696,7 @@ translate_BuiltInCall (TrackerSparql  *sparql,
                switch (type) {
                case TRACKER_PROPERTY_TYPE_UNKNOWN:
                case TRACKER_PROPERTY_TYPE_STRING:
+               case TRACKER_PROPERTY_TYPE_LANGSTRING:
                case TRACKER_PROPERTY_TYPE_RESOURCE:
                        retval = _postprocess_rule (sparql, expr, NULL, error);
                        break;
@@ -7020,34 +7043,96 @@ static gboolean
 translate_RDFLiteral (TrackerSparql  *sparql,
                       GError        **error)
 {
+       TrackerParserNode *node;
        TrackerBinding *binding;
+       gchar *str, *langtag = NULL, *cast = NULL;
+       gboolean is_parameter;
+       const TrackerGrammarRule *rule;
+       TrackerPropertyType type;
 
        /* RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )?
         */
        _call_rule (sparql, NAMED_RULE_String, error);
+       node = sparql->current_state.prev_node;
+       str = _extract_node_string (node, sparql);
+       rule = tracker_parser_node_get_rule (node);
+       is_parameter = tracker_grammar_rule_is_a (rule, RULE_TYPE_TERMINAL,
+                                                 TERMINAL_TYPE_PARAMETERIZED_VAR);
+
        binding = _convert_terminal (sparql);
 
        if (_accept (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_LANGTAG)) {
-               g_object_unref (binding);
-               _unimplemented ("LANGTAG");
+               langtag = _dup_last_string (sparql);
+               sparql->current_state.expression_type = TRACKER_PROPERTY_TYPE_LANGSTRING;
        } else if (_accept (sparql, RULE_TYPE_LITERAL, LITERAL_DOUBLE_CIRCUMFLEX)) {
-               TrackerPropertyType type;
-               gchar *cast;
-
                _call_rule (sparql, NAMED_RULE_iri, error);
                cast = _dup_last_string (sparql);
-               sparql->current_state.expression_type = rdf_type_to_property_type (cast);
+       }
+
+       if (is_parameter && (langtag || cast)) {
+               g_free (str);
+               g_free (langtag);
                g_free (cast);
+               _raise (PARSE, "Parameter cannot have LANGTAG/^^ modifiers", "RDFLiteral");
        }
 
-       tracker_binding_set_data_type (binding, sparql->current_state.expression_type);
+       if (is_parameter) {
+               binding = tracker_parameter_binding_new (str, NULL);
+       } else {
+               GString *langstr;
+               GBytes *bytes;
+
+               langstr = g_string_new (str);
+
+               if (langtag) {
+                       g_string_append_c (langstr, '\0');
+                       g_string_append_printf (langstr, "%s", &langtag[1]);
+               }
+
+               bytes = g_bytes_new_with_free_func (langstr->str,
+                                                   langstr->len + 1,
+                                                   g_free, NULL);
+               g_string_free (langstr, FALSE);
+
+               binding = tracker_literal_binding_new (bytes, NULL);
+               g_bytes_unref (bytes);
+       }
+
+       if (cast) {
+               type = rdf_type_to_property_type (cast);
+       } else if (langtag) {
+               type = TRACKER_PROPERTY_TYPE_LANGSTRING;
+       } else {
+               type = TRACKER_PROPERTY_TYPE_STRING;
+       }
+
+       sparql->current_state.expression_type = type;
+       tracker_binding_set_data_type (binding, type);
 
        if (sparql->current_state.type == TRACKER_SPARQL_TYPE_SELECT) {
+               // FIXME binding may be be parameter?
                tracker_select_context_add_literal_binding (TRACKER_SELECT_CONTEXT (sparql->context),
                                                            TRACKER_LITERAL_BINDING (binding));
        }
 
+       if (sparql->current_state.token) {
+               if (is_parameter) {
+                       tracker_token_parameter_init (sparql->current_state.token,
+                                                     TRACKER_PARAMETER_BINDING (binding)->name);
+               } else {
+                       gconstpointer data;
+                       gsize len;
+
+                       data = g_bytes_get_data (TRACKER_LITERAL_BINDING (binding)->bytes, &len);
+                       tracker_token_literal_init (sparql->current_state.token,
+                                                   data, len);
+               }
+       }
+
        g_object_unref (binding);
+       g_free (langtag);
+       g_free (cast);
+       g_free (str);
 
        return TRUE;
 }
@@ -7605,6 +7690,8 @@ prepare_query (TrackerDBInterface    *iface,
                        tracker_db_statement_bind_double (stmt, i, datetime);
                } else if (prop_type == TRACKER_PROPERTY_TYPE_INTEGER) {
                        tracker_db_statement_bind_int (stmt, i, atoi (binding->literal));
+               } else if (prop_type == TRACKER_PROPERTY_TYPE_LANGSTRING) {
+                       tracker_db_statement_bind_bytes (stmt, i, binding->bytes);
                } else {
                        tracker_db_statement_bind_text (stmt, i, binding->literal);
                }
diff --git a/src/libtracker-data/tracker-vtab-triples.c b/src/libtracker-data/tracker-vtab-triples.c
index ee13a805e..ddb5fee1c 100644
--- a/src/libtracker-data/tracker-vtab-triples.c
+++ b/src/libtracker-data/tracker-vtab-triples.c
@@ -284,6 +284,7 @@ convert_to_string (const gchar         *table_name,
 {
        switch (type) {
        case TRACKER_PROPERTY_TYPE_STRING:
+       case TRACKER_PROPERTY_TYPE_LANGSTRING:
        case TRACKER_PROPERTY_TYPE_INTEGER:
                return g_strdup_printf ("t.\"%s\"", table_name);
        case TRACKER_PROPERTY_TYPE_RESOURCE:
diff --git a/src/ontologies/11-rdf.ontology b/src/ontologies/11-rdf.ontology
index 0277115e9..efbf68f0e 100644
--- a/src/ontologies/11-rdf.ontology
+++ b/src/ontologies/11-rdf.ontology
@@ -28,6 +28,16 @@ rdfs:Literal a rdfs:Class ;
        rdfs:label "Literal" ;
        rdfs:subClassOf rdfs:Resource .
 
+rdfs:Datatype a rdfs:Class ;
+       rdfs:label "Datatype" ;
+       rdfs:comment "The class of RDF datatypes." ;
+       rdfs:subClassOf rdfs:Class .
+
+rdf:langString a rdfs:Class, rdfs:Datatype ;
+       rdfs:subClassOf rdfs:Literal ;
+       rdfs:label "langString" ;
+       rdfs:comment "The datatype of language-tagged string values" .
+
 rdf:type a rdf:Property ;
        rdfs:domain rdfs:Resource ;
        rdfs:range rdfs:Class .


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]