[libgda] Added REGEXP functions to SQLite
- From: Vivien Malerba <vivien src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libgda] Added REGEXP functions to SQLite
- Date: Wed, 16 Mar 2011 20:24:17 +0000 (UTC)
commit cdcacaa7f02fd89a315c422c9289ee8749feb473
Author: Vivien Malerba <malerba gnome-db org>
Date: Wed Mar 16 21:19:13 2011 +0100
Added REGEXP functions to SQLite
doc/C/prov-notes.xml | 47 ++++++++-
libgda/sqlite/gda-sqlite-provider.c | 182 +++++++++++++++++++++++++++++-
providers/sqlite/sqlite_specs_dsn.xml.in | 3 +
3 files changed, 225 insertions(+), 7 deletions(-)
---
diff --git a/doc/C/prov-notes.xml b/doc/C/prov-notes.xml
index 9fda183..6888d76 100644
--- a/doc/C/prov-notes.xml
+++ b/doc/C/prov-notes.xml
@@ -1,6 +1,8 @@
<?xml version="1.0"?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
- "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd"[
+<!ENTITY LIBGDA "<application>Libgda</application>">
+]>
<chapter id="provider-notes">
<title>Provider's notes</title>
@@ -71,10 +73,53 @@
directory is used.</entry>
<entry>No</entry>
</row>
+ <row>
+ <entry>LOAD_GDA_FUNCTIONS</entry>
+ <entry>If set to TRUE, then some extra functions defined by &LIBGDA; are added. The functions are
+ for &LIBGDA;'s own internal use and of little interrest otherwise.
+ </entry>
+ <entry>No</entry>
+ </row>
+ <row>
+ <entry>REGEXP</entry>
+ <entry>If set to TRUE, then the <function>regexp()</function> and <function>regexp_match()</function>
+ functions are defined. The <function>regexp()</function> function is used by SQL statement with a
+ construct as "x REGEXP y", and the <function>regexp_match()</function> is more general. The default for
+ this option is TRUE. See below for more information about this function.
+ </entry>
+ <entry>No</entry>
+ </row>
</tbody>
</tgroup>
</table>
</para>
+ <sect2>
+ <title>The <function>regexp_match()</function> function</title>
+ <para>
+ This function accepts at most 3 arguments, in that order:
+ <itemizedlist>
+ <listitem><para>the string on which the regular expression will be applied</para></listitem>
+ <listitem><para>the regular expression to apply</para></listitem>
+ <listitem><para>the options (not mandatory)</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The options are specified as a string where each character corresponds to a boolean flag (the presence of the
+ character meaning the flag is set). They are:
+ <itemizedlist>
+ <listitem><para>'i': specifies a case insensitive matching</para></listitem>
+ <listitem><para>'m': specifies that the the "start of line" and "end of line" constructs match immediately
+ following or immediately before any newline in the string, respectively, as well as at the very start
+ and end of the string</para></listitem>
+ <listitem><para>'v': specifies that instead of returning 0 or 1 as a result for matching or non
+ matching, the function returns the string which actually matches, or NULL if not match was found</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ This function is implemented using GLib's Perl-compatible regular expressions implementation:
+ <link linkend="GRegex">GRegex</link>, itself based on the <ulink url="http://www.pcre.org/">PCRE</ulink> library.
+ </para>
+ </sect2>
<para>
Also refer to the <link linkend="limitations_sqlite">SQLite's provider's limitations</link>.
</para>
diff --git a/libgda/sqlite/gda-sqlite-provider.c b/libgda/sqlite/gda-sqlite-provider.c
index a99a297..1457254 100644
--- a/libgda/sqlite/gda-sqlite-provider.c
+++ b/libgda/sqlite/gda-sqlite-provider.c
@@ -1,5 +1,5 @@
-/* GDA SQLite provider
- * Copyright (C) 1998 - 2010 The GNOME Foundation.
+/*
+ * Copyright (C) 1998 - 2011 The GNOME Foundation.
*
* AUTHORS:
* Rodrigo Moya <rodrigo gnome-db org>
@@ -339,9 +339,19 @@ static ScalarFunction scalars[] = {
{"gda_hex_print", 1, NULL, scalar_gda_hex_print_func},
{"gda_hex_print", 2, NULL, scalar_gda_hex_print_func2},
{"gda_hex", 1, NULL, scalar_gda_hex_func},
- {"gda_hex", 2, NULL, scalar_gda_hex_func2},
+ {"gda_hex", 2, NULL, scalar_gda_hex_func2}
};
+/* Regexp handling */
+static void scalar_regexp_func (sqlite3_context *context, int argc, sqlite3_value **argv);
+static void scalar_regexp_match_func (sqlite3_context *context, int argc, sqlite3_value **argv);
+
+static ScalarFunction regexp_functions[] = {
+ {"regexp", 2, NULL, scalar_regexp_func},
+ {"regexp", 3, NULL, scalar_regexp_func},
+ {"regexp_match", 2, NULL, scalar_regexp_match_func},
+ {"regexp_match", 3, NULL, scalar_regexp_match_func}
+};
/*
* Prepared internal statements
@@ -621,7 +631,7 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
gchar *filename = NULL;
const gchar *dirname = NULL, *dbname = NULL;
const gchar *is_virtual = NULL;
- const gchar *use_extra_functions = NULL, *with_fk = NULL;
+ const gchar *use_extra_functions = NULL, *with_fk = NULL, *regexp;
gint errmsg;
SqliteConnectionData *cdata;
gchar *dup = NULL;
@@ -645,6 +655,7 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
is_virtual = gda_quark_list_find (params, "_IS_VIRTUAL");
with_fk = gda_quark_list_find (params, "FK");
use_extra_functions = gda_quark_list_find (params, "LOAD_GDA_FUNCTIONS");
+ regexp = gda_quark_list_find (params, "REGEXP");
if (auth)
passphrase = gda_quark_list_find (auth, "PASSWORD");
@@ -873,6 +884,24 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
}
}
}
+
+ if (!regexp || ((*regexp == 't') || (*regexp == 'T'))) {
+ gsize i;
+
+ for (i = 0; i < sizeof (regexp_functions) / sizeof (ScalarFunction); i++) {
+ ScalarFunction *func = (ScalarFunction *) &(regexp_functions [i]);
+ gint res = SQLITE3_CALL (sqlite3_create_function) (cdata->connection,
+ func->name, func->nargs,
+ SQLITE_UTF8, func->user_data,
+ func->xFunc, NULL, NULL);
+ if (res != SQLITE_OK) {
+ gda_sqlite_free_cnc_data (cdata);
+ gda_connection_internal_set_provider_data (cnc, NULL, (GDestroyNotify) gda_sqlite_free_cnc_data);
+ g_static_rec_mutex_unlock (&cnc_mutex);
+ return FALSE;
+ }
+ }
+ }
if (SQLITE3_CALL (sqlite3_threadsafe) ())
g_object_set (G_OBJECT (cnc), "thread-owner", NULL, NULL);
@@ -1619,15 +1648,21 @@ sqlite_render_operation (GdaSqlOperation *op, GdaSqlRenderingContext *context, G
str = g_strdup_printf ("%s != %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
break;
case GDA_SQL_OPERATOR_TYPE_REGEXP:
- str = g_strdup_printf ("%s REGEXP %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
+ str = g_strdup_printf ("regexp (%s, %s)", SQL_OPERAND (sql_list->next->data)->sql, SQL_OPERAND (sql_list->data)->sql);
break;
case GDA_SQL_OPERATOR_TYPE_REGEXP_CI:
+ str = g_strdup_printf ("regexp (%s, %s, 'i')", SQL_OPERAND (sql_list->next->data)->sql,
+ SQL_OPERAND (sql_list->data)->sql);
+ break;
case GDA_SQL_OPERATOR_TYPE_NOT_REGEXP_CI:
+ str = g_strdup_printf ("NOT regexp (%s, %s, 'i')", SQL_OPERAND (sql_list->next->data)->sql,
+ SQL_OPERAND (sql_list->data)->sql);
case GDA_SQL_OPERATOR_TYPE_SIMILAR:
/* does not exist in SQLite => error */
break;
case GDA_SQL_OPERATOR_TYPE_NOT_REGEXP:
- str = g_strdup_printf ("NOT %s REGEXP %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
+ str = g_strdup_printf ("NOT regexp (%s, %s)", SQL_OPERAND (sql_list->next->data)->sql,
+ SQL_OPERAND (sql_list->data)->sql);
break;
case GDA_SQL_OPERATOR_TYPE_REM:
str = g_strdup_printf ("%s %% %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
@@ -3137,6 +3172,141 @@ scalar_gda_hex_func2 (sqlite3_context *context, int argc, sqlite3_value **argv)
}
static void
+scalar_regexp_func (sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+ GRegex *regex = NULL;
+ GError *error = NULL;
+ const gchar *str, *pattern, *options = NULL;
+ GRegexCompileFlags flags = G_REGEX_OPTIMIZE;
+ gboolean as_boolean = TRUE;
+
+#define MAX_DEFINED_REGEX 10
+ static GArray *re_array = NULL; /* array of signatures (pattern+option) */
+ static GHashTable *re_hash = NULL; /* hash of GRegex */
+
+ if ((argc != 2) && (argc != 3)) {
+ SQLITE3_CALL (sqlite3_result_error) (context, _("Function requires two or three arguments"), -1);
+ return;
+ }
+
+ str = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [1]);
+ if (!str) {
+ SQLITE3_CALL (sqlite3_result_null) (context);
+ return;
+ }
+
+ pattern = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [0]);
+ if (!pattern) {
+ SQLITE3_CALL (sqlite3_result_null) (context);
+ return;
+ }
+
+ if (argc == 3)
+ options = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [2]);
+
+ if (options) {
+ const gchar *ptr;
+ for (ptr = options; *ptr; ptr++) {
+ switch (*ptr) {
+ case 'i':
+ case 'I':
+ flags |= G_REGEX_CASELESS;
+ break;
+ case 'm':
+ case 'M':
+ flags |= G_REGEX_MULTILINE;
+ break;
+ case 'v':
+ case 'V':
+ as_boolean = FALSE;
+ break;
+ }
+ }
+ }
+
+ GString *sig;
+ sig = g_string_new (pattern);
+ g_string_append_c (sig, 0x01);
+ if (options && *options)
+ g_string_append (sig, options);
+
+ if (re_hash)
+ regex = g_hash_table_lookup (re_hash, sig->str);
+ if (regex) {
+ /*g_print ("FOUND GRegex %p as [%s]\n", regex, sig->str);*/
+ g_string_free (sig, TRUE);
+ }
+ else {
+ regex = g_regex_new ((const gchar*) pattern, flags, 0, &error);
+ if (! regex) {
+ gda_log_error (_("SQLite regexp '%s' error:"), pattern,
+ error && error->message ? error->message : _("Invalid regular expression"));
+ g_clear_error (&error);
+ if (as_boolean)
+ SQLITE3_CALL (sqlite3_result_int) (context, 0);
+ else
+ SQLITE3_CALL (sqlite3_result_null) (context);
+
+ g_string_free (sig, TRUE);
+ return;
+ }
+
+ if (!re_array) {
+ re_array = g_array_new (FALSE, FALSE, sizeof (gchar*));
+ re_hash = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) g_regex_unref);
+ }
+ /*g_print ("ADDED new GRegex %p as [%s]\n", regex, sig->str);*/
+ g_hash_table_insert (re_hash, sig->str, regex);
+ g_array_prepend_val (re_array, sig->str);
+ g_string_free (sig, FALSE);
+ if (re_array->len > MAX_DEFINED_REGEX) {
+ /* get rid of the 'oldest' GRexex */
+ gchar *osig;
+ osig = g_array_index (re_array, gchar*, re_array->len - 1);
+ /*g_print ("REMOVED GRegex [%s]\n", osig);*/
+ g_hash_table_remove (re_hash, osig);
+ g_array_remove_index (re_array, re_array->len - 1);
+ }
+ }
+
+ if (as_boolean) {
+ if (g_regex_match (regex, str, 0, NULL))
+ SQLITE3_CALL (sqlite3_result_int) (context, 1);
+ else
+ SQLITE3_CALL (sqlite3_result_int) (context, 0);
+ }
+ else {
+ GMatchInfo *match_info;
+ g_regex_match (regex, str, 0, &match_info);
+ if (g_match_info_matches (match_info)) {
+ gchar *word = g_match_info_fetch (match_info, 0);
+ SQLITE3_CALL (sqlite3_result_text) (context, word, -1, g_free);
+ }
+ else
+ SQLITE3_CALL (sqlite3_result_null) (context);
+ g_match_info_free (match_info);
+ }
+}
+
+static void
+scalar_regexp_match_func (sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+ if ((argc != 2) && (argc != 3)) {
+ SQLITE3_CALL (sqlite3_result_error) (context, _("Function requires two or three arguments"), -1);
+ return;
+ }
+
+ sqlite3_value **nargv;
+ nargv = g_new (sqlite3_value*, argc);
+ nargv[0] = argv[1];
+ nargv[1] = argv[0];
+ if (argc == 3)
+ nargv[2] = argv[2];
+ scalar_regexp_func (context, argc, nargv);
+ g_free (nargv);
+}
+
+static void
gda_sqlite_free_cnc_data (SqliteConnectionData *cdata)
{
if (!cdata)
diff --git a/providers/sqlite/sqlite_specs_dsn.xml.in b/providers/sqlite/sqlite_specs_dsn.xml.in
index 8eb2308..201a379 100644
--- a/providers/sqlite/sqlite_specs_dsn.xml.in
+++ b/providers/sqlite/sqlite_specs_dsn.xml.in
@@ -4,6 +4,9 @@
<parameter id="DB_NAME" _name="Database name" _descr="The name of a database to use (without the .db)" gdatype="gchararray" nullok="FALSE"/>
<parameter id="DB_DIR" _name="Directory" _descr="Directory where the database file is stored" gdatype="gchararray" nullok="FALSE" plugin="filesel:MODE=PICKFOLDER"/>
<parameter id="LOAD_GDA_FUNCTIONS" _name="Extra functions" _descr="Enable usage of some extra functions in SQL" gdatype="gboolean" nullok="TRUE"/>
+ <parameter id="REGEXP" _name="Define REGEXP" _descr="Define the REGEXP function" gdatype="gboolean" nullok="TRUE">
+ <gda_value>TRUE</gda_value>
+ </parameter>
<parameter id="FK" _name="With foreign keys" _descr="Enforce foreign keys" gdatype="gboolean" nullok="TRUE"/>
</parameters>
</data-set-spec>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]