[libgda] Added REGEXP functions to SQLite



commit cdcacaa7f02fd89a315c422c9289ee8749feb473
Author: Vivien Malerba <malerba gnome-db org>
Date:   Wed Mar 16 21:19:13 2011 +0100

    Added REGEXP functions to SQLite

 doc/C/prov-notes.xml                     |   47 ++++++++-
 libgda/sqlite/gda-sqlite-provider.c      |  182 +++++++++++++++++++++++++++++-
 providers/sqlite/sqlite_specs_dsn.xml.in |    3 +
 3 files changed, 225 insertions(+), 7 deletions(-)
---
diff --git a/doc/C/prov-notes.xml b/doc/C/prov-notes.xml
index 9fda183..6888d76 100644
--- a/doc/C/prov-notes.xml
+++ b/doc/C/prov-notes.xml
@@ -1,6 +1,8 @@
 <?xml version="1.0"?>
 <!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
-          "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd";>
+          "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd";[
+<!ENTITY LIBGDA          "<application>Libgda</application>">
+]>
 <chapter id="provider-notes">
   <title>Provider's notes</title>
 
@@ -71,10 +73,53 @@
 	      directory is used.</entry>
               <entry>No</entry>
 	    </row>
+	    <row>
+              <entry>LOAD_GDA_FUNCTIONS</entry>
+              <entry>If set to TRUE, then some extra functions defined by &LIBGDA; are added. The functions are
+	      for &LIBGDA;'s own internal use and of little interrest otherwise.
+	      </entry>
+              <entry>No</entry>
+	    </row>
+	    <row>
+              <entry>REGEXP</entry>
+              <entry>If set to TRUE, then the <function>regexp()</function> and <function>regexp_match()</function>
+	      functions are defined. The <function>regexp()</function> function is used by SQL statement with a
+	      construct as "x REGEXP y", and the <function>regexp_match()</function> is more general. The default for
+	      this option is TRUE. See below for more information about this function.
+	      </entry>
+              <entry>No</entry>
+	    </row>
 	  </tbody>
 	</tgroup>
       </table>
     </para>
+    <sect2>
+      <title>The <function>regexp_match()</function> function</title>
+      <para>
+	This function accepts at most 3 arguments, in that order:
+	<itemizedlist>
+	  <listitem><para>the string on which the regular expression will be applied</para></listitem>
+	  <listitem><para>the regular expression to apply</para></listitem>
+	  <listitem><para>the options (not mandatory)</para></listitem>
+	</itemizedlist>
+      </para>
+      <para>
+	The options are specified as a string where each character corresponds to a boolean flag (the presence of the
+	character meaning the flag is set). They are:
+	<itemizedlist>
+	  <listitem><para>'i': specifies a case insensitive matching</para></listitem>
+	  <listitem><para>'m': specifies that the the "start of line" and "end of line" constructs match immediately
+	  following or immediately before any newline in the string, respectively, as well as at the very start
+	  and end of the string</para></listitem>
+	  <listitem><para>'v': specifies that instead of returning 0 or 1 as a result for matching or non
+	  matching, the function returns the string which actually matches, or NULL if not match was found</para></listitem>
+	</itemizedlist>	
+      </para>
+      <para>
+	This function is implemented using GLib's Perl-compatible regular expressions implementation:
+	<link linkend="GRegex">GRegex</link>, itself based on the <ulink url="http://www.pcre.org/";>PCRE</ulink> library.
+      </para>
+    </sect2>
     <para>
       Also refer to the <link linkend="limitations_sqlite">SQLite's provider's limitations</link>.
     </para>
diff --git a/libgda/sqlite/gda-sqlite-provider.c b/libgda/sqlite/gda-sqlite-provider.c
index a99a297..1457254 100644
--- a/libgda/sqlite/gda-sqlite-provider.c
+++ b/libgda/sqlite/gda-sqlite-provider.c
@@ -1,5 +1,5 @@
-/* GDA SQLite provider
- * Copyright (C) 1998 - 2010 The GNOME Foundation.
+/*
+ * Copyright (C) 1998 - 2011 The GNOME Foundation.
  *
  * AUTHORS:
  *	Rodrigo Moya <rodrigo gnome-db org>
@@ -339,9 +339,19 @@ static ScalarFunction scalars[] = {
 	{"gda_hex_print", 1, NULL, scalar_gda_hex_print_func},
 	{"gda_hex_print", 2, NULL, scalar_gda_hex_print_func2},
 	{"gda_hex", 1, NULL, scalar_gda_hex_func},
-	{"gda_hex", 2, NULL, scalar_gda_hex_func2},
+	{"gda_hex", 2, NULL, scalar_gda_hex_func2}
 };
 
+/* Regexp handling */
+static void scalar_regexp_func (sqlite3_context *context, int argc, sqlite3_value **argv);
+static void scalar_regexp_match_func (sqlite3_context *context, int argc, sqlite3_value **argv);
+
+static ScalarFunction regexp_functions[] = {
+	{"regexp", 2, NULL, scalar_regexp_func},
+	{"regexp", 3, NULL, scalar_regexp_func},
+	{"regexp_match", 2, NULL, scalar_regexp_match_func},
+	{"regexp_match", 3, NULL, scalar_regexp_match_func}
+};
 
 /*
  * Prepared internal statements
@@ -621,7 +631,7 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
 	gchar *filename = NULL;
 	const gchar *dirname = NULL, *dbname = NULL;
 	const gchar *is_virtual = NULL;
-	const gchar *use_extra_functions = NULL, *with_fk = NULL;
+	const gchar *use_extra_functions = NULL, *with_fk = NULL, *regexp;
 	gint errmsg;
 	SqliteConnectionData *cdata;
 	gchar *dup = NULL;
@@ -645,6 +655,7 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
 	is_virtual = gda_quark_list_find (params, "_IS_VIRTUAL");
 	with_fk = gda_quark_list_find (params, "FK");
 	use_extra_functions = gda_quark_list_find (params, "LOAD_GDA_FUNCTIONS");
+	regexp = gda_quark_list_find (params, "REGEXP");
 	if (auth)
 		passphrase = gda_quark_list_find (auth, "PASSWORD");
 
@@ -873,6 +884,24 @@ gda_sqlite_provider_open_connection (GdaServerProvider *provider, GdaConnection
 			}
 		}
 	}
+
+	if (!regexp || ((*regexp == 't') || (*regexp == 'T'))) {
+		gsize i;
+
+		for (i = 0; i < sizeof (regexp_functions) / sizeof (ScalarFunction); i++) {
+			ScalarFunction *func = (ScalarFunction *) &(regexp_functions [i]);
+			gint res = SQLITE3_CALL (sqlite3_create_function) (cdata->connection, 
+									   func->name, func->nargs,
+									   SQLITE_UTF8, func->user_data, 
+									   func->xFunc, NULL, NULL);
+			if (res != SQLITE_OK) {
+				gda_sqlite_free_cnc_data (cdata);
+				gda_connection_internal_set_provider_data (cnc, NULL, (GDestroyNotify) gda_sqlite_free_cnc_data);
+				g_static_rec_mutex_unlock (&cnc_mutex);
+				return FALSE;
+			}
+		}
+	}
 	
 	if (SQLITE3_CALL (sqlite3_threadsafe) ())
 		g_object_set (G_OBJECT (cnc), "thread-owner", NULL, NULL);
@@ -1619,15 +1648,21 @@ sqlite_render_operation (GdaSqlOperation *op, GdaSqlRenderingContext *context, G
 		str = g_strdup_printf ("%s != %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
 		break;
 	case GDA_SQL_OPERATOR_TYPE_REGEXP:
-		str = g_strdup_printf ("%s REGEXP %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
+		str = g_strdup_printf ("regexp (%s, %s)", SQL_OPERAND (sql_list->next->data)->sql, SQL_OPERAND (sql_list->data)->sql);
 		break;
 	case GDA_SQL_OPERATOR_TYPE_REGEXP_CI:
+		str = g_strdup_printf ("regexp (%s, %s, 'i')", SQL_OPERAND (sql_list->next->data)->sql,
+				       SQL_OPERAND (sql_list->data)->sql);
+		break;
 	case GDA_SQL_OPERATOR_TYPE_NOT_REGEXP_CI:
+		str = g_strdup_printf ("NOT regexp (%s, %s, 'i')", SQL_OPERAND (sql_list->next->data)->sql,
+				       SQL_OPERAND (sql_list->data)->sql);
 	case GDA_SQL_OPERATOR_TYPE_SIMILAR:
 		/* does not exist in SQLite => error */
 		break;
 	case GDA_SQL_OPERATOR_TYPE_NOT_REGEXP:
-		str = g_strdup_printf ("NOT %s REGEXP %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
+		str = g_strdup_printf ("NOT regexp (%s, %s)", SQL_OPERAND (sql_list->next->data)->sql,
+				       SQL_OPERAND (sql_list->data)->sql);
 		break;
 	case GDA_SQL_OPERATOR_TYPE_REM:
 		str = g_strdup_printf ("%s %% %s", SQL_OPERAND (sql_list->data)->sql, SQL_OPERAND (sql_list->next->data)->sql);
@@ -3137,6 +3172,141 @@ scalar_gda_hex_func2 (sqlite3_context *context, int argc, sqlite3_value **argv)
 }
 
 static void
+scalar_regexp_func (sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+	GRegex *regex = NULL;
+	GError *error = NULL;
+	const gchar *str, *pattern, *options = NULL;
+	GRegexCompileFlags flags = G_REGEX_OPTIMIZE;
+	gboolean as_boolean = TRUE;
+
+#define MAX_DEFINED_REGEX 10
+	static GArray *re_array = NULL; /* array of signatures (pattern+option) */
+	static GHashTable *re_hash = NULL; /* hash of GRegex */
+
+	if ((argc != 2) && (argc != 3)) {
+		SQLITE3_CALL (sqlite3_result_error) (context, _("Function requires two or three arguments"), -1);
+		return;
+	}
+
+	str = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [1]);
+	if (!str) {
+		SQLITE3_CALL (sqlite3_result_null) (context);
+		return;
+	}
+
+	pattern = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [0]);
+	if (!pattern) {
+		SQLITE3_CALL (sqlite3_result_null) (context);
+		return;
+	}
+
+	if (argc == 3)
+		options = (gchar*) SQLITE3_CALL (sqlite3_value_text) (argv [2]);
+	
+	if (options) {
+		const gchar *ptr;
+		for (ptr = options; *ptr; ptr++) {
+			switch (*ptr) {
+			case 'i':
+			case 'I':
+				flags |= G_REGEX_CASELESS;
+				break;
+			case 'm':
+			case 'M':
+				flags |= G_REGEX_MULTILINE;
+				break;
+			case 'v':
+			case 'V':
+				as_boolean = FALSE;
+				break;
+			}
+		}
+	}
+
+	GString *sig;
+	sig = g_string_new (pattern);
+	g_string_append_c (sig, 0x01);
+	if (options && *options)
+		g_string_append (sig, options);
+	
+	if (re_hash)
+		regex = g_hash_table_lookup (re_hash, sig->str);
+	if (regex) {
+		/*g_print ("FOUND GRegex %p as [%s]\n", regex, sig->str);*/
+		g_string_free (sig, TRUE);
+	}
+	else {
+		regex = g_regex_new ((const gchar*) pattern, flags, 0, &error);
+		if (! regex) {
+			gda_log_error (_("SQLite regexp '%s' error:"), pattern,
+				       error && error->message ? error->message : _("Invalid regular expression"));
+			g_clear_error (&error);
+			if (as_boolean)
+				SQLITE3_CALL (sqlite3_result_int) (context, 0);
+			else
+				SQLITE3_CALL (sqlite3_result_null) (context);
+
+			g_string_free (sig, TRUE);
+			return;
+		}
+
+		if (!re_array) {
+			re_array = g_array_new (FALSE, FALSE, sizeof (gchar*));
+			re_hash = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) g_regex_unref);
+		}
+		/*g_print ("ADDED new GRegex %p as [%s]\n", regex, sig->str);*/
+		g_hash_table_insert (re_hash, sig->str, regex);
+		g_array_prepend_val (re_array, sig->str);
+		g_string_free (sig, FALSE);
+		if (re_array->len > MAX_DEFINED_REGEX) {
+			/* get rid of the 'oldest' GRexex */
+			gchar *osig;
+			osig = g_array_index (re_array, gchar*, re_array->len - 1);
+			/*g_print ("REMOVED GRegex [%s]\n", osig);*/
+			g_hash_table_remove (re_hash, osig);
+			g_array_remove_index (re_array, re_array->len - 1);
+		}
+	}
+
+	if (as_boolean) {
+		if (g_regex_match (regex, str, 0, NULL))
+			SQLITE3_CALL (sqlite3_result_int) (context, 1);
+		else
+			SQLITE3_CALL (sqlite3_result_int) (context, 0);
+	}
+	else {
+		GMatchInfo *match_info;
+		g_regex_match (regex, str, 0, &match_info);
+		if (g_match_info_matches (match_info)) {
+			gchar *word = g_match_info_fetch (match_info, 0);
+			SQLITE3_CALL (sqlite3_result_text) (context, word, -1, g_free);
+		}
+		else
+			SQLITE3_CALL (sqlite3_result_null) (context);
+		g_match_info_free (match_info);
+	}
+}
+
+static void
+scalar_regexp_match_func (sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+	if ((argc != 2) && (argc != 3)) {
+		SQLITE3_CALL (sqlite3_result_error) (context, _("Function requires two or three arguments"), -1);
+		return;
+	}
+
+	sqlite3_value **nargv;
+	nargv = g_new (sqlite3_value*, argc);
+	nargv[0] = argv[1];
+	nargv[1] = argv[0];
+	if (argc == 3)
+		nargv[2] = argv[2];
+	scalar_regexp_func (context, argc, nargv);
+	g_free (nargv);
+}
+
+static void
 gda_sqlite_free_cnc_data (SqliteConnectionData *cdata)
 {
 	if (!cdata)
diff --git a/providers/sqlite/sqlite_specs_dsn.xml.in b/providers/sqlite/sqlite_specs_dsn.xml.in
index 8eb2308..201a379 100644
--- a/providers/sqlite/sqlite_specs_dsn.xml.in
+++ b/providers/sqlite/sqlite_specs_dsn.xml.in
@@ -4,6 +4,9 @@
     <parameter id="DB_NAME" _name="Database name" _descr="The name of a database to use (without the .db)" gdatype="gchararray" nullok="FALSE"/>
     <parameter id="DB_DIR" _name="Directory" _descr="Directory where the database file is stored" gdatype="gchararray" nullok="FALSE" plugin="filesel:MODE=PICKFOLDER"/>
     <parameter id="LOAD_GDA_FUNCTIONS" _name="Extra functions" _descr="Enable usage of some extra functions in SQL" gdatype="gboolean" nullok="TRUE"/>
+    <parameter id="REGEXP" _name="Define REGEXP" _descr="Define the REGEXP function" gdatype="gboolean" nullok="TRUE">
+      <gda_value>TRUE</gda_value>
+    </parameter>
     <parameter id="FK" _name="With foreign keys" _descr="Enforce foreign keys" gdatype="gboolean" nullok="TRUE"/>
   </parameters>
 </data-set-spec>



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]