[Rhythmbox-devel] [PATCH] query parser



Hello,
I've just written the lexer mentioned a few days ago, which understands
queries like «by pink floyd and not (in obscured or in final cut)».
I have hooked it to the search box - it will fall back on normal search
if it doesn't recognise the syntax, but I'm planning to do a pretty
printer that does the reverse so that I can replace the smart playlist
editor. Then we'll have a playlist editor that is easy to use and allows
nested queries.

Patch is attached, it's a work in progress but it works.
For now I need some help with the Makefiles, because apparently editing
Makefile.am and re-running automake breaks things (I think it's not
related to the patch).
The makefiles should handle going from the .l to a .c to a .o .

Also I'd like someone to review this patch on bugzilla -
http://bugzilla.gnome.org/show_bug.cgi?id=165583 .
I'd like to build on it to also drop queries to playlists.
--- rhythmbox-0.8.8.myorig/rhythmdb/rhythmdb-query-parser.l	1970-01-01 01:00:00.000000000 +0100
+++ rhythmbox-0.8.8.perso/rhythmdb/rhythmdb-query-parser.l	2005-03-28 04:10:35.000000000 +0200
@@ -0,0 +1,355 @@
+/*
+ *  arch-tag: Lex code for the query parser.
+ *
+ *  Copyright (C) 2005 Gabriel de Perthuis <Gabriel de-Perthuis laPoste net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/*
+	Syntax:
+	by the cranberries OR in big calm OR (by Rinôçerôse AND NOT in installation sonore)
+	par the cranberries OU album big calm OU (par Rinôçerôse ET PAS album installation sonore)
+	
+	AND will have priority over OR: ie A OR B AND C means A OR (B AND C)
+	(because rhythmdb queries are like that)
+
+	question: two word keywords?
+	do it by using s instead of yytext in is_kwd.
+	depends if the syntax benefits.
+
+	
+
+	note: could do implicit AND
+
+	Note: the syntax could be extended for both 'property' and 'criterium'.
+	eg rated over 4. translation, though.
+	
+	Translating: is the prefix grammar common to all languages? Well, we
+	can fallback on petit-nègre, not too unusual in the field.
+*/
+
+%option noyywrap reentrant nodefault
+%option warn perf-report
+/*%option debug*/
+
+%{
+	#include <stdio.h> //string to stream
+	#include <glib.h> //GPtrArray
+	
+	#include <glib/gi18n.h> //_(), Q_()
+	
+	#include "rhythmdb.h" //Queries
+
+	#include "rhythmdb-query-parser.h"
+
+	typedef enum {
+		AND,
+		OR
+	} AndOr;
+
+	typedef struct {
+		RhythmDB* db;
+		GPtrArray* query;
+
+		char* s;
+		int s_position;
+
+		gboolean not;
+		gboolean fuzzy; /* marks both fuzzy (ie match part of it) and folded */
+		RhythmDBQueryType query_type;
+		RhythmDBPropType prop_type;
+
+		int mark;
+		int mark_end;
+	} Status;
+	#define YY_EXTRA_TYPE Status*
+	//See the start states below to find where fields are used
+
+	#define YY_USER_ACTION yyextra->s_position += yyleng;
+#if 1
+	#define yyterminate return
+#else //for breaking in gdb
+	int myyyterminate (int i){
+		printf("myyterminate %d\n", i);
+		return i;
+	}
+	#define yyterminate(i) {myyyterminate(i); return i;}
+#endif
+
+/*todo: propagate messages with errors*/
+	static GPtrArray* scan_query(RhythmDB*, char*);	
+	static gboolean prop_or_modif(char* word, gboolean* not, gboolean* fuzzy, RhythmDBQueryType*, RhythmDBPropType*);
+	static gboolean binary_keyword(char*, AndOr*);
+	static int query_append_string_chunk(Status*);
+%}
+
+
+DIGIT		[[:digit:]]
+NUMBER		{DIGIT}+("."{DIGIT}*)?
+SPACING		[ ]+
+
+WORD		[[:alnum:]]+
+
+/*INITIAL used for errors on «[^ ]». Non exclusive rules inherit.
+	As a consequence, rules that pass on . are %x exclusive*/
+/*
+	before writing a BEGIN(), be sure to initialise these:
+	strarg uses mark and end_mark
+	paren uses mark
+	args use prop_type and query_type
+	kwdormodif uses not, fuzzy, and whether query_type is none
+*/
+%s		KWDORMODIF
+
+%s		STRARG
+%s		NUMARG
+
+%s		PAREN
+%s		AFTERPAREN
+
+
+%%
+	if(YY_START == INITIAL){
+		yyextra->not = FALSE;
+		yyextra->fuzzy = TRUE;
+		yyextra->query_type = RHYTHMDB_QUERY_END;
+		BEGIN(KWDORMODIF); //could be locale-dependent?
+	}
+
+<KWDORMODIF>{WORD} {
+	if(prop_or_modif(yytext, &yyextra->not, &yyextra->fuzzy, &(yyextra->query_type), &(yyextra->prop_type))){
+		fprintf(stderr, "prop keyword or modifier: %s\n", yytext);
+	} else {
+		if(yyextra->query_type == RHYTHMDB_QUERY_END){
+			yyterminate(1);
+			//expecting keyword!
+		}
+		yyextra->mark = yyextra->s_position - yyleng;
+		yyextra->mark_end = yyextra->s_position;
+		BEGIN(STRARG);
+	}
+}
+<KWDORMODIF>"(" {
+	if(yyextra->query_type != RHYTHMDB_QUERY_END){
+		yyterminate(1);
+		//keyword then (
+		//note: we aren't catching errors like «exactly (a or b)»
+	}
+
+	fprintf(stderr, "subquery\n");
+	yyextra->mark = yyextra->s_position;
+	BEGIN(PAREN);
+}
+<NUMARG>NUMBER {
+	float f=0;
+	sscanf(yytext, "%f", &f);
+}
+
+<STRARG>{WORD} {
+	AndOr andor;
+	if(binary_keyword(yytext, &andor)){
+		if(query_append_string_chunk(yyextra))
+			yyterminate(1);
+
+		fprintf(stderr, "binary keyword: %s\n", yytext);
+//AND is implicit
+		if(andor == OR){
+			rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_DISJUNCTION, RHYTHMDB_QUERY_END);
+		}
+
+		yyextra->mark = yyextra->s_position;
+		yyextra->not = FALSE;
+		yyextra->fuzzy = TRUE;
+		yyextra->query_type = RHYTHMDB_QUERY_END;
+		BEGIN(KWDORMODIF);
+	} else {
+		yyextra->mark_end = yyextra->s_position;
+	}
+}
+
+<STRARG><<EOF>> {
+	if(query_append_string_chunk(yyextra))
+		yyterminate(1);
+	yyterminate(0);
+}
+
+<PAREN>[^)] {
+}
+<PAREN>")" {
+	(yyextra->s)[(yyextra->s_position - 1)] = '\0';
+	GPtrArray* subquery = scan_query(yyextra->db, yyextra->s + yyextra->mark);
+	if(subquery == NULL)
+		yyterminate(1);
+			rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_SUBQUERY, subquery, RHYTHMDB_QUERY_END);
+	BEGIN(AFTERPAREN);
+}
+<PAREN><<EOF>> {
+	//MAJOR FATAL ERROR DE SYNTAX
+	//manque )
+	yyterminate(1);
+}
+
+<AFTERPAREN>{WORD} {
+	AndOr andor;
+	if(binary_keyword(yytext, &andor)){
+		fprintf(stderr, "binary keyword: %s\n", yytext);
+
+//AND is implicit
+		if(andor == OR){
+			rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_DISJUNCTION, RHYTHMDB_QUERY_END);
+		}
+
+		yyextra->mark = yyextra->s_position;
+		yyextra->not = FALSE;
+		yyextra->fuzzy = TRUE;
+		yyextra->query_type = RHYTHMDB_QUERY_END;
+		BEGIN(KWDORMODIF);
+	} else {
+		yyterminate (1);
+	}
+}
+
+<AFTERPAREN><<EOF>> {
+	yyterminate(0);
+}
+
+<*>{SPACING} {
+}
+
+[)] {
+	//ERREUR
+	//Message spécial si )
+	yyterminate(1);
+}
+
+<*>.|\n {
+	//ERREUR - last resort, since shortest and at the end.
+	yyterminate(1);
+}
+
+<<EOF>> {
+	//EOF without a condition must be at the end
+	//ERREUR - manque argument - manque ) est à part
+	yyterminate(1);
+}
+%%
+
+/*returns whether was special*/
+/*TODO: move the ?: to the query_append - because of «by not»*/
+static gboolean prop_or_modif(char* s, gboolean* not, gboolean* fuzzy, RhythmDBQueryType* qtype, RhythmDBPropType* ptype){
+	if(!strcasecmp(s, Q_("keyword|in"))){
+		if(*qtype != RHYTHMDB_QUERY_END)
+			return FALSE;
+		*qtype = *not? RHYTHMDB_QUERY_PROP_NOT_LIKE : RHYTHMDB_QUERY_PROP_LIKE;
+		*ptype = RHYTHMDB_PROP_ALBUM_FOLDED;
+		return TRUE;
+	}
+	if(!strcasecmp(s, Q_("keyword|by"))){
+		if(*qtype != RHYTHMDB_QUERY_END)
+			return FALSE;
+		*qtype = *not? RHYTHMDB_QUERY_PROP_NOT_LIKE : RHYTHMDB_QUERY_PROP_LIKE;
+		*ptype = RHYTHMDB_PROP_ARTIST_FOLDED;
+		return TRUE;
+	}
+	if(!strcasecmp(s, Q_("keyword|not"))){
+		*not = !*not;
+		return TRUE;
+	}
+	if(!strcasecmp(s, Q_("keyword|exactly"))){
+		*fuzzy = FALSE;
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static gboolean binary_keyword(char* s, AndOr* andor){
+	if(!strcasecmp(s, Q_("Keyword|or"))){
+		*andor = OR;
+		return TRUE;
+	}
+	if(!strcasecmp(s, Q_("Keyword|and"))){
+		*andor = AND;
+		return TRUE;
+	}	
+	return FALSE;
+}
+
+/*TODO: handle not and fuzzy here*/
+static int query_append_string_chunk(Status* s){
+	int length = s->mark_end - s->mark;
+	if(length <= 0)
+		return 1;
+#if 1
+	char* s2 = malloc((length+1)*sizeof(char));
+	memcpy(s2, s->s + s->mark, length);
+	s2[length]='\0';
+#else
+	char* s2 = g_utf8_casefold(s->s + s->mark, length);
+#endif
+	fprintf(stderr, "string arg: «%s»\n", s2);
+	rhythmdb_query_append(s->db, s->query, s->query_type, s->prop_type, s2, RHYTHMDB_QUERY_END);
+	return 0;
+}
+
+/*warning: this will punch little holes in your string.*/
+static GPtrArray* scan_query(RhythmDB* db, char* s){
+	yyscan_t scanner;
+	Status status;
+
+	status.db = db;
+	status.query = g_ptr_array_new ();
+	status.s = s;
+	status.s_position = 0;
+	status.mark_end = 0;
+
+	yylex_init(&scanner);
+	yyset_extra(&status, scanner);
+//	yyset_debug(TRUE, scanner);
+/*
+	//Possibly using a flex-specific scan from string would speed up a few peanuts.
+	//need to add \0\0, etc...
+	YY_BUFFER_STATE buff = yy_scan_buffer(s, strlen(s));
+	yy_delete_buffer(buff);
+*/
+	yyset_in(fmemopen (s, strlen (s), "r"), scanner);
+	int err = yylex(scanner);
+	fclose(yyget_in(scanner));
+	yylex_destroy(scanner);
+	return err?NULL:status.query;
+}
+
+/*for public use, a long long name*/
+GPtrArray* rhythmdb_query_parse_from_string(RhythmDB* db, char* s) {
+	char* s2 = g_strdup(s);
+	GPtrArray* r = scan_query(db, strdup(s));
+	g_free(s2);
+	return r;
+}
+
+#if 0 //for testing
+int main(int argc, char** argv){
+	char*s;
+	if(argc != 2)
+		s = strdup("by any chance or (by someone else and by the way)");
+	else
+		s = argv[1];
+	GPtrArray* r = scan_query(NULL, s);
+	if(!r)
+		printf("We got an error\n");
+	exit(!r);
+}
+#endif
--- rhythmbox-0.8.8.myorig/sources/rb-library-source.c	2004-05-08 00:26:30.000000000 +0200
+++ rhythmbox-0.8.8.perso/sources/rb-library-source.c	2005-03-28 00:19:51.000000000 +0200
@@ -57,6 +57,7 @@ typedef enum
 	RB_LIBRARY_QUERY_TYPE_ARTIST,
 	RB_LIBRARY_QUERY_TYPE_ALBUM,
 	RB_LIBRARY_QUERY_TYPE_SEARCH,
+	RB_LIBRARY_QUERY_TYPE_COMPLEX_SEARCH,
 } RBLibraryQueryType;
 
 static void rb_library_source_class_init (RBLibrarySourceClass *klass);
@@ -939,7 +940,7 @@ impl_search (RBSource *asource, const ch
 
 	g_free (source->priv->search_text);
 	source->priv->search_text = search_text != NULL ? g_utf8_casefold (search_text, -1) : NULL;
-	rb_library_source_do_query (source, RB_LIBRARY_QUERY_TYPE_SEARCH);
+	rb_library_source_do_query (source, RB_LIBRARY_QUERY_TYPE_COMPLEX_SEARCH);
 
 	rb_source_notify_filter_changed (RB_SOURCE (source));
 }
@@ -1341,7 +1342,12 @@ construct_query_from_selection (RBLibrar
 	 */
 
 	if (source->priv->search_text) {
-		GPtrArray *subquery = rhythmdb_query_parse (source->priv->db,
+fprintf(stderr, "search text: «%s»\n", source->priv->search_text);
+	
+		GPtrArray* subquery;
+		subquery = rhythmdb_query_parse_from_string(source->priv->db, source->priv->search_text);
+		if(!subquery)
+			subquery = rhythmdb_query_parse (source->priv->db,
 							    RHYTHMDB_QUERY_PROP_LIKE,
 							    RHYTHMDB_PROP_GENRE_FOLDED,
 							    source->priv->search_text,
@@ -1358,14 +1364,16 @@ construct_query_from_selection (RBLibrar
 							    RHYTHMDB_PROP_TITLE_FOLDED,
 							    source->priv->search_text,
 							    RHYTHMDB_QUERY_END);
+			/* select where type="song" and
+			 *  (genre like "foo" or artist like "foo" or album like "foo")
+			 */
+
 		rhythmdb_query_append (source->priv->db,
 				       query,
 				       RHYTHMDB_QUERY_SUBQUERY,
 				       subquery,
 				       RHYTHMDB_QUERY_END);
-		/* select where type="song" and
-		 *  (genre like "foo" or artist like "foo" or album like "foo")
-		 */
+			
 	}
 
 	if (source->priv->selected_genres) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]