[Rhythmbox-devel] [PATCH] query parser
- From: Gabriel de Perthuis <Gabriel de-Perthuis laPoste net>
- To: rhythmbox-devel gnome org
- Subject: [Rhythmbox-devel] [PATCH] query parser
- Date: Mon, 28 Mar 2005 18:55:11 +0200
Hello,
I've just written the lexer mentioned a few days ago, which understands
queries like «by pink floyd and not (in obscured or in final cut)».
I have hooked it to the search box - it will fall back on normal search
if it doesn't recognise the syntax, but I'm planning to do a pretty
printer that does the reverse so that I can replace the smart playlist
editor. Then we'll have a playlist editor that is easy to use and allows
nested queries.
Patch is attached, it's a work in progress but it works.
For now I need some help with the Makefiles, because apparently editing
Makefile.am and re-running automake breaks things (I think it's not
related to the patch).
The makefiles should handle going from the .l to a .c to a .o .
Also I'd like someone to review this patch on bugzilla -
http://bugzilla.gnome.org/show_bug.cgi?id=165583 .
I'd like to build on it to also drop queries to playlists.
--- rhythmbox-0.8.8.myorig/rhythmdb/rhythmdb-query-parser.l 1970-01-01 01:00:00.000000000 +0100
+++ rhythmbox-0.8.8.perso/rhythmdb/rhythmdb-query-parser.l 2005-03-28 04:10:35.000000000 +0200
@@ -0,0 +1,355 @@
+/*
+ * arch-tag: Lex code for the query parser.
+ *
+ * Copyright (C) 2005 Gabriel de Perthuis <Gabriel de-Perthuis laPoste net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/*
+ Syntax:
+ by the cranberries OR in big calm OR (by Rinôçerôse AND NOT in installation sonore)
+ par the cranberries OU album big calm OU (par Rinôçerôse ET PAS album installation sonore)
+
+ AND will have priority over OR: ie A OR B AND C means A OR (B AND C)
+ (because rhythmdb queries are like that)
+
+ question: two word keywords?
+ do it by using s instead of yytext in is_kwd.
+ depends if the syntax benefits.
+
+
+
+ note: could do implicit AND
+
+ Note: the syntax could be extended for both 'property' and 'criterium'.
+ eg rated over 4. translation, though.
+
+ Translating: is the prefix grammar common to all languages? Well, we
+ can fallback on petit-nègre, not too unusual in the field.
+*/
+
+%option noyywrap reentrant nodefault
+%option warn perf-report
+/*%option debug*/
+
+%{
+ #include <stdio.h> //string to stream
+ #include <glib.h> //GPtrArray
+
+ #include <glib/gi18n.h> //_(), Q_()
+
+ #include "rhythmdb.h" //Queries
+
+ #include "rhythmdb-query-parser.h"
+
+ typedef enum {
+ AND,
+ OR
+ } AndOr;
+
+ typedef struct {
+ RhythmDB* db;
+ GPtrArray* query;
+
+ char* s;
+ int s_position;
+
+ gboolean not;
+ gboolean fuzzy; /* marks both fuzzy (ie match part of it) and folded */
+ RhythmDBQueryType query_type;
+ RhythmDBPropType prop_type;
+
+ int mark;
+ int mark_end;
+ } Status;
+ #define YY_EXTRA_TYPE Status*
+ //See the start states below to find where fields are used
+
+ #define YY_USER_ACTION yyextra->s_position += yyleng;
+#if 1
+ #define yyterminate return
+#else //for breaking in gdb
+ int myyyterminate (int i){
+ printf("myyterminate %d\n", i);
+ return i;
+ }
+ #define yyterminate(i) {myyyterminate(i); return i;}
+#endif
+
+/*todo: propagate messages with errors*/
+ static GPtrArray* scan_query(RhythmDB*, char*);
+ static gboolean prop_or_modif(char* word, gboolean* not, gboolean* fuzzy, RhythmDBQueryType*, RhythmDBPropType*);
+ static gboolean binary_keyword(char*, AndOr*);
+ static int query_append_string_chunk(Status*);
+%}
+
+
+DIGIT [[:digit:]]
+NUMBER {DIGIT}+("."{DIGIT}*)?
+SPACING [ ]+
+
+WORD [[:alnum:]]+
+
+/*INITIAL used for errors on «[^ ]». Non exclusive rules inherit.
+ As a consequence, rules that pass on . are %x exclusive*/
+/*
+ before writing a BEGIN(), be sure to initialise these:
+ strarg uses mark and end_mark
+ paren uses mark
+ args use prop_type and query_type
+ kwdormodif uses not, fuzzy, and whether query_type is none
+*/
+%s KWDORMODIF
+
+%s STRARG
+%s NUMARG
+
+%s PAREN
+%s AFTERPAREN
+
+
+%%
+ if(YY_START == INITIAL){
+ yyextra->not = FALSE;
+ yyextra->fuzzy = TRUE;
+ yyextra->query_type = RHYTHMDB_QUERY_END;
+ BEGIN(KWDORMODIF); //could be locale-dependent?
+ }
+
+<KWDORMODIF>{WORD} {
+ if(prop_or_modif(yytext, &yyextra->not, &yyextra->fuzzy, &(yyextra->query_type), &(yyextra->prop_type))){
+ fprintf(stderr, "prop keyword or modifier: %s\n", yytext);
+ } else {
+ if(yyextra->query_type == RHYTHMDB_QUERY_END){
+ yyterminate(1);
+ //expecting keyword!
+ }
+ yyextra->mark = yyextra->s_position - yyleng;
+ yyextra->mark_end = yyextra->s_position;
+ BEGIN(STRARG);
+ }
+}
+<KWDORMODIF>"(" {
+ if(yyextra->query_type != RHYTHMDB_QUERY_END){
+ yyterminate(1);
+ //keyword then (
+ //note: we aren't catching errors like «exactly (a or b)»
+ }
+
+ fprintf(stderr, "subquery\n");
+ yyextra->mark = yyextra->s_position;
+ BEGIN(PAREN);
+}
+<NUMARG>NUMBER {
+ float f=0;
+ sscanf(yytext, "%f", &f);
+}
+
+<STRARG>{WORD} {
+ AndOr andor;
+ if(binary_keyword(yytext, &andor)){
+ if(query_append_string_chunk(yyextra))
+ yyterminate(1);
+
+ fprintf(stderr, "binary keyword: %s\n", yytext);
+//AND is implicit
+ if(andor == OR){
+ rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_DISJUNCTION, RHYTHMDB_QUERY_END);
+ }
+
+ yyextra->mark = yyextra->s_position;
+ yyextra->not = FALSE;
+ yyextra->fuzzy = TRUE;
+ yyextra->query_type = RHYTHMDB_QUERY_END;
+ BEGIN(KWDORMODIF);
+ } else {
+ yyextra->mark_end = yyextra->s_position;
+ }
+}
+
+<STRARG><<EOF>> {
+ if(query_append_string_chunk(yyextra))
+ yyterminate(1);
+ yyterminate(0);
+}
+
+<PAREN>[^)] {
+}
+<PAREN>")" {
+ (yyextra->s)[(yyextra->s_position - 1)] = '\0';
+ GPtrArray* subquery = scan_query(yyextra->db, yyextra->s + yyextra->mark);
+ if(subquery == NULL)
+ yyterminate(1);
+ rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_SUBQUERY, subquery, RHYTHMDB_QUERY_END);
+ BEGIN(AFTERPAREN);
+}
+<PAREN><<EOF>> {
+ //MAJOR FATAL ERROR DE SYNTAX
+ //manque )
+ yyterminate(1);
+}
+
+<AFTERPAREN>{WORD} {
+ AndOr andor;
+ if(binary_keyword(yytext, &andor)){
+ fprintf(stderr, "binary keyword: %s\n", yytext);
+
+//AND is implicit
+ if(andor == OR){
+ rhythmdb_query_append(yyextra->db, yyextra->query, RHYTHMDB_QUERY_DISJUNCTION, RHYTHMDB_QUERY_END);
+ }
+
+ yyextra->mark = yyextra->s_position;
+ yyextra->not = FALSE;
+ yyextra->fuzzy = TRUE;
+ yyextra->query_type = RHYTHMDB_QUERY_END;
+ BEGIN(KWDORMODIF);
+ } else {
+ yyterminate (1);
+ }
+}
+
+<AFTERPAREN><<EOF>> {
+ yyterminate(0);
+}
+
+<*>{SPACING} {
+}
+
+[)] {
+ //ERREUR
+ //Message spécial si )
+ yyterminate(1);
+}
+
+<*>.|\n {
+ //ERREUR - last resort, since shortest and at the end.
+ yyterminate(1);
+}
+
+<<EOF>> {
+ //EOF without a condition must be at the end
+ //ERREUR - manque argument - manque ) est à part
+ yyterminate(1);
+}
+%%
+
+/*returns whether was special*/
+/*TODO: move the ?: to the query_append - because of «by not»*/
+static gboolean prop_or_modif(char* s, gboolean* not, gboolean* fuzzy, RhythmDBQueryType* qtype, RhythmDBPropType* ptype){
+ if(!strcasecmp(s, Q_("keyword|in"))){
+ if(*qtype != RHYTHMDB_QUERY_END)
+ return FALSE;
+ *qtype = *not? RHYTHMDB_QUERY_PROP_NOT_LIKE : RHYTHMDB_QUERY_PROP_LIKE;
+ *ptype = RHYTHMDB_PROP_ALBUM_FOLDED;
+ return TRUE;
+ }
+ if(!strcasecmp(s, Q_("keyword|by"))){
+ if(*qtype != RHYTHMDB_QUERY_END)
+ return FALSE;
+ *qtype = *not? RHYTHMDB_QUERY_PROP_NOT_LIKE : RHYTHMDB_QUERY_PROP_LIKE;
+ *ptype = RHYTHMDB_PROP_ARTIST_FOLDED;
+ return TRUE;
+ }
+ if(!strcasecmp(s, Q_("keyword|not"))){
+ *not = !*not;
+ return TRUE;
+ }
+ if(!strcasecmp(s, Q_("keyword|exactly"))){
+ *fuzzy = FALSE;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean binary_keyword(char* s, AndOr* andor){
+ if(!strcasecmp(s, Q_("Keyword|or"))){
+ *andor = OR;
+ return TRUE;
+ }
+ if(!strcasecmp(s, Q_("Keyword|and"))){
+ *andor = AND;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*TODO: handle not and fuzzy here*/
+static int query_append_string_chunk(Status* s){
+ int length = s->mark_end - s->mark;
+ if(length <= 0)
+ return 1;
+#if 1
+ char* s2 = malloc((length+1)*sizeof(char));
+ memcpy(s2, s->s + s->mark, length);
+ s2[length]='\0';
+#else
+ char* s2 = g_utf8_casefold(s->s + s->mark, length);
+#endif
+ fprintf(stderr, "string arg: «%s»\n", s2);
+ rhythmdb_query_append(s->db, s->query, s->query_type, s->prop_type, s2, RHYTHMDB_QUERY_END);
+ return 0;
+}
+
+/*warning: this will punch little holes in your string.*/
+static GPtrArray* scan_query(RhythmDB* db, char* s){
+ yyscan_t scanner;
+ Status status;
+
+ status.db = db;
+ status.query = g_ptr_array_new ();
+ status.s = s;
+ status.s_position = 0;
+ status.mark_end = 0;
+
+ yylex_init(&scanner);
+ yyset_extra(&status, scanner);
+// yyset_debug(TRUE, scanner);
+/*
+ //Possibly using a flex-specific scan from string would speed up a few peanuts.
+ //need to add \0\0, etc...
+ YY_BUFFER_STATE buff = yy_scan_buffer(s, strlen(s));
+ yy_delete_buffer(buff);
+*/
+ yyset_in(fmemopen (s, strlen (s), "r"), scanner);
+ int err = yylex(scanner);
+ fclose(yyget_in(scanner));
+ yylex_destroy(scanner);
+ return err?NULL:status.query;
+}
+
+/*for public use, a long long name*/
+GPtrArray* rhythmdb_query_parse_from_string(RhythmDB* db, char* s) {
+ char* s2 = g_strdup(s);
+ GPtrArray* r = scan_query(db, strdup(s));
+ g_free(s2);
+ return r;
+}
+
+#if 0 //for testing
+int main(int argc, char** argv){
+ char*s;
+ if(argc != 2)
+ s = strdup("by any chance or (by someone else and by the way)");
+ else
+ s = argv[1];
+ GPtrArray* r = scan_query(NULL, s);
+ if(!r)
+ printf("We got an error\n");
+ exit(!r);
+}
+#endif
--- rhythmbox-0.8.8.myorig/sources/rb-library-source.c 2004-05-08 00:26:30.000000000 +0200
+++ rhythmbox-0.8.8.perso/sources/rb-library-source.c 2005-03-28 00:19:51.000000000 +0200
@@ -57,6 +57,7 @@ typedef enum
RB_LIBRARY_QUERY_TYPE_ARTIST,
RB_LIBRARY_QUERY_TYPE_ALBUM,
RB_LIBRARY_QUERY_TYPE_SEARCH,
+ RB_LIBRARY_QUERY_TYPE_COMPLEX_SEARCH,
} RBLibraryQueryType;
static void rb_library_source_class_init (RBLibrarySourceClass *klass);
@@ -939,7 +940,7 @@ impl_search (RBSource *asource, const ch
g_free (source->priv->search_text);
source->priv->search_text = search_text != NULL ? g_utf8_casefold (search_text, -1) : NULL;
- rb_library_source_do_query (source, RB_LIBRARY_QUERY_TYPE_SEARCH);
+ rb_library_source_do_query (source, RB_LIBRARY_QUERY_TYPE_COMPLEX_SEARCH);
rb_source_notify_filter_changed (RB_SOURCE (source));
}
@@ -1341,7 +1342,12 @@ construct_query_from_selection (RBLibrar
*/
if (source->priv->search_text) {
- GPtrArray *subquery = rhythmdb_query_parse (source->priv->db,
+fprintf(stderr, "search text: «%s»\n", source->priv->search_text);
+
+ GPtrArray* subquery;
+ subquery = rhythmdb_query_parse_from_string(source->priv->db, source->priv->search_text);
+ if(!subquery)
+ subquery = rhythmdb_query_parse (source->priv->db,
RHYTHMDB_QUERY_PROP_LIKE,
RHYTHMDB_PROP_GENRE_FOLDED,
source->priv->search_text,
@@ -1358,14 +1364,16 @@ construct_query_from_selection (RBLibrar
RHYTHMDB_PROP_TITLE_FOLDED,
source->priv->search_text,
RHYTHMDB_QUERY_END);
+ /* select where type="song" and
+ * (genre like "foo" or artist like "foo" or album like "foo")
+ */
+
rhythmdb_query_append (source->priv->db,
query,
RHYTHMDB_QUERY_SUBQUERY,
subquery,
RHYTHMDB_QUERY_END);
- /* select where type="song" and
- * (genre like "foo" or artist like "foo" or album like "foo")
- */
+
}
if (source->priv->selected_genres) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]