[monet/new-parser] Add a new CSS parser



commit 58e25e9cd1f0723b80919464c9d897f42051b5d3
Author: Thomas Wood <thomas wood intel com>
Date:   Tue Oct 6 15:35:13 2009 +0100

    Add a new CSS parser
    
    Add a new GScanner based CSS parser and selector matching engine.

 monet/Makefile.am |    2 +
 monet/mn-css.c    |  613 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 monet/mn-css.h    |   47 ++++
 tests/Makefile.am |    4 +-
 tests/test-css.c  |   58 +++++
 tests/test.css    |   24 ++
 6 files changed, 747 insertions(+), 1 deletions(-)
---
diff --git a/monet/Makefile.am b/monet/Makefile.am
index 3ba69b6..7eebc8f 100644
--- a/monet/Makefile.am
+++ b/monet/Makefile.am
@@ -13,6 +13,7 @@ STAMP_FILES = \
 
 source_h = \
 	mn-color.h \
+	mn-css.h \
 	mn-item.h \
 	mn-parts.h \
 	mn-stylable.h \
@@ -72,6 +73,7 @@ libmonet_la_SOURCES = \
 	$(source_h)	 \
 	$(source_h_priv) \
 	mn-color.c	 \
+	mn-css.c	 \
 	mn-item.c	 \
 	mn-stylable.c	 \
 	mn-style.c	 \
diff --git a/monet/mn-css.c b/monet/mn-css.c
new file mode 100644
index 0000000..69404d0
--- /dev/null
+++ b/monet/mn-css.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Author: Thomas Wood <thos gnome org>
+ *
+ */
+#include "mn-css.h"
+#include <string.h>
+
+#include <unistd.h>
+#include <fcntl.h>
+
+struct _MnStyleSheet
+{
+  GList *selectors;
+  GList *styles;
+  GList *filenames;
+};
+
+typedef struct _MnSelector MnSelector;
+struct _MnSelector
+{
+  gchar *type;
+  gchar *id;
+  gchar *class;
+  gchar *pseudo_class;
+  MnSelector *parent;
+  GHashTable *style;
+  const gchar *filename; /* origin of this selector */
+};
+
+
+static gchar*
+append (gchar *str1, const gchar *str2)
+{
+  gchar *tmp;
+
+  if (!str1)
+      return g_strdup (str2);
+
+  if (!str2)
+      return str1;
+
+  tmp = g_strconcat (str1, str2, NULL);
+  g_free (str1);
+  return tmp;
+}
+
+static gchar*
+appendc (gchar *str, gchar c)
+{
+  gchar *tmp;
+  gint len;
+
+  if (str == NULL)
+    {
+      tmp = g_malloc (2);
+      len = 0;
+    }
+  else
+    {
+      len = strlen (str);
+      tmp = g_realloc (str, len + 2);
+    }
+
+  tmp[len] = c;
+  tmp[len + 1] = '\0';
+
+  return tmp;
+}
+
+static GTokenType
+css_parse_key_value (GScanner *scanner, gchar **key, gchar **value)
+{
+  GTokenType token;
+  gchar *id_first = scanner->config->cset_identifier_first;
+  gchar *id_nth = scanner->config->cset_identifier_nth;
+  guint scan_identifier_1char = scanner->config->scan_identifier_1char;
+
+  token = g_scanner_get_next_token (scanner);
+  if (token != G_TOKEN_IDENTIFIER)
+    return G_TOKEN_IDENTIFIER;
+  *key = g_strdup (scanner->value.v_identifier);
+
+  token = g_scanner_get_next_token (scanner);
+  if (token != ':')
+    return ':';
+
+  /* parse value */
+  /* set some options to be more forgiving */
+  scanner->config->cset_identifier_first = G_CSET_a_2_z "#_-0123456789"
+    G_CSET_A_2_Z G_CSET_LATINS G_CSET_LATINC;
+  scanner->config->cset_identifier_nth = scanner->config->cset_identifier_first;
+  scanner->config->scan_identifier_1char = 1;
+  scanner->config->char_2_token = FALSE;
+  scanner->config->cset_skip_characters = "\n";
+
+
+  while (scanner->next_value.v_char != ';')
+    {
+      token = g_scanner_get_next_token (scanner);
+      switch (token)
+        {
+        case G_TOKEN_IDENTIFIER:
+          *value = append (*value, scanner->value.v_identifier);
+          break;
+        case G_TOKEN_CHAR:
+          *value = appendc (*value, scanner->value.v_char);
+          break;
+
+        default:
+          return ';';
+        }
+
+      token = g_scanner_peek_next_token (scanner);
+    }
+
+  /* semi colon */
+  token = g_scanner_get_next_token (scanner);
+  if (scanner->value.v_char != ';')
+    return ';';
+
+  /* we've come to the end of the value, so reset the options */
+  scanner->config->cset_identifier_nth = id_nth;
+  scanner->config->cset_identifier_first = id_first;
+  scanner->config->scan_identifier_1char = scan_identifier_1char;
+  scanner->config->char_2_token = TRUE;
+  scanner->config->cset_skip_characters = " \t\n";
+
+  /* strip the leading and trailing whitespace */
+  g_strstrip (*value);
+
+  return G_TOKEN_NONE;
+}
+
+static GTokenType
+css_parse_style (GScanner *scanner, GHashTable *table)
+{
+  GTokenType token;
+
+  /* { */
+  token = g_scanner_get_next_token (scanner);
+  if (token != G_TOKEN_LEFT_CURLY)
+    return G_TOKEN_LEFT_CURLY;
+
+  /* keep going until we find '}' */
+  token = g_scanner_peek_next_token (scanner);
+  while (token != G_TOKEN_RIGHT_CURLY)
+    {
+      gchar *key = NULL, *value = NULL;
+
+      token = css_parse_key_value (scanner, &key, &value);
+      if (token != G_TOKEN_NONE)
+        return token;
+
+      g_hash_table_insert (table, key, value);
+
+      token = g_scanner_peek_next_token (scanner);
+    }
+
+  /* } */
+  token = g_scanner_get_next_token (scanner);
+  if (token != G_TOKEN_RIGHT_CURLY)
+    return G_TOKEN_RIGHT_CURLY;
+
+  return G_TOKEN_NONE;
+}
+
+
+static GTokenType
+css_parse_simple_selector (GScanner      *scanner,
+                           MnSelector    *selector)
+{
+  GTokenType token;
+
+  /* parse optional type (either '*' or an identifier) */
+  token = g_scanner_peek_next_token (scanner);
+  switch (token)
+    {
+    case '*':
+      token = g_scanner_get_next_token (scanner);
+      selector->type = g_strdup ("*");
+      break;
+    case G_TOKEN_IDENTIFIER:
+      token = g_scanner_get_next_token (scanner);
+      selector->type = g_strdup (scanner->value.v_identifier);
+      break;
+    default:
+      break;
+    }
+
+  /* Here we look for '#', '.' or ':' and return if we find anything else */
+  token = g_scanner_peek_next_token (scanner);
+  while (token != G_TOKEN_NONE)
+    {
+      switch (token)
+        {
+          /* id */
+        case '#':
+          token = g_scanner_get_next_token (scanner);
+          token = g_scanner_get_next_token (scanner);
+          if (token != G_TOKEN_IDENTIFIER)
+            return G_TOKEN_IDENTIFIER;
+          selector->id = g_strdup (scanner->value.v_identifier);
+          break;
+          /* class */
+        case '.':
+          token = g_scanner_get_next_token (scanner);
+          token = g_scanner_get_next_token (scanner);
+          if (token != G_TOKEN_IDENTIFIER)
+            return G_TOKEN_IDENTIFIER;
+          selector->class = g_strdup (scanner->value.v_identifier);
+          break;
+          /* pseudo-class */
+        case ':':
+          token = g_scanner_get_next_token (scanner);
+          token = g_scanner_get_next_token (scanner);
+          if (token != G_TOKEN_IDENTIFIER)
+            return G_TOKEN_IDENTIFIER;
+          selector->pseudo_class = g_strdup (scanner->value.v_identifier);
+          break;
+
+          /* unhandled */
+        default:
+          return G_TOKEN_NONE;
+          break;
+        }
+      token = g_scanner_peek_next_token (scanner);
+    }
+  return G_TOKEN_NONE;
+}
+
+
+static MnSelector *
+mn_selector_new ()
+{
+  return g_slice_new0 (MnSelector);
+}
+
+static void
+mn_selector_free (MnSelector *selector)
+{
+  if (!selector)
+    return;
+
+  g_free (selector->type);
+  g_free (selector->id);
+  g_free (selector->class);
+  g_free (selector->pseudo_class);
+
+  mn_selector_free (selector->parent);
+
+  g_slice_free (MnSelector, selector);
+}
+
+static GTokenType
+css_parse_ruleset (GScanner *scanner, GList **selectors)
+{
+  GTokenType token;
+  MnSelector *selector, *parent;
+
+  /* parse the first selector, then keep going until we find left curly */
+  token = g_scanner_peek_next_token (scanner);
+
+  parent = NULL;
+  selector = NULL;
+  while (token != G_TOKEN_LEFT_CURLY)
+    {
+      switch (token)
+        {
+        case G_TOKEN_IDENTIFIER:
+        case '*':
+        case '#':
+        case '.':
+        case ':':
+          selector = mn_selector_new ();
+          selector->filename = scanner->input_name;
+          *selectors = g_list_prepend (*selectors, selector);
+          token = css_parse_simple_selector (scanner, selector);
+          if (token != G_TOKEN_NONE)
+            return token;
+
+          break;
+
+        case '>':
+          g_scanner_get_next_token (scanner);
+          if (!selector)
+            {
+              g_warning ("NULL parent when parsing '>'");
+            }
+
+          parent = selector;
+
+          selector = mn_selector_new ();
+          *selectors = g_list_prepend (*selectors, selector);
+
+          /* remove parent from list of selectors and link it to the new
+           * selector */
+          selector->parent = parent;
+          *selectors = g_list_remove (*selectors, parent);
+
+          token = css_parse_simple_selector (scanner, selector);
+          if (token != G_TOKEN_NONE)
+            return token;
+
+          break;
+
+        case ',':
+          g_scanner_get_next_token (scanner);
+          selector = mn_selector_new ();
+          *selectors = g_list_prepend (*selectors, selector);
+          token = css_parse_simple_selector (scanner, selector);
+          if (token != G_TOKEN_NONE)
+            return token;
+
+          break;
+
+        default:
+          g_scanner_get_next_token (scanner);
+          g_scanner_unexp_token (scanner, G_TOKEN_ERROR, NULL, NULL, NULL,
+                                 "Unhandled selector", 1);
+          return '{';
+        }
+      token = g_scanner_peek_next_token (scanner);
+    }
+
+  return G_TOKEN_NONE;
+}
+
+static GTokenType
+css_parse_block (GScanner *scanner, GList **selectors, GList **styles)
+{
+  GTokenType token;
+  GHashTable *table;
+  GList *l, *list = NULL;
+
+
+  token = css_parse_ruleset (scanner, &list);
+  if (token != G_TOKEN_NONE)
+    return token;
+
+
+  /* create a hash table for the properties */
+  table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
+
+  token = css_parse_style (scanner, table);
+
+  /* assign all the selectors to this style */
+  for (l = list; l; l = l->next)
+    {
+      MnSelector* sl;
+
+      sl = (MnSelector*) l->data;
+
+      sl->style = table;
+    }
+
+  *styles = g_list_append (*styles, table);
+
+  *selectors = g_list_concat (*selectors, list);
+
+  return token;
+}
+
+
+static gboolean
+css_parse_file (MnStyleSheet *sheet,
+                gchar         *filename)
+{
+  GScanner *scanner;
+  int fd;
+  GTokenType token;
+
+  fd = open (filename, O_RDONLY);
+  if (fd == -1)
+    return FALSE;
+
+  scanner = g_scanner_new (NULL);
+  scanner->input_name = filename;
+
+  /* turn off single line comments, we need to parse '#' */
+  scanner->config->cpair_comment_single = "\1\n";
+  scanner->config->cset_identifier_nth = G_CSET_a_2_z "-_0123456789"
+    G_CSET_A_2_Z G_CSET_LATINS G_CSET_LATINC;
+  scanner->config->scan_float = FALSE; /* allows scanning '.' */
+  scanner->config->scan_hex = FALSE;
+  scanner->config->scan_string_sq = FALSE;
+  scanner->config->scan_string_dq = FALSE;
+
+  g_scanner_input_file (scanner, fd);
+
+
+  token = g_scanner_peek_next_token (scanner);
+  while (token != G_TOKEN_EOF)
+    {
+      token = css_parse_block (scanner, &sheet->selectors,
+                               &sheet->styles);
+      if (token != G_TOKEN_NONE)
+        break;
+
+      token = g_scanner_peek_next_token (scanner);
+    }
+
+  if (token != G_TOKEN_EOF)
+    g_scanner_unexp_token (scanner, token, NULL, NULL, NULL, "Error",
+                           TRUE);
+
+  close (fd);
+  g_scanner_destroy (scanner);
+
+  if (token == G_TOKEN_EOF)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+
+static gint
+css_node_matches_selector (MnNode       *node,
+                           MnSelector   *selector)
+{
+  gint score;
+  gboolean parent_matches, type_matches, id_matches, class_matches,
+           pseudo_class_matches;
+
+  score = 0;
+
+  parent_matches = 0;
+  type_matches = 0;
+  id_matches = 0;
+  class_matches = 0;
+  pseudo_class_matches = 0;
+
+  if (selector->parent)
+    {
+      if (!node->parent)
+        return 0;
+
+      parent_matches = css_node_matches_selector (node->parent,
+                                                  selector->parent);
+      if (parent_matches == 0)
+        return 0;
+    }
+
+  if (selector->type == NULL || selector->type[0] == '*')
+    type_matches = 1;
+  else
+    {
+      if (node->type && !strcmp (selector->type, node->type))
+        type_matches = 1;
+      else
+        return 0;
+    }
+
+  if (selector->id)
+    {
+      if (!node->id || strcmp (selector->id, node->id))
+        return 0; /* no match */
+      else
+        id_matches = 1;
+    }
+
+  if (selector->class)
+    {
+      if (!node->class || strcmp (selector->class, node->class))
+        return 0;
+      else
+        class_matches = 1;
+    }
+
+  if (selector->pseudo_class)
+    {
+      if (!node->pseudo_class
+          || strcmp (selector->pseudo_class, node->pseudo_class))
+        return 0;
+      else
+        pseudo_class_matches = 1;
+    }
+
+  if (type_matches == 1)
+    score += 1;
+  if (class_matches == 1)
+    score += 2;
+  if (id_matches == 1)
+    score += 4;
+  if (pseudo_class_matches == 1)
+    score += 8;
+
+  return score;
+}
+
+typedef struct _SelectorMatch
+{
+  MnSelector *selector;
+  gint score;
+} SelectorMatch;
+
+static gint
+compare_selector_matches (SelectorMatch *a,
+                          SelectorMatch *b)
+{
+  return a->score - b->score;
+}
+
+static void
+hash_table_copy (gpointer    *key,
+                 gpointer    *value,
+                 GHashTable *table)
+{
+  g_hash_table_insert (table, key, value);
+}
+
+static void
+free_selector_match (SelectorMatch *data)
+{
+  g_slice_free (SelectorMatch, data);
+}
+
+GHashTable *
+mn_style_sheet_get_properties (MnStyleSheet *sheet,
+                                MnNode       *node)
+{
+  GList *l, *matching_selectors = NULL;
+  SelectorMatch *selector_match = NULL;
+  GHashTable *result;
+
+  /* find matching selectors */
+  for (l = sheet->selectors; l; l = l->next)
+    {
+      gint score;
+
+      score = css_node_matches_selector (node, l->data);
+
+      if (score > 0)
+        {
+          selector_match = g_slice_new (SelectorMatch);
+          selector_match->selector = l->data;
+          selector_match->score = score;
+          matching_selectors = g_list_prepend (matching_selectors,
+                                               selector_match);
+        }
+    }
+  /* score the selectors by their score */
+  matching_selectors = g_list_sort (matching_selectors,
+                                    (GCompareFunc) compare_selector_matches);
+
+  /* get properties from selector's styles */
+  result = g_hash_table_new (g_str_hash, g_str_equal);
+  for (l = matching_selectors; l; l = l->next)
+    {
+      SelectorMatch *match = l->data;
+      g_hash_table_foreach (match->selector->style, (GHFunc) hash_table_copy,
+                            result);
+    }
+
+  g_list_foreach (matching_selectors, (GFunc) free_selector_match, NULL);
+  g_list_free (matching_selectors);
+
+  return result;
+}
+
+MnStyleSheet *
+mn_style_sheet_new ()
+{
+  return g_new0 (MnStyleSheet, 1);
+}
+
+void
+mn_style_sheet_destroy (MnStyleSheet *sheet)
+{
+  g_list_foreach (sheet->selectors, (GFunc) mn_selector_free, NULL);
+  g_list_free (sheet->selectors);
+
+  g_list_foreach (sheet->styles, (GFunc) g_hash_table_destroy, NULL);
+  g_list_free (sheet->styles);
+
+  g_list_foreach (sheet->filenames, (GFunc) g_free, NULL);
+  g_list_free (sheet->filenames);
+
+  g_free (sheet);
+}
+
+gboolean
+mn_style_sheet_add_from_file (MnStyleSheet *sheet,
+                              const gchar  *filename,
+                              GError       **error)
+{
+  gboolean result;
+  gchar *input_name;
+
+  g_return_val_if_fail (sheet != NULL, FALSE);
+  g_return_val_if_fail (*error == NULL, FALSE);
+  g_return_val_if_fail (filename != NULL, FALSE);
+
+  input_name = g_strdup (filename);
+  result = css_parse_file (sheet, input_name);
+  sheet->filenames = g_list_prepend (sheet->filenames, input_name);
+
+  return result;
+}
diff --git a/monet/mn-css.h b/monet/mn-css.h
new file mode 100644
index 0000000..39280ca
--- /dev/null
+++ b/monet/mn-css.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Author: Thomas Wood <thos gnome org>
+ *
+ */
+#include <glib.h>
+
+typedef struct _MnNode MnNode;
+typedef struct _MnStyleSheetValue MnStyleSheetValue;
+typedef struct _MnStyleSheet MnStyleSheet;
+
+struct _MnNode
+{
+  gchar *type;
+  gchar *id;
+  gchar *class;
+  gchar *pseudo_class;
+  MnNode *parent;
+};
+
+struct _MnStyleSheetValue
+{
+  const gchar *value;
+  const gchar *filename;
+};
+
+MnStyleSheet*  mn_style_sheet_new            ();
+void           mn_style_sheet_destroy        ();
+gboolean       mn_style_sheet_add_from_file  (MnStyleSheet *sheet,
+                                              const gchar  *filename,
+                                              GError       **error);
+GHashTable*    mn_style_sheet_get_properties (MnStyleSheet *sheet,
+                                              MnNode       *node);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 64a0890..1a98ad3 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -2,8 +2,10 @@ AM_CFLAGS = $(MONET_CFLAGS)
 INCLUDES = -I$(top_srcdir)
 LDADD = $(MONET_LIBS) $(top_builddir)/monet/libmonet.la
 
-noinst_PROGRAMS = test-stylable
+noinst_PROGRAMS = test-stylable test-css
 
 test_stylable_SOURCES = test-stylable.c
 
+test_css_SOURCES = test-css.c
+
 -include $(top_srcdir)/git.mk
diff --git a/tests/test-css.c b/tests/test-css.c
new file mode 100644
index 0000000..4b9f384
--- /dev/null
+++ b/tests/test-css.c
@@ -0,0 +1,58 @@
+
+#include <stdio.h>
+#include <monet/mn-css.h>
+
+static void
+print_key_value (gchar *key, gchar *value)
+{
+  printf ("%s = %s; ", key, value);
+}
+
+static void
+dump_style (GHashTable *style)
+{
+  g_hash_table_foreach (style, (GHFunc) print_key_value, NULL);
+  printf ("\n");
+}
+
+int
+main (int argc, char **argv)
+{
+  MnStyleSheet *sheet;
+  GHashTable *properties;
+  gint result;
+  GError *err = NULL;
+
+  sheet = mn_style_sheet_new ();
+  result = mn_style_sheet_add_from_file (sheet, "test.css", &err);
+
+  MnNode test1 = { "Foo", NULL, NULL, NULL, NULL };
+  MnNode test2 = { "Foo", "bar", NULL, "hover", NULL };
+  MnNode test3 = { "Foo", "baz", NULL, NULL, NULL };
+  MnNode test4 = { "Goo", "bar", NULL, NULL, NULL };
+
+  printf ("test1: Foo\n");
+  properties = mn_style_sheet_get_properties (sheet, &test1);
+  dump_style (properties);
+  g_hash_table_destroy (properties);
+
+  printf ("test2: Foo#bar:hover\n");
+  properties = mn_style_sheet_get_properties (sheet, &test2);
+  dump_style (properties);
+  g_hash_table_destroy (properties);
+
+  printf ("test3: Foo#baz\n");
+  properties = mn_style_sheet_get_properties (sheet, &test3);
+  dump_style (properties);
+  g_hash_table_destroy (properties);
+
+  printf ("test4: Goo > Foo\n");
+  test3.parent = &test4;
+  properties = mn_style_sheet_get_properties (sheet, &test3);
+  dump_style (properties);
+  g_hash_table_destroy (properties);
+
+  mn_style_sheet_destroy (sheet);
+
+  return 0;
+}
diff --git a/tests/test.css b/tests/test.css
new file mode 100644
index 0000000..1a9f4fc
--- /dev/null
+++ b/tests/test.css
@@ -0,0 +1,24 @@
+
+Goo > Foo
+{
+  prop: 100;
+}
+
+Foo:hover
+{
+ prop:2;
+ hover:1;
+}
+
+Foo
+{
+ number:1;
+}
+
+
+
+Foo#bar
+{
+  prop:3;
+}
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]