Re: parsing bibtex using gscanner



Below is a minimal example.
One  can check the problem with key={some value} by changing, say, 
"author = \"Chowdhury, D.\",\n"
to 
"author = {Chowdhury, D.},\n"

This can be compiled as
gcc -Wall `pkg-config --cflags --libs gtk+-3.0` glex.c

/*glex.c*/
#include <glib.h>
#include <string.h>

/* Test data */
static const gchar *ttest = "@phdthesis{chow1983thesis,\n"
                            "author = \"Chowdhury, D.\",\n"
                            "institution = \"Department of Physics, IIT,
Kanpur\",\n"
                            "location = \"Kanpur\",\n"
                            "publisher = \"Department of Physics, IIT,
Kanpur\",\n"
                            "school = \"Department of Physics, IIT,
Kanpur\",\n"
                            "title = \"{The Spin Glass Transition}\",\n"
                            "year = \"1983\"\n"
                            "}";


static void
output_entry (GHashTable *table)
{
  GHashTableIter iter;
  char *key, *val;

  g_print ("Citation entry:\n");
  g_hash_table_iter_init (&iter, table);
  while (g_hash_table_iter_next (&iter, (void **)&key, (void **)&val))
    g_print ("  %16s: %s\n", key, val);
  g_print ("\n");
}

static guint
parse_entry (GScanner   *scanner,
             GHashTable *table)
{
  /* Entry starts with @ */
  g_scanner_get_next_token (scanner);
  if (scanner->token != '@')
    return G_TOKEN_ERROR;

  /* Now get identifier */
  g_scanner_get_next_token (scanner);
  if (scanner->token != G_TOKEN_IDENTIFIER)
    return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup ("type"),
                       g_strdup (scanner->value.v_identifier));

  /* Brace */
  g_scanner_get_next_token (scanner);
  if (scanner->token != G_TOKEN_LEFT_CURLY)
    return G_TOKEN_ERROR;

  /* ID */
  g_scanner_get_next_token (scanner);
  if (scanner->token != G_TOKEN_IDENTIFIER)
    return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup ("id"),
                       g_strdup (scanner->value.v_identifier));

  while (TRUE)
    {
      char *key, *val;

      g_scanner_get_next_token (scanner);
      if (scanner->token != G_TOKEN_COMMA)
        return G_TOKEN_ERROR;

      g_scanner_get_next_token (scanner);
      if (scanner->token != G_TOKEN_IDENTIFIER)
        return G_TOKEN_ERROR;

      key = g_strdup (scanner->value.v_identifier);

      g_scanner_get_next_token (scanner);
      if (scanner->token != '=')
        {
          g_free (key);
          return G_TOKEN_ERROR;
        }

      g_scanner_get_next_token (scanner);
      if (scanner->token != G_TOKEN_STRING)
        {
          g_free (key);
          return G_TOKEN_ERROR;
        }

      val = g_strdup (scanner->value.v_string);
      g_hash_table_insert(table, key, val);

      g_scanner_peek_next_token (scanner);
      if (scanner->next_token == G_TOKEN_RIGHT_CURLY)
        break;
    }

  /* Eat last curly brace and return */
  g_scanner_get_next_token (scanner);
  return G_TOKEN_NONE;
}


int
main (int    argc,
      char **argv)
{
  GScanner *scanner;
  GHashTable *table;
  guint ret;

  scanner = g_scanner_new (NULL);
  g_scanner_input_text (scanner, ttest, strlen (ttest));

  table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
g_free);
  do
    {
      g_hash_table_remove_all (table);
      ret = parse_entry (scanner, table);

      if (ret == G_TOKEN_ERROR)
        break;
      else
        output_entry (table);

      g_scanner_peek_next_token (scanner);
    }
  while (scanner->next_token != G_TOKEN_EOF &&
         scanner->next_token != G_TOKEN_ERROR);

  /* finsish parsing */
  g_scanner_destroy (scanner);
  g_hash_table_destroy (table);

  return 0;
}






[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]