[Tracker] patch for extract html information
- From: jerry tan <Jerry Tan Sun COM>
- To: tracker-list gnome org
- Subject: [Tracker] patch for extract html information
- Date: Wed, 25 Jul 2007 17:10:40 +0800
two problem:
1. html tag are case insensitive, should use strcasecmp instead of strcmp
2. it will core dump when meet some html page which contains
"<a>abc</a>" or "<meta>"
patch attached.
Index: src/tracker-extract/tracker-extract-html.c
===================================================================
--- src/tracker-extract/tracker-extract-html.c (revision 724)
+++ src/tracker-extract/tracker-extract-html.c (working copy)
@@ -37,11 +37,14 @@
gboolean
has_attribute( const xmlChar ** atts, const char *attr, const char*val )
{
+ if (atts == NULL || attr == NULL || val == NULL)
+ return FALSE;
+
int i;
for ( i = 0; atts[i]; i+=2 )
{
- if ( strcmp((char*)atts[i],attr) == 0 ) {
- if ( !val || strcmp((char*)atts[i+1],val) == 0 ) {
+ if ( strcasecmp((char*)atts[i],attr) == 0 ) {
+ if ( !val || strcasecmp((char*)atts[i+1],val) == 0 ) {
return TRUE;
}
}
@@ -55,7 +58,7 @@
int i;
for ( i = 0; atts[i]; i+=2 )
{
- if ( strcmp((char*)atts[i],attr) == 0 ) {
+ if ( strcasecmp((char*)atts[i],attr) == 0 ) {
return atts[i+1];
}
}
@@ -67,7 +70,7 @@
startElement (void * info, const xmlChar * name, const xmlChar ** atts)
{
/* Look for RDFa triple describing the license */
- if ( strcmp((char*)name,"a") == 0 ) {
+ if ( strcasecmp((char*)name,"a") == 0 ) {
/* This tag is a license. Ignore, however, if it is referring to another document */
if ( has_attribute(atts,"rel","license") && !has_attribute(atts,"about",NULL) ) {
const xmlChar *href = lookup_attribute(atts,"href");
@@ -76,9 +79,9 @@
g_strdup( (char*)href ));
}
}
- } else if ( strcmp((char*)name,"title") == 0 ) {
+ } else if ( strcasecmp((char*)name,"title") == 0 ) {
((HTMLParseInfo *)info)->current = READ_TITLE;
- } else if ( strcmp((char*)name,"meta") == 0 ) {
+ } else if ( strcasecmp((char*)name,"meta") == 0 ) {
if ( has_attribute(atts,"name","Author") ) {
const xmlChar *author = lookup_attribute(atts,"content");
if ( author ) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]