[pybliographer] Fix medline query
- From: Zoltan Kota <zkota src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pybliographer] Fix medline query
- Date: Thu, 22 Sep 2011 13:25:43 +0000 (UTC)
commit 99dc57eb50db61b59f2bf8225fd9b16fbef75ef1
Author: Zoltan Kota <zoltank gmail com>
Date: Thu Sep 22 15:23:38 2011 +0200
Fix medline query
Pyblio/GnomeUI/Document.py | 6 ++-
Pyblio/Query.py | 93 ++++++++++++++++++++++----------------------
2 files changed, 51 insertions(+), 48 deletions(-)
---
diff --git a/Pyblio/GnomeUI/Document.py b/Pyblio/GnomeUI/Document.py
index 1871641..6ab63b1 100644
--- a/Pyblio/GnomeUI/Document.py
+++ b/Pyblio/GnomeUI/Document.py
@@ -523,7 +523,11 @@ class Document (Connector.Publisher):
# no result.
self.w.error (_("Your query returned no result"))
return
-
+ elif url is -1:
+ # error
+ self.w.error (_("An error occured during Medline Query"))
+ return
+
self.open_in_new(url, 'medline', no_name=True)
return
diff --git a/Pyblio/Query.py b/Pyblio/Query.py
index ef20e46..964f68c 100644
--- a/Pyblio/Query.py
+++ b/Pyblio/Query.py
@@ -23,13 +23,20 @@
Search a keyword in a medline database
This code has been contributed by: John Vu <jvu001 umaryland edu>
+
+Updated by Z. Kota in 2011 for eutils.
"""
# The time module is added for querying date ranges of publications
import urllib, urllib2, sys, re, string, time, tempfile, os
+from xml.dom.minidom import parse
+
+
+query_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
+fetch_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
-query_url = 'http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty.fcgi'
-fetch_url = 'http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi'
+toolName = 'pybliographer'
+adminEmail = 'webmaster pybliographer org'
def query_info (searchterm, field, displaynum, displaystart, edate):
@@ -38,50 +45,30 @@ def query_info (searchterm, field, displaynum, displaystart, edate):
'db': 'pubmed',
'term' : searchterm, # searchterm is user inputted text, modified by limits if applied
'field' : field,
- 'dopt' : 'd',
- 'dispmax' : displaynum,
- 'dispstart' : displaystart - 1, # minus 1 because the count starts at 0 and not at 1
- 'relentrezdate' : edate # two different search options given, depending on whether the user provides a relative entrez date
+ 'reldate' : edate, # two different search options given, depending on whether the user provides a relative entrez date
+ 'usehistory' : 'y',
+ 'tool' : toolName,
+ 'email' : adminEmail
})
else:
params = urllib.urlencode ({
'db': 'pubmed',
'term' : searchterm, # searchterm is user inputted text, modified by limits
'field' : field,
- 'dopt' : 'd',
- 'dispmax' : displaynum,
- 'dispstart' : displaystart - 1
+ 'usehistory' : 'y',
+ 'tool' : toolName,
+ 'email' : adminEmail
})
f = urllib2.urlopen("%s?%s" % (query_url, params))
- uids = []
- in_body = 0
- uid_re = re.compile (r'^([\d]+)<br>')
-
- while 1:
- line = f.readline ()
- if line == '': break
-
- if in_body:
- line = string.strip (string.lower (line))
-
- if line == '</body>': break
-
- ret = uid_re.match (line)
- if not ret:
- print "unknown line: %s" % line
- continue
-
- uids.append (int (ret.group (1)))
- else:
- line = string.strip (string.lower (line))
- if line == '<body>':
- in_body = 1
- continue
+ myquery = parse (f)
+ count = myquery.getElementsByTagName('Count')[0].firstChild.data
+ querykey = myquery.getElementsByTagName('QueryKey')[0].firstChild.data
+ webenv = myquery.getElementsByTagName('WebEnv')[0].firstChild.data
f.close ()
- return uids
+ return count, querykey, webenv
def medline_query (keyword,maxcount,displaystart,field,abstract,epubahead,pubtype,language,subset,agerange,humananimal,gender,entrezdate,pubdate,fromdate,todate):
@@ -251,23 +238,35 @@ def medline_query (keyword,maxcount,displaystart,field,abstract,epubahead,pubtyp
if pubdate == 'Publication Date': keyword = keyword + ' ' + fromdate + ':' + todate + '[dp]'
elif pubdate == 'Entrez Date': keyword = keyword + ' ' + fromdate + ':' + todate + '[edat]'
- # Below is the actual call to the URL (PubMed's cgi): first to gain the pubmed UIDs
- # and then to get the entries that is passed to pyblio to open
- uids = query_info (keyword, field, maxcount, displaystart, entrezdate) # get the pubmed UIDs and dump into uids variable
-
- uids = string.replace (str(uids),'[','') # get rid of open bracket in string
- uids = string.replace (str(uids),']','') # get rid of close bracket in the string
- uids = string.replace (str(uids),' ','') # get rid of all the spaces in the string
- if uids.strip () == '': return None
-
+ # Below is the actual call to the URL (PubMed's cgi): first to gain the count, querykey, and webenv
+ # parameters, and then to get the entries that is passed to pyblio to open
+
+ try:
+ count, querykey, webenv = query_info (keyword, field, maxcount, displaystart, entrezdate) # pubmed query using the history option
+
+ except:
+ # print sys.exc_info()[:2]
+ return -1
+
+
+ if count == '0' : return None
+
+
params = urllib.urlencode ({
- 'db' : 'pubmed',
- 'report' : 'medline',
- 'mode' : 'text'
+ 'db' : 'pubmed',
+ 'rettype' : 'medline',
+ 'retmode' : 'text',
+ 'retmax' : maxcount,
+ 'retstart' : displaystart - 1,
+ 'query_key': querykey,
+ 'WebEnv' : webenv,
+ 'tool' : toolName,
+ 'email' : adminEmail
})
- url = "%s?%s&id=%s" % (fetch_url, params, str(uids))
+
+ url = "%s?%s" % (fetch_url, params)
content = urllib2.urlopen(url)
fd, fn = tempfile.mkstemp('.medline', 'pyblio-')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]