Plugin for fetching data from Isi-WebOfScience

Discussion:

Mario Castro

2008-03-08 09:45:30 UTC

Mario Castro

2008-03-08 10:26:28 UTC

Hi all!

After a few days I've created a python plugin for getting informatin from
Isi-web of science

First of all, PYTHON IS AMAZING! Simple, powerful...I'm in love with python
:-)

Here I attach you my plugin (to be placed in $HOME/.referencer/plugins) for
referencer version 1.1.1

It can be improved in many ways. For instance if function getNumberOfRecords
returns a value equal to zero, a warning window could be open with
information about that. similarly, if it returns a number greater than 1, it
would be highly interesting to obtain a window with all the possibilities
and pick one with the mouse, but I don't know how to create a new window

Until next version, enjoy it!

#!/usr/bin/env python

# Get info from isi-web of science from title/author/year fields (any or all
of them)
# Mario Castro, 2008

import os
import referencer
from referencer import _
import sys, urllib2, urllib

from xml.dom import minidom

referencer_plugin_info = []
referencer_plugin_info.append (["longname", _("Get info from ISI Web of
Science")])
referencer_plugin_info.append (["action", _("Get info from ISI Web")])
referencer_plugin_info.append (["tooltip", _("Get info from ISI Web of
Science")])
referencer_plugin_capabilities = []
referencer_plugin_capabilities.append ("document_action")

def get_fields (doc, field, separator):
value = doc.getElementsByTagName(field)
output=''
if len(value) == 0:
return ""
else:
length=len(value)
if (len(value[0].childNodes) == 0):
return ""
else:
#for items in value:
for index in range(length-1):

output+=value[index].childNodes[0].data.encode("utf-8")+separator
return output+value[length-1].childNodes[0].data.encode("utf-8")

def get_last_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
for items in value:
last=items.childNodes[0].data.encode("utf-8")
return last

def get_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
return value[0].childNodes[0].data.encode("utf-8")

def get_attribute_from_field (doc, field, attr):
value = doc.getElementsByTagName(field)
return value[0].getAttribute(attr)

def getNumberOfRecords (document):
title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")

ti=urllib.urlencode([('','('+title+')')])
ye=urllib.urlencode([('','('+year+')')])
au=urllib.urlencode([('','('+author+')')])

url0='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&rspType=xml&method=search&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
data0 = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching number of ocurrences for %s/%s/%s") % (author,title,year),
url0);
xmldoc0 = minidom.parseString(data0)
recordsFound=get_field(xmldoc0,"recordsFound")
return recordsFound

def getAndSetFields(document):

title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")

page_orig=document.get_field("pages")
journal_orig=document.get_field("journal")
volume=document.get_field("volume")

ti=urllib.urlencode([('','('+title+')')])
ye=urllib.urlencode([('','('+year+')')])
au=urllib.urlencode([('','('+author+')')])

url='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&SID=Q1mNFhCECOk6c8aELLh&rspType=xml&method=searchRetrieve&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
data = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching data for %s/%s/%s") % (author,title,year), url);
xmldoc = minidom.parseString(data)
authors=get_field(xmldoc,"primaryauthor")
more_authors=get_fields(xmldoc,"author",' and ')
if(len(more_authors)>0):
authors+=' and '+more_authors
abstract=get_field(xmldoc,"p")
keywords=get_fields(xmldoc,"keyword",', ')
journal=get_field(xmldoc,"source_title")
doi=get_last_field(xmldoc,"article_no")
pages=get_field(xmldoc,"bib_pages")
title_isi=get_field(xmldoc,"item_title")
year_isi=get_attribute_from_field(xmldoc,"bib_issue","year")
volume_isi=get_attribute_from_field(xmldoc,"bib_issue","vol")

if (len(year)==0 and len(year_isi)>0):
document.set_field("year",year_isi)
if (len(volume)==0 and len(volume_isi)>0):
document.set_field("volume",volume_isi)
if (len(title)>0):
document.set_field("title",title_isi)
if (len(authors)>0):
document.set_field("author",authors)
if (len(doi)>0):
document.set_field("doi",doi)
if (len(journal_orig)==0 and len(journal)>0):
document.set_field("journal",journal)
if (len(page_orig)<len(pages) and pages!='-'):
document.set_field("pages",pages)
if (len(abstract)>0):
document.set_field("abstract",abstract)
if (len(keywords)>0):
document.set_field("keywords",keywords)

def do_action (documents):
empty = True
s = ""
assigned_keys = {}
for document in documents:
rec=getNumberOfRecords(document)
if (rec=='1'):
getAndSetFields(document)

return True
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://icculus.org/pipermail/referencer/attachments/20080308/6d88f323/attachment.htm>

Michael Banck

2008-03-08 11:05:16 UTC