PubmedSpreadsheet Generation Code

From GersteinInfo

Revision as of 11:02, 16 September 2011 by Public (Talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

parse_pmids.py:


#!/usr/bin/python
import os, sys
from GoogleSpreadsheet import GoogleSpreadsheet
from datetime import datetime
master_spreadsheet_id = "thsIyYg12E8Px0zGJQsAopg"
worksheet_id = "od6"
master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)
ncbiquery = "/home/mpw6/new_papers/ncbiquery.txt"
ncbiFile = open(ncbiquery,'w')
def buildQuery(master_spreadsheet, ncbiFile):
       start = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
       pmids = 
       end = "&rettype=xml&retmode=file"
       for row in master_spreadsheet:
               if row['pmid']:
                       pmids += row['pmid'].lstrip('\) + ','
       pmids = pmids[:-1]
       out = start + pmids + end + '\n'
       ncbiFile.write(out)
buildQuery(master_spreadsheet, ncbiFile)

Personal tools