Build Papers Page Code
From GersteinInfo
update.py
Basic Setup
#!/usr/bin/python
import os, sys, re
from GoogleSpreadsheet import GoogleSpreadsheet
from datetime import datetime
pubmed_spreadsheet_id = "*******************"
master_spreadsheet_id = "********************"
subject_spreadsheet_id = "*************************"
worksheet_id = "od6"
pubmed_spreadsheet = GoogleSpreadsheet(pubmed_spreadsheet_id, worksheet_id)
master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)
subject_spreadsheet = GoogleSpreadsheet(subject_spreadsheet_id, worksheet_id)
timestamp = str(datetime.now().ctime())
summaryPath = "************"
summaryIndex = "*********************"
summaryFile = open(summaryIndex,'w')
simpleIndex = "*************"
simpleFile = open(simpleIndex,'w')
subjectPath = "********************"
subjectIndex = subjectPath + "index.html"
subjectFile = open(subjectIndex,'w')
subjectSummaryIndex = "******************"
subjectSummaryFile = open(subjectSummaryIndex,'w')
header = '''
***********
'''
def printPapers(master_spreadsheet,summaryFile, header): out = "<HTML>\n" out += "<HEAD>\n" out += "<TITLE>Gerstein Lab Publications</TITLE>\n" out += header out += "\n"
out += "out += "Main Scientific Publications\n" out += "\n"
out += "Total papers: " + str(len(master_spreadsheet)) + "
\n" out += "(Last updated " + timestamp + ")</CENTER>\n" years = {} currentYear = 0 for row in master_spreadsheet: rowYear = row['year'] if years.has_key(rowYear): years[rowYear] += 1 else: years[rowYear] = 1 for row in master_spreadsheet: pubmed = pubmed_spreadsheet if currentYear != row['year']: currentYear = row['year'] out += "
-- " + currentYear + " (" + str(years[currentYear]) + ") --
"summaryFile.write(out) simpleFile.write(out) simpleFile2.write(out) out = "" printPaperEntry(row, summaryFile, pubmed) printSimpleEntry(row, simpleFile, pubmed) printSimpleEntry(row,simpleFile2,pubmed) printEntrySummary(row, header, pubmed) printEntryExtended(row) out = "</BODY></HTML>" summaryFile.write(out) simpleFile.write(out)
def printEntrySummary(row,header,pubmed): #if row['pmid']: #print 'pES first: ' + row['pmid'] #else: #print 'pES first: no pmid' pubmed.count = 0 for pubmed_row in pubmed: #print 'pES pubmed: ' + pubmed_row['pmid'] if row['pmid'] == pubmed_row['pmid']: row['title'] = pubmed_row['title'] row['citation'] = pubmed_row['citation'] row['authors'] = pubmed_row['authors'] row['year'] = pubmed_row['year'] break # create summary directory entrySummary = summaryPath + row['labid'].lstrip('\) if not os.path.exists(entrySummary): os.makedirs(entrySummary) entrySummaryIndex = entrySummary + "/index.html" entrySummaryFile = open(entrySummaryIndex,'w')
out = "<HTML>\n" out += "<HEAD>\n" out += "<TITLE>" + row['title'].lstrip('\) + "</TITLE>" out += header out += "\n"
out += "\n" out += "" + row['labid'] + "<p />" out += "\n" # print title and citation out += "</BODY></HTML>"<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index.html\">" + row['title'].lstrip('\) + "</A>\n"if row.has_key('authors'):
out += "" +row['authors'].lstrip('\)+ " " +row['citation'].lstrip('\) + "\n"else:
out += "" + " " + row['citation'].lstrip('\) + "\n"# print links
out += ""\n<P />\n" out += "if not row['website']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['website'].lstrip('\) + "\"><IMG SRC=\"/papers/website.jpg\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\"></A>" if not row['preprint']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['preprint'].lstrip('\) + "\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/preprint.jpg\"></A>" if not row['pmid']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=" + row['pmid'].lstrip('\) + "&dopt=Abstract\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/medline.jpg\"></A>" #if not labid: # out += "<A HREF=\"#\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>" #else: out += "<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index-all.html\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>"
out += "
<a href=index-all.html>View all citation information</a>
<a href='/'>Return to papers index</a>
entrySummaryFile.write(out)
def printEntryExtended(row): title = citation = authors = journal = pages = volume = year = pmcid =
for pubmed_row in pubmed_spreadsheet: if row['pmid'] == pubmed_row['pmid']: title = pubmed_row['title'] citation = pubmed_row['citation'] authors = pubmed_row['authors'] year = pubmed_row['year'] journal = pubmed_row['journal'] pages = pubmed_row['pages'] volume = pubmed_row['volume'] pmcid = pubmed_row['pmcid']
# create summary directory entryExtended = summaryPath + row['labid'].lstrip('\) if not os.path.exists(entryExtended): os.makedirs(entryExtended) entryExtendedIndex = entryExtended + "/index-all.html" entryExtendedFile = open(entryExtendedIndex,'w')
out = "<HTML>\n" out += "<HEAD>\n" if not title: out += "<TITLE>" + row['title'].lstrip('\) + "</TITLE>" else: out += "<TITLE>" + title.lstrip('\) + "</TITLE>" out += header out += "\n"
out += "\n" if not row['labid']: out += "" + row['pmid'].lstrip('\) + "<p />" else: out += "" + row['labid'] + "<p />" out += "\n" # print title and citation if not title: out += "</BODY></HTML>"<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index.html\">" + row['title'].lstrip('\) + "</A>\n"else:
out += "<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index.html\">" + title.lstrip('\) + "</A>\n"if not citation: if not row.has_key('authors'):
out += "" + " " + row['citation'].lstrip('\) + "\n" # out += "" + " " + row['citation'].lstrip('\) + "\n"else:
out += "" + row['authors'].lstrip('\) + " " +row['citation'].lstrip('\) + "\n"else:
out += "" + authors.lstrip('\) + ' ' + citation.lstrip('\) + "\n"# print links
out += ""\n<P />\n" out += '<A HREF="index.html">Switch to compact view</A>if not row['website']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['website'].lstrip('\) + "\"><IMG SRC=\"/papers/website.jpg\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\"></A>" if not row['preprint']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['preprint'].lstrip('\) + "\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/preprint.jpg\"></A>" if not row['pmid']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=" + row['pmid'].lstrip('\) + "&dopt=Abstract\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/medline.jpg\"></A>" #if not labid: # out += "<A HREF=\"#\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>" #else: out += "<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index-all.html\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>"
out += "
\n\n'
if not authors:
out += '\n' Authors else:
out += '\n' if not journal: out += ' Authors ' link = '<A HREF="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=PureSearch&db=PubMed&details_term=%s">%s</A>' authorlinks = [link % (a.lstrip('\).strip().replace(' ','%20'),a.lstrip('\).strip()) for a in authors.split(',')] # print authorlinks out += ', '.join(authorlinks)
out += '\n' Journal else:
out += '' if not row['pmid']: out += " Journal ' out += '<A HREF="http://locatorplus.gov/cgi-bin/Pwebrecon.cgi?DB=local&v2=1&ti=1,1&Search_Arg=9808944&Search_Code=0359&CNT=20&SID=1">'
out += journal + '</A>\n" PMID else:
out += '\n' if not pages: out += " PMID ' out += '<A HREF="http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=' + row['pmid'].lstrip('\) + '&dopt=Abstract">'
out += row['pmid'].lstrip('\) + '</A>\n" Pages else:
out += "\n" Pages " + pages.lstrip('\) + " if not volume:
out += "\n" Volume else:
out += "\n" Volume " + volume.lstrip('\) + " if not year:
out += "\n" Year else:
out += "\n" out += " Year " + year.lstrip('\) + " \n" labcite " + citation.lstrip('\) + " if not row['labid']:
out += "\n" labid else:
out += "\n" labid " + row['labid'].lstrip('\) + " if not title:
out += "\n" labtitle else:
out += "\n" labtitle " + title.lstrip('\) + " if not row['subject']:
out += "\n" subject else: subjects = row['subject'].split(',') num_subjects = len(subjects)
out += '\n' if not row['website']: out += " subject ' subject_count = 0 for subject in subjects: out += '<A HREF="/subject/' + subject.lstrip('\' ') + '">' + subject.lstrip('\' ') + '</A>' subject_count += 1 if subject_count < num_subjects: out += ', '
out += '\n" website else:
out += '\n' out += " website <A HREF="'+row['website'].lstrip('\)+'">'+row['website'].lstrip('\)+'</A>
Unused tags: e-print footnote grant ignore preprint sortval target website2
<a href='/'>Return to papers index</a>
entryExtendedFile.write(out)
def printPaperEntry(row, summaryFile, pubmed): title = citation = authors = year = for pubmed_row in pubmed: if row['pmid'] == pubmed_row['pmid']: title = pubmed_row['title'] citation = pubmed_row['citation'] authors = pubmed_row['authors'] year = pubmed_row['year'] break # print title and citation if not title:
out = "else:
out = "if not citation: if not row.has_key('authors'):
out += "else:
out += "else:
out += "# print links
out += "if not row['website']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['website'].lstrip('\) + "\"><IMG SRC=\"/papers/website.jpg\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\"></A>" if not row['preprint']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['preprint'].lstrip('\) + "\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/preprint.jpg\"></A>" if not row['pmid']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=" + row['pmid'].lstrip('\) + "&dopt=Abstract\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/medline.jpg\"></A>" #if not labid: # out += "<A HREF=\"#\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>" #else: out += "<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index-all.html\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>"
out += "summaryFile.write(out)
def printSimpleEntry(row, simpleFile, pubmed): pmid = title = citation = authors = year = #if row['pmid']: #print 'pSE first: ' + row['pmid'] #else: #print 'pSE first: no pmid' pubmed.count = 0 for pubmed_row in pubmed: #print 'pSE pubmed: ' + pubmed_row['pmid'] if row['pmid'] == pubmed_row['pmid']: pmid = pubmed_row['pmid'] title = pubmed_row['title'] citation = pubmed_row['citation'] authors = pubmed_row['authors'] year = pubmed_row['year'] pmcid = pubmed_row['pmcid'] break #else: #print 'no match' if row['pmid']: out = '
- ' + authors.lstrip('\) + ' (' + year.lstrip('\) + '). "' + title.lstrip('\) +'" '
p = re.compile("\(\d+\)\.")
p2 = re.compile(":")
citation = p.sub("",citation)
citation = p2.sub(": ",citation)
out += citation.lstrip('\) + ' [PMID: ' + pmid + '].
- ' +'"' + row['title'].lstrip('\) + '." '
#out = '
- subjectInclude = path + '/include.html'
- if os.path.exists(subjectInclude):
- includeFile = open(subjectInclude,'r')
- for line in includeFile:
- subjectFile.write(line + '\n')
- includeFile.close()
- subjectFile.write('\n\n')
- ' + ' (' + row['year'].lstrip('\) + '). "' + row['title'].lstrip('\) +'" '
#else:
#out = '
- ' + row['authors'].lstrip('\) + ' (' + row['year'].lstrip('\) + '). "' + row['title'].lstrip('\) +'" '
out += row['citation'].lstrip('\) + '
def printSubject(master_spreadsheet,header): allsubjects = [] subjectpapers = [] for i,row in enumerate(master_spreadsheet): if row.has_key('subject') and row['subject']: #and row['pmid'].isdigit(): subjects = row['subject'].split(',') for subject in subjects: subject = subject.strip( ) if subject not in allsubjects: allsubjects.append(subject) subjectpapers.append([i]) else: index = allsubjects.index(subject) subjectpapers[index].append(i)
for row in subject_spreadsheet: subject = row['labid'].strip( ) if subject not in allsubjects: path = subjectPath + subject if not os.path.exists(path): os.makedirs(path) subjectFile = open(path + '/index.html', 'w') out = <HTML> <HEAD> <TITLE>%s</TITLE> %subject.capitalize()
out +=header include = if row['html']: include =row['html'].lstrip('\) if include: out += include subjectFile.write(out)
subjectFile.write('</FONT><A HREF = "/">Return to front page</A>\n</BODY></HTML>')
subjectFile.close()
for i,subject in enumerate(allsubjects): printSubjectFile(subject,subjectpapers[i])
def printSubjectFile(subject,papers): path = subjectPath + subject if not os.path.exists(path): os.makedirs(path) subjectFile = open(path + '/index.html','w')
out = <HTML> <HEAD> <TITLE>%s</TITLE> % subject.capitalize() out += header
#include include file if exists
include = for row in subject_spreadsheet: if row['labid'] == subject: if row['html']: include = row['html'].lstrip('\) if include: out += include subjectFile.write(out)
for r in papers: printSubjectEntry(subjectFile,r)
subjectFile.write(' </FONT><A HREF= "/">Return to front page</A>\n</BODY></HTML>')
subjectFile.close()
def printSubjectEntry(subjectFile,r): row = master_spreadsheet.rows[r] found = False for sr in pubmed_spreadsheet: if row['pmid'] == sr['pmid']: #row['labid'] = sr['labid'] row['title'] = sr['title'] row['authors'] = sr['authors'] row['year'] = sr['year'] row['citation'] = sr['citation'] #row['website'] = sr['website'] #row['preprint'] = sr['preprint'] found = True # break if found:
# print title and citation
if not row['labid']:
out = "else:
out = "# print linksout += "
if not row['website']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['website'].lstrip('\) + "\"><IMG SRC=\"/papers/website.jpg\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\"></A>" if not row['preprint']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"" + row['preprint'].lstrip('\) + "\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/preprint.jpg\"></A>" if not row['pmid']: out += "<IMG SRC=\"/papers/null.gif\" HEIGHT=\"23\" WIDTH=\"56\" BORDER=\"0\">" else: out += "<A HREF=\"http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=" + row['pmid'].lstrip('\) + "&dopt=Abstract\"><IMG BORDER=\"0\" HEIGHT=\"23\" WIDTH=\"56\" SRC=\"/papers/medline.jpg\"></A>" if not row['labid']: out += "<A HREF=\"#\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>" else: out += "<A HREF=\"/papers/" + row['labid'].lstrip('\) + "/index-all.html\">" + "<IMG BORDER=\"0\" WIDTH=\"23\" HEIGHT=\"23\" SRC=\"/papers/more.gif\"></A>"out += "
subjectFile.write(out)
def printSubjectSummary(subject_spreadsheet, subjectSummaryFile, header):
styleSubject =
<STYLE type="text/css">
.unsel{background-color:#ffffff; background-position:top; background-repeat:repeat-x; color:#FFFFFF; font-weight:normal;}
.sel {background-color:#3b5998; color:#FFFFFF; font-weight:normal;}
.content{background-color:#ffffff; height:25px; padding-left:20px; padding-right:20px; padding-top:20px;color:#ffffff;}
a{text-decoration:none;color:#ffffff;}
.unsel a:link {color:#888888;text-decoration:none;} .sel a:visited {color:#ffffff;text-decoration:none;} .sel a:active {color:#ffffff;text-decoration:none;} .sel a:hover {color:#ffffff;text-decoration:underline;} </STYLE>
category = SubjectNumber = 0 outPart = out = "<HTML>\n" out += "<HEAD>\n" out += "<TITLE>Gerstein Lab Publications</TITLE>\n" out += header out += "\n" out += styleSubject out += '<BODY onload="javascript:showdh(3);">\n'
out += '\n' |