PubmedSpreadsheet Generation Code
From GersteinInfo
(Difference between revisions)
(→reload.py) |
|||
(19 intermediate revisions not shown) | |||
Line 1: | Line 1: | ||
- | parse_pmids.py | + | ==parse_pmids.py== |
- | + | ||
<code> | <code> | ||
#!/usr/bin/python | #!/usr/bin/python | ||
Line 6: | Line 5: | ||
from GoogleSpreadsheet import GoogleSpreadsheet | from GoogleSpreadsheet import GoogleSpreadsheet | ||
from datetime import datetime<br \> | from datetime import datetime<br \> | ||
- | master_spreadsheet_id = " | + | master_spreadsheet_id = "******************" |
worksheet_id = "od6" | worksheet_id = "od6" | ||
master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)<br \> | master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)<br \> | ||
Line 23: | Line 22: | ||
buildQuery(master_spreadsheet, ncbiFile) | buildQuery(master_spreadsheet, ncbiFile) | ||
</code> | </code> | ||
+ | |||
+ | ==PubmedHandler.py== | ||
+ | #!/usr/bin/python | ||
+ | # -*- coding: utf-8 -*- | ||
+ | from datetime import date | ||
+ | from xml.sax import ContentHandler | ||
+ | import xml.sax.saxutils<br \> | ||
+ | class PubmedHandler(ContentHandler): | ||
+ | def __init__(self,output_file = 'export.tab'): | ||
+ | self.file = file(output_file,'w') | ||
+ | self.sep = '\t' | ||
+ | self.data = {} | ||
+ | self.key = '' | ||
+ | self.pmid = False | ||
+ | self.authors = [] | ||
+ | self.handling = False | ||
+ | self.elements = ['Initials', #multiple, name unique | ||
+ | 'LastName', #multiple, name unique | ||
+ | 'MedlineTA', #unique | ||
+ | 'PMID', #unique | ||
+ | 'OtherID', #unique | ||
+ | 'MedlinePgn', #unique | ||
+ | 'Volume', #unique | ||
+ | 'Issue', #unique | ||
+ | 'Year', #inside PubDate | ||
+ | 'Month', #inside PubDate | ||
+ | 'Day', #inside PubDate | ||
+ | 'ArticleTitle', #unique | ||
+ | 'CollectiveName',#unique | ||
+ | 'ArticleId', | ||
+ | 'PubDate', | ||
+ | ] | ||
+ | self.file.write(self.get_header())<br \> | ||
+ | def __del__(self): | ||
+ | self.file.close()<br \> | ||
+ | def startElement(self,name,attrs): | ||
+ | if name == 'PubmedArticle': | ||
+ | self.data = {'PMID':''} | ||
+ | self.authors = [] | ||
+ | self.is_articledate = False | ||
+ | self.is_pubdate = False | ||
+ | self.pmid = 0 | ||
+ | self.handling = False | ||
+ | elif name == 'PubDate': | ||
+ | self.is_pubdate = True | ||
+ | self.data['Year'] = '' | ||
+ | self.data['Month']= '' | ||
+ | self.data['Day'] = '' | ||
+ | elif name in self.elements: | ||
+ | if name == 'ArticleId': | ||
+ | if attrs.getValue('IdType') == 'pmc': | ||
+ | self.key = 'PMCID' | ||
+ | self.data['PMCID'] = '' | ||
+ | self.handling = True | ||
+ | else: | ||
+ | self.key = name | ||
+ | self.handling = True | ||
+ | if name == 'PMID': | ||
+ | self.pmid += 1 | ||
+ | if name not in ['Year','Month','Day','PMID']: | ||
+ | self.data[name] = ''<br \> | ||
+ | def endElement(self,name): | ||
+ | if name == 'PubmedArticle': | ||
+ | self.write_to_file() | ||
+ | elif name == 'PubDate': | ||
+ | self.is_pubdate = False | ||
+ | elif name == 'Author': #merge author | ||
+ | if self.get_data('Initials'): | ||
+ | self.authors.append(self.get_data('Initials') + ' ' + self.get_data('LastName')) | ||
+ | if self.get_data('CollectiveName'): | ||
+ | self.authors.append(self.get_data('CollectiveName')) | ||
+ | self.data['CollectiveName'] = '' | ||
+ | self.handling = False<br \> | ||
+ | def characters(self,content): | ||
+ | if self.handling: | ||
+ | if self.key in ['Year','Month','Day']: | ||
+ | if self.is_pubdate: | ||
+ | self.data[self.key] += content | ||
+ | elif self.key == 'PMID': | ||
+ | if self.pmid == 1: | ||
+ | self.data[self.key] += content | ||
+ | else: | ||
+ | self.data[self.key] += xml.sax.saxutils.escape(content)<br \> | ||
+ | def write_to_file(self): | ||
+ | try: | ||
+ | self.file.write(self.get_row().encode('utf-8')) | ||
+ | except UnicodeEncodeError as anomerr: | ||
+ | print anomerr | ||
+ | print self.get_row() | ||
+ | return[]<br \> | ||
+ | def get_row(self): | ||
+ | return self.sep.join([', '.join(self.authors), | ||
+ | self.get_data('MedlineTA'), | ||
+ | self.get_data('PMID'), | ||
+ | self.get_data('MedlinePgn'), | ||
+ | self.get_data('Volume'), | ||
+ | self.get_data('Year'), | ||
+ | self.get_citation(), | ||
+ | self.get_data('ArticleTitle'), | ||
+ | self.get_data('PMCID')]) + '\n'<br \> | ||
+ | def get_citation(self): | ||
+ | citation = '' | ||
+ | if self.get_data('Year'): | ||
+ | citation += '(' + self.get_data('Year') + ').' + ' ' | ||
+ | citation += '<i>' + self.get_data('MedlineTA') + '</i>' + ' ' | ||
+ | date_str = '' | ||
+ | ref_str = '' | ||
+ | #build date string | ||
+ | if self.get_data('Year'): | ||
+ | date_str += self.get_data('Year') | ||
+ | if self.get_data('Month'): | ||
+ | date_str += ' ' + self.get_data('Month') | ||
+ | if self.get_int_data('Day'): | ||
+ | date_str += ' %d' % self.get_int_data('Day') | ||
+ | date_str = date_str.strip() | ||
+ | |||
+ | #build ref string | ||
+ | if self.get_data('Volume'): | ||
+ | ref_str += self.get_data('Volume') | ||
+ | #if self.get_data('Issue'): | ||
+ | # ref_str += '('+self.get_data('Issue')+')' | ||
+ | if self.get_data('MedlinePgn'): | ||
+ | ref_str += ':' + self.get_data('MedlinePgn') | ||
+ | |||
+ | ref_str = ref_str.strip() | ||
+ | |||
+ | #if date_str: | ||
+ | #citation += date_str | ||
+ | if ref_str: | ||
+ | #if date_str: | ||
+ | #citation += ';' | ||
+ | citation += ref_str | ||
+ | if citation[-1] != '.': | ||
+ | citation += '.' | ||
+ | return citation | ||
+ | |||
+ | def get_data(self,key): | ||
+ | if self.data.has_key(key): | ||
+ | return self.data[key] | ||
+ | return '' | ||
+ | |||
+ | def get_int_data(self,key): | ||
+ | val = self.get_data(key) | ||
+ | try: | ||
+ | val = int(val) | ||
+ | return val | ||
+ | except ValueError: | ||
+ | pass | ||
+ | return 0 | ||
+ | |||
+ | def get_header(self): | ||
+ | return self.sep.join(['Authors','Journal','PMID','Pages','Volume','Year','Citation','Title','PMCID']) + '\n' | ||
+ | |||
+ | ==import.py== | ||
+ | #!/usr/bin/python | ||
+ | # -*- coding: utf-8 -*- | ||
+ | from PubmedHandler import PubmedHandler | ||
+ | from xml import sax | ||
+ | import sys | ||
+ | import os | ||
+ | def main(input_file,output_file) : | ||
+ | parser = sax.make_parser() | ||
+ | handler = PubmedHandler(output_file) | ||
+ | parser.setContentHandler(handler) | ||
+ | parser.parse(input_file) | ||
+ | |||
+ | if __name__ == '__main__': | ||
+ | input_file = 'NCBIData.xml' | ||
+ | output_file = 'export.tab' | ||
+ | if len(sys.argv) > 1: | ||
+ | input_file = sys.argv[1] | ||
+ | if len(sys.argv) > 2: | ||
+ | output_file = sys.argv[2] | ||
+ | main(input_file,output_file) | ||
+ | cmd = "sed -e 's/ä/\ä/g' -e 's/ç/\ç/g' -e 's/ü/\ü/g' -e 's/ó/\ó/g' -e 's/ö/\ö/g' -e 's/ş/\ş/g' -e 's/í/\í/g' -e 's/é/\é/g' -e 's/ô/\ô/g' -e 's/è/\è/g' export.tab > export_out.tab" | ||
+ | os.system(cmd) | ||
+ | |||
+ | ==NewGoogleSpreadSheet.py== | ||
+ | <code><pre> | ||
+ | #!/usr/bin/python | ||
+ | import gdata.spreadsheet.service | ||
+ | class GoogleSpreadsheetException(Exception): | ||
+ | pass | ||
+ | class SpreadsheetRow: | ||
+ | """Row in a GoogleSpreadsheet""" | ||
+ | def __init__(self, index, row_dict): | ||
+ | """Inits SpreadsheetRow. | ||
+ | |||
+ | Args: | ||
+ | index: Row position in spreadsheet. | ||
+ | row_dict: Dict with the data for the row in form | ||
+ | {'header1' : value1, 'header2' : value2, 'header3' : value3, ...} | ||
+ | """ | ||
+ | self.index = index | ||
+ | self.row_dict = row_dict | ||
+ | |||
+ | def __str__(self): | ||
+ | return str(self.row_dict) | ||
+ | |||
+ | class GoogleSpreadsheet: | ||
+ | """An iterable Google Spreadsheet.""" | ||
+ | |||
+ | def __init__(self, spreadsheet_id, worksheet_id, user='*************', password='**************'): | ||
+ | """Inits GoogleSpreadsheet. | ||
+ | |||
+ | Args: | ||
+ | spreadsheet_id: Google-assigned unique id for spreadsheet | ||
+ | worksheet_id: Google-assigned unique id for worksheet within spreadsheet | ||
+ | user: Google account id with access to spreadsheet | ||
+ | password: Password for Google account | ||
+ | """ | ||
+ | self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() | ||
+ | self.gd_client.email = user | ||
+ | self.gd_client.password = password | ||
+ | self.gd_client.ProgrammaticLogin() | ||
+ | |||
+ | self.count = 0 | ||
+ | self.spreadsheet_id = spreadsheet_id | ||
+ | self.worksheet_id = worksheet_id | ||
+ | self.feed = self.gd_client.GetListFeed(self.spreadsheet_id, self.worksheet_id) | ||
+ | self.rows = self.form_rows(self.feed) | ||
+ | |||
+ | def form_rows(self, ListFeed): | ||
+ | """Builds row data from the Google Spreadsheet. | ||
+ | |||
+ | Args: | ||
+ | listFeed: SpreadsheetsListFeed object for the spreadsheet | ||
+ | """ | ||
+ | rows = [] | ||
+ | for i, entry in enumerate(ListFeed.entry): | ||
+ | d = {} | ||
+ | for key in entry.custom: | ||
+ | d[key] = entry.custom[key].text | ||
+ | rows.append(SpreadsheetRow(i, d)) | ||
+ | return rows | ||
+ | |||
+ | def reload(self): | ||
+ | """Refreshes object with current state of the remote spreadsheet""" | ||
+ | self.feed = self.gd_client.GetListFeed(self.spreadsheet_id, self.worksheet_id) | ||
+ | self.rows = self.form_rows(self.feed) | ||
+ | |||
+ | def update_row(self, row): | ||
+ | """Replaces a row on the remote spreadsheet and updates current object. | ||
+ | |||
+ | Args: | ||
+ | row: SpreadsheetRow object to be updated | ||
+ | """ | ||
+ | entry = self.gd_client.UpdateRow(self.feed.entry[row.index], row.row_dict) | ||
+ | if not isinstance(entry, gdata.spreadsheet.SpreadsheetsList): | ||
+ | raise GoogleSpreadsheetException("Error updating row") | ||
+ | self.reload() | ||
+ | |||
+ | def add_row(self, row): | ||
+ | """Adds an additional row to the remote spreadsheet and updates current | ||
+ | object. | ||
+ | |||
+ | Args: | ||
+ | row: SpreadsheetRow object to be added | ||
+ | """ | ||
+ | entry = self.gd_client.InsertRow(row.row_dict, self.spreadsheet_id, self.worksheet_id) | ||
+ | if not isinstance(entry, gdata.spreadsheet.SpreadsheetsList): | ||
+ | raise GoogleSpreadsheetException("Error updating row") | ||
+ | self.reload() | ||
+ | |||
+ | def delete_row(self, row): | ||
+ | """Deletes a row from the remote spreadsheet and updates current object. | ||
+ | |||
+ | Args: | ||
+ | row: SpreadsheetRow object to be deleted | ||
+ | """ | ||
+ | entry = self.gd_client.DeleteRow(self.feed.entry[row.index]) | ||
+ | self.reload() | ||
+ | |||
+ | def __iter__(self): | ||
+ | return self | ||
+ | |||
+ | def next(self): | ||
+ | if self.count >= len(self.rows): | ||
+ | self.count = 0 | ||
+ | raise StopIteration | ||
+ | else: | ||
+ | self.count += 1 | ||
+ | return self.rows[self.count - 1] | ||
+ | |||
+ | def __getitem__(self, index): | ||
+ | return self.rows[index] | ||
+ | |||
+ | def __len__(self): | ||
+ | return len(self.rows) | ||
+ | </pre></code> | ||
+ | ==reload.py== | ||
+ | <code><pre> | ||
+ | #!/usr/bin/python | ||
+ | from NEWGoogleSpreadsheet import GoogleSpreadsheet, SpreadsheetRow | ||
+ | |||
+ | spreadsheet_id = '0AiiHlTECOi8edHFxQWRwN0kxeTQ3ZzdCOXhQX2Z1ZGc' | ||
+ | worksheet_id = 'od6' | ||
+ | |||
+ | gs = GoogleSpreadsheet(spreadsheet_id, worksheet_id) | ||
+ | f = open('export_out.tab', 'r') | ||
+ | |||
+ | headers = [str(x).lower() for x in f.readline().rstrip('\n').split('\t')] | ||
+ | total_lines = 0 | ||
+ | for line_num, line in enumerate(f): | ||
+ | total_lines += 1 | ||
+ | fields = line.rstrip('\n').split('\t') | ||
+ | row_dict = {} | ||
+ | for i, val in enumerate(fields): | ||
+ | row_dict[headers[i]] = val | ||
+ | r = SpreadsheetRow(line_num, row_dict) | ||
+ | print r.index, r.row_dict | ||
+ | if line_num < len(gs): | ||
+ | gs.update_row(r) | ||
+ | else: | ||
+ | gs.add_row(r) | ||
+ | # Remove excess | ||
+ | if total_lines < len(gs): | ||
+ | for r in gs[:total_lines-1:-1]: | ||
+ | gs.delete_row(r) | ||
+ | </pre></code> |
Latest revision as of 10:53, 19 September 2011
Contents |
parse_pmids.py
#!/usr/bin/python
import os, sys
from GoogleSpreadsheet import GoogleSpreadsheet
from datetime import datetime
master_spreadsheet_id = "******************"
worksheet_id = "od6"
master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)
ncbiquery = "/home/mpw6/new_papers/ncbiquery.txt"
ncbiFile = open(ncbiquery,'w')
def buildQuery(master_spreadsheet, ncbiFile):
start = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
pmids =
end = "&rettype=xml&retmode=file"
for row in master_spreadsheet:
if row['pmid']:
pmids += row['pmid'].lstrip('\) + ','
pmids = pmids[:-1]
out = start + pmids + end + '\n'
ncbiFile.write(out)
buildQuery(master_spreadsheet, ncbiFile)
PubmedHandler.py
#!/usr/bin/python # -*- coding: utf-8 -*- from datetime import date from xml.sax import ContentHandler import xml.sax.saxutils
class PubmedHandler(ContentHandler): def __init__(self,output_file = 'export.tab'): self.file = file(output_file,'w') self.sep = '\t' self.data = {} self.key = self.pmid = False self.authors = [] self.handling = False self.elements = ['Initials', #multiple, name unique 'LastName', #multiple, name unique 'MedlineTA', #unique 'PMID', #unique 'OtherID', #unique 'MedlinePgn', #unique 'Volume', #unique 'Issue', #unique 'Year', #inside PubDate 'Month', #inside PubDate 'Day', #inside PubDate 'ArticleTitle', #unique 'CollectiveName',#unique 'ArticleId', 'PubDate', ] self.file.write(self.get_header())
def __del__(self): self.file.close()
def startElement(self,name,attrs): if name == 'PubmedArticle': self.data = {'PMID':} self.authors = [] self.is_articledate = False self.is_pubdate = False self.pmid = 0 self.handling = False elif name == 'PubDate': self.is_pubdate = True self.data['Year'] = self.data['Month']= self.data['Day'] = elif name in self.elements: if name == 'ArticleId': if attrs.getValue('IdType') == 'pmc': self.key = 'PMCID' self.data['PMCID'] = self.handling = True else: self.key = name self.handling = True if name == 'PMID': self.pmid += 1 if name not in ['Year','Month','Day','PMID']: self.data[name] =
def endElement(self,name): if name == 'PubmedArticle': self.write_to_file() elif name == 'PubDate': self.is_pubdate = False elif name == 'Author': #merge author if self.get_data('Initials'): self.authors.append(self.get_data('Initials') + ' ' + self.get_data('LastName')) if self.get_data('CollectiveName'): self.authors.append(self.get_data('CollectiveName')) self.data['CollectiveName'] = self.handling = False
def characters(self,content): if self.handling: if self.key in ['Year','Month','Day']: if self.is_pubdate: self.data[self.key] += content elif self.key == 'PMID': if self.pmid == 1: self.data[self.key] += content else: self.data[self.key] += xml.sax.saxutils.escape(content)
def write_to_file(self): try: self.file.write(self.get_row().encode('utf-8')) except UnicodeEncodeError as anomerr: print anomerr print self.get_row() return[]
def get_row(self): return self.sep.join([', '.join(self.authors), self.get_data('MedlineTA'), self.get_data('PMID'), self.get_data('MedlinePgn'), self.get_data('Volume'), self.get_data('Year'), self.get_citation(), self.get_data('ArticleTitle'), self.get_data('PMCID')]) + '\n'
def get_citation(self): citation = if self.get_data('Year'): citation += '(' + self.get_data('Year') + ').' + ' ' citation += '' + self.get_data('MedlineTA') + '' + ' ' date_str = ref_str = #build date string if self.get_data('Year'): date_str += self.get_data('Year') if self.get_data('Month'): date_str += ' ' + self.get_data('Month') if self.get_int_data('Day'): date_str += ' %d' % self.get_int_data('Day') date_str = date_str.strip() #build ref string if self.get_data('Volume'): ref_str += self.get_data('Volume') #if self.get_data('Issue'): # ref_str += '('+self.get_data('Issue')+')' if self.get_data('MedlinePgn'): ref_str += ':' + self.get_data('MedlinePgn') ref_str = ref_str.strip() #if date_str: #citation += date_str if ref_str: #if date_str: #citation += ';' citation += ref_str if citation[-1] != '.': citation += '.' return citation def get_data(self,key): if self.data.has_key(key): return self.data[key] return def get_int_data(self,key): val = self.get_data(key) try: val = int(val) return val except ValueError: pass return 0 def get_header(self): return self.sep.join(['Authors','Journal','PMID','Pages','Volume','Year','Citation','Title','PMCID']) + '\n'
import.py
#!/usr/bin/python # -*- coding: utf-8 -*- from PubmedHandler import PubmedHandler from xml import sax import sys import os def main(input_file,output_file) : parser = sax.make_parser() handler = PubmedHandler(output_file) parser.setContentHandler(handler) parser.parse(input_file) if __name__ == '__main__': input_file = 'NCBIData.xml' output_file = 'export.tab' if len(sys.argv) > 1: input_file = sys.argv[1] if len(sys.argv) > 2: output_file = sys.argv[2] main(input_file,output_file) cmd = "sed -e 's/ä/\ä/g' -e 's/ç/\ç/g' -e 's/ü/\ü/g' -e 's/ó/\ó/g' -e 's/ö/\ö/g' -e 's/ş/\ş/g' -e 's/í/\í/g' -e 's/é/\é/g' -e 's/ô/\ô/g' -e 's/è/\è/g' export.tab > export_out.tab" os.system(cmd)
NewGoogleSpreadSheet.py
#!/usr/bin/python
import gdata.spreadsheet.service
class GoogleSpreadsheetException(Exception):
pass
class SpreadsheetRow:
"""Row in a GoogleSpreadsheet"""
def __init__(self, index, row_dict):
"""Inits SpreadsheetRow.
Args:
index: Row position in spreadsheet.
row_dict: Dict with the data for the row in form
{'header1' : value1, 'header2' : value2, 'header3' : value3, ...}
"""
self.index = index
self.row_dict = row_dict
def __str__(self):
return str(self.row_dict)
class GoogleSpreadsheet:
"""An iterable Google Spreadsheet."""
def __init__(self, spreadsheet_id, worksheet_id, user='*************', password='**************'):
"""Inits GoogleSpreadsheet.
Args:
spreadsheet_id: Google-assigned unique id for spreadsheet
worksheet_id: Google-assigned unique id for worksheet within spreadsheet
user: Google account id with access to spreadsheet
password: Password for Google account
"""
self.gd_client = gdata.spreadsheet.service.SpreadsheetsService()
self.gd_client.email = user
self.gd_client.password = password
self.gd_client.ProgrammaticLogin()
self.count = 0
self.spreadsheet_id = spreadsheet_id
self.worksheet_id = worksheet_id
self.feed = self.gd_client.GetListFeed(self.spreadsheet_id, self.worksheet_id)
self.rows = self.form_rows(self.feed)
def form_rows(self, ListFeed):
"""Builds row data from the Google Spreadsheet.
Args:
listFeed: SpreadsheetsListFeed object for the spreadsheet
"""
rows = []
for i, entry in enumerate(ListFeed.entry):
d = {}
for key in entry.custom:
d[key] = entry.custom[key].text
rows.append(SpreadsheetRow(i, d))
return rows
def reload(self):
"""Refreshes object with current state of the remote spreadsheet"""
self.feed = self.gd_client.GetListFeed(self.spreadsheet_id, self.worksheet_id)
self.rows = self.form_rows(self.feed)
def update_row(self, row):
"""Replaces a row on the remote spreadsheet and updates current object.
Args:
row: SpreadsheetRow object to be updated
"""
entry = self.gd_client.UpdateRow(self.feed.entry[row.index], row.row_dict)
if not isinstance(entry, gdata.spreadsheet.SpreadsheetsList):
raise GoogleSpreadsheetException("Error updating row")
self.reload()
def add_row(self, row):
"""Adds an additional row to the remote spreadsheet and updates current
object.
Args:
row: SpreadsheetRow object to be added
"""
entry = self.gd_client.InsertRow(row.row_dict, self.spreadsheet_id, self.worksheet_id)
if not isinstance(entry, gdata.spreadsheet.SpreadsheetsList):
raise GoogleSpreadsheetException("Error updating row")
self.reload()
def delete_row(self, row):
"""Deletes a row from the remote spreadsheet and updates current object.
Args:
row: SpreadsheetRow object to be deleted
"""
entry = self.gd_client.DeleteRow(self.feed.entry[row.index])
self.reload()
def __iter__(self):
return self
def next(self):
if self.count >= len(self.rows):
self.count = 0
raise StopIteration
else:
self.count += 1
return self.rows[self.count - 1]
def __getitem__(self, index):
return self.rows[index]
def __len__(self):
return len(self.rows)
reload.py
#!/usr/bin/python
from NEWGoogleSpreadsheet import GoogleSpreadsheet, SpreadsheetRow
spreadsheet_id = '0AiiHlTECOi8edHFxQWRwN0kxeTQ3ZzdCOXhQX2Z1ZGc'
worksheet_id = 'od6'
gs = GoogleSpreadsheet(spreadsheet_id, worksheet_id)
f = open('export_out.tab', 'r')
headers = [str(x).lower() for x in f.readline().rstrip('\n').split('\t')]
total_lines = 0
for line_num, line in enumerate(f):
total_lines += 1
fields = line.rstrip('\n').split('\t')
row_dict = {}
for i, val in enumerate(fields):
row_dict[headers[i]] = val
r = SpreadsheetRow(line_num, row_dict)
print r.index, r.row_dict
if line_num < len(gs):
gs.update_row(r)
else:
gs.add_row(r)
# Remove excess
if total_lines < len(gs):
for r in gs[:total_lines-1:-1]:
gs.delete_row(r)