Skip to content

Instantly share code, notes, and snippets.

@meren
Last active October 23, 2015 16:27
Show Gist options
  • Select an option

  • Save meren/f778eb437cee211ee732 to your computer and use it in GitHub Desktop.

Select an option

Save meren/f778eb437cee211ee732 to your computer and use it in GitHub Desktop.
Takes an "EndNote library" exported as TXT, and generates an HTML output that can be copy-pasted to WordPress. Embarrassingly ad-hoc. I regret nothing. Here is the output: http://www.mbl.edu/jbpc/publicationsparentpage/
# people who have links
author_links = {'Eren, A. M.': 'http://meren.org/research/',
'Algar, C. K.': 'http://www.mbl.edu/jbpc/faculty/huber/',
'Alliegro, M. A.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
'Alliegro, M. C.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
'Amaral-Zettler, L. A.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
'Arkhipova, I. R.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
'Belfort, M.': 'http://www.albany.edu/biology/faculty/mbelfort/',
'Bordenstein, S. R.': 'https://medschool.mc.vanderbilt.edu/biosci/bio_fac.php?id3=17392',
'Borisy, G. G.': 'http://forsyth.org/person/scientist/gary-borisy',
'Fischer, A. H.': 'http://antjefischer.org/Antje_Fischer/Main_page.html',
'Fortunato, C. S.': 'http://www.mbl.edu/jbpc/faculty/huber/',
'Gribble, K. E.': 'http://www.mbl.edu/jbpc/faculty/gribble/',
'Grim, S. L.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
'Hecox-Lea, B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
'Huber, J. A.': 'http://www.mbl.edu/jbpc/staff/huber/',
'Huse, S. M.': 'https://vivo.brown.edu/display/sh129',
'Maignien, L.': 'http://pagesperso.univ-brest.fr/~maignien/',
'Mark Welch, D. B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
'Mark Welch, J. L.': 'http://www.mbl.edu/jbpc/staff/markwelchj/',
'Meselson, M.': 'https://www.mcb.harvard.edu/mcb/faculty/profile/matthew-s-meselson/',
'Morrison, H. G.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
'Murphy, L. G.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
'Post, A. F.': 'http://www.mbl.edu/jbpc/faculty/post/',
'Reddington, E.': 'http://www.mbl.edu/jbpc/faculty/huber/',
'Reveillaud, J.': 'http://www.mbl.edu/jbpc/faculty/huber/',
'Reznikoff, W. S.': 'http://www.mbl.edu/jbpc/faculty/reznikoff/',
'Rodriguez, F.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
'Schmidt, V. T.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
'Serres, M. H.': 'http://www.mbl.edu/jbpc/staff/serres/',
'Shipunova, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
'Smith, J.': 'http://www.mbl.edu/jbpc/faculty/smith/',
'Sogin, M. L.': 'http://www.mbl.edu/jbpc/faculty/sogin/',
'Sul, W. J.': 'https://www.researchgate.net/profile/Woo_Jun_Sul',
'Tulin, S.': 'http://joelsmithlab.org/people/',
'Vineis, J. H.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
'Voorhis, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
'Wernegreen, J. J.': 'http://www.genome.duke.edu/directory/faculty/wernegreen/',
'Yushenova, I. A.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
'Gilbert, J. A.': 'http://www.anl.gov/experts?field_contributor_language_tid=283',
}
# remaining bpc people who do not appear in the dictionary above:
bpc_people = set(author_links.keys() + ['Reveillaud, J.', 'Maignien, L.', 'Algar, C. K.', 'DeForce, E. A.',
'Chafee, M. E.', 'Simmons, S. L.', 'Akerman, N. H.', 'Yushenova, I. A.',
'Rodriguez, F.', 'Sul, W. J.', 'Murphy, L. G.', 'Grim, S. L.',
'Vineis, J. H.', 'Fortunato, C. S.', ' Hecox-Lea, B.', 'Meyer, J. L.',
'Campbell, R. K.', 'Valm, A. M.', 'Hasegawa, Y.',
'Gladyshev, E. A.', 'Lasek-Nesselquist, E.'])
import sys
keep_pubs_after = 1996
pubs_dict = {}
journals_list = []
authors_list = []
recent_authors_list = []
# takes an EndNote library exported as a TXT file. here is a sample line from this txt file:
#
# Winterberg, K. M., and Reznikoff, W. S. (2007). "Screening transposon mutant libraries using full-genome oligonucleotide microarrays." Methods Enzymol, 421, 110-25.
#
f = open(sys.argv[1])
def get_author_links(authors_str):
for author in bpc_people:
if author in author_links:
authors_str = authors_str.replace(author, '<a href="%s" target="_blank"><b>%s</b></a>' % (author_links[author], author))
else:
authors_str = authors_str.replace(author, '<b>%s</b>' % (author))
return authors_str
for line in [l.strip() for l in f.readlines()]:
if line.find('(ed.)') > 0 or line.find('(eds.)') > 0:
continue
p_s = line.find(' (')
p_e = p_s + 6
if not p_s > 0:
continue
if not line[p_e] == ')':
continue
year = int(line[p_s + 2:p_e])
if year < keep_pubs_after:
continue
authors = line[0: p_s]
q_s = line.find(' "', p_e)
if not q_s > 0:
continue
q_e = line.find('."', q_s)
if not q_e > 0:
q_e = line.find('?"', q_s)
if not q_e > 0:
continue
title = line[q_s + 2:q_e + 1]
c = line.find(', ', q_e + 2)
if not c > 0:
continue
journal = line[q_e + 3:c]
issue = line[c + 2:-1]
# ad hoc fixes for journal names
journal = journal.replace('The ISME journal', 'ISME J')
journal = journal.replace('Proceedings of the National Academy of Sciences of the United States of America', 'Proc Natl Acad Sci U S A')
journal = journal.replace('Proceedings of the National Academy of Sciences', 'Proc Natl Acad Sci U S A')
journals_list.append(journal)
# ad hoc fixes for authors who didn't pay attention how their names are printed
# when it mattered ;) (except the 'Jr.' fix)
authors = authors.replace(', Jr.', ' Jr.')
authors = authors.replace('Amaral Zettler', 'Amaral-Zettler')
authors = authors.replace('Mark Welch, D.,', 'Mark Welch, D. B.,')
authors = authors.replace('Serres, M.,', 'Serres, M. H.,')
authors = authors.replace('Amaral-Zettler, L.,', 'Amaral-Zettler, L. A.,')
authors = authors.replace('Huse, S.,', 'Huse, S. M.,')
authors = authors.replace('Wernegreen, J.,', 'Wernegreen, J. J.,')
authors = authors.replace('Yushenova, I.,', 'Yushenova, I. A.,')
authors = authors.replace('Arkhipova, I.,', 'Arkhipova, I. R.,')
if not pubs_dict.has_key(year):
pubs_dict[year] = [{'authors': authors, 'title': title, 'journal': journal, 'issue': issue}]
else:
pubs_dict[year].append({'authors': authors, 'title': title, 'journal': journal, 'issue': issue})
if authors.count(',') == 1:
authors_list.append(authors)
if year > 2004:
recent_authors_list.append(authors)
else:
for author in [a + '.' if not a.endswith('.') else a for a in authors.replace('and ', '').split('., ')]:
authors_list.append(author)
if year > 2004:
recent_authors_list.append(author)
years = ' | '.join(['<a href="#%s">%s</a> (<i>%d</i>)' % (y, y, len(pubs_dict[y])) for y in sorted(pubs_dict.keys(), reverse=True)])
top_journals = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(journals_list.count(journal), journal) for journal in set(journals_list)], reverse = True)[0:25]])
print "<h2>Top 25 Journals</h2>"
print top_journals
print
#top_authors = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(authors_list.count(author), author) for author in set(authors_list)], reverse = True) if x[0] >= 10][0:20])
#print "Authors (all time): ", top_authors
#print
#recent_authors = ", ".join(['%s (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(recent_authors_list.count(author), author) for author in set(recent_authors_list)], reverse = True) if x[0] > 10])
#print "<h2>Authors (last 10 years)</h2>"
#print get_author_links(recent_authors)
#print
print "<h2>Years</h2>"
print years
print
for year in sorted(pubs_dict.keys(), reverse=True):
print '<a name="%s">&nbsp;</a><h2>%s</h2>' % (year, year)
print
for pub in pubs_dict[year]:
print '%s (%d). "<a href="%s" target="_new">%s</a>" <i>%s</i>, %s.' % \
(get_author_links(pub['authors']),
year,
'http://scholar.google.com/scholar?hl=en&q=%s' % (pub['title'].replace(' ', '+')),
pub['title'],
pub['journal'],
pub['issue'])
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment