meren · October 23, 2015 16:27
diff --git a/bpc_pubs.py b/bpc_pubs.py
 # people who have links
 author_links = {'Eren, A. M.': 'http://meren.org/research/',
                'Algar, C. K.': 'http://www.mbl.edu/jbpc/faculty/huber/',
                'Alliegro, M. A.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
                'Alliegro, M. C.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
                'Amaral-Zettler, L. A.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
                'Arkhipova, I. R.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
                'Belfort, M.': 'http://www.albany.edu/biology/faculty/mbelfort/',
                'Bordenstein, S. R.': 'https://medschool.mc.vanderbilt.edu/biosci/bio_fac.php?id3=17392',
                'Borisy, G. G.': 'http://forsyth.org/person/scientist/gary-borisy',
                'Fischer, A. H.': 'http://antjefischer.org/Antje_Fischer/Main_page.html',
                'Fortunato, C. S.': 'http://www.mbl.edu/jbpc/faculty/huber/',
                'Gribble, K. E.': 'http://www.mbl.edu/jbpc/faculty/gribble/',
                'Grim, S. L.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
                'Hecox-Lea, B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
                'Huber, J. A.': 'http://www.mbl.edu/jbpc/staff/huber/',
                'Huse, S. M.': 'https://vivo.brown.edu/display/sh129',
                'Maignien, L.': 'http://pagesperso.univ-brest.fr/~maignien/',
                'Mark Welch, D. B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
                'Mark Welch, J. L.': 'http://www.mbl.edu/jbpc/staff/markwelchj/',
                'Meselson, M.': 'https://www.mcb.harvard.edu/mcb/faculty/profile/matthew-s-meselson/',
                'Morrison, H. G.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
                'Murphy, L. G.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
                'Post, A. F.': 'http://www.mbl.edu/jbpc/faculty/post/',
                'Reddington, E.': 'http://www.mbl.edu/jbpc/faculty/huber/',
                'Reveillaud, J.': 'http://www.mbl.edu/jbpc/faculty/huber/',
                'Reznikoff, W. S.': 'http://www.mbl.edu/jbpc/faculty/reznikoff/',
                'Rodriguez, F.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
                'Schmidt, V. T.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
                'Serres, M. H.': 'http://www.mbl.edu/jbpc/staff/serres/',
                'Shipunova, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
                'Smith, J.': 'http://www.mbl.edu/jbpc/faculty/smith/',
                'Sogin, M. L.': 'http://www.mbl.edu/jbpc/faculty/sogin/',
                'Sul, W. J.': 'https://www.researchgate.net/profile/Woo_Jun_Sul',
                'Tulin, S.': 'http://joelsmithlab.org/people/',
                'Vineis, J. H.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
                'Voorhis, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
                'Wernegreen, J. J.': 'http://www.genome.duke.edu/directory/faculty/wernegreen/',
                'Yushenova, I. A.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
                'Gilbert, J. A.': 'http://www.anl.gov/experts?field_contributor_language_tid=283',
                }


 # remaining bpc people who do not appear in the dictionary above:
 bpc_people = set(author_links.keys() + ['Reveillaud, J.', 'Maignien, L.', 'Algar, C. K.', 'DeForce, E. A.',
                                         'Chafee, M. E.', 'Simmons, S. L.', 'Akerman, N. H.', 'Yushenova, I. A.',
                                         'Rodriguez, F.', 'Sul, W. J.', 'Murphy, L. G.', 'Grim, S. L.',
                                         'Vineis, J. H.', 'Fortunato, C. S.', ' Hecox-Lea, B.', 'Meyer, J. L.',
                                         'Campbell, R. K.', 'Valm, A. M.', 'Hasegawa, Y.',
                                         'Gladyshev, E. A.', 'Lasek-Nesselquist, E.'])



 import sys

 keep_pubs_after = 1996

 pubs_dict = {}
 journals_list = []
 authors_list = []
 recent_authors_list = []

 # takes an EndNote library exported as a TXT file. here is a sample line from this txt file:
 #
 # Winterberg, K. M., and Reznikoff, W. S. (2007). "Screening transposon mutant libraries using full-genome oligonucleotide microarrays." Methods Enzymol, 421, 110-25.
 #
 f = open(sys.argv[1])


 def get_author_links(authors_str):
    for author in bpc_people:
        if author in author_links:
            authors_str = authors_str.replace(author, '<a href="%s" target="_blank"><b>%s</b></a>' % (author_links[author], author))
        else:
            authors_str = authors_str.replace(author, '<b>%s</b>' % (author))

    return authors_str



 for line in [l.strip() for l in f.readlines()]:
    if line.find('(ed.)') > 0 or line.find('(eds.)') > 0:
        continue
    p_s = line.find(' (')
    p_e = p_s + 6
    if not p_s > 0:
        continue
    if not line[p_e] == ')':
        continue

    year = int(line[p_s + 2:p_e])

    if year < keep_pubs_after:
        continue

    authors = line[0: p_s]

    q_s = line.find(' "', p_e)
    if not q_s > 0:
        continue
    q_e = line.find('."', q_s)

    if not q_e > 0:
        q_e = line.find('?"', q_s)
        if not q_e > 0:
            continue

    title = line[q_s + 2:q_e + 1]

    c = line.find(', ', q_e + 2)
    if not c > 0:
        continue

    journal = line[q_e + 3:c]

    issue = line[c + 2:-1]


    # ad hoc fixes for journal names
    journal = journal.replace('The ISME journal', 'ISME J')
    journal = journal.replace('Proceedings of the National Academy of Sciences of the United States of America', 'Proc Natl Acad Sci U S A')
    journal = journal.replace('Proceedings of the National Academy of Sciences', 'Proc Natl Acad Sci U S A')
    journals_list.append(journal)


    # ad hoc fixes for authors who didn't pay attention how their names are printed
    # when it mattered ;) (except the 'Jr.' fix)
    authors = authors.replace(', Jr.', ' Jr.')
    authors = authors.replace('Amaral Zettler', 'Amaral-Zettler')
    authors = authors.replace('Mark Welch, D.,', 'Mark Welch, D. B.,')
    authors = authors.replace('Serres, M.,', 'Serres, M. H.,')
    authors = authors.replace('Amaral-Zettler, L.,', 'Amaral-Zettler, L. A.,')
    authors = authors.replace('Huse, S.,', 'Huse, S. M.,')
    authors = authors.replace('Wernegreen, J.,', 'Wernegreen, J. J.,')
    authors = authors.replace('Yushenova, I.,', 'Yushenova, I. A.,')
    authors = authors.replace('Arkhipova, I.,', 'Arkhipova, I. R.,')


    if not pubs_dict.has_key(year):
        pubs_dict[year] = [{'authors': authors, 'title': title, 'journal': journal, 'issue': issue}]
    else:
        pubs_dict[year].append({'authors': authors, 'title': title, 'journal': journal, 'issue': issue})



    if authors.count(',') == 1:
        authors_list.append(authors)
        if year > 2004:
            recent_authors_list.append(authors)
    else:
        for author in [a + '.' if not a.endswith('.') else a for a in authors.replace('and ', '').split('., ')]:
            authors_list.append(author)
            if year > 2004:
                recent_authors_list.append(author)


 years = ' | '.join(['<a href="#%s">%s</a> (<i>%d</i>)' % (y, y, len(pubs_dict[y])) for y in sorted(pubs_dict.keys(), reverse=True)])

 top_journals = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(journals_list.count(journal), journal) for journal in set(journals_list)], reverse = True)[0:25]])
 print "<h2>Top 25 Journals</h2>"
 print top_journals
 print

 #top_authors = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(authors_list.count(author), author) for author in set(authors_list)], reverse = True) if x[0] >= 10][0:20])
 #print "Authors (all time): ", top_authors
 #print

 #recent_authors = ", ".join(['%s (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(recent_authors_list.count(author), author) for author in set(recent_authors_list)], reverse = True) if x[0] > 10])
 #print "<h2>Authors (last 10 years)</h2>"
 #print get_author_links(recent_authors)
 #print


 print "<h2>Years</h2>"

 print years
 print

 for year in sorted(pubs_dict.keys(), reverse=True):
    print '<a name="%s">&nbsp;</a><h2>%s</h2>' % (year, year)
    print
    for pub in pubs_dict[year]:

        print '%s (%d). "<a href="%s" target="_new">%s</a>" <i>%s</i>, %s.' % \
                                    (get_author_links(pub['authors']),
                                     year,
                                     'http://scholar.google.com/scholar?hl=en&q=%s' % (pub['title'].replace(' ', '+')),
                                     pub['title'],
                                     pub['journal'],
                                     pub['issue'])
        print
	# people who have links
	author_links = {'Eren, A. M.': 'http://meren.org/research/',
	'Algar, C. K.': 'http://www.mbl.edu/jbpc/faculty/huber/',
	'Alliegro, M. A.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
	'Alliegro, M. C.': 'http://www.mbl.edu/jbpc/faculty/alliegro/',
	'Amaral-Zettler, L. A.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
	'Arkhipova, I. R.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
	'Belfort, M.': 'http://www.albany.edu/biology/faculty/mbelfort/',
	'Bordenstein, S. R.': 'https://medschool.mc.vanderbilt.edu/biosci/bio_fac.php?id3=17392',
	'Borisy, G. G.': 'http://forsyth.org/person/scientist/gary-borisy',
	'Fischer, A. H.': 'http://antjefischer.org/Antje_Fischer/Main_page.html',
	'Fortunato, C. S.': 'http://www.mbl.edu/jbpc/faculty/huber/',
	'Gribble, K. E.': 'http://www.mbl.edu/jbpc/faculty/gribble/',
	'Grim, S. L.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
	'Hecox-Lea, B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
	'Huber, J. A.': 'http://www.mbl.edu/jbpc/staff/huber/',
	'Huse, S. M.': 'https://vivo.brown.edu/display/sh129',
	'Maignien, L.': 'http://pagesperso.univ-brest.fr/~maignien/',
	'Mark Welch, D. B.': 'http://www.mbl.edu/jbpc/faculty/markwelchd/',
	'Mark Welch, J. L.': 'http://www.mbl.edu/jbpc/staff/markwelchj/',
	'Meselson, M.': 'https://www.mcb.harvard.edu/mcb/faculty/profile/matthew-s-meselson/',
	'Morrison, H. G.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
	'Murphy, L. G.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
	'Post, A. F.': 'http://www.mbl.edu/jbpc/faculty/post/',
	'Reddington, E.': 'http://www.mbl.edu/jbpc/faculty/huber/',
	'Reveillaud, J.': 'http://www.mbl.edu/jbpc/faculty/huber/',
	'Reznikoff, W. S.': 'http://www.mbl.edu/jbpc/faculty/reznikoff/',
	'Rodriguez, F.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
	'Schmidt, V. T.': 'http://www.mbl.edu/jbpc/faculty/amaralzettler/',
	'Serres, M. H.': 'http://www.mbl.edu/jbpc/staff/serres/',
	'Shipunova, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
	'Smith, J.': 'http://www.mbl.edu/jbpc/faculty/smith/',
	'Sogin, M. L.': 'http://www.mbl.edu/jbpc/faculty/sogin/',
	'Sul, W. J.': 'https://www.researchgate.net/profile/Woo_Jun_Sul',
	'Tulin, S.': 'http://joelsmithlab.org/people/',
	'Vineis, J. H.': 'http://www.mbl.edu/jbpc/faculty/morrison/',
	'Voorhis, A.': 'http://www.mbl.edu/jbpc/staff/bioinformaticians/',
	'Wernegreen, J. J.': 'http://www.genome.duke.edu/directory/faculty/wernegreen/',
	'Yushenova, I. A.': 'http://www.mbl.edu/jbpc/faculty/arkhipova/',
	'Gilbert, J. A.': 'http://www.anl.gov/experts?field_contributor_language_tid=283',
	}


	# remaining bpc people who do not appear in the dictionary above:
	bpc_people = set(author_links.keys() + ['Reveillaud, J.', 'Maignien, L.', 'Algar, C. K.', 'DeForce, E. A.',
	'Chafee, M. E.', 'Simmons, S. L.', 'Akerman, N. H.', 'Yushenova, I. A.',
	'Rodriguez, F.', 'Sul, W. J.', 'Murphy, L. G.', 'Grim, S. L.',
	'Vineis, J. H.', 'Fortunato, C. S.', ' Hecox-Lea, B.', 'Meyer, J. L.',
	'Campbell, R. K.', 'Valm, A. M.', 'Hasegawa, Y.',
	'Gladyshev, E. A.', 'Lasek-Nesselquist, E.'])



	import sys

	keep_pubs_after = 1996

	pubs_dict = {}
	journals_list = []
	authors_list = []
	recent_authors_list = []

	# takes an EndNote library exported as a TXT file. here is a sample line from this txt file:
	#
	# Winterberg, K. M., and Reznikoff, W. S. (2007). "Screening transposon mutant libraries using full-genome oligonucleotide microarrays." Methods Enzymol, 421, 110-25.
	#
	f = open(sys.argv[1])


	def get_author_links(authors_str):
	for author in bpc_people:
	if author in author_links:
	authors_str = authors_str.replace(author, '<a href="%s" target="_blank"><b>%s</b></a>' % (author_links[author], author))
	else:
	authors_str = authors_str.replace(author, '<b>%s</b>' % (author))

	return authors_str



	for line in [l.strip() for l in f.readlines()]:
	if line.find('(ed.)') > 0 or line.find('(eds.)') > 0:
	continue
	p_s = line.find(' (')
	p_e = p_s + 6
	if not p_s > 0:
	continue
	if not line[p_e] == ')':
	continue

	year = int(line[p_s + 2:p_e])

	if year < keep_pubs_after:
	continue

	authors = line[0: p_s]

	q_s = line.find(' "', p_e)
	if not q_s > 0:
	continue
	q_e = line.find('."', q_s)

	if not q_e > 0:
	q_e = line.find('?"', q_s)
	if not q_e > 0:
	continue

	title = line[q_s + 2:q_e + 1]

	c = line.find(', ', q_e + 2)
	if not c > 0:
	continue

	journal = line[q_e + 3:c]

	issue = line[c + 2:-1]


	# ad hoc fixes for journal names
	journal = journal.replace('The ISME journal', 'ISME J')
	journal = journal.replace('Proceedings of the National Academy of Sciences of the United States of America', 'Proc Natl Acad Sci U S A')
	journal = journal.replace('Proceedings of the National Academy of Sciences', 'Proc Natl Acad Sci U S A')
	journals_list.append(journal)


	# ad hoc fixes for authors who didn't pay attention how their names are printed
	# when it mattered ;) (except the 'Jr.' fix)
	authors = authors.replace(', Jr.', ' Jr.')
	authors = authors.replace('Amaral Zettler', 'Amaral-Zettler')
	authors = authors.replace('Mark Welch, D.,', 'Mark Welch, D. B.,')
	authors = authors.replace('Serres, M.,', 'Serres, M. H.,')
	authors = authors.replace('Amaral-Zettler, L.,', 'Amaral-Zettler, L. A.,')
	authors = authors.replace('Huse, S.,', 'Huse, S. M.,')
	authors = authors.replace('Wernegreen, J.,', 'Wernegreen, J. J.,')
	authors = authors.replace('Yushenova, I.,', 'Yushenova, I. A.,')
	authors = authors.replace('Arkhipova, I.,', 'Arkhipova, I. R.,')


	if not pubs_dict.has_key(year):
	pubs_dict[year] = [{'authors': authors, 'title': title, 'journal': journal, 'issue': issue}]
	else:
	pubs_dict[year].append({'authors': authors, 'title': title, 'journal': journal, 'issue': issue})



	if authors.count(',') == 1:
	authors_list.append(authors)
	if year > 2004:
	recent_authors_list.append(authors)
	else:
	for author in [a + '.' if not a.endswith('.') else a for a in authors.replace('and ', '').split('., ')]:
	authors_list.append(author)
	if year > 2004:
	recent_authors_list.append(author)


	years = ' \| '.join(['<a href="#%s">%s</a> (<i>%d</i>)' % (y, y, len(pubs_dict[y])) for y in sorted(pubs_dict.keys(), reverse=True)])

	top_journals = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(journals_list.count(journal), journal) for journal in set(journals_list)], reverse = True)[0:25]])
	print "<h2>Top 25 Journals</h2>"
	print top_journals
	print

	#top_authors = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(authors_list.count(author), author) for author in set(authors_list)], reverse = True) if x[0] >= 10][0:20])
	#print "Authors (all time): ", top_authors
	#print

	#recent_authors = ", ".join(['%s (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(recent_authors_list.count(author), author) for author in set(recent_authors_list)], reverse = True) if x[0] > 10])
	#print "<h2>Authors (last 10 years)</h2>"
	#print get_author_links(recent_authors)
	#print


	print "<h2>Years</h2>"

	print years
	print

	for year in sorted(pubs_dict.keys(), reverse=True):
	print '<a name="%s"> </a><h2>%s</h2>' % (year, year)
	print
	for pub in pubs_dict[year]:

	print '%s (%d). "<a href="%s" target="_new">%s</a>" <i>%s</i>, %s.' % \
	(get_author_links(pub['authors']),
	year,
	'http://scholar.google.com/scholar?hl=en&q=%s' % (pub['title'].replace(' ', '+')),
	pub['title'],
	pub['journal'],
	pub['issue'])
	print
No results found