Skip to content

Instantly share code, notes, and snippets.

@jakeoung
Last active May 10, 2017 02:26
Show Gist options
  • Select an option

  • Save jakeoung/18bf4e0f022fe67d706ce896ab190550 to your computer and use it in GitHub Desktop.

Select an option

Save jakeoung/18bf4e0f022fe67d706ce896ab190550 to your computer and use it in GitHub Desktop.
convert bibliography for ai
import re
import requests
import titlecase
# I doubt if we need to go above ten
words_to_numerals =\
{
'first': '1',
'second': '2',
'third': '3',
'fourth': '4',
'fifth': '5',
'sixth': '6',
'seventh': '7',
'eighth': '8',
'ninth': '9',
'tenth': '10'
}
journals_needing_article =\
{
'Journal of Philosophy',
'Philosophical Quarterly',
'Philosophical Review'
}
## JK
list_abbr = ['UCLA']
def abbreviations(word, **kwargs):
if word.upper() in list_abbr:
return word.upper()
def remove_outer_braces(s):
"""
str -> str
Remove the outermost braces from a string if it has no other braces.
(This is a first pass at getting rid of unnecessarily protected
biblatex fields. I would like to also strip where there are just
internal braces as in '{This {is} a test}')
>>> remove_outer_braces('{This is a test}')
'This is a test'
>>> remove_outer_braces('This is a test')
'This is a test'
>>> remove_outer_braces('{This} is a test')
'{This} is a test'
"""
if re.search('^{[^{}]*}$', s):
s = s[1:-1]
return s
def full_range(s):
""" str -> str
Take a string representing a Biblatex page range (e.g. '100--45').
Return a string where all the units of the end are filled in.
The range will be marked with two hyphens.
>>> full_range('100--115')
'100-115'
>>> full_range('100-1000')
'100-1000'
>>> full_range('100-15')
'100-115'
>>> full_range('100-5')
'100-105'
"""
parts = re.split('-+', s)
if len(parts[1]) < len(parts[0]):
difference = len(parts[0]) - len(parts[1])
parts[1] = parts[0][:difference] + parts[1]
return '-'.join(parts)
def remove_resolver(doi):
"""
str -> str
Remove the 'http://dx.doi.org/' at the start of DOIs
retrieved from the Crossref API.
>>> remove_resolver('http://dx.doi.org/10.1080/00455091.2013.871111')
'10.1080/00455091.2013.871111'
>>> remove_resolver('10.1080/00455091.2013.871111')
'10.1080/00455091.2013.871111'
"""
return re.sub('http://dx.doi.org/', '', doi)
def title_name(name):
"""
str -> str
Take a name and return it in title case, leaving 'and' alone.
>>> title_name('hodgson, thomas')
'Hodgson, Thomas'
>>> title_name('hodgson, thomas and CHOMSKY, NOAM')
'Hodgson, Thomas and Chomsky, Noam'
"""
name =\
' '.join(
[x.title() if not re.match('and', x) else x for x in name.split()]
)
return name
def braces(s):
"""
str -> str
Take a string and enclose it in braces ('{', '}'),
unless it already has them.
>>> braces('foo')
'{foo}'
>>> braces('{foo}')
'{foo}'
"""
if not s.startswith('{'):
s = '{' + s
if not s.endswith('}'):
s = s + '}'
return s
def remove_eprint(record):
"""
Remove Eprint fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "eprint" in record:
del record["eprint"]
return record
def issue_to_number(record):
"""
If a record has an Issue field which is a number,
and doesn't have a number field, replace Issue with Number
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "issue" in record and "number" not in record and re.fullmatch('\d+', record["issue"]):
record["number"] = record["issue"]
del record["issue"]
return record
def remove_leading_zeros(record):
"""
Remove leading zeroes from Volume and Number fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "volume" in record:
record["volume"] = record["volume"].lstrip('0')
if "number" in record:
record["number"] = record["number"].lstrip('0')
return record
def remove_numpages(record):
"""
Remove Numpages fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "numpages" in record:
del record["numpages"]
return record
def remove_month(record):
"""
Remove Month fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "month" in record:
del record["month"]
return record
def remove_series(record):
"""
Remove Series fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "series" in record:
del record["series"]
return record
def philpapers(record):
"""
Put the PhilPapers ID in a field.
This function assumes that the ID for the records is a PhilPapers ID.
:param record: the record.
:type record: dict
:ret
"""
if re.search('-', record["ID"]):
# Split into a list at hyphens
segments = re.split('-', record["ID"])
# Check whether we have an ID of the form 'FOOBAR-1'
if re.fullmatch('\d+', segments[-1]):
ppid = '{}-{}'.format(
segments[-2],
segments[-1]
)
else:
ppid = segments[-1]
record["philpapers"] = ppid
return record
def subtitles(record):
"""
Put subtitles in.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "journaltitle" in record and re.search(':', record["journaltitle"]):
m = re.search(':', record["journaltitle"])
title = record["journaltitle"][:m.start()].strip()
subtitle = record["journaltitle"][m.end():].strip()
record["journaltitle"] = title
record["journalsubtitle"] = subtitle
if "title" in record and re.search(':', record["title"]):
m = re.search(':', record["title"])
title = record["title"][:m.start()].strip()
subtitle = record["title"][m.end():].strip()
record["title"] = title
record["subtitle"] = subtitle
return record
def add_definite_to_journaltitles(record):
"""
Add a definite article ('the') to titles from a specified list.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "journaltitle" in record:
if record["journaltitle"] in journals_needing_article:
record["journaltitle"] = 'The ' + record["journaltitle"]
return record
def remove_pages_from_entry(record, entry):
"""
Remove the 'pages' field from records with ENTRYTYPE 'incollection' or 'inbook'.
:returns: dict -- the modified record.
"""
if record["ENTRYTYPE"] == entry:
if "pages" in record:
del record["pages"]
return record
def active_quotes(record):
"""
Replace LaTeX quotes with unicode quotes,
defined as active characters by csquotes.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
# The regexes must be done like this to avoid balance problems
# Match one or two '`', one or two ''', one '"', or one '“'
# preceded by space or the start of a string
for field in record:
record[field] = re.sub(
'(?:(?<=\s)|(?<=^))((`|\'){1,2}|\"|“)(?=\w)',
'‘',
record[field]
)
# Match one or two ''', one '"', or one '”'
# followed by space or the end of a string
for field in record:
record[field] = re.sub(
'(?<=\w)(\'{1,2}|\"|”)(?:(?=\s)|(?=$))',
'’',
record[field]
)
return record
def remove_protection(record):
"""
Remove unnecessary protection.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "title" in record:
record["title"] = remove_outer_braces(record["title"])
if "subtitle" in record:
record["subtitle"] = remove_outer_braces(record["subtitle"])
return record
def citeulike(record):
"""
Remove CiteULike's special fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "citeulike-article-id" in record:
del record["citeulike-article-id"]
if "priority" in record:
del record["priority"]
if "posted-at" in record:
del record["posted-at"]
return record
def empty_fields(record):
"""
Remove empty fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
list_of_empty_fields = []
for field in record:
if record[field] == '':
list_of_empty_fields.append(field)
for field in list_of_empty_fields:
del record[field]
return record
def biblatex_page_ranges(record):
if "pages" in record:
# Get rid of p., pp. etc.
record["pages"] = re.sub('[Pp]{1,2}\\.?', '', record["pages"]).strip()
# If this is a range remove truncation and normalise it to two hyphens,
# if not, complain
if re.search('^\d+-+\d+$', record["pages"]):
record["pages"] = record["pages"] = full_range(
record["pages"]
)
# The function returns a single hyphen range,
# so do the normalisation afterwards
record["pages"] = re.sub('-+', '--', record["pages"])
else:
print(
"The 'Pages' field for record {} isn't a valid biblatex range.".format(
record["ID"]
)
)
return record
def non_page_hyphens(record):
"""
Replace numbers of hyphens != 2 with 2.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "volume" in record:
record["volume"] = re.sub('-+', '--', record["volume"])
if "number" in record:
record["number"] = re.sub('-+', '--', record["number"])
return record
def dashes(record):
"""
Replace en and em dashes with hyphens.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
for field in record:
record[field] = re.sub('–', '--', record[field])
record[field] = re.sub('—', '---', record[field])
return record
def remove_keyword(record):
"""
Remove Keywords fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "keywords" in record:
del record["keywords"]
if "keyword" in record:
del record["keyword"]
return record
def strip_doi(record):
"""
Strip resolvers from DOI fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "doi" in record:
record["doi"] = remove_resolver(record["doi"])
return record
def get_doi(record):
"""
Get DOIs for articles from the CrossRef API.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
#print(record["ENTRYTYPE"])
if record["ENTRYTYPE"] == "article" and "doi" not in record:
# Build a search term for the API
query = ''
# Build a query
# The API doesn't like spaces or exotic characters
if "title" in record:
query += re.sub('\W+', '+', record["title"])
if "author" in record:
query += '+' + re.sub('\W+', '+', record["author"])
# I need to make sure a query has been built
if query:
payload = {
'query': query,
'rows': '1',
'sort': 'score',
'order': 'desc'
}
# We might not have an internet connection
# Catch the exception that will raise
r = requests.get(
'http://api.crossref.org/works',
params=payload
)
print(
'I got status code {} from the CrossRef API for record {}.'.format(
r.status_code,
record["ID"]
)
)
# Proceed if the status code was a good one
try:
if r.status_code == requests.codes.ok:
# The result is JSON text
# Items is a list in order of match score, it will have a DOI in it
# Catch exception raised by any sort of problem with the response
try:
doi = r.json()['message']['items'][0]['DOI']
record["doi"] = doi
except (IndexError, KeyError):
print("I couldn't find a DOI in the JSON for record {}.".format(
record["ID"]
)
)
# This deals with errors caused by encoding problems,
# which are fixed anyway by having the conversion
# to unicode done before authors are dealt with
except UnicodeEncodeError:
print(
"I couldn't get a DOI. A character in record {} wasn't encoded in a way the CrossRef API understands.".format(
record["ID"]
)
)
return record
def titlecase_name(record):
"""
Put authors and editors into title case.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "author" in record:
record["author"] = title_name(record["author"])
if "editor" in record:
record["editor"] = title_name(record["editor"])
return record
def publisher(record):
"""
Protect 'and' in publisher field with braces around the field.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "publisher" in record:
if re.search('and', record["publisher"]):
record["publisher"] = braces(record["publisher"])
return record
def edition(record):
"""
Put "Edition" in a nice format.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "edition" in record:
if record["edition"].lower().strip() in words_to_numerals:
record["edition"] =\
words_to_numerals[record["edition"].lower().strip()]
elif re.search('\d+(st|nd|rd|th)', record["edition"].lower().strip()):
record["edition"] =\
re.sub('(st|nd|rd|th)', '', record["edition"].lower().strip())
return record
def journaltitle(record):
"""
Change "Journal" to "Journaltitle".
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "journal" in record:
record["journaltitle"] = record["journal"]
del record["journal"]
return record
def case_title(record):
"""
Put titles in titlecase for English records.
Depends on the 'titlecase' module
https://pypi.python.org/pypi/titlecase/
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "language" not in record or record["language"] == 'English':
if "title" in record:
record["title"] = titlecase.titlecase(record["title"], callback=abbreviations)
if "subtitle" in record:
record["subtitle"] = titlecase.titlecase(record["subtitle"], callback=abbreviations)
if "booktitle" in record:
record["booktitle"] = titlecase.titlecase(record["booktitle"], callback=abbreviations)
if "journal" in record:
record["journal"] = titlecase.titlecase(record["journal"], callback=abbreviations)
return record
def join_author_editor(record):
"""
Convert authors and/or editors as lists of strings
to strings joined by "and".
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "author" in record:
record["author"] = " and ".join(record["author"])
if "editor" in record:
record["editor"] = " and ".join([d['name'] for d in record["editor"]])
return record
def booktitle(record):
"""
Add 'Booktitle' field identical to 'Title' field for book entries.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if record["ENTRYTYPE"] == "book":
if "title" in record:
record["booktitle"] = record["title"]
return record
def remove_abstract(record):
"""
Remove abstracts.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "abstract" in record:
del record["abstract"]
return record
def remove_epub(record):
"""
Remove epub field.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "epub" in record:
del record["issn"]
return record
def remove_ISSN(record):
"""
Remove ISSN.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "issn" in record:
del record["issn"]
return record
def remove_ISBN(record):
"""
Remove ISBNs.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "isbn" in record:
del record["isbn"]
return record
def remove_copyright(record):
"""
Remove copyright.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "copyright" in record:
del record["copyright"]
return record
def language(record):
"""
Remove listings as English.
Make sure we have both language and langid.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "language" in record and record["language"] == 'English':
del record["language"]
if "langid" in record:
del record["langid"]
elif "language" in record:
record["langid"] = record["language"].lower()
elif "langid" in record:
print(
"There is a 'Langid' of '{}'' but no 'Language' field for record {}.".format(
record["langid"],
record["ID"]
)
)
return record
def remove_publisher(record):
"""
Remove publisher from articles.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "publisher" in record:
del record["publisher"]
return record
def remove_link(record):
"""
Remove links.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "link" in record:
del record["link"]
return record
def remove_ampersand(record):
"""
Convert ampersand ('&') to 'and'
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "booktitle" in record:
record["booktitle"] = re.sub(r'\\&', 'and', record["booktitle"])
if "journal" in record:
record["journal"] = re.sub(r'\\&', 'and', record["journal"])
if "subtitle" in record:
record["subtitle"] = re.sub(r'\\\\&', 'and', record["subtitle"])
if "title" in record:
record["title"] = re.sub(r'\\\\&', 'and', record["title"])
return record
def escape_characters(record):
"""
Make sure that characters reserved by LaTeX are escaped.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
list_of_characters = ['&', '%', '_']
for val in record:
# Underscores are ok in IDs, which shouldn't have other special
# characters anyway
if val != "ID":
for c in list_of_characters:
record[val] = re.sub(
'(?<!\\\\){}'.format(c),
'\{}'.format(c),
record[val]
)
return record
def jstor(record):
"""
Get rid of JSTOR's special fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "jstor_articletype" in record:
del record["jstor_articletype"]
if "jstor_formatteddate" in record:
del record["jstor_formatteddate"]
if "jstor_issuetitle" in record:
del record["jstor_issuetitle"]
return record
def protect(s):
"""
Str -> Str
Helper function for `protect_capitalization`.
Take a string and return a string where words containing capital letters
(after the first word) are protected with braces.
"""
needs_protection = re.findall('(?<=\s)\S*[A-Z]+\S*|(?<=:\s)\S+', s)
for word in needs_protection:
s = re.sub(word, '{{{}}}'.format(word), s)
return s
def protect_capitalisation(record):
"""
Protect capitalised words with braces.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "title" in record:
record["title"] = protect(record["title"])
if "subtitle" in record:
record["subtitle"] = protect(record["subtitle"])
if "booktitle" in record:
record["booktitle"] = protect(record["booktitle"])
return record
def multivolume(record):
"""
If a book or collection has a volume number,
change its ENTRYTYPE to mvbook/mvcollection.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if record["ENTRYTYPE"] == "book":
if "volume" in record:
record["ENTRYTYPE"] = "mvbook"
elif record["ENTRYTYPE"] == "collection":
if "volume" in record:
record["ENTRYTYPE"] = "mvcollection"
return record
def remove_booktitle(record):
"""
Remove 'booktitle' fields.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "booktitle" in record:
del record["booktitle"]
return record
def year_to_date(record):
"""
Turn 'year' fields into 'date'.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "year" in record:
record["date"] = record["year"]
del record["year"]
return record
"""
Added by koo
"""
list_abbrevations = [
["[Jj]ournal", "J."],
["International", "Inc."],
["Transactions", "Trans."],
["[Aa]nalysis", "Anal."],
["[Rr]ecognition", "Recog."],
]
def convert_abbreviations_ieee(record, key):
if key in record:
for ori, abbrev in list_abbrevations:
record[key] = re.sub(ori, abbrev, record[key])
return record
def remove_year_in_key(record, key):
if key in record:
record[key] = re.sub(r"[0-9]{4}\.* *", '', record[key])
record[key] = re.sub(r"\'[0-9]{2}", '', record[key])
pass
return record
list_proc = [
["CVPR", "Computer Vision and Pattern Recognition",
"Proc. IEEE Conf. Comput. Vis. Pattern Recog.", ],
["ECCV", "European Conference on Computer Vision",
"Proc. Eur. Conf. Comput. Vis.", ],
["ICCV", "international conference on computer vision",
"Proc. Int. Conf. Comput. Vis.", "Int'l Conf. Computer Vision"],
["BMVC", "British Machine Vision Conference",
"Proc. British Mach. Vis. Conf."],
["SSVM", "Scale Space and Variational Methods in Computer Vision",
"Proc. Int. Conf. on Scale Space and Variational Methods in Comput. Vis.", ],
["ICIP", "International Conference on Image Processing",
"Proc. Inc. Conf. on Image Processing"]
]
list_jour = [
["PAMI", "Pattern Analysis and Machine Intelligence",
"IEEE Trans. Pattern Anal. Mach. Intell."],
["IJCV", "International Journal of Computer Vision",
"Int. J. of Comput. Vis."],
["TIP", "Transactions on Image Processing",
"IEEE Trans. on Image Processing"],
["JMIV", "Journal of Mathematical Imaging and Vision",
"J. of Math. Imaging and Vis.",]
]
def unify_ai_titles(record, abbrev_degree=2, verbose=1):
"""
Unify AI proceedings and journals based on abbrev_degree
:param record: the record.
:param abbrev_degree:
0:
1:
2: IEEE Computer Society Style Guide
:returns: dict -- the modified record.
"""
if "booktitle" in record:
name = record["booktitle"]
for i in range(len(list_proc)):
exp = ""
for val in list_proc[i]:
exp += val + '|'
exp = exp[:-1]
if len(re.findall(exp, name, flags=re.IGNORECASE)):
record["booktitle"] = list_proc[i][abbrev_degree]
break
if verbose:
print(record["booktitle"])
elif "journal" in record:
name = record["journal"]
for i in range(len(list_jour)):
exp = ""
for val in list_jour[i]:
exp += val + '|'
exp = exp[:-1]
if len(re.findall(exp, name, flags=re.IGNORECASE)):
record["journal"] = list_jour[i][abbrev_degree]
break
if verbose:
print(record["journal"])
return record
def remove_page_if_doi(record):
"""
remove page if doi available
:param record:
:return:
"""
if "doi" in record:
del record["page"]
return record
def remove_organization(record):
"""
Remove organization from proceedings.
:param record: the record.
:type record: dict
:returns: dict -- the modified record.
"""
if "organization" in record:
del record["organization"]
return record
#!/usr/bin/env python3
import argparse
import re
import shutil
import sys
import cb_customs
from bibtexparser.bparser import BibTexParser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.customization import *
def fix_keys(l):
""" list -> list
Take a list that represents lines.
Find lines which are the start of a bibtex entry without a key.
Add dummy keys to those lines.
Remove spaces from keys.
>>> fix_keys(
['@book{foo bar,', '@article{', ' Author = {Thomas Hodgson}', '}']
)
['@book{foobar,', '@article{Foo1,', ' Author = {Thomas Hodgson}', '}']
"""
i = 1
j = 0
while j < len(l):
if re.fullmatch('@\\w+\\s*{,{0,1}', l[j].strip()):
l[j] = l[j][:l[j].find('{')+1] + 'Foo' + str(i) + ','
i += 1
elif re.match('@', l[j].strip()):
# Find where the key starts
start = re.search('{', l[j]).end()
# Get rid of any non word characters
## JK
# key = re.sub('\W+', '', l[j][start:])
key = l[j][start:]
# Put it back together; add a comma which will have been removed
l[j] = l[j][:start] + key + ','
j += 1
return l
def customizations(record):
"""Use some functions delivered by the library
:param record: a record
:returns: -- customized record
"""
# This needs to come before authors are dealt with
# otherwise there are encoding problems
# record = convert_to_unicode(record)
# record = page_double_hyphen(record)
# record = author(record)
# record = editor(record)
# # This is needed after `author` is called to allow writing
# record = cb_customs.join_author_editor(record)
record = cb_customs.titlecase_name(record)
# record = cb_customs.language(record)
record = cb_customs.case_title(record)
# This should come after `journaltitle`is called
record = cb_customs.add_definite_to_journaltitles(record)
# record = cb_customs.remove_pages_from_books_and_collections(record)
record = cb_customs.non_page_hyphens(record)
record = cb_customs.dashes(record)
# record = cb_customs.biblatex_page_ranges(record)
record = cb_customs.remove_abstract(record)
record = cb_customs.remove_ISBN(record)
record = cb_customs.remove_ISSN(record)
record = cb_customs.remove_epub(record)
record = cb_customs.remove_copyright(record)
record = cb_customs.remove_publisher(record)
record = cb_customs.remove_organization(record)
record = cb_customs.remove_link(record)
record = cb_customs.escape_characters(record)
record = cb_customs.remove_ampersand(record)
record = cb_customs.jstor(record)
record = cb_customs.citeulike(record)
record = cb_customs.edition(record)
record = cb_customs.multivolume(record)
record = cb_customs.strip_doi(record)
record = cb_customs.remove_keyword(record)
record = cb_customs.empty_fields(record)
record = cb_customs.remove_protection(record)
record = cb_customs.subtitles(record)
record = cb_customs.remove_series(record)
record = cb_customs.remove_month(record)
record = cb_customs.remove_numpages(record)
record = cb_customs.remove_eprint(record)
# record = cb_customs.year_to_date(record)
# The order of the following matters
# record = cb_customs.issue_to_number(record)
record = cb_customs.remove_leading_zeros(record)
record = cb_customs.remove_year_in_key(record, "booktitle")
record = cb_customs.unify_ai_titles(record)
record = cb_customs.convert_abbreviations_ieee(record, "booktitle")
record = cb_customs.convert_abbreviations_ieee(record, "journal")
record = cb_customs.remove_pages_from_entry(record, "inproceedings")
#record = cb_customs.remove_page_if_doi(record)
if not args.nodoi:
try:
record = cb_customs.get_doi(record)
# If there is a connection error stop trying to get DOIs
except cb_customs.requests.exceptions.ConnectionError:
if args.verbose:
print(
"I couldn't connect to the CrossRef API. "
"Perhaps you are not connected to the internet?"
)
args.nodoi = True
return record
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('input', help='input bib file')
parser.add_argument(
'--no-doi',
dest='nodoi',
action='store_true',
help="Don't look for DOIs from CrossRef"
)
parser.add_argument(
'--verbose',
dest='verbose',
action='store_true',
help="Print messages"
)
args = parser.parse_args()
print(args.input)
if args.input:
bib = args.input
try:
# shutil.copy(bib, bib + '.backup')
# if args.verbose:
# print(
# "I have made a backup of the orignal file at {}.backup"
# .format(bib)
# )
with open(bib, 'r', encoding='utf-8') as biblatex:
content = biblatex.read()
except FileNotFoundError:
if args.verbose:
print("I couldn't find the file {}.".format(bib))
sys.exit()
else:
content = sys.stdin.read()
# Find the start of the first record
try:
start = re.search('@', content).start()
except AttributeError:
if args.verbose:
print("The file I was given didn't contain any records.")
sys.exit()
content = content[start:].split('\n')
# Provide dummy citekeys
content = fix_keys(content)
fixed_content = '\n'.join(content)
bibliography = BibTexParser(
fixed_content,
customization=customizations,
ignore_nonstandard_types=False
# Otherwise bibtexparser will complain if I give it a collection
)
output = BibTexWriter().write(bibliography)
if args.input:
output_name = args.input[:-4]+'_convert.bib'
with open(output_name, 'w', encoding='utf-8') as biblatex:
biblatex.write(output)
else:
sys.stdout.write(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment