jakeoung · May 10, 2017 02:26
diff --git a/cb_customs.py b/cb_customs.py
 import re
 import requests
 import titlecase

 # I doubt if we need to go above ten
 words_to_numerals =\
    {
        'first': '1',
        'second': '2',
        'third': '3',
        'fourth': '4',
        'fifth': '5',
        'sixth': '6',
        'seventh': '7',
        'eighth': '8',
        'ninth': '9',
        'tenth': '10'
    }

 journals_needing_article =\
    {
        'Journal of Philosophy',
        'Philosophical Quarterly',
        'Philosophical Review'
    }

 ## JK
 list_abbr = ['UCLA']
 def abbreviations(word, **kwargs):
    if word.upper() in list_abbr:
        return word.upper()


 def remove_outer_braces(s):
    """
    str -> str
    Remove the outermost braces from a string if it has no other braces.
    (This is a first pass at getting rid of unnecessarily protected
    biblatex fields. I would like to also strip where there are just
    internal braces as in '{This {is} a test}')
    >>> remove_outer_braces('{This is a test}')
    'This is a test'
    >>> remove_outer_braces('This is a test')
    'This is a test'
    >>> remove_outer_braces('{This} is a test')
    '{This} is a test'
    """
    if re.search('^{[^{}]*}$', s):
        s = s[1:-1]
    return s


 def full_range(s):
    """ str -> str
    Take a string representing a Biblatex page range (e.g. '100--45').
    Return a string where all the units of the end are filled in.
    The range will be marked with two hyphens.
    >>> full_range('100--115')
    '100-115'
    >>> full_range('100-1000')
    '100-1000'
    >>> full_range('100-15')
    '100-115'
    >>> full_range('100-5')
    '100-105'
    """
    parts = re.split('-+', s)
    if len(parts[1]) < len(parts[0]):
        difference = len(parts[0]) - len(parts[1])
        parts[1] = parts[0][:difference] + parts[1]
    return '-'.join(parts)


 def remove_resolver(doi):
    """
    str -> str
    Remove the 'http://dx.doi.org/' at the start of DOIs
    retrieved from the Crossref API.
    >>> remove_resolver('http://dx.doi.org/10.1080/00455091.2013.871111')
    '10.1080/00455091.2013.871111'
    >>> remove_resolver('10.1080/00455091.2013.871111')
    '10.1080/00455091.2013.871111'
    """
    return re.sub('http://dx.doi.org/', '', doi)


 def title_name(name):
    """
    str -> str
    Take a name and return it in title case, leaving 'and' alone.
    >>> title_name('hodgson, thomas')
    'Hodgson, Thomas'
    >>> title_name('hodgson, thomas and CHOMSKY, NOAM')
    'Hodgson, Thomas and Chomsky, Noam'
    """
    name =\
        ' '.join(
            [x.title() if not re.match('and', x) else x for x in name.split()]
        )
    return name


 def braces(s):
    """
    str -> str
    Take a string and enclose it in braces ('{', '}'),
    unless it already has them.
    >>> braces('foo')
    '{foo}'
    >>> braces('{foo}')
    '{foo}'
    """
    if not s.startswith('{'):
        s = '{' + s
    if not s.endswith('}'):
        s = s + '}'
    return s


 def remove_eprint(record):
    """
    Remove Eprint fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "eprint" in record:
        del record["eprint"]
    return record


 def issue_to_number(record):
    """
    If a record has an Issue field which is a number,
    and doesn't have a number field, replace Issue with Number

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "issue" in record and "number" not in record and re.fullmatch('\d+', record["issue"]):
        record["number"] = record["issue"]
        del record["issue"]
    return record


 def remove_leading_zeros(record):
    """
    Remove leading zeroes from Volume and Number fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "volume" in record:
        record["volume"] = record["volume"].lstrip('0')
    if "number" in record:
        record["number"] = record["number"].lstrip('0')
    return record


 def remove_numpages(record):
    """
    Remove Numpages fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "numpages" in record:
        del record["numpages"]
    return record


 def remove_month(record):
    """
    Remove Month fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "month" in record:
        del record["month"]
    return record


 def remove_series(record):
    """
    Remove Series fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "series" in record:
        del record["series"]
    return record


 def philpapers(record):
    """
    Put the PhilPapers ID in a field.

    This function assumes that the ID for the records is a PhilPapers ID.

    :param record: the record.
    :type record: dict
    :ret
    """
    if re.search('-', record["ID"]):
        # Split into a list at hyphens
        segments = re.split('-', record["ID"])
        # Check whether we have an ID of the form 'FOOBAR-1'
        if re.fullmatch('\d+', segments[-1]):
            ppid = '{}-{}'.format(
                segments[-2],
                segments[-1]
            )
        else:
            ppid = segments[-1]
        record["philpapers"] = ppid
    return record


 def subtitles(record):
    """
    Put subtitles in.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "journaltitle" in record and re.search(':', record["journaltitle"]):
        m = re.search(':', record["journaltitle"])
        title = record["journaltitle"][:m.start()].strip()
        subtitle = record["journaltitle"][m.end():].strip()
        record["journaltitle"] = title
        record["journalsubtitle"] = subtitle
    if "title" in record and re.search(':', record["title"]):
        m = re.search(':', record["title"])
        title = record["title"][:m.start()].strip()
        subtitle = record["title"][m.end():].strip()
        record["title"] = title
        record["subtitle"] = subtitle
    return record


 def add_definite_to_journaltitles(record):
    """
    Add a definite article ('the') to titles from a specified list.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "journaltitle" in record:
        if record["journaltitle"] in journals_needing_article:
            record["journaltitle"] = 'The ' + record["journaltitle"]
    return record

 def remove_pages_from_entry(record, entry):
    """
    Remove the 'pages' field from records with ENTRYTYPE 'incollection' or 'inbook'.

    :returns: dict -- the modified record.
    """
    if record["ENTRYTYPE"] == entry:
        if "pages" in record:
            del record["pages"]

    return record


 def active_quotes(record):
    """
    Replace LaTeX quotes with unicode quotes,
    defined as active characters by csquotes.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    # The regexes must be done like this to avoid balance problems
    # Match one or two '`', one or two ''', one '"', or one '“'
    # preceded by space or the start of a string
    for field in record:
        record[field] = re.sub(
            '(?:(?<=\s)|(?<=^))((`|\'){1,2}|\"|“)(?=\w)',
            '‘',
            record[field]
        )
    # Match one or two ''', one '"', or one '”'
    # followed by space or the end of a string
    for field in record:
        record[field] = re.sub(
            '(?<=\w)(\'{1,2}|\"|”)(?:(?=\s)|(?=$))',
            '’',
            record[field]
        )
    return record


 def remove_protection(record):
    """
    Remove unnecessary protection.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "title" in record:
        record["title"] = remove_outer_braces(record["title"])
    if "subtitle" in record:
        record["subtitle"] = remove_outer_braces(record["subtitle"])
    return record


 def citeulike(record):
    """
    Remove CiteULike's special fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "citeulike-article-id" in record:
        del record["citeulike-article-id"]
    if "priority" in record:
        del record["priority"]
    if "posted-at" in record:
        del record["posted-at"]
    return record


 def empty_fields(record):
    """
    Remove empty fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    list_of_empty_fields = []
    for field in record:
        if record[field] == '':
            list_of_empty_fields.append(field)
    for field in list_of_empty_fields:
        del record[field]
    return record


 def biblatex_page_ranges(record):
    if "pages" in record:
        # Get rid of p., pp. etc.
        record["pages"] = re.sub('[Pp]{1,2}\\.?', '', record["pages"]).strip()
        # If this is a range remove truncation and normalise it to two hyphens,
        # if not, complain
        if re.search('^\d+-+\d+$', record["pages"]):
            record["pages"] = record["pages"] = full_range(
                record["pages"]
            )
            # The function returns a single hyphen range,
            # so do the normalisation afterwards
            record["pages"] = re.sub('-+', '--', record["pages"])

        else:
            print(
                "The 'Pages' field for record {} isn't a valid biblatex range.".format(
                    record["ID"]
                )
            )
    return record


 def non_page_hyphens(record):
    """
    Replace numbers of hyphens != 2 with 2.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "volume" in record:
        record["volume"] = re.sub('-+', '--', record["volume"])
    if "number" in record:
        record["number"] = re.sub('-+', '--', record["number"])
    return record


 def dashes(record):
    """
    Replace en and em dashes with hyphens.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    for field in record:
        record[field] = re.sub('–', '--', record[field])
        record[field] = re.sub('—', '---', record[field])
    return record


 def remove_keyword(record):
    """
    Remove Keywords fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "keywords" in record:
        del record["keywords"]
    if "keyword" in record:
        del record["keyword"]
    return record


 def strip_doi(record):
    """
    Strip resolvers from DOI fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "doi" in record:
        record["doi"] = remove_resolver(record["doi"])
    return record


 def get_doi(record):
    """
    Get DOIs for articles from the CrossRef API.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    #print(record["ENTRYTYPE"])
    if record["ENTRYTYPE"] == "article" and "doi" not in record:
        # Build a search term for the API
        query = ''
        # Build a query
        # The API doesn't like spaces or exotic characters
        if "title" in record:
            query += re.sub('\W+', '+', record["title"])
            if "author" in record:
                query += '+' + re.sub('\W+', '+', record["author"])
        # I need to make sure a query has been built
        if query:
            payload = {
                'query': query,
                'rows': '1',
                'sort': 'score',
                'order': 'desc'
            }
            # We might not have an internet connection
            # Catch the exception that will raise
            r = requests.get(
                'http://api.crossref.org/works',
                params=payload
            )
            print(
                'I got status code {} from the CrossRef API for record {}.'.format(
                    r.status_code,
                    record["ID"]
                )
            )
            # Proceed if the status code was a good one
            try:
                if r.status_code == requests.codes.ok:
                    # The result is JSON text
                    # Items is a list in order of match score, it will have a DOI in it
                    # Catch exception raised by any sort of problem with the response
                    try:
                        doi = r.json()['message']['items'][0]['DOI']
                        record["doi"] = doi
                    except (IndexError, KeyError):
                        print("I couldn't find a DOI in the JSON for record {}.".format(
                            record["ID"]
                            )
                        )
            # This deals with errors caused by encoding problems,
            # which are fixed anyway by having the conversion
            # to unicode done before authors are dealt with
            except UnicodeEncodeError:
                print(
                    "I couldn't get a DOI. A character in record {} wasn't encoded in a way the CrossRef API understands.".format(
                        record["ID"]
                    )
                )
    return record


 def titlecase_name(record):
    """
    Put authors and editors into title case.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "author" in record:
        record["author"] = title_name(record["author"])
    if "editor" in record:
        record["editor"] = title_name(record["editor"])
    return record


 def publisher(record):
    """
    Protect 'and' in publisher field with braces around the field.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "publisher" in record:
        if re.search('and', record["publisher"]):
            record["publisher"] = braces(record["publisher"])
    return record


 def edition(record):
    """
    Put "Edition" in a nice format.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "edition" in record:
        if record["edition"].lower().strip() in words_to_numerals:
            record["edition"] =\
                words_to_numerals[record["edition"].lower().strip()]
        elif re.search('\d+(st|nd|rd|th)', record["edition"].lower().strip()):
            record["edition"] =\
                re.sub('(st|nd|rd|th)', '', record["edition"].lower().strip())
    return record


 def journaltitle(record):
    """
    Change "Journal" to "Journaltitle".

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "journal" in record:
        record["journaltitle"] = record["journal"]
        del record["journal"]
    return record


 def case_title(record):
    """
    Put titles in titlecase for English records.
    Depends on the 'titlecase' module
    https://pypi.python.org/pypi/titlecase/

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "language" not in record or record["language"] == 'English':
        if "title" in record:
            record["title"] = titlecase.titlecase(record["title"], callback=abbreviations)
        if "subtitle" in record:
            record["subtitle"] = titlecase.titlecase(record["subtitle"], callback=abbreviations)
        if "booktitle" in record:
            record["booktitle"] = titlecase.titlecase(record["booktitle"], callback=abbreviations)
        if "journal" in record:
            record["journal"] = titlecase.titlecase(record["journal"], callback=abbreviations)
    return record


 def join_author_editor(record):
    """
    Convert authors and/or editors as lists of strings
    to strings joined by "and".

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "author" in record:
        record["author"] = " and ".join(record["author"])
    if "editor" in record:
        record["editor"] = " and ".join([d['name'] for d in record["editor"]])
    return record


 def booktitle(record):
    """
    Add 'Booktitle' field identical to 'Title' field for book entries.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if record["ENTRYTYPE"] == "book":
        if "title" in record:
            record["booktitle"] = record["title"]
    return record


 def remove_abstract(record):
    """
    Remove abstracts.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "abstract" in record:
        del record["abstract"]
    return record


 def remove_epub(record):
    """
    Remove epub field.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "epub" in record:
        del record["issn"]
    return record


 def remove_ISSN(record):
    """
    Remove ISSN.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "issn" in record:
        del record["issn"]
    return record


 def remove_ISBN(record):
    """
    Remove ISBNs.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "isbn" in record:
        del record["isbn"]
    return record


 def remove_copyright(record):
    """
    Remove copyright.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "copyright" in record:
        del record["copyright"]
    return record


 def language(record):
    """
    Remove listings as English.
    Make sure we have both language and langid.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "language" in record and record["language"] == 'English':
        del record["language"]
        if "langid" in record:
            del record["langid"]
    elif "language" in record:
        record["langid"] = record["language"].lower()
    elif "langid" in record:
        print(
            "There is a 'Langid' of '{}'' but no 'Language' field for record {}.".format(
                record["langid"],
                record["ID"]
            )
        )
    return record

 def remove_publisher(record):
    """
    Remove publisher from articles.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "publisher" in record:
        del record["publisher"]

    return record


 def remove_link(record):
    """
    Remove links.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "link" in record:
        del record["link"]
    return record


 def remove_ampersand(record):
    """
    Convert ampersand ('&') to 'and'

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "booktitle" in record:
        record["booktitle"] = re.sub(r'\\&', 'and', record["booktitle"])
    if "journal" in record:
        record["journal"] = re.sub(r'\\&', 'and', record["journal"])
    if "subtitle" in record:
        record["subtitle"] = re.sub(r'\\\\&', 'and', record["subtitle"])
    if "title" in record:
        record["title"] = re.sub(r'\\\\&', 'and', record["title"])
    return record


 def escape_characters(record):
    """
    Make sure that characters reserved by LaTeX are escaped.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    list_of_characters = ['&', '%', '_']
    for val in record:
        # Underscores are ok in IDs, which shouldn't have other special
        # characters anyway
        if val != "ID":
            for c in list_of_characters:
                record[val] = re.sub(
                    '(?<!\\\\){}'.format(c),
                    '\{}'.format(c),
                    record[val]
                )
    return record


 def jstor(record):
    """
    Get rid of JSTOR's special fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "jstor_articletype" in record:
        del record["jstor_articletype"]
    if "jstor_formatteddate" in record:
        del record["jstor_formatteddate"]
    if "jstor_issuetitle" in record:
        del record["jstor_issuetitle"]
    return record


 def protect(s):
    """
    Str -> Str

    Helper function for `protect_capitalization`.
    Take a string and return a string where words containing capital letters
    (after the first word) are protected with braces.
    """
    needs_protection = re.findall('(?<=\s)\S*[A-Z]+\S*|(?<=:\s)\S+', s)
    for word in needs_protection:
        s = re.sub(word, '{{{}}}'.format(word), s)
    return s


 def protect_capitalisation(record):
    """
    Protect capitalised words with braces.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "title" in record:
        record["title"] = protect(record["title"])
    if "subtitle" in record:
        record["subtitle"] = protect(record["subtitle"])
    if "booktitle" in record:
        record["booktitle"] = protect(record["booktitle"])
    return record


 def multivolume(record):
    """
    If a book or collection has a volume number,
    change its ENTRYTYPE to mvbook/mvcollection.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if record["ENTRYTYPE"] == "book":
        if "volume" in record:
            record["ENTRYTYPE"] = "mvbook"
    elif record["ENTRYTYPE"] == "collection":
        if "volume" in record:
            record["ENTRYTYPE"] = "mvcollection"
    return record


 def remove_booktitle(record):
    """
    Remove 'booktitle' fields.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "booktitle" in record:
        del record["booktitle"]
    return record


 def year_to_date(record):
    """
    Turn 'year' fields into 'date'.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "year" in record:
        record["date"] = record["year"]
        del record["year"]
    return record


 """
 Added by koo
 """
 list_abbrevations = [
    ["[Jj]ournal", "J."],
    ["International", "Inc."],
    ["Transactions", "Trans."],
    ["[Aa]nalysis", "Anal."],
    ["[Rr]ecognition", "Recog."],
 ]
 def convert_abbreviations_ieee(record, key):
    if key in record:
        for ori, abbrev in list_abbrevations:
            record[key] = re.sub(ori, abbrev, record[key])

    return record


 def remove_year_in_key(record, key):
    if key in record:
        record[key] = re.sub(r"[0-9]{4}\.* *", '', record[key])
        record[key] = re.sub(r"\'[0-9]{2}", '', record[key])
        pass

    return record

 list_proc = [
    ["CVPR", "Computer Vision and Pattern Recognition",
     "Proc. IEEE Conf. Comput. Vis. Pattern Recog.", ],
    ["ECCV", "European Conference on Computer Vision",
     "Proc. Eur. Conf. Comput. Vis.", ],
    ["ICCV", "international conference on computer vision",
     "Proc. Int. Conf. Comput. Vis.", "Int'l Conf. Computer Vision"],
    ["BMVC", "British Machine Vision Conference",
     "Proc. British Mach. Vis. Conf."],
    ["SSVM", "Scale Space and Variational Methods in Computer Vision",
     "Proc. Int. Conf. on Scale Space and Variational Methods in Comput. Vis.", ],
    ["ICIP", "International Conference on Image Processing",
     "Proc. Inc. Conf. on Image Processing"]
 ]


 list_jour = [
    ["PAMI", "Pattern Analysis and Machine Intelligence",
     "IEEE Trans. Pattern Anal. Mach. Intell."],
    ["IJCV", "International Journal of Computer Vision",
     "Int. J. of Comput. Vis."],
    ["TIP", "Transactions on Image Processing",
     "IEEE Trans. on Image Processing"],
    ["JMIV", "Journal of Mathematical Imaging and Vision",
     "J. of Math. Imaging and Vis.",]
 ]

 def unify_ai_titles(record, abbrev_degree=2, verbose=1):
    """
    Unify AI proceedings and journals based on abbrev_degree

    :param record: the record.
    :param abbrev_degree:
        0:
        1:
        2: IEEE Computer Society Style Guide

    :returns: dict -- the modified record.
    """
    if "booktitle" in record:
        name = record["booktitle"]

        for i in range(len(list_proc)):
            exp = ""
            for val in list_proc[i]:
                exp += val + '|'
            exp = exp[:-1]

            if len(re.findall(exp, name, flags=re.IGNORECASE)):
                record["booktitle"] = list_proc[i][abbrev_degree]
                break

        if verbose:
            print(record["booktitle"])

    elif "journal" in record:
        name = record["journal"]

        for i in range(len(list_jour)):
            exp = ""
            for val in list_jour[i]:
                exp += val + '|'
            exp = exp[:-1]

            if len(re.findall(exp, name, flags=re.IGNORECASE)):
                record["journal"] = list_jour[i][abbrev_degree]
                break

        if verbose:
            print(record["journal"])

    return record

 def remove_page_if_doi(record):
    """
    remove page if doi available
    :param record:
    :return:
    """
    if "doi" in record:
        del record["page"]

    return record


 def remove_organization(record):
    """
    Remove organization from proceedings.

    :param record: the record.
    :type record: dict
    :returns: dict -- the modified record.
    """
    if "organization" in record:
        del record["organization"]

    return record
diff --git a/convertbib.py b/convertbib.py
 #!/usr/bin/env python3

 import argparse
 import re
 import shutil
 import sys

 import cb_customs

 from bibtexparser.bparser import BibTexParser
 from bibtexparser.bwriter import BibTexWriter
 from bibtexparser.customization import *


 def fix_keys(l):
    """ list -> list
    Take a list that represents lines.
    Find lines which are the start of a bibtex entry without a key.
    Add dummy keys to those lines.
    Remove spaces from keys.
    >>> fix_keys(
        ['@book{foo bar,', '@article{', '    Author = {Thomas Hodgson}', '}']
    )
    ['@book{foobar,', '@article{Foo1,', '    Author = {Thomas Hodgson}', '}']
    """
    i = 1
    j = 0
    while j < len(l):
        if re.fullmatch('@\\w+\\s*{,{0,1}', l[j].strip()):
            l[j] = l[j][:l[j].find('{')+1] + 'Foo' + str(i) + ','
            i += 1
        elif re.match('@', l[j].strip()):
            # Find where the key starts
            start = re.search('{', l[j]).end()
            # Get rid of any non word characters
            ## JK
            # key = re.sub('\W+', '', l[j][start:])
            key = l[j][start:]
            # Put it back together; add a comma which will have been removed
            l[j] = l[j][:start] + key + ','
        j += 1
    return l


 def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    # This needs to come before authors are dealt with
    # otherwise there are encoding problems
    # record = convert_to_unicode(record)
    # record = page_double_hyphen(record)
    # record = author(record)
    # record = editor(record)
    # # This is needed after `author` is called to allow writing
    # record = cb_customs.join_author_editor(record)
    record = cb_customs.titlecase_name(record)
    # record = cb_customs.language(record)
    record = cb_customs.case_title(record)
    # This should come after `journaltitle`is called
    record = cb_customs.add_definite_to_journaltitles(record)
    # record = cb_customs.remove_pages_from_books_and_collections(record)
    record = cb_customs.non_page_hyphens(record)
    record = cb_customs.dashes(record)
    # record = cb_customs.biblatex_page_ranges(record)
    record = cb_customs.remove_abstract(record)
    record = cb_customs.remove_ISBN(record)
    record = cb_customs.remove_ISSN(record)
    record = cb_customs.remove_epub(record)
    record = cb_customs.remove_copyright(record)
    record = cb_customs.remove_publisher(record)
    record = cb_customs.remove_organization(record)
    record = cb_customs.remove_link(record)
    record = cb_customs.escape_characters(record)
    record = cb_customs.remove_ampersand(record)
    record = cb_customs.jstor(record)
    record = cb_customs.citeulike(record)
    record = cb_customs.edition(record)
    record = cb_customs.multivolume(record)
    record = cb_customs.strip_doi(record)
    record = cb_customs.remove_keyword(record)
    record = cb_customs.empty_fields(record)
    record = cb_customs.remove_protection(record)
    record = cb_customs.subtitles(record)
    record = cb_customs.remove_series(record)
    record = cb_customs.remove_month(record)
    record = cb_customs.remove_numpages(record)
    record = cb_customs.remove_eprint(record)
 #    record = cb_customs.year_to_date(record)
    # The order of the following matters
 #    record = cb_customs.issue_to_number(record)
    record = cb_customs.remove_leading_zeros(record)

    record = cb_customs.remove_year_in_key(record, "booktitle")
    record = cb_customs.unify_ai_titles(record)

    record = cb_customs.convert_abbreviations_ieee(record, "booktitle")
    record = cb_customs.convert_abbreviations_ieee(record, "journal")
    record = cb_customs.remove_pages_from_entry(record, "inproceedings")

    #record = cb_customs.remove_page_if_doi(record)

    if not args.nodoi:
        try:
            record = cb_customs.get_doi(record)
        # If there is a connection error stop trying to get DOIs
        except cb_customs.requests.exceptions.ConnectionError:
            if args.verbose:
                print(
                    "I couldn't connect to the CrossRef API. "
                    "Perhaps you are not connected to the internet?"
                )
            args.nodoi = True

    return record

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='input bib file')
    parser.add_argument(
        '--no-doi',
        dest='nodoi',
        action='store_true',
        help="Don't look for DOIs from CrossRef"
    )
    parser.add_argument(
        '--verbose',
        dest='verbose',
        action='store_true',
        help="Print messages"
    )
    args = parser.parse_args()
    print(args.input)
    if args.input:
        bib = args.input
        try:
            # shutil.copy(bib, bib + '.backup')
            # if args.verbose:
            #     print(
            #         "I have made a backup of the orignal file at {}.backup"
            #         .format(bib)
            #     )
            with open(bib, 'r', encoding='utf-8') as biblatex:
                content = biblatex.read()
        except FileNotFoundError:
            if args.verbose:
                print("I couldn't find the file {}.".format(bib))
            sys.exit()
    else:
        content = sys.stdin.read()
    # Find the start of the first record
    try:
        start = re.search('@', content).start()
    except AttributeError:
        if args.verbose:
            print("The file I was given didn't contain any records.")
        sys.exit()

    content = content[start:].split('\n')
    # Provide dummy citekeys
    content = fix_keys(content)
    fixed_content = '\n'.join(content)


    bibliography = BibTexParser(
        fixed_content,
        customization=customizations,
        ignore_nonstandard_types=False
        # Otherwise bibtexparser will complain if I give it a collection
    )
    output = BibTexWriter().write(bibliography)

    if args.input:
        output_name = args.input[:-4]+'_convert.bib'
        with open(output_name, 'w', encoding='utf-8') as biblatex:
            biblatex.write(output)
    else:
        sys.stdout.write(output)
	#!/usr/bin/env python3

	import argparse
	import re
	import shutil
	import sys

	import cb_customs

	from bibtexparser.bparser import BibTexParser
	from bibtexparser.bwriter import BibTexWriter
	from bibtexparser.customization import *


	def fix_keys(l):
	""" list -> list
	Take a list that represents lines.
	Find lines which are the start of a bibtex entry without a key.
	Add dummy keys to those lines.
	Remove spaces from keys.
	>>> fix_keys(
	['@book{foo bar,', '@article{', ' Author = {Thomas Hodgson}', '}']
	)
	['@book{foobar,', '@article{Foo1,', ' Author = {Thomas Hodgson}', '}']
	"""
	i = 1
	j = 0
	while j < len(l):
	if re.fullmatch('@\\w+\\s*{,{0,1}', l[j].strip()):
	l[j] = l[j][:l[j].find('{')+1] + 'Foo' + str(i) + ','
	i += 1
	elif re.match('@', l[j].strip()):
	# Find where the key starts
	start = re.search('{', l[j]).end()
	# Get rid of any non word characters
	## JK
	# key = re.sub('\W+', '', l[j][start:])
	key = l[j][start:]
	# Put it back together; add a comma which will have been removed
	l[j] = l[j][:start] + key + ','
	j += 1
	return l


	def customizations(record):
	"""Use some functions delivered by the library

	:param record: a record
	:returns: -- customized record
	"""
	# This needs to come before authors are dealt with
	# otherwise there are encoding problems
	# record = convert_to_unicode(record)
	# record = page_double_hyphen(record)
	# record = author(record)
	# record = editor(record)
	# # This is needed after `author` is called to allow writing
	# record = cb_customs.join_author_editor(record)
	record = cb_customs.titlecase_name(record)
	# record = cb_customs.language(record)
	record = cb_customs.case_title(record)
	# This should come after `journaltitle`is called
	record = cb_customs.add_definite_to_journaltitles(record)
	# record = cb_customs.remove_pages_from_books_and_collections(record)
	record = cb_customs.non_page_hyphens(record)
	record = cb_customs.dashes(record)
	# record = cb_customs.biblatex_page_ranges(record)
	record = cb_customs.remove_abstract(record)
	record = cb_customs.remove_ISBN(record)
	record = cb_customs.remove_ISSN(record)
	record = cb_customs.remove_epub(record)
	record = cb_customs.remove_copyright(record)
	record = cb_customs.remove_publisher(record)
	record = cb_customs.remove_organization(record)
	record = cb_customs.remove_link(record)
	record = cb_customs.escape_characters(record)
	record = cb_customs.remove_ampersand(record)
	record = cb_customs.jstor(record)
	record = cb_customs.citeulike(record)
	record = cb_customs.edition(record)
	record = cb_customs.multivolume(record)
	record = cb_customs.strip_doi(record)
	record = cb_customs.remove_keyword(record)
	record = cb_customs.empty_fields(record)
	record = cb_customs.remove_protection(record)
	record = cb_customs.subtitles(record)
	record = cb_customs.remove_series(record)
	record = cb_customs.remove_month(record)
	record = cb_customs.remove_numpages(record)
	record = cb_customs.remove_eprint(record)
	# record = cb_customs.year_to_date(record)
	# The order of the following matters
	# record = cb_customs.issue_to_number(record)
	record = cb_customs.remove_leading_zeros(record)

	record = cb_customs.remove_year_in_key(record, "booktitle")
	record = cb_customs.unify_ai_titles(record)

	record = cb_customs.convert_abbreviations_ieee(record, "booktitle")
	record = cb_customs.convert_abbreviations_ieee(record, "journal")
	record = cb_customs.remove_pages_from_entry(record, "inproceedings")

	#record = cb_customs.remove_page_if_doi(record)

	if not args.nodoi:
	try:
	record = cb_customs.get_doi(record)
	# If there is a connection error stop trying to get DOIs
	except cb_customs.requests.exceptions.ConnectionError:
	if args.verbose:
	print(
	"I couldn't connect to the CrossRef API. "
	"Perhaps you are not connected to the internet?"
	)
	args.nodoi = True

	return record

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument('input', help='input bib file')
	parser.add_argument(
	'--no-doi',
	dest='nodoi',
	action='store_true',
	help="Don't look for DOIs from CrossRef"
	)
	parser.add_argument(
	'--verbose',
	dest='verbose',
	action='store_true',
	help="Print messages"
	)
	args = parser.parse_args()
	print(args.input)
	if args.input:
	bib = args.input
	try:
	# shutil.copy(bib, bib + '.backup')
	# if args.verbose:
	# print(
	# "I have made a backup of the orignal file at {}.backup"
	# .format(bib)
	# )
	with open(bib, 'r', encoding='utf-8') as biblatex:
	content = biblatex.read()
	except FileNotFoundError:
	if args.verbose:
	print("I couldn't find the file {}.".format(bib))
	sys.exit()
	else:
	content = sys.stdin.read()
	# Find the start of the first record
	try:
	start = re.search('@', content).start()
	except AttributeError:
	if args.verbose:
	print("The file I was given didn't contain any records.")
	sys.exit()

	content = content[start:].split('\n')
	# Provide dummy citekeys
	content = fix_keys(content)
	fixed_content = '\n'.join(content)


	bibliography = BibTexParser(
	fixed_content,
	customization=customizations,
	ignore_nonstandard_types=False
	# Otherwise bibtexparser will complain if I give it a collection
	)
	output = BibTexWriter().write(bibliography)

	if args.input:
	output_name = args.input[:-4]+'_convert.bib'
	with open(output_name, 'w', encoding='utf-8') as biblatex:
	biblatex.write(output)
	else:
	sys.stdout.write(output)
No results found