Last active
June 10, 2021 17:54
-
-
Save kmcelwee/8b698be7b2e581b3ce2cbb3ba72917c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import shutil\n", | |
| "import os\n", | |
| "import xml.etree.ElementTree as ET\n", | |
| "from os.path import join as pjoin" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "DIR = \"dspace_imports/20210609/SUCCESS\"\n", | |
| "\n", | |
| "def get_title(file):\n", | |
| " tree = ET.parse(pjoin(DIR, file, 'dublin_core.xml'))\n", | |
| " return tree.find('dcvalue[@element=\"title\"]').text\n", | |
| "\n", | |
| "all_titles = {}\n", | |
| "for file in os.listdir(DIR):\n", | |
| " all_titles[file] = get_title(file)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Are there any duplicates?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "## No...\n", | |
| "len(all_titles) == len(set(all_titles.values()))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Exclude April dissertations" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "exclude = [\n", | |
| " \"Architectural Support for Scalable High-Bandwidth I/O\",\n", | |
| " \"High-bandwidth processing of heterodyne spectroscopic signals for remote and extractive chemical sensing\",\n", | |
| " \"QUANTIFYING, CODIFYING, AND CONTROLLING CORTICAL NEURAL POPULATION DYNAMICS\",\n", | |
| " \"Essays in Firm Taxation and Job Loss\",\n", | |
| " \"NANOPARTICLES FOR MEDICAL IMAGING AND ORAL THERAPEUTICS: FORMULATION DESIGN AND PROCESS IMPROVEMENTS\",\n", | |
| " \"Local Color: Race, Gender and Spanishness in French and Spanish Painting, 1855-1927\",\n", | |
| " \"REPRODUCTION AS A SITE OF POWER AND CONTESTATION: INFORMED CONSENT AND BLACK WOMEN’S NEGOTIATION OF MEDICAL DECISION-MAKING\",\n", | |
| " \"Confronting the past: transitional justice policies after violence\",\n", | |
| " \"The Nature of the Beast: The Animal Apocalypse(s) of Enoch\",\n", | |
| " \"ELECTRONIC PROPERTIES OF METAL HALIDE PEROVSKITES SURFACES: COMPLEXITIES AND REWARDS OF HIGH SENSITIVITY MEASUREMENTS\",\n", | |
| " \"REVOLUTIONARY RELIGION: YOUTH AND ISLAM IN POST-2011 EGYPT\",\n", | |
| " \"Who's Laughing Now?: Black Affective Play and Formalist Innovation in Twenty-First Century Black Literary Satire\",\n", | |
| " \"The Politics of Fitra: Approaching Moral Concepts in the Contemporary Study of Islamic Ethics\",\n", | |
| " \"BILATERAL FINANCING OF THE POWER SECTOR AND IMPLICATIONS FOR THE GLOBAL LOW CARBON ENERGY TRANSITION\",\n", | |
| " \"Harnessing the Novel Acidimicrobiaceae sp. Strain A6: Optimal Environmental and Operational Conditions in Constructed Wetland Mesocosms and Microbial Electrochemical System\",\n", | |
| " \"Long-Range Coupling of Electron Spins\",\n", | |
| " \"Protected qubits, Floquet engineering and crosstalk suppression with superconducting circuits\",\n", | |
| " \"Cobalt-Catalyzed Asymmetric Hydrogenation of Alkenes: Catalyst Developments and Mechanistic Investigations\",\n", | |
| " \"ELUCIDATING THE MECHANISMS UNDERLYING MICROBIAL INTERACTIONS IN THE HUMAN MICROBIOME AT THE MOLECULAR SCALE\",\n", | |
| " \"The Extracellular Matrix as a Regulator of Branching Morphogenesis\",\n", | |
| " \"MECHANISTIC STUDIES INTO HETEROCHROMATIN PROPAGATION BY THE HISTONE LYSINE METHYLTRANSFERASE, G9A\",\n", | |
| " \"Cupiditate Ducti: Corruption in the Carolingian World\",\n", | |
| " \"QUANTITATIVE ANALYSIS OF NADPH AND DE NOVO LIPOGENESIS\",\n", | |
| " \"Protein Acetylation as a Regulatory Toggle during Viral Infection and DNA Repair\",\n", | |
| " \"Drug Metabolism by the Human Gut Microbiome: An Investigation of Metabolic Capacity, Interpersonal Variability, and Genetic Mechanisms\",\n", | |
| " \"Illuminating developmental gene regulation with optimized photoswitchable MEK\",\n", | |
| " \"Language, Body, World: The Art of Hans Bellmer\",\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "81" | |
| ] | |
| }, | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "exclude_april_titles = {k: v for k, v in all_titles.items() if v not in exclude}\n", | |
| "len(exclude_april_titles)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "filtered_files = [x for x in exclude_april_titles.keys()]\n", | |
| "DEST_DIR = 'dspace_imports/20210609/FILTERED'\n", | |
| "for directory in filtered_files:\n", | |
| " src = pjoin(DIR, directory)\n", | |
| " dest = pjoin(DEST_DIR, directory)\n", | |
| " shutil.copytree(src, dest)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!open ." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment