Last active
August 1, 2024 12:06
-
-
Save psychemedia/cd2425975ffc1d87bf8cb3c4df264fca to your computer and use it in GitHub Desktop.
First attempt at a yaml validator for ou-container-builder v3 yaml config scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "d388bfdc-c605-42e0-8d09-e3a371c3d6b8", | |
| "metadata": {}, | |
| "source": [ | |
| "## OU Container Builder YAML Configuration File builder\n", | |
| "\n", | |
| "Simple attempt at a validator for `ou-container-builder` YAML configuration file (v3).", | |
| "\n", | |
| "Handy docs: https://www.andrewvillazon.com/validate-yaml-python-schema/", | |
| "\n", | |
| "Repo: https://github.com/keleshev/schema" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "20cde7b6-8f11-43a0-be60-2699274bc49b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#%pip install schema" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 134, | |
| "id": "40efa6bc-8968-4a54-9401-c1ca5bb6f8cf", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from schema import Schema, SchemaError, Optional, Regex, Or\n", | |
| "import yaml" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 184, | |
| "id": "53075a86-70b5-4e3f-9725-0199292194ee", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "full_test_yaml=\"\"\"version: 3\n", | |
| "module:\n", | |
| " code: TM129\n", | |
| " presentation: 23B\n", | |
| "image:\n", | |
| " base: as\n", | |
| " user: as\n", | |
| "packs:\n", | |
| " jupyterlab: {}\n", | |
| " notebook: {}\n", | |
| " ipykernel: {}\n", | |
| "sources:\n", | |
| " apt:\n", | |
| " - name: mongodb\n", | |
| " key_url: https://www.mongodb.org/static/pgp/server-7.0.asc\n", | |
| " dearmor: true\n", | |
| " deb:\n", | |
| " url: https://repo.mongodb.org/apt/ubuntu\n", | |
| " distribution: jammy/mongodb-org/7.0\n", | |
| " component: multiverse\n", | |
| "server:\n", | |
| " access_token: as\n", | |
| " default_path: as\n", | |
| "packages:\n", | |
| " apt:\n", | |
| " build:\n", | |
| " - gdal-bin\n", | |
| " - libgdal-dev\n", | |
| " deploy:\n", | |
| " - mongodb-org\n", | |
| " - postgresql\n", | |
| " - openjdk-17-jre\n", | |
| " - openjdk-17-jre-headless\n", | |
| " - gdal-bin\n", | |
| " - libgdal-dev\n", | |
| " pip:\n", | |
| " system:\n", | |
| " - ou-tm351-jl-extensions>=0.2.8\n", | |
| " user:\n", | |
| " - pymongo\n", | |
| " - jupysql\n", | |
| " - psycopg2-binary\n", | |
| " - pgspecial\n", | |
| " - SQLAlchemy\n", | |
| " - schemadisplay-magic>=0.0.7\n", | |
| " - geopandas\n", | |
| " - fiona\n", | |
| " - Shapely\n", | |
| " - geopy\n", | |
| " - folium\n", | |
| " - descartes\n", | |
| " - pandas\n", | |
| " - scipy\n", | |
| " - seaborn\n", | |
| " - xlrd\n", | |
| " - openpyxl\n", | |
| "content:\n", | |
| " - source: as\n", | |
| " target: as\n", | |
| " overwrite: always\n", | |
| " - source: ./db_setup/mongodb-org/mongod\n", | |
| " target: /etc/init.d/mongod\n", | |
| " overwrite: always\n", | |
| " - source: ./db_setup/mongodb-org/mongod.conf\n", | |
| " target: /etc/mongod.conf\n", | |
| " overwrite: always\n", | |
| " - source: asa\n", | |
| " target: as\n", | |
| " overwrite: always\n", | |
| " - source: ./icons/openrefine.svg\n", | |
| " target: /var/ou/icons/openrefine.svg\n", | |
| " overwrite: always\n", | |
| "environment:\n", | |
| " - name: MONGO_DB_PATH\n", | |
| " value: /var/db/data/mongo\n", | |
| " - name: PG_VERSION\n", | |
| " value: '15'\n", | |
| " - name: PGDATA\n", | |
| " value: /var/lib/postgresql/$PG_VERSION/main\n", | |
| " - name: POSTGRES_USER\n", | |
| " value: postgres\n", | |
| " - name: POSTGRES_PASSWORD\n", | |
| " value: postgres\n", | |
| " - name: POSTGRES_DB\n", | |
| " value: oudb\n", | |
| " - name: PLOOMBER_STATS_ENABLED\n", | |
| " value: 'false'\n", | |
| " - name: PLOOMBER_VERSION_CHECK_DISABLED\n", | |
| " value: 'false'\n", | |
| " - name: OPENREFINE_VERSION\n", | |
| " value: 3.8.0\n", | |
| " - name: OPENREFINE_PATH\n", | |
| " value: /var/openrefine\n", | |
| "scripts:\n", | |
| " - stage: deploy\n", | |
| " commands: >-\n", | |
| " chmod ugo+rx /etc/init.d/mongod,cp -p /etc/mongod.conf\n", | |
| " /etc/ouseful/mongod.conf,chmod u-w /etc/ouseful/mongod.conf\n", | |
| " - stage: deploy\n", | |
| " commands: >-\n", | |
| " sed -e \"s/[#]?listen_addresses = .*/listen_addresses = '*'/g\" -i\n", | |
| " \"/etc/postgresql/$PG_VERSION/main/postgresql.conf\",usermod -aG users\n", | |
| " postgres\n", | |
| " - stage: build\n", | |
| " commands: >-\n", | |
| " wget -q -O openrefine-${OPENREFINE_VERSION}.tar.gz\n", | |
| " https://github.com/OpenRefine/OpenRefine/releases/download/${OPENREFINE_VERSION}/openrefine-linux-${OPENREFINE_VERSION}.tar.gz,tar\n", | |
| " xzf openrefine-${OPENREFINE_VERSION}.tar.gz,mv\n", | |
| " openrefine-${OPENREFINE_VERSION} $OPENREFINE_PATH\n", | |
| "output_blocks:\n", | |
| " deploy:\n", | |
| " - block: COPY --from=base /var/openrefine /var/openrefine\n", | |
| " weight: 2333\n", | |
| "web_apps:\n", | |
| " - path: openrefine\n", | |
| " options:\n", | |
| " command:\n", | |
| " - /var/openrefine/refine\n", | |
| " - '-i'\n", | |
| " - 127.0.0.1\n", | |
| " - '-p'\n", | |
| " - '{port}'\n", | |
| " - '-d'\n", | |
| " - /home/ou/TM351-24J/openrefine\n", | |
| " - '-H'\n", | |
| " - '*'\n", | |
| " - '-x'\n", | |
| " - refine.display.new.version.notice=false\n", | |
| " timeout: '120'\n", | |
| " launcher:\n", | |
| " enabled: false\n", | |
| "services:\n", | |
| " - mongod\n", | |
| " - postgresql\n", | |
| " \"\"\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 185, | |
| "id": "ec87b344-c3ee-463f-a242-f4dd8c53b122", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Need to schemas - one for the full schema, another for partial schema \n", | |
| "# In the partial schema validation, all elements optional?\n", | |
| "oucb_schema_ = {\"version\": int,\n", | |
| " \"module\": {\"code\":Regex(r'[A-M]{1,5}\\d{3}$'),\n", | |
| " \"presentation\": Regex(r'^\\d{2}[A-M]$')},\n", | |
| " \"image\":{\n", | |
| " \"base\": str,\n", | |
| " \"user\": str\n", | |
| " },\n", | |
| " \"packs\": {Optional(\"jupyterlab\"): dict,\n", | |
| " Optional(\"notebook\"): dict,\n", | |
| " Optional(\"ipykernel\"): dict,\n", | |
| " Optional(\"irkernel\"): dict,\n", | |
| " Optional(\"code_server\"): dict,\n", | |
| " Optional(\"xfce4\"): dict,\n", | |
| " },\n", | |
| " Optional(\"sources\"): {Optional(\"apt\"): [{\"name\":str, \"key_url\": str, \"dearmor\": bool,\n", | |
| " \"deb\": {\"url\": str, \"distribution\": str, \"component\": str}}]},\n", | |
| " Optional(\"server\"): {\"access_token\": str, \"default_path\": str},\n", | |
| " Optional(\"packages\"): { Optional(\"apt\"):\n", | |
| " {\n", | |
| " Optional(\"build\"): list,\n", | |
| " Optional(\"deploy\"): list\n", | |
| " },\n", | |
| " Optional(\"pip\"): {Optional(\"system\"): list, Optional(\"user\"): list}\n", | |
| " },\n", | |
| " Optional(\"content\"): [{\"source\": str, \"target\":str, \"overwrite\":Or(\"always\", \"never\")}],\n", | |
| " Optional(\"environment\"): [{\"name\":str, \"value\":str}],\n", | |
| " Optional(\"scripts\"): [{\"stage\": Or(\"build\", \"deploy\"), \"commands\": str}],\n", | |
| " Optional(\"output_blocks\"): {\n", | |
| " Optional(\"build\"): [{\"block\":str, \"weight\":int}],\n", | |
| " Optional(\"deploy\"): [{\"block\":str, \"weight\":int}]\n", | |
| " },\n", | |
| " Optional(\"web_apps\"): [{\n", | |
| " \"path\": str, \"options\":{\"command\": list, \"timeout\": Or(str, int)},\n", | |
| " Optional(\"launcher\"): {Optional(\"title\"): str,\n", | |
| " Optional(\"icon_path\"): str,\n", | |
| " Optional(\"enabled\"): bool},\n", | |
| " }],\n", | |
| " Optional(\"services\"): list\n", | |
| " }\n", | |
| "\n", | |
| "oucb_schema = Schema(oucb_schema_)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 186, | |
| "id": "5cf4c8f1-1522-410d-bc92-b20ff48cd88d", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Configuration is valid.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "configuration = yaml.safe_load(full_test_yaml)\n", | |
| "\n", | |
| "try:\n", | |
| " oucb_schema.validate(configuration)\n", | |
| " print(\"Configuration is valid.\")\n", | |
| "except SchemaError as se:\n", | |
| " #raise se\n", | |
| " print(se)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "420f4dd5-272c-4d53-81c0-b847ad31e42b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "5ed6a253-6000-486e-ac69-50fe16cd0dd1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.11.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment