DockerFile should have JProfiler installation.
RUN wget <JProfiler file location> -P /tmp/ && \
tar -xzf /tmp/<JProfiler file> -C /usr/local && \
rm /tmp/<JProfiler file>
| #!/bin/bash | |
| export PDFALTO_PATH=./grobid-home/pdfalto/mac_arm-64 | |
| # Function to process PDF files | |
| process_pdf_files() { | |
| local input_dir="$1" | |
| local output_dir="$2" | |
| # Create the output directory if it doesn't exist |
| import argparse | |
| import os | |
| from pathlib import Path | |
| import requests | |
| # Constants | |
| GLUTTON_URL = "ADD BIBLIO GLUTTON LOOKUP SERVICE" | |
| # Credits to https://marmelab.com/blog/2018/03/21/using-nvidia-gpu-within-docker-container.html | |
| # Run with | |
| # [CPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py cpu 10000 | |
| # [GPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py gpu 10000 | |
| import sys | |
| import numpy as np | |
| import tensorflow as tf |
| from difflib import SequenceMatcher | |
| def group_by_with_soft_matching(input_list, threshold): | |
| matching = {} | |
| last_matching = -1 | |
| input_list_sorted = sorted(list(set(input_list)), reverse=True) | |
| for index_x, x in enumerate(input_list_sorted): | |
| unpacked = [y for x in matching for y in matching[x]] |
| import json | |
| import os | |
| import pathlib | |
| import sys | |
| from delft.sequenceLabelling.preprocess import WordPreprocessor | |
| if __name__ == '__main__': | |
| if len(sys.argv) != 2: | |
| print("Invalid parameters. Usage: python json_migration.py model directory. " |
| # Just return the sha of the duplicated files | |
| sha1sum * | gsort | gawk '{a[$1]++}END{for(i in a){if(a[i]-1)print i, a[i]}}' | |
| # Return the last file name for each duplicated files | |
| sha1sum * | gsort | gawk '{a[$1]++; b[$1]=$2}END{for(i in a){if(a[i]-1)print i, b[i]}}' |
| import prodigy | |
| from prodigy.components.loaders import JSONL | |
| from prodigy.util import split_string | |
| @prodigy.recipe('superconductor-material-recipe', | |
| dataset=prodigy.recipe_args['dataset'], | |
| source=("The source data as a JSONL file", "positional", None, str), | |
| label=("One or more comma-separated labels", "option", "l", split_string)) | |
| def superconductors_detection(dataset, source=None, label=None): |