lfoppiano / process_pdfalto.sh

Last active March 4, 2025 23:45

process_with_pdfalto

	#!/bin/bash

	export PDFALTO_PATH=./grobid-home/pdfalto/mac_arm-64

	# Function to process PDF files
	process_pdf_files() {
	local input_dir="$1"
	local output_dir="$2"

	# Create the output directory if it doesn't exist

lfoppiano / glutton.py

Created April 4, 2024 08:38

Lookup Open Access PDF files from a list of DOIs using Biblio Glutton https://github.com/kermitt2/biblio-glutton

	import argparse
	import os
	from pathlib import Path

	import requests

	# Constants
	GLUTTON_URL = "ADD BIBLIO GLUTTON LOOKUP SERVICE"

lfoppiano / nvidia-benchmark.py

Last active September 10, 2021 08:05

NVIDIA benchmark

	# Credits to https://marmelab.com/blog/2018/03/21/using-nvidia-gpu-within-docker-container.html

	# Run with
	# [CPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py cpu 10000
	# [GPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py gpu 10000


	import sys
	import numpy as np
	import tensorflow as tf

lfoppiano / JProfiler-with-Docker.md

Last active March 23, 2021 06:24 — forked from kevin-lee/JProfiler-with-Docker.md

JVM Profiler with Docker

JProfiler with Docker

Docker

DockerFile should have JProfiler installation.

RUN wget <JProfiler file location> -P /tmp/ && \
  tar -xzf /tmp/<JProfiler file> -C /usr/local && \
  rm /tmp/<JProfiler file>

lfoppiano / soft_matching

Last active March 23, 2021 06:27

How to match element in two list of strings using a soft matching

	from difflib import SequenceMatcher

	def group_by_with_soft_matching(input_list, threshold):
	matching = {}
	last_matching = -1

	input_list_sorted = sorted(list(set(input_list)), reverse=True)

	for index_x, x in enumerate(input_list_sorted):
	unpacked = [y for x in matching for y in matching[x]]

lfoppiano / preprocessor_migration.py

Created August 11, 2020 01:03

Migrate delft preprocessors to JSON

	import json
	import os
	import pathlib
	import sys

	from delft.sequenceLabelling.preprocess import WordPreprocessor

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print("Invalid parameters. Usage: python json_migration.py model directory. "

lfoppiano / find_duplicates.sh

Last active May 16, 2020 01:57

Find duplicated files

	# Just return the sha of the duplicated files
	sha1sum * \| gsort \| gawk '{a[$1]++}END{for(i in a){if(a[i]-1)print i, a[i]}}'

	# Return the last file name for each duplicated files
	sha1sum * \| gsort \| gawk '{a[$1]++; b[$1]=$2}END{for(i in a){if(a[i]-1)print i, b[i]}}'

lfoppiano / recipe.py

Created March 6, 2019 23:34

prodigy recipe

	import prodigy
	from prodigy.components.loaders import JSONL
	from prodigy.util import split_string


	@prodigy.recipe('superconductor-material-recipe',
	dataset=prodigy.recipe_args['dataset'],
	source=("The source data as a JSONL file", "positional", None, str),
	label=("One or more comma-separated labels", "option", "l", split_string))
	def superconductors_detection(dataset, source=None, label=None):