Created
May 24, 2025 14:17
-
-
Save zuazo/640654ae8b53d7bc231d02f802fb6eef to your computer and use it in GitHub Desktop.
hf_fork.sh: Clone a list of Hub repos into an organisation, preserving history & LFS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # hf_fork.sh - Clone a list of Hub repos into an organisation, preserving history & LFS | |
| # | |
| # Usage: | |
| # HF_TOKEN=hf_... ./hf_fork.sh OurTeam myuser/whisper-tiny-eu myuser/other-repo ... | |
| # | |
| # Requirements: | |
| # • git ≥ 2.34 and git-lfs | |
| # • huggingface_hub ≥ 0.32 (gives you the `huggingface-cli` command) | |
| # • Your token (env var HF_TOKEN) must have "write" scope on the destination org. | |
| # • Enough disk space/band-width – the script downloads and re-uploads all LFS blobs. | |
| set -euo pipefail | |
| usage() { | |
| echo 'Syntax:' >&2 | |
| echo " $0 DEST_ORG REPO1 [REPO2] [...]" >&2 | |
| exit 255 | |
| } | |
| # user parameters with some safety checking | |
| [ -z "$*" ] && usage | |
| DEST_ORG="${1}" | |
| [ -z "${DEST_ORG}" ] && usage | |
| shift | |
| [ -z "${1}" ] && usage | |
| REPOS=("$@") # positional args → list of source repos | |
| [ -z "${TMP_DIR}" ] && TMP_DIR="${HOME}/tmp/hf-fork" | |
| # helpers | |
| die() { | |
| echo "[❌] Error: ${*}" >&2 | |
| exit 255 | |
| } | |
| hf_check() { | |
| huggingface-cli whoami &> /dev/null | |
| } | |
| hf_create_repo() { | |
| local DEST_REPO="${1}" | |
| local TEAM NAME HF_ARGS | |
| TEAM="${DEST_REPO%/*}" | |
| NAME="${DEST_REPO##*/}" | |
| HF_ARGS=("--type=model" "-y") | |
| if [ -n "${TEAM}" ] | |
| then | |
| HF_ARGS+=("--organization" "${TEAM}") | |
| fi | |
| # --type=model is fine for Spaces/Datasets too if you change it. | |
| if ! huggingface-cli repo create "${NAME}" "${HF_ARGS[@]}" | |
| then | |
| echo "[⚠️ ] ${DEST_REPO} already exists? continuing... (waiting 5 seconds)" | |
| sleep 5 | |
| fi | |
| } | |
| clean_up() { | |
| rm -rf "${TMP_DIR}"; | |
| } | |
| trap clean_up EXIT | |
| hf_check || die 'Set the HF_TOKEN or run "huggingface-cli login".' | |
| # main loop | |
| for SRC in "${REPOS[@]}" | |
| do | |
| NAME="${SRC#*/}" # repo slug (everything after first slash) | |
| DEST="${DEST_ORG}/${NAME}" | |
| echo -e "\n=== Forking ${SRC} → ${DEST} ===" | |
| hf_create_repo "${DEST}" | |
| mkdir -p "${TMP_DIR}" | |
| cd "${TMP_DIR}" | |
| git clone "[email protected]:${DEST}" "${NAME}" | |
| pushd "${NAME}" | |
| # 1. pull non-LFS objects only | |
| git lfs install --skip-smudge --local | |
| git remote add upstream "[email protected]:${SRC}" | |
| git fetch upstream | |
| # 2. fetch LFS blobs | |
| git lfs fetch --all upstream | |
| # 3. fast-forward fork to upstream's default branch (detect automatically) | |
| DEFAULT_BRANCH="$(git remote show upstream | awk '/HEAD branch/ {print $NF}')" | |
| git reset --hard "upstream/${DEFAULT_BRANCH}" | |
| # 4. re-enable push hooks & (optionally) > 5 GB support | |
| git lfs install --force --local | |
| huggingface-cli lfs-enable-largefiles . 2>/dev/null || true | |
| # 5. push everything to the organisation fork | |
| git push --force origin "${DEFAULT_BRANCH}" | |
| popd | |
| rm -rf "${NAME}" | |
| echo "[✅] $SRC → $DEST done." | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment