Last active
March 5, 2026 04:50
-
-
Save ilyar/b61bab36b68338ceef6c049868164ae7 to your computer and use it in GitHub Desktop.
Install llama.cpp prebuilt binaries from GitHub releases https://github.com/ggml-org/llama.cpp/releases
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Install llama.cpp prebuilt binaries from GitHub releases https://github.com/ggml-org/llama.cpp/releases | |
| # | |
| # curl -fsSL <URL_RAW> | bash | |
| # | |
| # Supports: | |
| # - macOS (arm64/x64) | |
| # - Linux Ubuntu (x64/s390x, cpu/vulkan/rocm7.2) | |
| # - Windows (cpu/cuda12.4/cuda13.1/vulkan/sycl/hip/opencl-adreno) | |
| # - openEuler (310p/910b + optional aclgraph) | |
| # | |
| # Env vars: | |
| # LLAMA_TAG e.g. b8201 (default: latest) | |
| # LLAMA_BACKEND auto|cpu|vulkan|rocm|cuda12|cuda13|sycl|hip|opencl-adreno|xcframework (default: auto) | |
| # LLAMA_PREFIX install prefix (default: /usr/local if writable or sudo available; else ~/.local) | |
| # LLAMA_NO_SUDO set to 1 to never use sudo | |
| # LLAMA_ENABLE_SYSTEMD set to 1 to install + start systemd service on Linux (default: 0) | |
| # LLAMA_SERVER_HOST default 127.0.0.1 | |
| # LLAMA_SERVER_PORT default 8080 | |
| # LLAMA_SERVER_ARGS extra args for llama-server | |
| # LLAMA_OE_FAMILY openEuler only: 310p|910b (default: 310p) | |
| # LLAMA_OE_ACLGRAPH openEuler only: 0|1 (default: 0) | |
| # | |
| # Examples: | |
| # curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash | |
| # LLAMA_BACKEND=vulkan curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash | |
| # LLAMA_BACKEND=auto LLAMA_ENABLE_SYSTEMD=1 curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash | |
| # LLAMA_PREFIX=~/.local curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash | |
| # LLAMA_BACKEND=cuda12 curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash # Windows (Git Bash) | |
| main() { | |
| set -euo pipefail | |
| red="$( (tput bold || :; tput setaf 1 || :) 2>/dev/null )" | |
| plain="$( (tput sgr0 || :) 2>/dev/null )" | |
| status() { echo ">>> $*" >&2; } | |
| warning() { echo "${red}WARNING:${plain} $*" >&2; } | |
| error() { echo "${red}ERROR:${plain} $*" >&2; exit 1; } | |
| available() { command -v "$1" >/dev/null 2>&1; } | |
| require() { | |
| local missing="" | |
| for tool in "$@"; do | |
| if ! available "$tool"; then | |
| missing="$missing $tool" | |
| fi | |
| done | |
| echo "$missing" | |
| } | |
| TMP_DIR="$(mktemp -d)" | |
| cleanup() { rm -rf "$TMP_DIR"; } | |
| trap cleanup EXIT | |
| # ---------- config ---------- | |
| REPO="ggml-org/llama.cpp" | |
| API_BASE="https://api.github.com/repos/${REPO}/releases" | |
| UA="llama.cpp-prebuilt-installer/2.0" | |
| TAG="${LLAMA_TAG:-"$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | sed -n 's/.*"tag_name": *"\([^"]*\)".*/\1/p')"}" | |
| BACKEND="${LLAMA_BACKEND:-auto}" | |
| ENABLE_SYSTEMD="${LLAMA_ENABLE_SYSTEMD:-0}" | |
| SERVER_HOST="${LLAMA_SERVER_HOST:-127.0.0.1}" | |
| SERVER_PORT="${LLAMA_SERVER_PORT:-8080}" | |
| SERVER_ARGS="${LLAMA_SERVER_ARGS:-}" | |
| OE_FAMILY="${LLAMA_OE_FAMILY:-310p}" # openEuler: 310p or 910b | |
| OE_ACLGRAPH="${LLAMA_OE_ACLGRAPH:-0}" # openEuler: 1 => -aclgraph | |
| # ---------- deps ---------- | |
| NEEDS="$(require curl uname grep cut sed awk)" | |
| if [ -n "$NEEDS" ]; then | |
| status "Missing required tools:" | |
| for n in $NEEDS; do echo " - $n"; done | |
| exit 1 | |
| fi | |
| # ---------- sudo policy ---------- | |
| SUDO="" | |
| if [ "${LLAMA_NO_SUDO:-0}" = "1" ]; then | |
| SUDO="" | |
| else | |
| if [ "$(id -u)" -ne 0 ]; then | |
| if available sudo; then | |
| SUDO="sudo" | |
| fi | |
| fi | |
| fi | |
| # ---------- detect OS / arch ---------- | |
| OS="$(uname -s)" | |
| ARCH_RAW="$(uname -m)" | |
| case "$ARCH_RAW" in | |
| x86_64|amd64) ARCH="x64" ;; | |
| aarch64|arm64) ARCH="arm64" ;; | |
| s390x) ARCH="s390x" ;; | |
| *) error "Unsupported architecture: $ARCH_RAW" ;; | |
| esac | |
| IS_WINDOWS=0 | |
| IS_MAC=0 | |
| IS_LINUX=0 | |
| case "$OS" in | |
| Darwin) IS_MAC=1 ;; | |
| Linux) IS_LINUX=1 ;; | |
| MINGW*|MSYS*|CYGWIN*) IS_WINDOWS=1 ;; | |
| *) error "Unsupported OS: $OS" ;; | |
| esac | |
| # openEuler is Linux, detect via /etc/os-release | |
| IS_OPENEULER=0 | |
| if [ "$IS_LINUX" = "1" ] && [ -r /etc/os-release ]; then | |
| if grep -qi '^ID=.*openeuler' /etc/os-release; then | |
| IS_OPENEULER=1 | |
| fi | |
| fi | |
| # ---------- choose prefix ---------- | |
| if [ -n "${LLAMA_PREFIX:-}" ]; then | |
| PREFIX="$LLAMA_PREFIX" | |
| else | |
| # If /usr/local is writable OR we have sudo, default to /usr/local, else ~/.local | |
| if [ -w /usr/local ] || [ -n "$SUDO" ]; then | |
| PREFIX="/usr/local" | |
| else | |
| PREFIX="${HOME}/.local" | |
| fi | |
| fi | |
| BIN_DIR="${PREFIX}/bin" | |
| ROOT_DIR="${PREFIX}/lib/llama.cpp" # keep versioned installs here | |
| mkdir -p "$TMP_DIR" | |
| # ---------- fetch release tag ---------- | |
| fetch_release_json() { | |
| local url="$1" | |
| local out="$2" | |
| curl -fsSL -H "Accept: application/vnd.github+json" -H "User-Agent: ${UA}" "$url" > "$out" \ | |
| || error "Failed to fetch release metadata (GitHub API rate limit? try later)" | |
| } | |
| RELEASE_JSON="$TMP_DIR/release.json" | |
| if [ -n "$TAG" ]; then | |
| status "Fetching release info for tag: $TAG" | |
| fetch_release_json "${API_BASE}/tags/${TAG}" "$RELEASE_JSON" | |
| else | |
| status "Fetching latest release info..." | |
| fetch_release_json "${API_BASE}/latest" "$RELEASE_JSON" | |
| TAG="$(grep -m1 '"tag_name"' "$RELEASE_JSON" | cut -d '"' -f4 || true)" | |
| [ -n "$TAG" ] || error "Could not parse tag_name from GitHub API response" | |
| fi | |
| status "Selected release: $TAG" | |
| # ---------- auto backend detection ---------- | |
| # Important: Linux prebuilt list here has: cpu/vulkan/rocm7.2 only (no CUDA for Linux in your list) | |
| # Windows has cuda12/cuda13 variants + cudart | |
| auto_backend() { | |
| if [ "$IS_MAC" = "1" ]; then | |
| # No separate metal variant in your list; prebuilt is "macos-ARCH" | |
| echo "cpu" | |
| return 0 | |
| fi | |
| if [ "$IS_WINDOWS" = "1" ]; then | |
| # Prefer CUDA if nvidia-smi exists, else vulkan if vulkaninfo exists, else cpu | |
| if available nvidia-smi; then | |
| # Pick cuda12 by default (more common driver availability); user can override | |
| echo "cuda12" | |
| elif available vulkaninfo; then | |
| echo "vulkan" | |
| else | |
| echo "cpu" | |
| fi | |
| return 0 | |
| fi | |
| # Linux (including openEuler): detect ROCm, then Vulkan, else CPU | |
| if available rocminfo; then | |
| echo "rocm" | |
| elif available vulkaninfo; then | |
| echo "vulkan" | |
| else | |
| echo "cpu" | |
| fi | |
| } | |
| if [ "$BACKEND" = "auto" ]; then | |
| BACKEND="$(auto_backend)" | |
| status "Auto-selected backend: $BACKEND" | |
| else | |
| status "Requested backend: $BACKEND" | |
| fi | |
| # ---------- build asset filename ---------- | |
| ASSET="" | |
| ASSET2="" # extra (cudart) for some Windows CUDA builds | |
| EXT="tar.gz" | |
| if [ "$IS_MAC" = "1" ]; then | |
| case "$BACKEND" in | |
| cpu|vulkan|rocm) : ;; # tolerate; mac uses same tarball | |
| xcframework) | |
| # iOS XCFramework | |
| EXT="zip" | |
| ASSET="llama-${TAG}-xcframework.zip" | |
| ;; | |
| *) | |
| warning "macOS: backend '$BACKEND' ignored; using macos prebuilt" | |
| ;; | |
| esac | |
| if [ -z "$ASSET" ]; then | |
| ASSET="llama-${TAG}-bin-macos-${ARCH}.tar.gz" | |
| EXT="tar.gz" | |
| fi | |
| elif [ "$IS_WINDOWS" = "1" ]; then | |
| EXT="zip" | |
| case "$BACKEND" in | |
| cpu) | |
| ASSET="llama-${TAG}-bin-win-cpu-${ARCH}.zip" | |
| ;; | |
| cuda12) | |
| [ "$ARCH" = "x64" ] || error "Windows CUDA builds are x64 only in your list" | |
| ASSET="llama-${TAG}-bin-win-cuda-12.4-x64.zip" | |
| ASSET2="cudart-llama-bin-win-cuda-12.4-x64.zip" | |
| ;; | |
| cuda13) | |
| [ "$ARCH" = "x64" ] || error "Windows CUDA builds are x64 only in your list" | |
| ASSET="llama-${TAG}-bin-win-cuda-13.1-x64.zip" | |
| # Your list shows only cudart for 12.4; keep 13 without extra unless they add it later | |
| ;; | |
| vulkan) | |
| [ "$ARCH" = "x64" ] || error "Windows Vulkan build in your list is x64 only" | |
| ASSET="llama-${TAG}-bin-win-vulkan-x64.zip" | |
| ;; | |
| sycl) | |
| [ "$ARCH" = "x64" ] || error "Windows SYCL build in your list is x64 only" | |
| ASSET="llama-${TAG}-bin-win-sycl-x64.zip" | |
| ;; | |
| hip) | |
| [ "$ARCH" = "x64" ] || error "Windows HIP Radeon build in your list is x64 only" | |
| ASSET="llama-${TAG}-bin-win-hip-radeon-x64.zip" | |
| ;; | |
| opencl-adreno) | |
| [ "$ARCH" = "arm64" ] || error "Windows OpenCL Adreno build in your list is arm64 only" | |
| ASSET="llama-${TAG}-bin-win-opencl-adreno-arm64.zip" | |
| ;; | |
| *) | |
| error "Unsupported Windows backend: $BACKEND" | |
| ;; | |
| esac | |
| else | |
| # Linux / openEuler | |
| if [ "$IS_OPENEULER" = "1" ]; then | |
| # openEuler assets are named differently and don't include x64/arm64 tags in the same style | |
| # Map ARCH -> x86/aarch64 in names | |
| OE_ARCH="" | |
| case "$ARCH" in | |
| x64) OE_ARCH="x86" ;; | |
| arm64) OE_ARCH="aarch64" ;; | |
| *) error "openEuler assets in your list support x86 and aarch64 only; got: $ARCH" ;; | |
| esac | |
| case "$OE_FAMILY" in | |
| 310p|910b) : ;; | |
| *) error "openEuler: LLAMA_OE_FAMILY must be 310p or 910b" ;; | |
| esac | |
| if [ "$OE_ACLGRAPH" = "1" ]; then | |
| ASSET="llama-${TAG}-bin-${OE_FAMILY}-openEuler-${OE_ARCH}-aclgraph.tar.gz" | |
| else | |
| ASSET="llama-${TAG}-bin-${OE_FAMILY}-openEuler-${OE_ARCH}.tar.gz" | |
| fi | |
| EXT="tar.gz" | |
| # Note: backend selection is ignored for openEuler because your assets are family-based | |
| if [ "$BACKEND" != "cpu" ]; then | |
| status "openEuler: ignoring backend='$BACKEND' (assets are 310p/910b based)" | |
| fi | |
| else | |
| # Ubuntu assets (Linux) | |
| case "$BACKEND" in | |
| cpu) | |
| if [ "$ARCH" = "s390x" ]; then | |
| ASSET="llama-${TAG}-bin-ubuntu-s390x.tar.gz" | |
| else | |
| [ "$ARCH" = "x64" ] || error "Ubuntu CPU assets in your list are x64 and s390x only" | |
| ASSET="llama-${TAG}-bin-ubuntu-x64.tar.gz" | |
| fi | |
| ;; | |
| vulkan) | |
| [ "$ARCH" = "x64" ] || error "Ubuntu Vulkan asset in your list is x64 only" | |
| ASSET="llama-${TAG}-bin-ubuntu-vulkan-x64.tar.gz" | |
| ;; | |
| rocm) | |
| [ "$ARCH" = "x64" ] || error "Ubuntu ROCm asset in your list is x64 only" | |
| ASSET="llama-${TAG}-bin-ubuntu-rocm-7.2-x64.tar.gz" | |
| ;; | |
| *) | |
| error "Unsupported Linux backend: $BACKEND (use cpu|vulkan|rocm|auto)" | |
| ;; | |
| esac | |
| EXT="tar.gz" | |
| fi | |
| fi | |
| [ -n "$ASSET" ] || error "Internal: failed to choose asset" | |
| # ---------- download ---------- | |
| dl() { | |
| local filename="$1" | |
| local dest="$2" | |
| local url="https://github.com/${REPO}/releases/download/${TAG}/${filename}" | |
| status "Downloading: $filename" | |
| curl --fail --show-error --location --progress-bar -H "User-Agent: ${UA}" \ | |
| -o "$dest" "$url" || error "Download failed: $filename" | |
| } | |
| ARCHIVE1="$TMP_DIR/asset1.${EXT}" | |
| dl "$ASSET" "$ARCHIVE1" | |
| ARCHIVE2="" | |
| if [ -n "$ASSET2" ]; then | |
| ARCHIVE2="$TMP_DIR/asset2.zip" | |
| dl "$ASSET2" "$ARCHIVE2" | |
| fi | |
| # ---------- extract ---------- | |
| EXTRACT_DIR="$TMP_DIR/extract" | |
| mkdir -p "$EXTRACT_DIR" | |
| if [ "$EXT" = "zip" ]; then | |
| NEEDS="$(require unzip)" | |
| [ -z "$NEEDS" ] || error "Missing required tool: unzip" | |
| unzip -q "$ARCHIVE1" -d "$EXTRACT_DIR" | |
| else | |
| NEEDS="$(require tar)" | |
| [ -z "$NEEDS" ] || error "Missing required tool: tar" | |
| tar -xzf "$ARCHIVE1" -C "$EXTRACT_DIR" | |
| fi | |
| if [ -n "$ARCHIVE2" ]; then | |
| NEEDS="$(require unzip)" | |
| [ -z "$NEEDS" ] || error "Missing required tool: unzip" | |
| unzip -q "$ARCHIVE2" -d "$EXTRACT_DIR" | |
| fi | |
| # ---------- install layout ---------- | |
| # We keep a versioned directory under PREFIX/lib/llama.cpp and then link executables into PREFIX/bin | |
| INSTALL_ID="${TAG}-$(uname -s | tr '[:upper:]' '[:lower:]')-${ARCH}-${BACKEND}" | |
| INSTALL_DIR="${ROOT_DIR}/${INSTALL_ID}" | |
| status "Installing into: $INSTALL_DIR" | |
| if [ -n "$SUDO" ] && [ "$PREFIX" = "/usr/local" ]; then | |
| $SUDO mkdir -p "$BIN_DIR" "$ROOT_DIR" | |
| $SUDO rm -rf "$INSTALL_DIR" | |
| $SUDO mkdir -p "$INSTALL_DIR" | |
| $SUDO cp -R "$EXTRACT_DIR"/. "$INSTALL_DIR"/ | |
| else | |
| mkdir -p "$BIN_DIR" "$ROOT_DIR" | |
| rm -rf "$INSTALL_DIR" | |
| mkdir -p "$INSTALL_DIR" | |
| cp -R "$EXTRACT_DIR"/. "$INSTALL_DIR"/ | |
| fi | |
| # ---------- link executables ---------- | |
| link_one() { | |
| local src="$1" | |
| local name="$2" | |
| if [ -n "$SUDO" ] && [ "$PREFIX" = "/usr/local" ]; then | |
| $SUDO ln -sf "$src" "${BIN_DIR}/${name}" 2>/dev/null || $SUDO cp -f "$src" "${BIN_DIR}/${name}" | |
| else | |
| ln -sf "$src" "${BIN_DIR}/${name}" 2>/dev/null || cp -f "$src" "${BIN_DIR}/${name}" | |
| fi | |
| } | |
| status "Linking binaries into: $BIN_DIR" | |
| if [ "$IS_WINDOWS" = "1" ]; then | |
| # link *.exe and *.bat | |
| wins=() | |
| while IFS= read -r line; do wins+=("$line"); done < <(find "$INSTALL_DIR" -type f \( -iname '*.exe' -o -iname '*.bat' \) 2>/dev/null || true) | |
| [ "${#wins[@]}" -gt 0 ] || warning "No .exe/.bat found. Asset layout may have changed" | |
| for f in "${wins[@]}"; do | |
| link_one "$f" "$(basename "$f")" | |
| done | |
| else | |
| # link all executable files (best effort) | |
| nix=() | |
| while IFS= read -r line; do nix+=("$line"); done < <(find "$INSTALL_DIR" -type f -perm -111 2>/dev/null || true) | |
| [ "${#nix[@]}" -gt 0 ] || warning "No executable files found. Asset layout may have changed" | |
| for f in "${nix[@]}"; do | |
| link_one "$f" "$(basename "$f")" | |
| done | |
| # friendly alias | |
| if [ -f "${BIN_DIR}/llama-cli" ]; then | |
| link_one "${BIN_DIR}/llama-cli" "llama" | |
| fi | |
| fi | |
| # ---------- optional systemd (Linux only) ---------- | |
| configure_systemd() { | |
| [ "$IS_LINUX" = "1" ] || return 0 | |
| [ "$ENABLE_SYSTEMD" = "1" ] || return 0 | |
| available systemctl || { warning "systemctl not found; skipping systemd service"; return 0; } | |
| local server_path="${BIN_DIR}/llama-server" | |
| [ -x "$server_path" ] || { warning "llama-server not found in ${BIN_DIR}; skipping systemd"; return 0; } | |
| if [ -z "$SUDO" ]; then | |
| warning "No sudo available; cannot install system-wide systemd service. (Set LLAMA_PREFIX=~/.local and use user systemd manually.)" | |
| return 0 | |
| fi | |
| status "Creating llama.cpp systemd service (llama-cpp.service)..." | |
| $SUDO tee /etc/systemd/system/llama-cpp.service >/dev/null <<EOF | |
| [Unit] | |
| Description=llama.cpp server | |
| After=network-online.target | |
| Wants=network-online.target | |
| [Service] | |
| Type=simple | |
| ExecStart=${server_path} --host ${SERVER_HOST} --port ${SERVER_PORT} ${SERVER_ARGS} | |
| Restart=always | |
| RestartSec=2 | |
| Environment="PATH=/usr/local/bin:/usr/bin:/bin:${BIN_DIR}" | |
| [Install] | |
| WantedBy=multi-user.target | |
| EOF | |
| status "Enabling and starting llama-cpp.service..." | |
| $SUDO systemctl daemon-reload | |
| $SUDO systemctl enable llama-cpp.service | |
| $SUDO systemctl restart llama-cpp.service | |
| status "llama.cpp server should be listening on ${SERVER_HOST}:${SERVER_PORT}" | |
| } | |
| configure_systemd | |
| status "Install complete" | |
| echo | |
| echo "Installed: $INSTALL_DIR" | |
| echo "Binaries: $BIN_DIR" | |
| echo | |
| if [ "$IS_WINDOWS" = "1" ]; then | |
| echo "Try (Git Bash):" | |
| echo " ${BIN_DIR}/llama-cli.exe --help" | |
| echo " ${BIN_DIR}/llama-server.exe --help" | |
| else | |
| echo "Try:" | |
| echo " ${BIN_DIR}/llama-cli --help" | |
| echo " ${BIN_DIR}/llama-server --help" | |
| echo | |
| if [ "$ENABLE_SYSTEMD" = "1" ] && [ "$IS_LINUX" = "1" ] && available systemctl; then | |
| echo "Service:" | |
| echo " systemctl status llama-cpp.service" | |
| fi | |
| fi | |
| } | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment