Skip to content

Instantly share code, notes, and snippets.

@ilyar
Last active March 5, 2026 04:50
Show Gist options
  • Select an option

  • Save ilyar/b61bab36b68338ceef6c049868164ae7 to your computer and use it in GitHub Desktop.

Select an option

Save ilyar/b61bab36b68338ceef6c049868164ae7 to your computer and use it in GitHub Desktop.
Install llama.cpp prebuilt binaries from GitHub releases https://github.com/ggml-org/llama.cpp/releases
#!/usr/bin/env bash
# Install llama.cpp prebuilt binaries from GitHub releases https://github.com/ggml-org/llama.cpp/releases
#
# curl -fsSL <URL_RAW> | bash
#
# Supports:
# - macOS (arm64/x64)
# - Linux Ubuntu (x64/s390x, cpu/vulkan/rocm7.2)
# - Windows (cpu/cuda12.4/cuda13.1/vulkan/sycl/hip/opencl-adreno)
# - openEuler (310p/910b + optional aclgraph)
#
# Env vars:
# LLAMA_TAG e.g. b8201 (default: latest)
# LLAMA_BACKEND auto|cpu|vulkan|rocm|cuda12|cuda13|sycl|hip|opencl-adreno|xcframework (default: auto)
# LLAMA_PREFIX install prefix (default: /usr/local if writable or sudo available; else ~/.local)
# LLAMA_NO_SUDO set to 1 to never use sudo
# LLAMA_ENABLE_SYSTEMD set to 1 to install + start systemd service on Linux (default: 0)
# LLAMA_SERVER_HOST default 127.0.0.1
# LLAMA_SERVER_PORT default 8080
# LLAMA_SERVER_ARGS extra args for llama-server
# LLAMA_OE_FAMILY openEuler only: 310p|910b (default: 310p)
# LLAMA_OE_ACLGRAPH openEuler only: 0|1 (default: 0)
#
# Examples:
# curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash
# LLAMA_BACKEND=vulkan curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash
# LLAMA_BACKEND=auto LLAMA_ENABLE_SYSTEMD=1 curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash
# LLAMA_PREFIX=~/.local curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash
# LLAMA_BACKEND=cuda12 curl -fsSL <URL_RAW>/install-llama-cpp.sh | bash # Windows (Git Bash)
main() {
set -euo pipefail
red="$( (tput bold || :; tput setaf 1 || :) 2>/dev/null )"
plain="$( (tput sgr0 || :) 2>/dev/null )"
status() { echo ">>> $*" >&2; }
warning() { echo "${red}WARNING:${plain} $*" >&2; }
error() { echo "${red}ERROR:${plain} $*" >&2; exit 1; }
available() { command -v "$1" >/dev/null 2>&1; }
require() {
local missing=""
for tool in "$@"; do
if ! available "$tool"; then
missing="$missing $tool"
fi
done
echo "$missing"
}
TMP_DIR="$(mktemp -d)"
cleanup() { rm -rf "$TMP_DIR"; }
trap cleanup EXIT
# ---------- config ----------
REPO="ggml-org/llama.cpp"
API_BASE="https://api.github.com/repos/${REPO}/releases"
UA="llama.cpp-prebuilt-installer/2.0"
TAG="${LLAMA_TAG:-"$(curl -fsSL https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | sed -n 's/.*"tag_name": *"\([^"]*\)".*/\1/p')"}"
BACKEND="${LLAMA_BACKEND:-auto}"
ENABLE_SYSTEMD="${LLAMA_ENABLE_SYSTEMD:-0}"
SERVER_HOST="${LLAMA_SERVER_HOST:-127.0.0.1}"
SERVER_PORT="${LLAMA_SERVER_PORT:-8080}"
SERVER_ARGS="${LLAMA_SERVER_ARGS:-}"
OE_FAMILY="${LLAMA_OE_FAMILY:-310p}" # openEuler: 310p or 910b
OE_ACLGRAPH="${LLAMA_OE_ACLGRAPH:-0}" # openEuler: 1 => -aclgraph
# ---------- deps ----------
NEEDS="$(require curl uname grep cut sed awk)"
if [ -n "$NEEDS" ]; then
status "Missing required tools:"
for n in $NEEDS; do echo " - $n"; done
exit 1
fi
# ---------- sudo policy ----------
SUDO=""
if [ "${LLAMA_NO_SUDO:-0}" = "1" ]; then
SUDO=""
else
if [ "$(id -u)" -ne 0 ]; then
if available sudo; then
SUDO="sudo"
fi
fi
fi
# ---------- detect OS / arch ----------
OS="$(uname -s)"
ARCH_RAW="$(uname -m)"
case "$ARCH_RAW" in
x86_64|amd64) ARCH="x64" ;;
aarch64|arm64) ARCH="arm64" ;;
s390x) ARCH="s390x" ;;
*) error "Unsupported architecture: $ARCH_RAW" ;;
esac
IS_WINDOWS=0
IS_MAC=0
IS_LINUX=0
case "$OS" in
Darwin) IS_MAC=1 ;;
Linux) IS_LINUX=1 ;;
MINGW*|MSYS*|CYGWIN*) IS_WINDOWS=1 ;;
*) error "Unsupported OS: $OS" ;;
esac
# openEuler is Linux, detect via /etc/os-release
IS_OPENEULER=0
if [ "$IS_LINUX" = "1" ] && [ -r /etc/os-release ]; then
if grep -qi '^ID=.*openeuler' /etc/os-release; then
IS_OPENEULER=1
fi
fi
# ---------- choose prefix ----------
if [ -n "${LLAMA_PREFIX:-}" ]; then
PREFIX="$LLAMA_PREFIX"
else
# If /usr/local is writable OR we have sudo, default to /usr/local, else ~/.local
if [ -w /usr/local ] || [ -n "$SUDO" ]; then
PREFIX="/usr/local"
else
PREFIX="${HOME}/.local"
fi
fi
BIN_DIR="${PREFIX}/bin"
ROOT_DIR="${PREFIX}/lib/llama.cpp" # keep versioned installs here
mkdir -p "$TMP_DIR"
# ---------- fetch release tag ----------
fetch_release_json() {
local url="$1"
local out="$2"
curl -fsSL -H "Accept: application/vnd.github+json" -H "User-Agent: ${UA}" "$url" > "$out" \
|| error "Failed to fetch release metadata (GitHub API rate limit? try later)"
}
RELEASE_JSON="$TMP_DIR/release.json"
if [ -n "$TAG" ]; then
status "Fetching release info for tag: $TAG"
fetch_release_json "${API_BASE}/tags/${TAG}" "$RELEASE_JSON"
else
status "Fetching latest release info..."
fetch_release_json "${API_BASE}/latest" "$RELEASE_JSON"
TAG="$(grep -m1 '"tag_name"' "$RELEASE_JSON" | cut -d '"' -f4 || true)"
[ -n "$TAG" ] || error "Could not parse tag_name from GitHub API response"
fi
status "Selected release: $TAG"
# ---------- auto backend detection ----------
# Important: Linux prebuilt list here has: cpu/vulkan/rocm7.2 only (no CUDA for Linux in your list)
# Windows has cuda12/cuda13 variants + cudart
auto_backend() {
if [ "$IS_MAC" = "1" ]; then
# No separate metal variant in your list; prebuilt is "macos-ARCH"
echo "cpu"
return 0
fi
if [ "$IS_WINDOWS" = "1" ]; then
# Prefer CUDA if nvidia-smi exists, else vulkan if vulkaninfo exists, else cpu
if available nvidia-smi; then
# Pick cuda12 by default (more common driver availability); user can override
echo "cuda12"
elif available vulkaninfo; then
echo "vulkan"
else
echo "cpu"
fi
return 0
fi
# Linux (including openEuler): detect ROCm, then Vulkan, else CPU
if available rocminfo; then
echo "rocm"
elif available vulkaninfo; then
echo "vulkan"
else
echo "cpu"
fi
}
if [ "$BACKEND" = "auto" ]; then
BACKEND="$(auto_backend)"
status "Auto-selected backend: $BACKEND"
else
status "Requested backend: $BACKEND"
fi
# ---------- build asset filename ----------
ASSET=""
ASSET2="" # extra (cudart) for some Windows CUDA builds
EXT="tar.gz"
if [ "$IS_MAC" = "1" ]; then
case "$BACKEND" in
cpu|vulkan|rocm) : ;; # tolerate; mac uses same tarball
xcframework)
# iOS XCFramework
EXT="zip"
ASSET="llama-${TAG}-xcframework.zip"
;;
*)
warning "macOS: backend '$BACKEND' ignored; using macos prebuilt"
;;
esac
if [ -z "$ASSET" ]; then
ASSET="llama-${TAG}-bin-macos-${ARCH}.tar.gz"
EXT="tar.gz"
fi
elif [ "$IS_WINDOWS" = "1" ]; then
EXT="zip"
case "$BACKEND" in
cpu)
ASSET="llama-${TAG}-bin-win-cpu-${ARCH}.zip"
;;
cuda12)
[ "$ARCH" = "x64" ] || error "Windows CUDA builds are x64 only in your list"
ASSET="llama-${TAG}-bin-win-cuda-12.4-x64.zip"
ASSET2="cudart-llama-bin-win-cuda-12.4-x64.zip"
;;
cuda13)
[ "$ARCH" = "x64" ] || error "Windows CUDA builds are x64 only in your list"
ASSET="llama-${TAG}-bin-win-cuda-13.1-x64.zip"
# Your list shows only cudart for 12.4; keep 13 without extra unless they add it later
;;
vulkan)
[ "$ARCH" = "x64" ] || error "Windows Vulkan build in your list is x64 only"
ASSET="llama-${TAG}-bin-win-vulkan-x64.zip"
;;
sycl)
[ "$ARCH" = "x64" ] || error "Windows SYCL build in your list is x64 only"
ASSET="llama-${TAG}-bin-win-sycl-x64.zip"
;;
hip)
[ "$ARCH" = "x64" ] || error "Windows HIP Radeon build in your list is x64 only"
ASSET="llama-${TAG}-bin-win-hip-radeon-x64.zip"
;;
opencl-adreno)
[ "$ARCH" = "arm64" ] || error "Windows OpenCL Adreno build in your list is arm64 only"
ASSET="llama-${TAG}-bin-win-opencl-adreno-arm64.zip"
;;
*)
error "Unsupported Windows backend: $BACKEND"
;;
esac
else
# Linux / openEuler
if [ "$IS_OPENEULER" = "1" ]; then
# openEuler assets are named differently and don't include x64/arm64 tags in the same style
# Map ARCH -> x86/aarch64 in names
OE_ARCH=""
case "$ARCH" in
x64) OE_ARCH="x86" ;;
arm64) OE_ARCH="aarch64" ;;
*) error "openEuler assets in your list support x86 and aarch64 only; got: $ARCH" ;;
esac
case "$OE_FAMILY" in
310p|910b) : ;;
*) error "openEuler: LLAMA_OE_FAMILY must be 310p or 910b" ;;
esac
if [ "$OE_ACLGRAPH" = "1" ]; then
ASSET="llama-${TAG}-bin-${OE_FAMILY}-openEuler-${OE_ARCH}-aclgraph.tar.gz"
else
ASSET="llama-${TAG}-bin-${OE_FAMILY}-openEuler-${OE_ARCH}.tar.gz"
fi
EXT="tar.gz"
# Note: backend selection is ignored for openEuler because your assets are family-based
if [ "$BACKEND" != "cpu" ]; then
status "openEuler: ignoring backend='$BACKEND' (assets are 310p/910b based)"
fi
else
# Ubuntu assets (Linux)
case "$BACKEND" in
cpu)
if [ "$ARCH" = "s390x" ]; then
ASSET="llama-${TAG}-bin-ubuntu-s390x.tar.gz"
else
[ "$ARCH" = "x64" ] || error "Ubuntu CPU assets in your list are x64 and s390x only"
ASSET="llama-${TAG}-bin-ubuntu-x64.tar.gz"
fi
;;
vulkan)
[ "$ARCH" = "x64" ] || error "Ubuntu Vulkan asset in your list is x64 only"
ASSET="llama-${TAG}-bin-ubuntu-vulkan-x64.tar.gz"
;;
rocm)
[ "$ARCH" = "x64" ] || error "Ubuntu ROCm asset in your list is x64 only"
ASSET="llama-${TAG}-bin-ubuntu-rocm-7.2-x64.tar.gz"
;;
*)
error "Unsupported Linux backend: $BACKEND (use cpu|vulkan|rocm|auto)"
;;
esac
EXT="tar.gz"
fi
fi
[ -n "$ASSET" ] || error "Internal: failed to choose asset"
# ---------- download ----------
dl() {
local filename="$1"
local dest="$2"
local url="https://github.com/${REPO}/releases/download/${TAG}/${filename}"
status "Downloading: $filename"
curl --fail --show-error --location --progress-bar -H "User-Agent: ${UA}" \
-o "$dest" "$url" || error "Download failed: $filename"
}
ARCHIVE1="$TMP_DIR/asset1.${EXT}"
dl "$ASSET" "$ARCHIVE1"
ARCHIVE2=""
if [ -n "$ASSET2" ]; then
ARCHIVE2="$TMP_DIR/asset2.zip"
dl "$ASSET2" "$ARCHIVE2"
fi
# ---------- extract ----------
EXTRACT_DIR="$TMP_DIR/extract"
mkdir -p "$EXTRACT_DIR"
if [ "$EXT" = "zip" ]; then
NEEDS="$(require unzip)"
[ -z "$NEEDS" ] || error "Missing required tool: unzip"
unzip -q "$ARCHIVE1" -d "$EXTRACT_DIR"
else
NEEDS="$(require tar)"
[ -z "$NEEDS" ] || error "Missing required tool: tar"
tar -xzf "$ARCHIVE1" -C "$EXTRACT_DIR"
fi
if [ -n "$ARCHIVE2" ]; then
NEEDS="$(require unzip)"
[ -z "$NEEDS" ] || error "Missing required tool: unzip"
unzip -q "$ARCHIVE2" -d "$EXTRACT_DIR"
fi
# ---------- install layout ----------
# We keep a versioned directory under PREFIX/lib/llama.cpp and then link executables into PREFIX/bin
INSTALL_ID="${TAG}-$(uname -s | tr '[:upper:]' '[:lower:]')-${ARCH}-${BACKEND}"
INSTALL_DIR="${ROOT_DIR}/${INSTALL_ID}"
status "Installing into: $INSTALL_DIR"
if [ -n "$SUDO" ] && [ "$PREFIX" = "/usr/local" ]; then
$SUDO mkdir -p "$BIN_DIR" "$ROOT_DIR"
$SUDO rm -rf "$INSTALL_DIR"
$SUDO mkdir -p "$INSTALL_DIR"
$SUDO cp -R "$EXTRACT_DIR"/. "$INSTALL_DIR"/
else
mkdir -p "$BIN_DIR" "$ROOT_DIR"
rm -rf "$INSTALL_DIR"
mkdir -p "$INSTALL_DIR"
cp -R "$EXTRACT_DIR"/. "$INSTALL_DIR"/
fi
# ---------- link executables ----------
link_one() {
local src="$1"
local name="$2"
if [ -n "$SUDO" ] && [ "$PREFIX" = "/usr/local" ]; then
$SUDO ln -sf "$src" "${BIN_DIR}/${name}" 2>/dev/null || $SUDO cp -f "$src" "${BIN_DIR}/${name}"
else
ln -sf "$src" "${BIN_DIR}/${name}" 2>/dev/null || cp -f "$src" "${BIN_DIR}/${name}"
fi
}
status "Linking binaries into: $BIN_DIR"
if [ "$IS_WINDOWS" = "1" ]; then
# link *.exe and *.bat
wins=()
while IFS= read -r line; do wins+=("$line"); done < <(find "$INSTALL_DIR" -type f \( -iname '*.exe' -o -iname '*.bat' \) 2>/dev/null || true)
[ "${#wins[@]}" -gt 0 ] || warning "No .exe/.bat found. Asset layout may have changed"
for f in "${wins[@]}"; do
link_one "$f" "$(basename "$f")"
done
else
# link all executable files (best effort)
nix=()
while IFS= read -r line; do nix+=("$line"); done < <(find "$INSTALL_DIR" -type f -perm -111 2>/dev/null || true)
[ "${#nix[@]}" -gt 0 ] || warning "No executable files found. Asset layout may have changed"
for f in "${nix[@]}"; do
link_one "$f" "$(basename "$f")"
done
# friendly alias
if [ -f "${BIN_DIR}/llama-cli" ]; then
link_one "${BIN_DIR}/llama-cli" "llama"
fi
fi
# ---------- optional systemd (Linux only) ----------
configure_systemd() {
[ "$IS_LINUX" = "1" ] || return 0
[ "$ENABLE_SYSTEMD" = "1" ] || return 0
available systemctl || { warning "systemctl not found; skipping systemd service"; return 0; }
local server_path="${BIN_DIR}/llama-server"
[ -x "$server_path" ] || { warning "llama-server not found in ${BIN_DIR}; skipping systemd"; return 0; }
if [ -z "$SUDO" ]; then
warning "No sudo available; cannot install system-wide systemd service. (Set LLAMA_PREFIX=~/.local and use user systemd manually.)"
return 0
fi
status "Creating llama.cpp systemd service (llama-cpp.service)..."
$SUDO tee /etc/systemd/system/llama-cpp.service >/dev/null <<EOF
[Unit]
Description=llama.cpp server
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=${server_path} --host ${SERVER_HOST} --port ${SERVER_PORT} ${SERVER_ARGS}
Restart=always
RestartSec=2
Environment="PATH=/usr/local/bin:/usr/bin:/bin:${BIN_DIR}"
[Install]
WantedBy=multi-user.target
EOF
status "Enabling and starting llama-cpp.service..."
$SUDO systemctl daemon-reload
$SUDO systemctl enable llama-cpp.service
$SUDO systemctl restart llama-cpp.service
status "llama.cpp server should be listening on ${SERVER_HOST}:${SERVER_PORT}"
}
configure_systemd
status "Install complete"
echo
echo "Installed: $INSTALL_DIR"
echo "Binaries: $BIN_DIR"
echo
if [ "$IS_WINDOWS" = "1" ]; then
echo "Try (Git Bash):"
echo " ${BIN_DIR}/llama-cli.exe --help"
echo " ${BIN_DIR}/llama-server.exe --help"
else
echo "Try:"
echo " ${BIN_DIR}/llama-cli --help"
echo " ${BIN_DIR}/llama-server --help"
echo
if [ "$ENABLE_SYSTEMD" = "1" ] && [ "$IS_LINUX" = "1" ] && available systemctl; then
echo "Service:"
echo " systemctl status llama-cpp.service"
fi
fi
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment