Skip to content

Instantly share code, notes, and snippets.

@ms-2k
Last active October 20, 2025 19:06
Show Gist options
  • Select an option

  • Save ms-2k/cc6ad8fdeae6407ecd825fdd46f9816a to your computer and use it in GitHub Desktop.

Select an option

Save ms-2k/cc6ad8fdeae6407ecd825fdd46f9816a to your computer and use it in GitHub Desktop.
Dockerfile for llama.cpp built with CUDA (12.8.1), ROCm (7.0.1), and Vulkan backends for RTX 3090 + AMD Instinct MI50 (or any RTX 30-series + gfx906).
ARG UBUNTU_VERSION=24.04
ARG CUDA_VERSION=12.8.1
# Target CUDA base images
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
# Build-time ROCm base (CUDA devel + ROCm)
FROM ${BASE_CUDA_DEV_CONTAINER} AS build-rocm-base
# Install build tools and dependencies
RUN apt-get update && \
apt-get install -y build-essential cmake python3 python3-pip git \
libcurl4-openssl-dev libgomp1 wget xz-utils zstd \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev \
libnuma1 kmod rsync dialog
# Install ROCm
RUN --mount=type=cache,target=/cache/amdgpu-build \
if [ ! -f /cache/amdgpu-build/amdgpu-install_7.0.1.70001-1_all.deb ]; then \
wget https://repo.radeon.com/amdgpu-install/7.0.1/ubuntu/noble/amdgpu-install_7.0.1.70001-1_all.deb -O /cache/amdgpu-build/amdgpu-install_7.0.1.70001-1_all.deb; \
fi && \
apt install -y /cache/amdgpu-build/amdgpu-install_7.0.1.70001-1_all.deb && \
apt update && \
apt install -y python3-setuptools python3-wheel rocm
# ROCm 7 gfx906 workaround: download rocblas 6.4 from AUR (https://github.com/ROCm/ROCm/issues/4625)
RUN --mount=type=cache,target=/cache/rocblas-build \
if [ ! -f /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst ]; then \
wget https://archlinux.org/packages/extra/x86_64/rocblas/download -O /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst; \
fi && \
cd /tmp && \
tar -xf /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst && \
find opt/rocm/lib/rocblas/library -name "*gfx906*" -exec cp {} /opt/rocm/lib/rocblas/library/ \; && \
rm -rf /tmp/*
# Set ROCm environment variables
ENV ROCM_PATH=/opt/rocm
ENV HIP_PATH=/opt/rocm
ENV PATH=$ROCM_PATH/bin:$ROCM_PATH/llvm/bin:$PATH
ENV LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$LD_LIBRARY_PATH
# Build stage with Vulkan SDK
FROM build-rocm-base AS build
# GPU architecture configuration
# CUDA architecture to build for (RTX 3090 = compute capability 8.6, See: https://developer.nvidia.com/cuda-gpus)
ARG CUDA_DOCKER_ARCH=86
# AMD GPU target architecture (MI50 = gfx906, See: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
ARG AMDGPU_TARGETS=gfx906
# Target llama.cpp commit hash
ARG LLAMA_COMMIT=1d660d2fae42ea2e1d3569638e722bf7a37b6b19
# Install Vulkan SDK
ARG VULKAN_VERSION=1.4.321.1
RUN --mount=type=cache,target=/cache/vulkan \
ARCH=$(uname -m) && \
if [ ! -f /cache/vulkan/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz ]; then \
wget -qO /cache/vulkan/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz; \
fi && \
mkdir -p /opt/vulkan && \
tar -xf /cache/vulkan/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz -C /tmp --strip-components=1 && \
mv /tmp/${ARCH}/* /opt/vulkan/ && \
rm -rf /tmp/*
# Set Vulkan environment variables
ENV VULKAN_SDK=/opt/vulkan
ENV PATH=$VULKAN_SDK/bin:$PATH
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
# Clone llama.cpp
WORKDIR /
RUN --mount=type=cache,target=/cache/git \
git clone --branch master --single-branch --recurse-submodules https://github.com/ggml-org/llama.cpp.git --config advice.detachedHead=false \
&& cd llama.cpp && git checkout ${LLAMA_COMMIT}
WORKDIR /llama.cpp
# Build with CUDA, ROCm, and Vulkan backends
RUN CMAKE_ARGS="" && \
if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -B build \
-DGGML_NATIVE=OFF \
-DGGML_CUDA=ON \
-DGGML_HIP=ON \
-DGGML_VULKAN=ON \
-DGGML_BACKEND_DL=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DAMDGPU_TARGETS=${AMDGPU_TARGETS} \
-DCMAKE_BUILD_TYPE=Release \
${CMAKE_ARGS} \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
# Runtime ROCm base (CUDA runtime + ROCm)
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime-rocm-base
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y libgomp1 curl wget libvulkan-dev libnuma1 kmod rsync dialog zstd && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
# Install ROCm
RUN --mount=type=cache,target=/cache/amdgpu-runtime \
if [ ! -f /cache/amdgpu-runtime/amdgpu-install_7.0.1.70001-1_all.deb ]; then \
wget https://repo.radeon.com/amdgpu-install/7.0.1/ubuntu/noble/amdgpu-install_7.0.1.70001-1_all.deb -O /cache/amdgpu-runtime/amdgpu-install_7.0.1.70001-1_all.deb; \
fi && \
apt install -y /cache/amdgpu-runtime/amdgpu-install_7.0.1.70001-1_all.deb && \
apt update && \
apt install -y python3-setuptools python3-wheel rocm
# ROCm 7 gfx906 workaround: download rocblas 6.4 from AUR (https://github.com/ROCm/ROCm/issues/4625)
RUN --mount=type=cache,target=/cache/rocblas-build \
if [ ! -f /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst ]; then \
wget https://archlinux.org/packages/extra/x86_64/rocblas/download -O /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst; \
fi && \
cd /tmp && \
tar -xf /cache/rocblas-build/rocblas-6.4.3-3-x86_64.pkg.tar.zst && \
find opt/rocm/lib/rocblas/library -name "*gfx906*" -exec cp {} /opt/rocm/lib/rocblas/library/ \; && \
rm -rf /tmp/*
# Set ROCm environment variables
ENV ROCM_PATH=/opt/rocm
ENV HIP_PATH=/opt/rocm
ENV PATH=$ROCM_PATH/bin:$PATH
ENV LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$LD_LIBRARY_PATH
# Base runtime image
FROM runtime-rocm-base AS base
COPY --from=build /app/lib/ /app
# Full runtime image
FROM base AS full
ENV NVIDIA_DRIVER_CAPABILITIES=all
ENV NVIDIA_VISIBLE_DEVICES=all
ENV ROC_ENABLE_PRE_VEGA=1
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
python3-wheel \
pciutils \
vulkan-tools \
mesa-utils \
rocm-smi \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
# Server and light omitted
@ms-2k
Copy link
Author

ms-2k commented Sep 22, 2025

Example docker-compose.yaml. Requires NVIDIA Container Toolkit installed on host.

services:
  llama-cpp:
    container_name: llama-cpp
    build:
      context: .
      dockerfile: cuda-rocm-vulkan.Dockerfile
      target: full
    runtime: nvidia
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    security_opt:
      - seccomp=unconfined
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [ gpu ]
    environment:
      - API_KEY='<some secret goes here>'
      - HSA_OVERRIDE_GFX_VERSION=9.0.6
    ports:
      - "8080:8080"
    volumes:
      - /path/to/huggingface:/root/.cache/huggingface
      - /path/to/run.sh:/app/run.sh:ro
      - /path/to/models:/app/models:ro
    entrypoint: /bin/bash
    command: /app/run.sh
    restart: unless-stopped

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment