Last active
February 10, 2026 08:50
-
-
Save Danukeru/b8dbf3242cdbd173289ebe6d8b425293 to your computer and use it in GitHub Desktop.
Compile PyTorch 2.10.0 in docker ( Ubuntu 22.04 + CUDA 12.8 with SM7 support for V100 )
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # CI-friendly script to build the Docker builder stage and extract the generated .deb to the host | |
| # Usage: ci-build.sh [image_tag] [output_dir] | |
| # Example: ci-build.sh pytorch-builder:latest ./artifacts_host | |
| IMAGE_TAG=${1:-pytorch-builder:latest} | |
| OUTPUT_DIR=${2:-./artifacts_host} | |
| DOCKER_CMD=${DOCKER_CMD:-docker} | |
| BUILD_ARGS=${BUILD_ARGS:-} | |
| echo "Building image '${IMAGE_TAG}' (target=builder)" | |
| ${DOCKER_CMD} build --target builder -t "${IMAGE_TAG}" ${BUILD_ARGS} . | |
| echo "Creating temporary container to extract artifacts" | |
| CID=$(${DOCKER_CMD} create "${IMAGE_TAG}" /bin/true) | |
| mkdir -p "${OUTPUT_DIR}" | |
| echo "Copying /artifacts from container ${CID} to ${OUTPUT_DIR}" | |
| ${DOCKER_CMD} cp "${CID}:/artifacts/." "${OUTPUT_DIR}/" | |
| echo "Removing temporary container ${CID}" | |
| ${DOCKER_CMD} rm "${CID}" | |
| echo "Artifacts copied to: ${OUTPUT_DIR}" | |
| ls -lah "${OUTPUT_DIR}" | |
| echo "Done." |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Dockerfile: Ubuntu 22.04 + CUDA 12.8 + cuDNN9 + NCCL | |
| # Targets: PyTorch v2.10.0 build from source, CUDA 12.8, cuDNN9, NCCL | |
| # GPU target: Tesla V100 (compute capability 7.0 / sm_70) | |
| FROM ubuntu:24.04 AS builder | |
| ARG DEBIAN_FRONTEND=noninteractive | |
| ENV TZ=Etc/UTC | |
| # Ensure basics and build deps | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential git ca-certificates cmake ninja-build pkg-config ccache \ | |
| python3 python3-venv python3-dev python-is-python3 \ | |
| libopenblas-dev libblas-dev liblapack-dev libeigen3-dev \ | |
| zlib1g-dev libbz2-dev liblzma-dev libffi-dev libssl-dev \ | |
| libjpeg-dev libpng-dev libgflags-dev libgoogle-glog-dev libnuma-dev \ | |
| protobuf-compiler libprotobuf-dev libsnappy-dev \ | |
| openmpi-bin libopenmpi-dev wget curl gnupg2 ca-certificates \ | |
| gcc g++ \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Build everything under the system python | |
| RUN wget https://bootstrap.pypa.io/get-pip.py \ | |
| && python3 get-pip.py --break-system-packages | |
| # Install Python build dependencies (pip packages) | |
| RUN python3 -m pip install --upgrade pip setuptools wheel --break-system-packages | |
| RUN python3 -m pip install numpy pyyaml typing_extensions future pybind11 protobuf ninja --break-system-packages | |
| # Add NVIDIA CUDA apt repository and install CUDA 12.8, cuDNN9, and NCCL | |
| RUN wget -qO /usr/share/keyrings/cuda-archive-keyring.gpg \ | |
| https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-archive-keyring.gpg \ | |
| && echo "deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/ /" > /etc/apt/sources.list.d/cuda.list \ | |
| && apt-get update \ | |
| && apt-get install -y --no-install-recommends \ | |
| cuda-toolkit-12-8 libcudnn9-cuda-12 libcudnn9-dev-cuda-12 libnccl2 libnccl-dev \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && ln -s /usr/local/cuda-12.8 /usr/local/cuda || true | |
| # Set environment variables for CUDA and PyTorch build | |
| ENV CUDA_HOME=/usr/local/cuda-12.8 \ | |
| PATH=/usr/local/cuda-12.8/bin:${PATH} \ | |
| LD_LIBRARY_PATH=/usr/local/cuda-12.8/lib64 \ | |
| TORCH_CUDA_ARCH_LIST="7.0" \ | |
| CMAKE_BUILD_TYPE=Release | |
| # Clone PyTorch v2.10.0 and submodules | |
| ARG PYTORCH_VERSION=v2.10.0 | |
| WORKDIR /opt | |
| RUN git clone --recursive -b ${PYTORCH_VERSION} https://github.com/pytorch/pytorch.git /opt/pytorch | |
| # Install PyTorch Python dev requirements (some are optional; this covers common ones) | |
| WORKDIR /opt/pytorch | |
| RUN python3 -m pip install -r requirements.txt --break-system-packages || true | |
| # Build & package PyTorch (from source) | |
| # Environment settings for a CUDA build with cuDNN and NCCL | |
| ENV USE_CUDA=1 \ | |
| USE_CUDNN=1 \ | |
| USE_NCCL=1 \ | |
| BUILD_TEST=0 | |
| # Build wheel | |
| RUN MAX_JOBS=$(nproc) python3 setup.py bdist_wheel | |
| # Install fpm (to convert wheel -> .deb), package wheel into /artifacts and install .deb | |
| RUN apt-get update && apt-get install -y --no-install-recommends ruby-dev rubygems unzip build-essential && \ | |
| gem install --no-document fpm && \ | |
| mkdir -p /artifacts && \ | |
| PKGVER=$(python3 -c "import importlib.metadata as m; print(m.version('torch'))" || echo ${PYTORCH_VERSION#v}) && \ | |
| for whl in dist/*.whl; do \ | |
| echo "Packaging $whl -> /artifacts/pytorch_${PKGVER}_amd64.deb"; \ | |
| fpm -s python -t deb --name pytorch --version "$PKGVER" --python-bin python3 --package /artifacts/pytorch_${PKGVER}_amd64.deb --vendor pytorch --description "PyTorch ${PKGVER} built with CUDA 12.8, cuDNN9, NCCL" "$whl" || exit 1; \ | |
| done && \ | |
| dpkg -i /artifacts/*.deb || true | |
| # Clean up source / caches to reduce image size | |
| RUN rm -rf /opt/pytorch/build /root/.cache/pip/* | |
| # Final minimal runtime image containing only the .deb artifact | |
| FROM ubuntu:24.04 AS runtime | |
| RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/* | |
| COPY --from=builder /artifacts /artifacts | |
| WORKDIR /workspace | |
| CMD ["bash"] |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Short summary: The Dockerfile is a multi-stage build that uses a CUDA 12.8 + cuDNN base (builder stage) to compile PyTorch v2.10.0 from source with CUDA/cuDNN/NCCL support (targeting SM7 / Tesla V100). It builds a Python wheel, converts the wheel into a .deb using fpm and places the package in /artifacts (installed briefly for verification), then a tiny runtime stage copies only /artifacts into a minimal Ubuntu image. Use the helper script ci-build.sh to build the builder stage and extract the .deb to the host.