Last active
December 12, 2025 22:54
-
-
Save badnetmask/ee552d596224de9258793c416a775d15 to your computer and use it in GitHub Desktop.
Supporting material for my blog post: https://mteixeira.wordpress.com/2025/12/12/running-ollama-and-llama-cpp-on-talos-linux-on-an-amd-strix-halo-cpu/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ollama: | |
| mountPath: "/root/.ollama" | |
| gpu: | |
| # REQUIRED FOR AMD/VULKAN: Set to false to prevent automatic ROCm image selection | |
| # The chart auto-appends "-rocm" suffix when type="amd", which we don't want for Vulkan | |
| enabled: false | |
| models: | |
| # Automatically pull models at container startup | |
| pull: | |
| - llama3.2:3b | |
| - gemma3:4b | |
| - llava:7b | |
| - qwen3:8b | |
| resources: | |
| requests: | |
| cpu: 2000m | |
| memory: 4Gi | |
| # REQUIRED FOR AMD: GPU resource request for AMD integrated GPU | |
| amd.com/gpu: 1 | |
| limits: | |
| cpu: 16000m | |
| memory: 32Gi | |
| # REQUIRED FOR AMD: GPU resource limit (must match request for extended resources) | |
| amd.com/gpu: 1 | |
| # This is optional, but speeds up the container start by never deleting the models | |
| # persistentVolume: | |
| # enabled: true | |
| # storageClass: <your-storage-class> | |
| # size: 50Gi | |
| # accessModes: | |
| # - ReadWriteOnce | |
| extraEnv: | |
| # Allow cross-origin HTTP requests | |
| - name: OLLAMA_ORIGINS | |
| value: "*" | |
| # Host binding | |
| - name: OLLAMA_HOST | |
| value: "0.0.0.0:11434" | |
| # Set home directory to match the persistent volume mount | |
| - name: OLLAMA_HOME | |
| value: "/root/.ollama" | |
| - name: OLLAMA_DEBUG | |
| value: "0" | |
| # REQUIRED FOR VULKAN: Enable Vulkan backend (Ollama 0.12.11+) | |
| # This allows AMD GPUs to work without ROCm image | |
| - name: OLLAMA_VULKAN | |
| value: "1" | |
| # REQUIRED FOR AMD/VULKAN: Mount GPU devices for direct hardware access | |
| # This allows multiple pods to share the GPU without device plugin exclusive allocation | |
| volumes: | |
| # REQUIRED: DRI (Direct Rendering Infrastructure) for GPU access | |
| - name: dri-devices | |
| hostPath: | |
| path: /dev/dri | |
| # REQUIRED FOR AMD: KFD (Kernel Fusion Driver) for AMD GPU compute | |
| - name: kfd-device | |
| hostPath: | |
| path: /dev/kfd | |
| volumeMounts: | |
| # REQUIRED: Mount DRI devices into container | |
| - name: dri-devices | |
| mountPath: /dev/dri | |
| # REQUIRED FOR AMD: Mount KFD device into container | |
| - name: kfd-device | |
| mountPath: /dev/kfd | |
| # This is optional, for debugging: installs GPU monitoring tools | |
| # lifecycle: | |
| # postStart: | |
| # exec: | |
| # command: | |
| # - /bin/sh | |
| # - -c | |
| # - | | |
| # apt-get update && apt-get install -y libdrm-amdgpu1 nvtop && rm -rf /var/lib/apt/lists/* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment