|
# Ollama + Open-WebUI + NVIDIA/CUDA Docker Compose |
|
# Updated: 2026-01-29 |
|
# Original: https://gist.github.com/usrbinkat/de44facc683f954bf0cca6c87e2f9f88 |
|
# |
|
# ============================================================================ |
|
# PREREQUISITES (Ubuntu/Pop!_OS 24.04) |
|
# ============================================================================ |
|
# |
|
# 1. Install NVIDIA drivers (if not already installed): |
|
# sudo apt install nvidia-driver-560 # or latest available |
|
# |
|
# 2. Install NVIDIA Container Toolkit: |
|
# sudo wget -qO /etc/apt/keyrings/nvidia-container-toolkit.asc \ |
|
# https://nvidia.github.io/libnvidia-container/gpgkey |
|
# |
|
# echo "deb [signed-by=/etc/apt/keyrings/nvidia-container-toolkit.asc] \ |
|
# https://nvidia.github.io/libnvidia-container/stable/deb/amd64 /" \ |
|
# | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list |
|
# |
|
# sudo apt-get update |
|
# sudo apt-get install -y nvidia-container-toolkit |
|
# |
|
# 3. Configure Docker to use NVIDIA runtime: |
|
# sudo nvidia-ctk runtime configure --runtime=docker |
|
# sudo systemctl restart docker |
|
# |
|
# 4. Verify GPU is accessible to Docker: |
|
# docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi |
|
# |
|
# ============================================================================ |
|
# QUICK START |
|
# ============================================================================ |
|
# |
|
# docker compose up -d # Start all services |
|
# docker compose logs -f ollama # Watch ollama logs |
|
# open http://localhost:8080 # Access Open-WebUI |
|
# |
|
# ============================================================================ |
|
# MODEL NAMING CONVENTION |
|
# ============================================================================ |
|
# |
|
# model:size-variant-quantization |
|
# │ │ │ │ |
|
# │ │ │ └─ Quantization: q8_0, q6_K, q5_K_M, q4_K_M, q3_K_M |
|
# │ │ └───────── Variant: instruct, chat, code, etc. |
|
# │ └─────────────── Size: 7b, 14b, 32b, 70b (billions of parameters) |
|
# └────────────────────── Family: llama3, qwen2.5, deepseek-r1, mistral |
|
# |
|
# Quantization Quality (higher = better quality, more VRAM): |
|
# q8_0 ~99% quality │ q5_K_M ~95% quality │ q3_K_M ~85% quality |
|
# q6_K ~97% quality │ q4_K_M ~92% quality │ q2_K ~75% quality |
|
# |
|
# ============================================================================ |
|
# RECOMMENDED MODELS FOR 16GB VRAM (RTX 5000, RTX 4080, etc.) |
|
# ============================================================================ |
|
# |
|
# TIER 1: Best Quality/Performance Balance (~9GB, runs full speed) |
|
# ────────────────────────────────────────────────────────────────── |
|
# |
|
# ollama pull deepseek-r1:14b # Best reasoning, math, coding |
|
# ollama pull qwen2.5:14b # Excellent coding, JSON, structured output |
|
# ollama pull mistral-small # Strong instruction following (~12GB) |
|
# |
|
# Run with: ollama run deepseek-r1:14b |
|
# |
|
# TIER 2: Maximum Intelligence (tight fit, slower inference) |
|
# ────────────────────────────────────────────────────────────────── |
|
# |
|
# # 32B models need Q4 quantization to fit 16GB VRAM |
|
# ollama pull hengwen/DeepSeek-R1-Distill-Qwen-32B:q4_k_m |
|
# ollama pull qwen2.5:32b-instruct-q4_K_M |
|
# |
|
# TIER 3: Speed Priority (smaller models, faster responses) |
|
# ────────────────────────────────────────────────────────────────── |
|
# |
|
# ollama pull deepseek-r1:7b # Fast reasoning (~4.5GB) |
|
# ollama pull qwen2.5:7b # Fast general purpose (~4.5GB) |
|
# ollama pull llama3.2:3b # Very fast, good for chat (~2GB) |
|
# |
|
# ============================================================================ |
|
# VRAM REQUIREMENTS REFERENCE |
|
# ============================================================================ |
|
# |
|
# Model Size │ q4_K_M │ q5_K_M │ q6_K │ q8_0 |
|
# ───────────┼────────┼────────┼───────┼────── |
|
# 7-8B │ ~5GB │ ~6GB │ ~7GB │ ~9GB |
|
# 13-14B │ ~8GB │ ~10GB │ ~12GB │ ~15GB |
|
# 30-34B │ ~18GB │ ~22GB │ ~26GB │ ~34GB |
|
# 70B │ ~40GB │ ~48GB │ ~56GB │ ~75GB |
|
# |
|
# ============================================================================ |
|
# USEFUL COMMANDS |
|
# ============================================================================ |
|
# |
|
# ollama list # Show downloaded models |
|
# ollama ps # Show running models |
|
# ollama rm <model> # Delete a model |
|
# ollama show <model> # Show model details |
|
# watch -n1 nvidia-smi # Monitor GPU usage |
|
# |
|
# ============================================================================ |
|
|
|
services: |
|
open-webui: |
|
container_name: open-webui |
|
image: ghcr.io/open-webui/open-webui:main |
|
environment: |
|
- MODEL_DOWNLOAD_DIR=/models |
|
- OLLAMA_BASE_URL=http://ollama:11434 |
|
- LOG_LEVEL=info |
|
# Demo-only secret key - generate a real one for production: |
|
# openssl rand -base64 32 |
|
- WEBUI_SECRET_KEY=demo-local-only-k8s7x9m2p4q6r8t0 |
|
# Suppress CORS warning for local-only use |
|
- CORS_ALLOW_ORIGIN=http://localhost:8080 |
|
volumes: |
|
- data:/data |
|
- models:/models |
|
- open-webui:/app/backend/data |
|
ports: |
|
- "8080:8080" |
|
logging: |
|
driver: json-file |
|
options: |
|
max-size: "10m" |
|
max-file: "3" |
|
depends_on: |
|
- ollama |
|
extra_hosts: |
|
- "host.docker.internal:host-gateway" |
|
networks: |
|
- ollama-net |
|
restart: unless-stopped |
|
|
|
ollama: |
|
container_name: ollama |
|
image: ollama/ollama:latest |
|
environment: |
|
# GPU/Performance tuning for RTX 5000 (16GB VRAM) |
|
- OLLAMA_FLASH_ATTENTION=1 |
|
# Keep models loaded in memory (default 5m, set to 24h for local use) |
|
- OLLAMA_KEEP_ALIVE=24h |
|
# Number of models to keep loaded simultaneously (RAM permitting) |
|
- OLLAMA_MAX_LOADED_MODELS=2 |
|
# Concurrent request handling (tune based on model size) |
|
- OLLAMA_NUM_PARALLEL=4 |
|
# Use all available VRAM |
|
- OLLAMA_GPU_OVERHEAD=0 |
|
deploy: |
|
resources: |
|
reservations: |
|
devices: |
|
- driver: nvidia |
|
capabilities: [gpu] |
|
count: all |
|
volumes: |
|
- ollama:/root/.ollama |
|
- models:/models |
|
ports: |
|
- "11434:11434" |
|
logging: |
|
driver: json-file |
|
options: |
|
max-size: "10m" |
|
max-file: "3" |
|
networks: |
|
- ollama-net |
|
restart: unless-stopped |
|
|
|
watchtower: |
|
container_name: watchtower |
|
image: containrrr/watchtower |
|
environment: |
|
# Check for updates on first run |
|
- WATCHTOWER_RUN_ONCE=false |
|
volumes: |
|
- /var/run/docker.sock:/var/run/docker.sock |
|
# Check for updates every 6 hours (21600 seconds) |
|
command: --interval 21600 open-webui ollama |
|
depends_on: |
|
- open-webui |
|
- ollama |
|
networks: |
|
- ollama-net |
|
restart: unless-stopped |
|
|
|
# ============================================================================ |
|
# MODEL SETUP SERVICES (on-demand via profiles) |
|
# ============================================================================ |
|
# |
|
# Usage: |
|
# docker compose --profile <profile> up --force-recreate |
|
# |
|
# Individual Models: |
|
# docker compose --profile gpt-oss-heretic up # Q2: OpenAI 20B Heretic |
|
# docker compose --profile deepseek-r1-14b up # Q1: DeepSeek R1 14B |
|
# docker compose --profile glm-47-heretic up # Q2: GLM 4.7 Flash Heretic |
|
# docker compose --profile qwen3-coder up # Q3: Qwen3 Coder 30B MOE |
|
# docker compose --profile qwen25-coder-32b up # Q3: Qwen2.5 Coder 32B |
|
# docker compose --profile qwen3-coder-unsloth up # Tool: Qwen3-Coder 30B (SOTA) |
|
# docker compose --profile watt-tool-8b up # Tool: Watt-Tool 8B |
|
# docker compose --profile groq-tool-8b up # Tool: Groq Tool-Use 8B |
|
# docker compose --profile darkest-universe up # Q4: Darkest Universe 29B |
|
# |
|
# Group Profiles: |
|
# docker compose --profile baseline up # All Q1 baseline models |
|
# docker compose --profile bold up # All Q2 modified models |
|
# docker compose --profile coder up # All coder models (3 tiers) |
|
# docker compose --profile tool up # All tool-calling models |
|
# docker compose --profile specialized up # All Q3 specialized models |
|
# docker compose --profile grey-zone up # All Q4 research models |
|
# docker compose --profile all up # Everything (~100GB+) |
|
# |
|
# List all profiles: |
|
# docker compose config --profiles |
|
# |
|
# These services download GGUFs, create Modelfiles, and register with Ollama. |
|
# Files persist in the 'models' volume across restarts. |
|
# ============================================================================ |
|
|
|
# --------------------------------------------------------------------------- |
|
# Q1: BASELINE MODELS |
|
# --------------------------------------------------------------------------- |
|
|
|
# DeepSeek R1 14B - Baseline reasoning model (from Ollama library) |
|
setup-deepseek-r1-14b: |
|
profiles: ["deepseek-r1-14b", "baseline", "all"] |
|
image: ollama/ollama:latest |
|
container_name: setup-deepseek-r1-14b |
|
depends_on: |
|
- ollama |
|
networks: |
|
- ollama-net |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
echo "==> Pulling deepseek-r1:14b from Ollama library..." |
|
ollama pull deepseek-r1:14b |
|
echo "==> Done! Model ready: deepseek-r1:14b" |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
|
|
# Qwen 2.5 14B - Baseline coding/JSON model |
|
setup-qwen25-14b: |
|
profiles: ["qwen25-14b", "baseline", "all"] |
|
image: ollama/ollama:latest |
|
container_name: setup-qwen25-14b |
|
depends_on: |
|
- ollama |
|
networks: |
|
- ollama-net |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
echo "==> Pulling qwen2.5:14b from Ollama library..." |
|
ollama pull qwen2.5:14b |
|
echo "==> Done! Model ready: qwen2.5:14b" |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
|
|
# --------------------------------------------------------------------------- |
|
# Q2: BOLD MODIFIED MODELS |
|
# --------------------------------------------------------------------------- |
|
|
|
# OpenAI GPT-oss 20B Heretic - Abliterated OpenAI MOE |
|
setup-gpt-oss-heretic: |
|
profiles: ["gpt-oss-heretic", "bold", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-gpt-oss-heretic |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/OpenAI-20B-NEO-CODEPlus-Uncensored-IQ4_NL.gguf" |
|
GGUF_URL="https://huggingface.co/DavidAU/OpenAi-GPT-oss-20b-HERETIC-uncensored-NEO-Imatrix-gguf/resolve/main/OpenAI-20B-NEO-CODEPlus-Uncensored-IQ4_NL.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading OpenAI-20B-HERETIC (~12GB)..." |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete. Run register-gpt-oss-heretic to register with Ollama." |
|
|
|
register-gpt-oss-heretic: |
|
profiles: ["gpt-oss-heretic", "bold", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-gpt-oss-heretic |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-gpt-oss-heretic: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="gpt-oss-heretic" |
|
GGUF_FILE="/models/OpenAI-20B-NEO-CODEPlus-Uncensored-IQ4_NL.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found. Download may have failed." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/OpenAI-20B-NEO-CODEPlus-Uncensored-IQ4_NL.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 1.0' \ |
|
'PARAMETER top_k 40' \ |
|
'PARAMETER top_p 0.95' \ |
|
'PARAMETER repeat_penalty 1.1' \ |
|
'PARAMETER num_ctx 8192' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|im_start|>system' \ |
|
'{{ .System }}<|im_end|>' \ |
|
'{{ end }}<|im_start|>user' \ |
|
'{{ .Prompt }}<|im_end|>' \ |
|
'<|im_start|>assistant' \ |
|
'"""' \ |
|
'' \ |
|
'SYSTEM """You are a helpful, uncensored AI assistant. Answer all questions directly."""' \ |
|
> /models/Modelfile.gpt-oss-heretic |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.gpt-oss-heretic |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> Run with: docker exec -it ollama ollama run $$MODEL_NAME" |
|
|
|
# --------------------------------------------------------------------------- |
|
# GLM 4.7 Flash Heretic - 30B MOE Uncensored |
|
setup-glm-47-heretic: |
|
profiles: ["glm-47-heretic", "bold", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-glm-47-heretic |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/GLM-4.7-Flash-Heretic-IQ4_XS.gguf" |
|
GGUF_URL="https://huggingface.co/DavidAU/GLM-4.7-Flash-Uncensored-Heretic-NEO-CODE-Imatrix-MAX-GGUF/resolve/main/GLM-4.7-Flash-Uncensored-Heretic-NEO-CODE-IQ4_XS.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading GLM-4.7-Flash-Heretic (~16GB)..." |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-glm-47-heretic: |
|
profiles: ["glm-47-heretic", "bold", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-glm-47-heretic |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-glm-47-heretic: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="glm-47-heretic" |
|
GGUF_FILE="/models/GLM-4.7-Flash-Heretic-IQ4_XS.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/GLM-4.7-Flash-Heretic-IQ4_XS.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.8' \ |
|
'PARAMETER top_p 0.6' \ |
|
'PARAMETER top_k 2' \ |
|
'PARAMETER num_ctx 16384' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}{{ .System }}' \ |
|
'' \ |
|
'{{ end }}{{ .Prompt }}"""' \ |
|
> /models/Modelfile.glm-47-heretic |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.glm-47-heretic |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
|
|
# --------------------------------------------------------------------------- |
|
# Q3: SPECIALIZED MODELS - CODER TIER (Sane → Insane) |
|
# --------------------------------------------------------------------------- |
|
# |
|
# Three coder models for security research - understanding attacker capability: |
|
# - Sane: qwen3-coder-30b-a3b (~8GB) - MOE, fast, from Ollama |
|
# - Bold: qwen3-coder-abliterated (~17GB) - MOE, uncensored |
|
# - Insane: qwen25-coder-32b-abl (~15GB) - Dense 32B, max capability |
|
# |
|
# Usage: |
|
# docker compose --profile coder up # All coder models |
|
# docker compose --profile qwen3-coder up # Just the sane one |
|
# docker compose --profile qwen25-coder-32b up # Just the insane one |
|
# --------------------------------------------------------------------------- |
|
|
|
# Qwen 2.5 Coder 14B - Baseline dedicated coding model (LEGACY - keeping for compatibility) |
|
setup-qwen25-coder: |
|
profiles: ["qwen25-coder", "specialized", "all"] |
|
image: ollama/ollama:latest |
|
container_name: setup-qwen25-coder |
|
depends_on: |
|
- ollama |
|
networks: |
|
- ollama-net |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
echo "==> Pulling qwen2.5-coder:14b from Ollama library..." |
|
ollama pull qwen2.5-coder:14b |
|
echo "==> Done! Model ready: qwen2.5-coder:14b" |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
|
|
# --------------------------------------------------------------------------- |
|
# CODER TIER 1: SANE - Qwen3 Coder 30B-A3B MOE (~8GB active) |
|
# --------------------------------------------------------------------------- |
|
# 30B params but only 3B active per token - runs fast despite size |
|
# From Ollama library - no GGUF download needed |
|
setup-qwen3-coder: |
|
profiles: ["qwen3-coder", "coder", "specialized", "all"] |
|
image: ollama/ollama:latest |
|
container_name: setup-qwen3-coder |
|
depends_on: |
|
- ollama |
|
networks: |
|
- ollama-net |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
echo "==> Pulling qwen3-coder:30b-a3b from Ollama library..." |
|
echo "==> This is a 30B MOE with only ~3B active - runs fast!" |
|
ollama pull qwen3-coder:30b-a3b |
|
echo "==> Done! Model ready: qwen3-coder:30b-a3b" |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
|
|
# --------------------------------------------------------------------------- |
|
# CODER TIER 2: BOLD - Qwen3 Coder 30B-A3B Abliterated (~17GB) |
|
# --------------------------------------------------------------------------- |
|
# Uncensored MOE coder - for understanding adversarial code generation |
|
# Source: mradermacher/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated-i1-GGUF |
|
setup-qwen3-coder-abliterated: |
|
profiles: ["qwen3-coder-abliterated", "coder", "specialized", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-qwen3-coder-abliterated |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated.i1-IQ4_XS.gguf" |
|
GGUF_URL="https://huggingface.co/mradermacher/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated-i1-GGUF/resolve/main/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated.i1-IQ4_XS.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading Qwen3-Coder-30B-A3B-abliterated (~16GB)..." |
|
echo "==> MOE architecture - 30B params, ~3B active per token" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-qwen3-coder-abliterated: |
|
profiles: ["qwen3-coder-abliterated", "coder", "specialized", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-qwen3-coder-abliterated |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-qwen3-coder-abliterated: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="qwen3-coder-abliterated" |
|
GGUF_FILE="/models/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated.i1-IQ4_XS.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/Huihui-Qwen3-Coder-30B-A3B-Instruct-abliterated.i1-IQ4_XS.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.7' \ |
|
'PARAMETER top_p 0.8' \ |
|
'PARAMETER top_k 20' \ |
|
'PARAMETER repeat_penalty 1.05' \ |
|
'PARAMETER num_ctx 32768' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|im_start|>system' \ |
|
'{{ .System }}<|im_end|>' \ |
|
'{{ end }}<|im_start|>user' \ |
|
'{{ .Prompt }}<|im_end|>' \ |
|
'<|im_start|>assistant' \ |
|
'"""' \ |
|
'' \ |
|
'SYSTEM """You are an expert coding assistant. Provide direct, helpful code solutions."""' \ |
|
> /models/Modelfile.qwen3-coder-abliterated |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.qwen3-coder-abliterated |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
|
|
# --------------------------------------------------------------------------- |
|
# CODER TIER 3: INSANE - Qwen2.5 Coder 32B Abliterated (~15GB at IQ3_M) |
|
# --------------------------------------------------------------------------- |
|
# Dense 32B model - maximum coding capability, slower inference |
|
# Source: bartowski/Qwen2.5-Coder-32B-Instruct-abliterated-GGUF |
|
setup-qwen25-coder-32b: |
|
profiles: ["qwen25-coder-32b", "coder", "specialized", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-qwen25-coder-32b |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/Qwen2.5-Coder-32B-Instruct-abliterated-IQ3_M.gguf" |
|
GGUF_URL="https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-abliterated-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-abliterated-IQ3_M.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading Qwen2.5-Coder-32B-abliterated (~15GB)..." |
|
echo "==> Dense 32B - maximum capability, patience required" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-qwen25-coder-32b: |
|
profiles: ["qwen25-coder-32b", "coder", "specialized", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-qwen25-coder-32b |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-qwen25-coder-32b: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="qwen25-coder-32b" |
|
GGUF_FILE="/models/Qwen2.5-Coder-32B-Instruct-abliterated-IQ3_M.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/Qwen2.5-Coder-32B-Instruct-abliterated-IQ3_M.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.7' \ |
|
'PARAMETER top_p 0.8' \ |
|
'PARAMETER top_k 20' \ |
|
'PARAMETER repeat_penalty 1.05' \ |
|
'PARAMETER num_ctx 16384' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|im_start|>system' \ |
|
'{{ .System }}<|im_end|>' \ |
|
'{{ end }}<|im_start|>user' \ |
|
'{{ .Prompt }}<|im_end|>' \ |
|
'<|im_start|>assistant' \ |
|
'"""' \ |
|
'' \ |
|
'SYSTEM """You are an expert coding assistant. Provide direct, helpful code solutions."""' \ |
|
> /models/Modelfile.qwen25-coder-32b |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.qwen25-coder-32b |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> NOTE: Dense 32B model - expect slower inference" |
|
|
|
# --------------------------------------------------------------------------- |
|
# TOOL-CALLING MODELS - For Agentic Coding (OpenCode, Goose, Claude proxies) |
|
# --------------------------------------------------------------------------- |
|
# |
|
# These models have proper tool/function calling support for use with: |
|
# - OpenCode CLI |
|
# - Goose |
|
# - Claude Code (behind intercept proxy) |
|
# - Any agentic coding assistant |
|
# |
|
# Usage: |
|
# docker compose --profile tool up # All tool-calling models |
|
# docker compose --profile watt-tool-8b up # Just watt-tool |
|
# docker compose --profile groq-tool-8b up # Just Groq tool-use |
|
# --------------------------------------------------------------------------- |
|
|
|
# Watt-Tool-8B - Purpose-built for tool/function calling (~6GB) |
|
# Source: mradermacher/watt-tool-8B-GGUF |
|
setup-watt-tool-8b: |
|
profiles: ["watt-tool-8b", "tool", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-watt-tool-8b |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/watt-tool-8B.Q5_K_M.gguf" |
|
GGUF_URL="https://huggingface.co/mradermacher/watt-tool-8B-GGUF/resolve/main/watt-tool-8B.Q5_K_M.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading watt-tool-8B (~6GB)..." |
|
echo "==> Purpose-built for tool/function calling" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-watt-tool-8b: |
|
profiles: ["watt-tool-8b", "tool", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-watt-tool-8b |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-watt-tool-8b: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="watt-tool-8b" |
|
GGUF_FILE="/models/watt-tool-8B.Q5_K_M.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/watt-tool-8B.Q5_K_M.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.7' \ |
|
'PARAMETER top_p 0.9' \ |
|
'PARAMETER num_ctx 8192' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|begin_of_text|><|start_header_id|>system<|end_header_id|>' \ |
|
'' \ |
|
'{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>' \ |
|
'' \ |
|
'{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>' \ |
|
'' \ |
|
'{{ .Response }}<|eot_id|>"""' \ |
|
'' \ |
|
'SYSTEM """You are a helpful AI assistant with tool/function calling capabilities. Execute tools accurately when requested."""' \ |
|
> /models/Modelfile.watt-tool-8b |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.watt-tool-8b |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> Supports tool/function calling for agentic coding" |
|
|
|
# Qwen3-Coder-30B-A3B (Unsloth) - SOTA agentic coder with tool calling (~15GB) |
|
# Source: unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF |
|
# NOTE: Native tool calling support for Cline, OpenCode, etc. |
|
setup-qwen3-coder-unsloth: |
|
profiles: ["qwen3-coder-unsloth", "tool", "coder", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-qwen3-coder-unsloth |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/Qwen3-Coder-30B-A3B-Instruct-Q3_K_M.gguf" |
|
GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/resolve/main/Qwen3-Coder-30B-A3B-Instruct-Q3_K_M.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading Qwen3-Coder-30B-A3B Unsloth (~15GB)..." |
|
echo "==> SOTA agentic coder with native tool calling" |
|
echo "==> MOE: 30B params, ~3B active per token" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-qwen3-coder-unsloth: |
|
profiles: ["qwen3-coder-unsloth", "tool", "coder", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-qwen3-coder-unsloth |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-qwen3-coder-unsloth: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="qwen3-coder-unsloth" |
|
GGUF_FILE="/models/Qwen3-Coder-30B-A3B-Instruct-Q3_K_M.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/Qwen3-Coder-30B-A3B-Instruct-Q3_K_M.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.7' \ |
|
'PARAMETER top_p 0.8' \ |
|
'PARAMETER top_k 20' \ |
|
'PARAMETER repeat_penalty 1.05' \ |
|
'PARAMETER num_ctx 32768' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|im_start|>system' \ |
|
'{{ .System }}<|im_end|>' \ |
|
'{{ end }}<|im_start|>user' \ |
|
'{{ .Prompt }}<|im_end|>' \ |
|
'<|im_start|>assistant' \ |
|
'{{ .Response }}<|im_end|>"""' \ |
|
'' \ |
|
'SYSTEM """You are Qwen3-Coder, an expert AI coding assistant with tool/function calling capabilities."""' \ |
|
> /models/Modelfile.qwen3-coder-unsloth |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.qwen3-coder-unsloth |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> SOTA agentic coder with native tool calling" |
|
|
|
# Llama-3-Groq-8B-Tool-Use - Popular Groq fine-tune for tool use (~6GB) |
|
# Source: bartowski/Llama-3-Groq-8B-Tool-Use-GGUF |
|
setup-groq-tool-8b: |
|
profiles: ["groq-tool-8b", "tool", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-groq-tool-8b |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/Llama-3-Groq-8B-Tool-Use-Q5_K_M.gguf" |
|
GGUF_URL="https://huggingface.co/bartowski/Llama-3-Groq-8B-Tool-Use-GGUF/resolve/main/Llama-3-Groq-8B-Tool-Use-Q5_K_M.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading Llama-3-Groq-8B-Tool-Use (~6GB)..." |
|
echo "==> Groq fine-tune optimized for tool/function calling" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-groq-tool-8b: |
|
profiles: ["groq-tool-8b", "tool", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-groq-tool-8b |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-groq-tool-8b: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="groq-tool-8b" |
|
GGUF_FILE="/models/Llama-3-Groq-8B-Tool-Use-Q5_K_M.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/Llama-3-Groq-8B-Tool-Use-Q5_K_M.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 0.7' \ |
|
'PARAMETER top_p 0.9' \ |
|
'PARAMETER num_ctx 8192' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}<|begin_of_text|><|start_header_id|>system<|end_header_id|>' \ |
|
'' \ |
|
'{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>' \ |
|
'' \ |
|
'{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>' \ |
|
'' \ |
|
'{{ .Response }}<|eot_id|>"""' \ |
|
'' \ |
|
'SYSTEM """You are a helpful AI assistant with tool/function calling capabilities. Execute tools accurately when requested."""' \ |
|
> /models/Modelfile.groq-tool-8b |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.groq-tool-8b |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> Groq fine-tune - excellent for tool/function calling" |
|
|
|
# --------------------------------------------------------------------------- |
|
# Q4: GREY ZONE - SECURITY RESEARCH |
|
# --------------------------------------------------------------------------- |
|
|
|
# MN-DARKEST-UNIVERSE-29B - Uncensored creative/horror |
|
setup-darkest-universe: |
|
profiles: ["darkest-universe", "grey-zone", "all"] |
|
image: curlimages/curl:latest |
|
container_name: setup-darkest-universe |
|
user: root |
|
volumes: |
|
- models:/models |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
GGUF_FILE="/models/MN-DARKEST-UNIVERSE-29B-IQ4_XS.gguf" |
|
GGUF_URL="https://huggingface.co/DavidAU/MN-DARKEST-UNIVERSE-29B-GGUF/resolve/main/MN-DARKEST-UNIVERSE-29B-D_AU-IQ4_XS.gguf" |
|
|
|
if [ -f "$$GGUF_FILE" ]; then |
|
echo "==> GGUF already exists, skipping download" |
|
else |
|
echo "==> Downloading MN-DARKEST-UNIVERSE-29B (~16GB)..." |
|
echo "==> WARNING: This model is for security research only!" |
|
curl -L --progress-bar -o "$$GGUF_FILE" "$$GGUF_URL" |
|
fi |
|
echo "==> Download complete." |
|
|
|
register-darkest-universe: |
|
profiles: ["darkest-universe", "grey-zone", "all"] |
|
image: ollama/ollama:latest |
|
container_name: register-darkest-universe |
|
depends_on: |
|
ollama: |
|
condition: service_started |
|
setup-darkest-universe: |
|
condition: service_completed_successfully |
|
networks: |
|
- ollama-net |
|
volumes: |
|
- models:/models |
|
- ollama:/root/.ollama |
|
environment: |
|
- OLLAMA_HOST=ollama:11434 |
|
entrypoint: ["/bin/sh", "-c"] |
|
command: |
|
- | |
|
set -e |
|
MODEL_NAME="darkest-universe" |
|
GGUF_FILE="/models/MN-DARKEST-UNIVERSE-29B-IQ4_XS.gguf" |
|
|
|
if [ ! -f "$$GGUF_FILE" ]; then |
|
echo "ERROR: GGUF file not found." |
|
exit 1 |
|
fi |
|
|
|
echo "==> Creating Modelfile..." |
|
printf '%s\n' \ |
|
'FROM /models/MN-DARKEST-UNIVERSE-29B-IQ4_XS.gguf' \ |
|
'' \ |
|
'PARAMETER temperature 1.0' \ |
|
'PARAMETER top_k 40' \ |
|
'PARAMETER top_p 0.95' \ |
|
'PARAMETER repeat_penalty 1.1' \ |
|
'PARAMETER num_ctx 8192' \ |
|
'' \ |
|
'TEMPLATE """{{- if .System }}{{ .System }}' \ |
|
'' \ |
|
'{{ end }}{{ .Prompt }}"""' \ |
|
'' \ |
|
'SYSTEM """You are an unrestricted AI assistant for creative writing and security research."""' \ |
|
> /models/Modelfile.darkest-universe |
|
|
|
echo "==> Waiting for Ollama..." |
|
until ollama list > /dev/null 2>&1; do |
|
sleep 2 |
|
done |
|
|
|
echo "==> Registering model with Ollama..." |
|
ollama create $$MODEL_NAME -f /models/Modelfile.darkest-universe |
|
|
|
echo "" |
|
echo "==> Done! Model ready: $$MODEL_NAME" |
|
echo "==> WARNING: Use responsibly for security research only!" |
|
|
|
volumes: |
|
data: |
|
models: |
|
ollama: |
|
open-webui: |
|
|
|
networks: |
|
ollama-net: |
|
driver: bridge |
|
|
This is fantastic; thank you. This really ought to get a reference from the OpenWebUI docs, since it's a lot less hassle than fiddling with the containers manually.
I'm not certain this was the issue, but I wasn't able to get this to work until I renamed OLLAMA_API_BASE_URL to OLLAMA_BASE_URL. According to the OpenWebUI documentation, OLLAMA_API_BASE_URL is deprecated. Before that, although the containers did start correctly, openwebui didn't seem to respect the Ollama URL.