Skip to content

Instantly share code, notes, and snippets.

@loleg
Last active November 24, 2025 15:49
Show Gist options
  • Select an option

  • Save loleg/33ab522bdf46fee4e58338628ffacc15 to your computer and use it in GitHub Desktop.

Select an option

Save loleg/33ab522bdf46fee4e58338628ffacc15 to your computer and use it in GitHub Desktop.
Docker Compose and NGINX configuration for running a vLLM model with Open WebUI on a small server - based on https://github.com/marib00/vllm-openwebui-nginx-compose - see also my blog post https://log.alets.ch/110/
networks: # Define a custom network
internal_network:
driver: bridge
services:
nginx-proxy:
image: nginx:latest
container_name: nginx-proxy
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/conf.d/default.conf
depends_on:
vllm-server:
condition: service_healthy # Wait for vLLM to be healthy
open-webui:
condition: service_started # Wait for Open WebUI to start
networks: # Connect Nginx to the internal network
- internal_network
restart: always
vllm-server:
image: vllm/vllm-openai:nightly
container_name: vllm-server
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: ["gpu"]
volumes:
- /srv/huggingface:/root/.cache/huggingface
ipc: host
environment:
- HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
- CUDA_VISIBLE_DEVICES=0
- PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
command: --model ${HF_MODEL} --max-model-len 4096
healthcheck:
test: ["CMD-SHELL", "curl -f --connect-timeout 4 --max-time 9 http://127.0.0.1:8000/health || exit 1"]
interval: 15s
timeout: 10s
retries: 5
start_period: 1200s
networks: # Connect vllm-server to the internal network
- internal_network
restart: always
open-webui:
image: ghcr.io/open-webui/open-webui:main
container_name: open-webui
depends_on:
# For faster startup, don't wait for vllm-server to be fully healthy (service_healthy)
vllm-server:
condition: service_started
volumes:
- open-webui_data:/app/backend/data # Use a named volume for persistence
environment:
OPENAI_API_BASE_URL: http://vllm-server:8000/v1
networks: # Connect open-webui to the internal network
- internal_network
restart: always
volumes:
open-webui_data: # Define the named volume
server {
listen 80;
server_name _; # Replace _ with your domain if you have one
# Reverse proxy for Open WebUI
location / {
proxy_pass http://open-webui:8080;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Required for WebSockets
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_read_timeout 900s;
proxy_send_timeout 900s;
}
# Reverse proxy for vLLM server API
# External access will be https://your.domain/vllm-api/
# For example, https://your.domain/vllm-api/v1/chat/completions
location /vllm-api/ {
proxy_pass http://vllm-server:8000/; # Note the trailing slash
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_buffering off;
proxy_read_timeout 900s;
proxy_send_timeout 900s;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment