Last active
November 24, 2025 15:49
-
-
Save loleg/33ab522bdf46fee4e58338628ffacc15 to your computer and use it in GitHub Desktop.
Docker Compose and NGINX configuration for running a vLLM model with Open WebUI on a small server - based on https://github.com/marib00/vllm-openwebui-nginx-compose - see also my blog post https://log.alets.ch/110/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| networks: # Define a custom network | |
| internal_network: | |
| driver: bridge | |
| services: | |
| nginx-proxy: | |
| image: nginx:latest | |
| container_name: nginx-proxy | |
| ports: | |
| - "80:80" | |
| volumes: | |
| - ./nginx.conf:/etc/nginx/conf.d/default.conf | |
| depends_on: | |
| vllm-server: | |
| condition: service_healthy # Wait for vLLM to be healthy | |
| open-webui: | |
| condition: service_started # Wait for Open WebUI to start | |
| networks: # Connect Nginx to the internal network | |
| - internal_network | |
| restart: always | |
| vllm-server: | |
| image: vllm/vllm-openai:nightly | |
| container_name: vllm-server | |
| runtime: nvidia | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: ["gpu"] | |
| volumes: | |
| - /srv/huggingface:/root/.cache/huggingface | |
| ipc: host | |
| environment: | |
| - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN} | |
| - CUDA_VISIBLE_DEVICES=0 | |
| - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True | |
| command: --model ${HF_MODEL} --max-model-len 4096 | |
| healthcheck: | |
| test: ["CMD-SHELL", "curl -f --connect-timeout 4 --max-time 9 http://127.0.0.1:8000/health || exit 1"] | |
| interval: 15s | |
| timeout: 10s | |
| retries: 5 | |
| start_period: 1200s | |
| networks: # Connect vllm-server to the internal network | |
| - internal_network | |
| restart: always | |
| open-webui: | |
| image: ghcr.io/open-webui/open-webui:main | |
| container_name: open-webui | |
| depends_on: | |
| # For faster startup, don't wait for vllm-server to be fully healthy (service_healthy) | |
| vllm-server: | |
| condition: service_started | |
| volumes: | |
| - open-webui_data:/app/backend/data # Use a named volume for persistence | |
| environment: | |
| OPENAI_API_BASE_URL: http://vllm-server:8000/v1 | |
| networks: # Connect open-webui to the internal network | |
| - internal_network | |
| restart: always | |
| volumes: | |
| open-webui_data: # Define the named volume |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| server { | |
| listen 80; | |
| server_name _; # Replace _ with your domain if you have one | |
| # Reverse proxy for Open WebUI | |
| location / { | |
| proxy_pass http://open-webui:8080; | |
| proxy_set_header Host $host; | |
| proxy_set_header X-Real-IP $remote_addr; | |
| proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
| proxy_set_header X-Forwarded-Proto $scheme; | |
| # Required for WebSockets | |
| proxy_http_version 1.1; | |
| proxy_set_header Upgrade $http_upgrade; | |
| proxy_set_header Connection "upgrade"; | |
| proxy_read_timeout 900s; | |
| proxy_send_timeout 900s; | |
| } | |
| # Reverse proxy for vLLM server API | |
| # External access will be https://your.domain/vllm-api/ | |
| # For example, https://your.domain/vllm-api/v1/chat/completions | |
| location /vllm-api/ { | |
| proxy_pass http://vllm-server:8000/; # Note the trailing slash | |
| proxy_set_header Host $host; | |
| proxy_set_header X-Real-IP $remote_addr; | |
| proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
| proxy_set_header X-Forwarded-Proto $scheme; | |
| proxy_buffering off; | |
| proxy_read_timeout 900s; | |
| proxy_send_timeout 900s; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment