loleg · November 24, 2025 15:49
diff --git a/docker-compose.yml b/docker-compose.yml
 networks: # Define a custom network
  internal_network:
    driver: bridge

 services:
  nginx-proxy:
    image: nginx:latest
    container_name: nginx-proxy
    ports:
      - "80:80"
    volumes:
      - ./nginx.conf:/etc/nginx/conf.d/default.conf
    depends_on:
      vllm-server:
        condition: service_healthy # Wait for vLLM to be healthy
      open-webui:
        condition: service_started # Wait for Open WebUI to start
    networks: # Connect Nginx to the internal network
      - internal_network
    restart: always

  vllm-server:
    image: vllm/vllm-openai:nightly
    container_name: vllm-server
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: ["gpu"]
    volumes:
      - /srv/huggingface:/root/.cache/huggingface
    ipc: host
    environment:
      - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
      - CUDA_VISIBLE_DEVICES=0
      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
    command: --model ${HF_MODEL} --max-model-len 4096
    healthcheck:
      test: ["CMD-SHELL", "curl -f --connect-timeout 4 --max-time 9 http://127.0.0.1:8000/health || exit 1"]
      interval: 15s
      timeout: 10s
      retries: 5
      start_period: 1200s
    networks: # Connect vllm-server to the internal network
      - internal_network
    restart: always

  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    depends_on:
      # For faster startup, don't wait for vllm-server to be fully healthy (service_healthy)
      vllm-server:
        condition: service_started
    volumes:
      - open-webui_data:/app/backend/data # Use a named volume for persistence
    environment:
      OPENAI_API_BASE_URL: http://vllm-server:8000/v1
    networks: # Connect open-webui to the internal network
      - internal_network
    restart: always

 volumes:
  open-webui_data: # Define the named volume
diff --git a/nginx.conf b/nginx.conf
 server {
    listen 80;
    server_name _; # Replace _ with your domain if you have one

    # Reverse proxy for Open WebUI
    location / {
        proxy_pass http://open-webui:8080;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        # Required for WebSockets
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_read_timeout 900s; 
        proxy_send_timeout 900s;
    }

    # Reverse proxy for vLLM server API
    # External access will be https://your.domain/vllm-api/
    # For example, https://your.domain/vllm-api/v1/chat/completions
    location /vllm-api/ {
        proxy_pass http://vllm-server:8000/; # Note the trailing slash
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_buffering off; 
        proxy_read_timeout 900s;
        proxy_send_timeout 900s;
    }
 }
	networks: # Define a custom network
	internal_network:
	driver: bridge

	services:
	nginx-proxy:
	image: nginx:latest
	container_name: nginx-proxy
	ports:
	- "80:80"
	volumes:
	- ./nginx.conf:/etc/nginx/conf.d/default.conf
	depends_on:
	vllm-server:
	condition: service_healthy # Wait for vLLM to be healthy
	open-webui:
	condition: service_started # Wait for Open WebUI to start
	networks: # Connect Nginx to the internal network
	- internal_network
	restart: always

	vllm-server:
	image: vllm/vllm-openai:nightly
	container_name: vllm-server
	runtime: nvidia
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: all
	capabilities: ["gpu"]
	volumes:
	- /srv/huggingface:/root/.cache/huggingface
	ipc: host
	environment:
	- HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
	- CUDA_VISIBLE_DEVICES=0
	- PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
	command: --model ${HF_MODEL} --max-model-len 4096
	healthcheck:
	test: ["CMD-SHELL", "curl -f --connect-timeout 4 --max-time 9 http://127.0.0.1:8000/health \|\| exit 1"]
	interval: 15s
	timeout: 10s
	retries: 5
	start_period: 1200s
	networks: # Connect vllm-server to the internal network
	- internal_network
	restart: always

	open-webui:
	image: ghcr.io/open-webui/open-webui:main
	container_name: open-webui
	depends_on:
	# For faster startup, don't wait for vllm-server to be fully healthy (service_healthy)
	vllm-server:
	condition: service_started
	volumes:
	- open-webui_data:/app/backend/data # Use a named volume for persistence
	environment:
	OPENAI_API_BASE_URL: http://vllm-server:8000/v1
	networks: # Connect open-webui to the internal network
	- internal_network
	restart: always

	volumes:
	open-webui_data: # Define the named volume
	server {
	listen 80;
	server_name _; # Replace _ with your domain if you have one

	# Reverse proxy for Open WebUI
	location / {
	proxy_pass http://open-webui:8080;
	proxy_set_header Host $host;
	proxy_set_header X-Real-IP $remote_addr;
	proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
	proxy_set_header X-Forwarded-Proto $scheme;
	# Required for WebSockets
	proxy_http_version 1.1;
	proxy_set_header Upgrade $http_upgrade;
	proxy_set_header Connection "upgrade";
	proxy_read_timeout 900s;
	proxy_send_timeout 900s;
	}

	# Reverse proxy for vLLM server API
	# External access will be https://your.domain/vllm-api/
	# For example, https://your.domain/vllm-api/v1/chat/completions
	location /vllm-api/ {
	proxy_pass http://vllm-server:8000/; # Note the trailing slash
	proxy_set_header Host $host;
	proxy_set_header X-Real-IP $remote_addr;
	proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
	proxy_set_header X-Forwarded-Proto $scheme;
	proxy_buffering off;
	proxy_read_timeout 900s;
	proxy_send_timeout 900s;
	}
	}