Last active
March 8, 2026 00:50
-
-
Save EvilFreelancer/16d92ec2f5605c975501c405ab05eaba to your computer and use it in GitHub Desktop.
Qwen 3.5 27b on vLLM 0.17.0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| qwen35-27b: | |
| image: vllm/vllm-openai:v0.17.0 | |
| #build: | |
| # context: ./vllm | |
| restart: always | |
| volumes: | |
| - ./vllm_data:/root/.cache/huggingface | |
| entrypoint: vllm | |
| command: > | |
| serve Qwen/Qwen3.5-27B | |
| --served-model-name Qwen/Qwen3.5-27B | |
| --dtype auto | |
| --gpu-memory-utilization 0.7 | |
| --mm-encoder-tp-mode data | |
| --mm-processor-cache-type shm | |
| --max-model-len 128000 | |
| --max-num-seqs 2 | |
| --max-num-batched-tokens 32768 | |
| --tensor-parallel-size 1 | |
| --kv-cache-dtype fp8 | |
| --enable-chunked-prefill | |
| --async-scheduling | |
| --enable-auto-tool-choice | |
| --tool-call-parser qwen3_coder | |
| --reasoning-parser qwen3 | |
| environment: | |
| LD_LIBRARY_PATH: /usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/lib/x86_64-linux-gnu | |
| NCCL_IGNORE_DISABLED_P2P: "1" | |
| HF_HUB_ENABLE_HF_TRANSFER: "0" | |
| VLLM_NO_USAGE_STATS: "1" | |
| DO_NOT_TRACK: "1" | |
| ports: | |
| - 8082:8000 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| device_ids: [ "1" ] | |
| capabilities: [ gpu ] | |
| logging: | |
| driver: "json-file" | |
| options: | |
| max-size: "100k" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment