-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
100 lines (83 loc) · 2.65 KB
/
docker-compose.yml
File metadata and controls
100 lines (83 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
services:
vllm-node:
build:
context: .
dockerfile: Dockerfile
container_name: vllm-container
# restart: unless-stopped
ports:
- "8000:8000"
gpus: all
env_file:
- ./vars.env
environment:
# MODEL is selected by runMe.sh via `env MODEL=<name> docker compose up`.
# Keep a default for direct `docker compose up` usage.
MODEL: ${MODEL:-glm47-flash}
# VARIANT selects the config variant within a family model file (e.g. nvfp4).
# Leave empty for legacy single-model files.
VARIANT: ${VARIANT:-}
volumes:
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
- ./scripts/run_vllm_agent.sh:/app/run_vllm_agent.sh:ro
- ./models:/app/models:ro
- ./secrets:/app/secrets:ro
- ./scripts:/app/scripts:ro
- ./vars.env:/app/vars.env:ro
- ./scripts/generate_litellm_config.py:/app/generate_litellm_config.py:ro
- ./scripts/litellm_config.template.yaml:/app/litellm_config.template.yaml:ro
- litellm_config:/app/generated_configs
- ./moe_configs/${MODEL:-glm47-flash}:/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/fused_moe/configs:ro
command: bash /app/run_vllm_agent.sh
healthcheck:
test: ["CMD-SHELL", "test -f /app/generated_configs/config.yaml && curl -s -H 'Authorization: Bearer sk-FAKE' http://localhost:8000/v1/models | grep -q vllm_agent || exit 1"]
interval: 15s
timeout: 60s
retries: 50
start_period: 150s
litellm:
image: ghcr.io/berriai/litellm:main-stable
command: ["--config=/app/generated_configs/config.yaml"]
container_name: litellm
# restart: unless-stopped
volumes:
- litellm_config:/app/generated_configs
ports:
- "4000:4000"
env_file:
- ./vars.env
depends_on:
db:
condition: service_healthy
vllm-node:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1"]
interval: 30s
timeout: 30s
retries: 10
start_period: 30s
db:
image: postgres:16
restart: always
container_name: litellm_db
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10
volumes:
postgres_data:
name: litellm_postgres_data
litellm_config:
name: litellm_config
driver: local
driver_opts:
type: tmpfs
device: tmpfs