services: vllm-bge: image: vllm/vllm-openai:latest ipc: host volumes: - ./models/bge-m3:/models command: [ "--model", "/models", "--served-model-name", "bge-m3", "--dtype", "float16", "--gpu-memory-utilization", "0.9", ] ports: - "8000:8000" deploy: resources: reservations: devices: - driver: nvidia capabilities: [gpu] networks: - ragflow vllm-deepseek: image: vllm/vllm-openai:latest ipc: host volumes: - ./models/DeepSeek-R1-1.5B:/models command: [ "--model", "/models", "--served-model-name", "deepseek-r1", "--dtype", "float16", "--tensor-parallel-size", "1", "--max-model-len", "4096" ] ports: - "8001:8000" deploy: resources: reservations: devices: - driver: nvidia capabilities: [gpu] networks: - ragflow networks: ragflow: name: docker_ragflow driver: bridge