hardware: ram: 64G vram: 30G models: - name: "tabby-code" port: 18080 internal_port: 28080 vram_usage: 8.25G ram_usage: 2.6G env: CUDA_VISIBLE_DEVICES: 0 HSA_OVERRIDE_GFX_VERSION: '11.0.0' args: model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-7B-Instruct-Q6_K_L.gguf gpu-layers: 9999 flash-attn: true ctx-size: 32768 host: 0.0.0.0 - name: "tabby-embeddings" port: 18081 internal_port: 28081 vram_usage: 1G ram_usage: 2G env: CUDA_VISIBLE_DEVICES: 0 HSA_OVERRIDE_GFX_VERSION: '11.0.0' args: model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf gpu-layers: 9999 flash-attn: true host: 0.0.0.0 embeddings: true - name: "big-chat" port: 18082 internal_port: 28082 vram_usage: 26.5G ram_usage: 2.5G env: CUDA_VISIBLE_DEVICES: 0 HSA_OVERRIDE_GFX_VERSION: '11.0.0' args: model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf gpu-layers: 9999 ctx-size: 16384 flash-attn: true host: 0.0.0.0