redvault-ai/llama_proxy_man/config.yaml

47 lines
1.1 KiB
YAML
Raw Normal View History

2024-09-19 16:49:46 +02:00
hardware:
ram: 64G
vram: 30G
2024-09-19 16:49:46 +02:00
models:
- name: "tabby-code"
port: 18080
2024-09-19 16:49:46 +02:00
internal_port: 28080
vram_usage: 8.25G
ram_usage: 2.6G
2024-09-19 16:49:46 +02:00
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-7B-Instruct-Q6_K_L.gguf
2024-09-19 16:49:46 +02:00
gpu-layers: 9999
flash-attn: true
ctx-size: 32768
host: 0.0.0.0
- name: "tabby-embeddings"
port: 18081
2024-09-19 16:49:46 +02:00
internal_port: 28081
vram_usage: 1G
ram_usage: 2G
2024-09-19 16:49:46 +02:00
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
2024-09-19 16:49:46 +02:00
gpu-layers: 9999
flash-attn: true
host: 0.0.0.0
embeddings: true
- name: "big-chat"
port: 18082
internal_port: 28082
vram_usage: 26.5G
ram_usage: 2.5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
gpu-layers: 9999
ctx-size: 16384
flash-attn: true
host: 0.0.0.0