conf: Update model configurations

This commit is contained in:
Tristan D. 2025-01-31 13:20:40 +01:00
parent 56ce7c5831
commit aad9472372
Signed by: tristan
SSH key fingerprint: SHA256:3RU4RLOoM8oAjFU19f1W6t8uouZbA7GWkaSW6rjp1k8

View file

@ -53,7 +53,7 @@ models:
# draft-max: 16
# draft-min: 5
- name: "bigger-chat"
port: 18083
port: 18085
vram_usage: 29G
ram_usage: 5G
env:
@ -70,3 +70,39 @@ models:
gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "bigger-chat-2"
port: 18083
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 8192
flash-attn: true
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "deep-think"
port: 18084
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 32768
flash-attn: true
# cache-type-k: q8_0
# cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5