conf: Update model configurations
This commit is contained in:
parent
56ce7c5831
commit
aad9472372
1 changed files with 37 additions and 1 deletions
|
@ -53,7 +53,7 @@ models:
|
||||||
# draft-max: 16
|
# draft-max: 16
|
||||||
# draft-min: 5
|
# draft-min: 5
|
||||||
- name: "bigger-chat"
|
- name: "bigger-chat"
|
||||||
port: 18083
|
port: 18085
|
||||||
vram_usage: 29G
|
vram_usage: 29G
|
||||||
ram_usage: 5G
|
ram_usage: 5G
|
||||||
env:
|
env:
|
||||||
|
@ -70,3 +70,39 @@ models:
|
||||||
gpu-layers-draft: 0
|
gpu-layers-draft: 0
|
||||||
# draft-max: 16
|
# draft-max: 16
|
||||||
# draft-min: 5
|
# draft-min: 5
|
||||||
|
- name: "bigger-chat-2"
|
||||||
|
port: 18083
|
||||||
|
vram_usage: 29G
|
||||||
|
ram_usage: 5G
|
||||||
|
env:
|
||||||
|
CUDA_VISIBLE_DEVICES: 0
|
||||||
|
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||||
|
args:
|
||||||
|
ctx-size: 8192
|
||||||
|
flash-attn: true
|
||||||
|
cache-type-k: q8_0
|
||||||
|
cache-type-v: q8_0
|
||||||
|
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
|
||||||
|
gpu-layers: 9999
|
||||||
|
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||||
|
# gpu-layers-draft: 0
|
||||||
|
# draft-max: 16
|
||||||
|
# draft-min: 5
|
||||||
|
- name: "deep-think"
|
||||||
|
port: 18084
|
||||||
|
vram_usage: 29G
|
||||||
|
ram_usage: 5G
|
||||||
|
env:
|
||||||
|
CUDA_VISIBLE_DEVICES: 0
|
||||||
|
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||||
|
args:
|
||||||
|
ctx-size: 32768
|
||||||
|
flash-attn: true
|
||||||
|
# cache-type-k: q8_0
|
||||||
|
# cache-type-v: q8_0
|
||||||
|
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
|
||||||
|
gpu-layers: 9999
|
||||||
|
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||||
|
# gpu-layers-draft: 0
|
||||||
|
# draft-max: 16
|
||||||
|
# draft-min: 5
|
||||||
|
|
Loading…
Add table
Reference in a new issue