Compare commits
2 commits
bd572a0ccd
...
f799554173
Author | SHA1 | Date | |
---|---|---|---|
f799554173 | |||
cdd0a84d67 |
3 changed files with 21 additions and 11 deletions
8
flake.lock
generated
8
flake.lock
generated
|
@ -64,16 +64,16 @@
|
|||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1745868199,
|
||||
"narHash": "sha256-kTy1oNcN4bNHDV7yqIOSTo/JUN4enHeqyUeVE/q4iHg=",
|
||||
"lastModified": 1745909149,
|
||||
"narHash": "sha256-TDGa+/P14TAS1DC9MsTilNQ3sw/mL08s4KYIlTwW850=",
|
||||
"owner": "ggerganov",
|
||||
"repo": "llama.cpp",
|
||||
"rev": "eaea3253244dc4bbe07f6cd81325847ccc6cf93e",
|
||||
"rev": "b6ce7430b7eb51f032152316880204e0a9c0470e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "ggerganov",
|
||||
"ref": "b5214",
|
||||
"ref": "b5216",
|
||||
"repo": "llama.cpp",
|
||||
"type": "github"
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
flake-parts.url = "github:hercules-ci/flake-parts";
|
||||
devshell.url = "github:numtide/devshell";
|
||||
llama-cpp = {
|
||||
url = "github:ggerganov/llama.cpp/b5214";
|
||||
url = "github:ggerganov/llama.cpp/b5216";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
inputs.flake-parts.follows = "flake-parts";
|
||||
};
|
||||
|
@ -89,6 +89,7 @@
|
|||
wayland
|
||||
];
|
||||
rocmTargets = [
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1102"
|
||||
"gfx1103"
|
||||
|
|
|
@ -3,9 +3,9 @@ system_resources:
|
|||
vram: 30G
|
||||
model_specs:
|
||||
- name: "tabby-code"
|
||||
port: 18080
|
||||
port: 28080 # to make sure its not sued for now
|
||||
# internal_port: 28080 # Optional
|
||||
autostart: true
|
||||
autostart: false
|
||||
vram_usage: 27G # Coder-32B + draft 0.5B
|
||||
ram_usage: 3G # Coder-32B + draft 0.5B
|
||||
# vram_usage: 8.25G # Coder-7B
|
||||
|
@ -21,12 +21,12 @@ model_specs:
|
|||
gpu-layers: 9999
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf
|
||||
gpu-layers-draft: 9999
|
||||
cache-type-k: q8_0
|
||||
# cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
draft-max: 16
|
||||
draft-min: 5
|
||||
- name: "tabby-embeddings"
|
||||
port: 18081
|
||||
port: 28081
|
||||
vram_usage: 0.4G
|
||||
ram_usage: 2.5G
|
||||
env:
|
||||
|
@ -46,10 +46,19 @@ model_specs:
|
|||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: 16384
|
||||
ctx-size: 65536
|
||||
flash-attn: true
|
||||
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q5_K_S.gguf
|
||||
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q4_K_L.gguf
|
||||
cache-type-k: f16
|
||||
cache-type-v: f16
|
||||
gpu-layers: 9999
|
||||
rope-scaling: yarn
|
||||
rope-scale: 4
|
||||
yarn-orig-ctx: 32768
|
||||
temp: 0.65
|
||||
top-p: 0.9
|
||||
top-k: 20
|
||||
min-p: 0.0
|
||||
# model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf
|
||||
# gpu-layers-draft: 9999
|
||||
# draft-max: 16
|
||||
|
|
Loading…
Add table
Reference in a new issue