Compare commits

..

No commits in common. "f7995541736082043bc446aadc770255cc6cb2ae" and "bd572a0ccd9e4ea7e98a07aad16b7df79d37a569" have entirely different histories.

3 changed files with 11 additions and 21 deletions

8
flake.lock generated
View file

@ -64,16 +64,16 @@
]
},
"locked": {
"lastModified": 1745909149,
"narHash": "sha256-TDGa+/P14TAS1DC9MsTilNQ3sw/mL08s4KYIlTwW850=",
"lastModified": 1745868199,
"narHash": "sha256-kTy1oNcN4bNHDV7yqIOSTo/JUN4enHeqyUeVE/q4iHg=",
"owner": "ggerganov",
"repo": "llama.cpp",
"rev": "b6ce7430b7eb51f032152316880204e0a9c0470e",
"rev": "eaea3253244dc4bbe07f6cd81325847ccc6cf93e",
"type": "github"
},
"original": {
"owner": "ggerganov",
"ref": "b5216",
"ref": "b5214",
"repo": "llama.cpp",
"type": "github"
}

View file

@ -19,7 +19,7 @@
flake-parts.url = "github:hercules-ci/flake-parts";
devshell.url = "github:numtide/devshell";
llama-cpp = {
url = "github:ggerganov/llama.cpp/b5216";
url = "github:ggerganov/llama.cpp/b5214";
inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-parts.follows = "flake-parts";
};
@ -89,7 +89,6 @@
wayland
];
rocmTargets = [
"gfx1030"
"gfx1100"
"gfx1102"
"gfx1103"

View file

@ -3,9 +3,9 @@ system_resources:
vram: 30G
model_specs:
- name: "tabby-code"
port: 28080 # to make sure its not sued for now
port: 18080
# internal_port: 28080 # Optional
autostart: false
autostart: true
vram_usage: 27G # Coder-32B + draft 0.5B
ram_usage: 3G # Coder-32B + draft 0.5B
# vram_usage: 8.25G # Coder-7B
@ -21,12 +21,12 @@ model_specs:
gpu-layers: 9999
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf
gpu-layers-draft: 9999
# cache-type-k: q8_0
cache-type-k: q8_0
cache-type-v: q8_0
draft-max: 16
draft-min: 5
- name: "tabby-embeddings"
port: 28081
port: 18081
vram_usage: 0.4G
ram_usage: 2.5G
env:
@ -46,19 +46,10 @@ model_specs:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 65536
ctx-size: 16384
flash-attn: true
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q4_K_L.gguf
cache-type-k: f16
cache-type-v: f16
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q5_K_S.gguf
gpu-layers: 9999
rope-scaling: yarn
rope-scale: 4
yarn-orig-ctx: 32768
temp: 0.65
top-p: 0.9
top-k: 20
min-p: 0.0
# model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf
# gpu-layers-draft: 9999
# draft-max: 16