Compare commits

...

2 commits

Author SHA1 Message Date
f799554173
Bump 2025-05-02 02:58:07 +02:00
cdd0a84d67
Add gfx1030 2025-04-29 09:36:22 +02:00
3 changed files with 21 additions and 11 deletions

8
flake.lock generated
View file

@ -64,16 +64,16 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1745868199, "lastModified": 1745909149,
"narHash": "sha256-kTy1oNcN4bNHDV7yqIOSTo/JUN4enHeqyUeVE/q4iHg=", "narHash": "sha256-TDGa+/P14TAS1DC9MsTilNQ3sw/mL08s4KYIlTwW850=",
"owner": "ggerganov", "owner": "ggerganov",
"repo": "llama.cpp", "repo": "llama.cpp",
"rev": "eaea3253244dc4bbe07f6cd81325847ccc6cf93e", "rev": "b6ce7430b7eb51f032152316880204e0a9c0470e",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "ggerganov", "owner": "ggerganov",
"ref": "b5214", "ref": "b5216",
"repo": "llama.cpp", "repo": "llama.cpp",
"type": "github" "type": "github"
} }

View file

@ -19,7 +19,7 @@
flake-parts.url = "github:hercules-ci/flake-parts"; flake-parts.url = "github:hercules-ci/flake-parts";
devshell.url = "github:numtide/devshell"; devshell.url = "github:numtide/devshell";
llama-cpp = { llama-cpp = {
url = "github:ggerganov/llama.cpp/b5214"; url = "github:ggerganov/llama.cpp/b5216";
inputs.nixpkgs.follows = "nixpkgs"; inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-parts.follows = "flake-parts"; inputs.flake-parts.follows = "flake-parts";
}; };
@ -89,6 +89,7 @@
wayland wayland
]; ];
rocmTargets = [ rocmTargets = [
"gfx1030"
"gfx1100" "gfx1100"
"gfx1102" "gfx1102"
"gfx1103" "gfx1103"

View file

@ -3,9 +3,9 @@ system_resources:
vram: 30G vram: 30G
model_specs: model_specs:
- name: "tabby-code" - name: "tabby-code"
port: 18080 port: 28080 # to make sure its not sued for now
# internal_port: 28080 # Optional # internal_port: 28080 # Optional
autostart: true autostart: false
vram_usage: 27G # Coder-32B + draft 0.5B vram_usage: 27G # Coder-32B + draft 0.5B
ram_usage: 3G # Coder-32B + draft 0.5B ram_usage: 3G # Coder-32B + draft 0.5B
# vram_usage: 8.25G # Coder-7B # vram_usage: 8.25G # Coder-7B
@ -21,12 +21,12 @@ model_specs:
gpu-layers: 9999 gpu-layers: 9999
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf
gpu-layers-draft: 9999 gpu-layers-draft: 9999
cache-type-k: q8_0 # cache-type-k: q8_0
cache-type-v: q8_0 cache-type-v: q8_0
draft-max: 16 draft-max: 16
draft-min: 5 draft-min: 5
- name: "tabby-embeddings" - name: "tabby-embeddings"
port: 18081 port: 28081
vram_usage: 0.4G vram_usage: 0.4G
ram_usage: 2.5G ram_usage: 2.5G
env: env:
@ -46,10 +46,19 @@ model_specs:
CUDA_VISIBLE_DEVICES: 0 CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0' HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args: args:
ctx-size: 16384 ctx-size: 65536
flash-attn: true flash-attn: true
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q5_K_S.gguf model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q4_K_L.gguf
cache-type-k: f16
cache-type-v: f16
gpu-layers: 9999 gpu-layers: 9999
rope-scaling: yarn
rope-scale: 4
yarn-orig-ctx: 32768
temp: 0.65
top-p: 0.9
top-k: 20
min-p: 0.0
# model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf # model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf
# gpu-layers-draft: 9999 # gpu-layers-draft: 9999
# draft-max: 16 # draft-max: 16