Compare commits
2 commits
bd572a0ccd
...
f799554173
Author | SHA1 | Date | |
---|---|---|---|
f799554173 | |||
cdd0a84d67 |
3 changed files with 21 additions and 11 deletions
8
flake.lock
generated
8
flake.lock
generated
|
@ -64,16 +64,16 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1745868199,
|
"lastModified": 1745909149,
|
||||||
"narHash": "sha256-kTy1oNcN4bNHDV7yqIOSTo/JUN4enHeqyUeVE/q4iHg=",
|
"narHash": "sha256-TDGa+/P14TAS1DC9MsTilNQ3sw/mL08s4KYIlTwW850=",
|
||||||
"owner": "ggerganov",
|
"owner": "ggerganov",
|
||||||
"repo": "llama.cpp",
|
"repo": "llama.cpp",
|
||||||
"rev": "eaea3253244dc4bbe07f6cd81325847ccc6cf93e",
|
"rev": "b6ce7430b7eb51f032152316880204e0a9c0470e",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "ggerganov",
|
"owner": "ggerganov",
|
||||||
"ref": "b5214",
|
"ref": "b5216",
|
||||||
"repo": "llama.cpp",
|
"repo": "llama.cpp",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
flake-parts.url = "github:hercules-ci/flake-parts";
|
flake-parts.url = "github:hercules-ci/flake-parts";
|
||||||
devshell.url = "github:numtide/devshell";
|
devshell.url = "github:numtide/devshell";
|
||||||
llama-cpp = {
|
llama-cpp = {
|
||||||
url = "github:ggerganov/llama.cpp/b5214";
|
url = "github:ggerganov/llama.cpp/b5216";
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
inputs.flake-parts.follows = "flake-parts";
|
inputs.flake-parts.follows = "flake-parts";
|
||||||
};
|
};
|
||||||
|
@ -89,6 +89,7 @@
|
||||||
wayland
|
wayland
|
||||||
];
|
];
|
||||||
rocmTargets = [
|
rocmTargets = [
|
||||||
|
"gfx1030"
|
||||||
"gfx1100"
|
"gfx1100"
|
||||||
"gfx1102"
|
"gfx1102"
|
||||||
"gfx1103"
|
"gfx1103"
|
||||||
|
|
|
@ -3,9 +3,9 @@ system_resources:
|
||||||
vram: 30G
|
vram: 30G
|
||||||
model_specs:
|
model_specs:
|
||||||
- name: "tabby-code"
|
- name: "tabby-code"
|
||||||
port: 18080
|
port: 28080 # to make sure its not sued for now
|
||||||
# internal_port: 28080 # Optional
|
# internal_port: 28080 # Optional
|
||||||
autostart: true
|
autostart: false
|
||||||
vram_usage: 27G # Coder-32B + draft 0.5B
|
vram_usage: 27G # Coder-32B + draft 0.5B
|
||||||
ram_usage: 3G # Coder-32B + draft 0.5B
|
ram_usage: 3G # Coder-32B + draft 0.5B
|
||||||
# vram_usage: 8.25G # Coder-7B
|
# vram_usage: 8.25G # Coder-7B
|
||||||
|
@ -21,12 +21,12 @@ model_specs:
|
||||||
gpu-layers: 9999
|
gpu-layers: 9999
|
||||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf
|
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Instruct-Q8_0.gguf
|
||||||
gpu-layers-draft: 9999
|
gpu-layers-draft: 9999
|
||||||
cache-type-k: q8_0
|
# cache-type-k: q8_0
|
||||||
cache-type-v: q8_0
|
cache-type-v: q8_0
|
||||||
draft-max: 16
|
draft-max: 16
|
||||||
draft-min: 5
|
draft-min: 5
|
||||||
- name: "tabby-embeddings"
|
- name: "tabby-embeddings"
|
||||||
port: 18081
|
port: 28081
|
||||||
vram_usage: 0.4G
|
vram_usage: 0.4G
|
||||||
ram_usage: 2.5G
|
ram_usage: 2.5G
|
||||||
env:
|
env:
|
||||||
|
@ -46,10 +46,19 @@ model_specs:
|
||||||
CUDA_VISIBLE_DEVICES: 0
|
CUDA_VISIBLE_DEVICES: 0
|
||||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||||
args:
|
args:
|
||||||
ctx-size: 16384
|
ctx-size: 65536
|
||||||
flash-attn: true
|
flash-attn: true
|
||||||
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q5_K_S.gguf
|
model: /media/SanDisk/ai/models_live/Qwen_Qwen3-30B-A3B-Q4_K_L.gguf
|
||||||
|
cache-type-k: f16
|
||||||
|
cache-type-v: f16
|
||||||
gpu-layers: 9999
|
gpu-layers: 9999
|
||||||
|
rope-scaling: yarn
|
||||||
|
rope-scale: 4
|
||||||
|
yarn-orig-ctx: 32768
|
||||||
|
temp: 0.65
|
||||||
|
top-p: 0.9
|
||||||
|
top-k: 20
|
||||||
|
min-p: 0.0
|
||||||
# model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf
|
# model-draft: /media/SanDisk/ai/models_live/Qwen_Qwen3-0.6B-Q6_K.gguf
|
||||||
# gpu-layers-draft: 9999
|
# gpu-layers-draft: 9999
|
||||||
# draft-max: 16
|
# draft-max: 16
|
||||||
|
|
Loading…
Add table
Reference in a new issue