Compare commits

..

3 commits

Author SHA1 Message Date
aad9472372
conf: Update model configurations 2025-01-31 13:20:40 +01:00
56ce7c5831
Add mvp plans for redvault-el 2025-01-31 13:20:26 +01:00
ed369c9ed7
Update deps
- update rust toolchain
- update llamaCpp
2025-01-31 13:19:28 +01:00
5 changed files with 78 additions and 31 deletions

54
flake.lock generated
View file

@ -5,11 +5,11 @@
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1728330715,
"narHash": "sha256-xRJ2nPOXb//u1jaBnDP56M7v5ldavjbtR6lfGqSvcKg=",
"lastModified": 1735644329,
"narHash": "sha256-tO3HrHriyLvipc4xr+Ewtdlo7wM1OjXNjlWRgmM7peY=",
"owner": "numtide",
"repo": "devshell",
"rev": "dd6b80932022cea34a019e2bb32f6fa9e494dfef",
"rev": "f7795ede5b02664b57035b3b757876703e2c3eac",
"type": "github"
},
"original": {
@ -23,11 +23,11 @@
"nixpkgs-lib": "nixpkgs-lib"
},
"locked": {
"lastModified": 1730504689,
"narHash": "sha256-hgmguH29K2fvs9szpq2r3pz2/8cJd2LPS+b4tfNFCwE=",
"lastModified": 1735774679,
"narHash": "sha256-soePLBazJk0qQdDVhdbM98vYdssfs3WFedcq+raipRI=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "506278e768c2a08bec68eb62932193e341f55c90",
"rev": "f2f7418ce0ab4a5309a4596161d154cfc877af66",
"type": "github"
},
"original": {
@ -41,11 +41,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1726560853,
"narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=",
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
@ -64,16 +64,16 @@
]
},
"locked": {
"lastModified": 1732585640,
"narHash": "sha256-sZxUPkGSTpcGgaRoB6X0xqodZMcqayCtOSceZxc0FjU=",
"lastModified": 1736021399,
"narHash": "sha256-wlc9kVJ6zqJRx02hdFhZtJBdHVzYHDFzlPTgdaftpcY=",
"owner": "ggerganov",
"repo": "llama.cpp",
"rev": "0eb4e12beebabae46d37b78742f4c5d4dbe52dc1",
"rev": "b56f079e28fda692f11a8b59200ceb815b05d419",
"type": "github"
},
"original": {
"owner": "ggerganov",
"ref": "b4174",
"ref": "b4418",
"repo": "llama.cpp",
"type": "github"
}
@ -96,39 +96,39 @@
},
"nixpkgs-lib": {
"locked": {
"lastModified": 1730504152,
"narHash": "sha256-lXvH/vOfb4aGYyvFmZK/HlsNsr/0CVWlwYvo2rxJk3s=",
"lastModified": 1735774519,
"narHash": "sha256-CewEm1o2eVAnoqb6Ml+Qi9Gg/EfNAxbRx1lANGVyoLI=",
"type": "tarball",
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
"url": "https://github.com/NixOS/nixpkgs/archive/e9b51731911566bbf7e4895475a87fe06961de0b.tar.gz"
},
"original": {
"type": "tarball",
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
"url": "https://github.com/NixOS/nixpkgs/archive/e9b51731911566bbf7e4895475a87fe06961de0b.tar.gz"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1730958623,
"narHash": "sha256-JwQZIGSYnRNOgDDoIgqKITrPVil+RMWHsZH1eE1VGN0=",
"lastModified": 1736077418,
"narHash": "sha256-2LwAcQXlLkqWyibkYGiS1SfXsewxRuhpYtzrMQSYElc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "85f7e662eda4fa3a995556527c87b2524b691933",
"rev": "e554bf17658bd1bfe393dcaca8b8eee6014ddfa1",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"ref": "nixos-unstable-small",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_3": {
"locked": {
"lastModified": 1728538411,
"narHash": "sha256-f0SBJz1eZ2yOuKUr5CA9BHULGXVSn6miBuUWdTyhUhU=",
"lastModified": 1736320768,
"narHash": "sha256-nIYdTAiKIGnFNugbomgBJR+Xv5F1ZQU+HfaBqJKroC0=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "b69de56fac8c2b6f8fd27f2eca01dcda8e0a4221",
"rev": "4bc9c909d9ac828a039f288cf872d16d38185db8",
"type": "github"
},
"original": {
@ -170,11 +170,11 @@
"nixpkgs": "nixpkgs_3"
},
"locked": {
"lastModified": 1731119076,
"narHash": "sha256-2eVhmocCZHJlFAz6Mt3EwPdFFVAtGgIySJc1EHQVxcc=",
"lastModified": 1738290352,
"narHash": "sha256-YKOHUmc0Clm4tMV8grnxYL4IIwtjTayoq/3nqk0QM7k=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "23c4b3ba5f806fcf25d5a3b6b54fa0d07854c032",
"rev": "b031b584125d33d23a0182f91ddbaf3ab4880236",
"type": "github"
},
"original": {

View file

@ -13,7 +13,7 @@
];
};
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small";
rust-overlay.url = "github:oxalica/rust-overlay";
flake-utils.url = "github:numtide/flake-utils";
flake-parts.url = "github:hercules-ci/flake-parts";
@ -23,7 +23,7 @@
flake = false;
};
llama-cpp = {
url = "github:ggerganov/llama.cpp/b4174";
url = "github:ggerganov/llama.cpp/b4418";
inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-parts.follows = "flake-parts";
};
@ -179,6 +179,7 @@
sccache
sqlx-cli
unzip
rocmPackages.rocminfo
]
++ buildInputs;

View file

@ -53,7 +53,7 @@ models:
# draft-max: 16
# draft-min: 5
- name: "bigger-chat"
port: 18083
port: 18085
vram_usage: 29G
ram_usage: 5G
env:
@ -70,3 +70,39 @@ models:
gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "bigger-chat-2"
port: 18083
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 8192
flash-attn: true
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "deep-think"
port: 18084
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 32768
flash-attn: true
# cache-type-k: q8_0
# cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5

View file

@ -24,6 +24,16 @@
## Planning
- MVP:
- single file overlay
- get completion from server with single file context
- put completion into overlay
- trigger completion while typing, update cursor position etc, with debounce
- multi file overlay (with context based on currently open file)
- rank by imports/keywords/embeddings, test different approaches against each other
- multi file overlay (with context based on cursored function)
- better caching
- combine file-level/function-level context somehow, cache for recently/commonly used files
### Interface

View file

@ -1,5 +1,5 @@
[toolchain]
channel = "nightly-2024-08-20"
channel = "nightly-2025-01-30"
targets = [
"x86_64-unknown-linux-gnu",
"wasm32-unknown-unknown",