Compare commits
No commits in common. "3b4655728dfada0bb1b1ae01165c84b38881409d" and "58b04b09d1da3f1d68f55e2d1565b169ceb6c279" have entirely different histories.
3b4655728d
...
58b04b09d1
4 changed files with 52 additions and 85 deletions
52
flake.lock
generated
52
flake.lock
generated
|
@ -5,11 +5,11 @@
|
|||
"nixpkgs": "nixpkgs"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1728330715,
|
||||
"narHash": "sha256-xRJ2nPOXb//u1jaBnDP56M7v5ldavjbtR6lfGqSvcKg=",
|
||||
"lastModified": 1722113426,
|
||||
"narHash": "sha256-Yo/3loq572A8Su6aY5GP56knpuKYRvM2a1meP9oJZCw=",
|
||||
"owner": "numtide",
|
||||
"repo": "devshell",
|
||||
"rev": "dd6b80932022cea34a019e2bb32f6fa9e494dfef",
|
||||
"rev": "67cce7359e4cd3c45296fb4aaf6a19e2a9c757ae",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -23,11 +23,11 @@
|
|||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1730504689,
|
||||
"narHash": "sha256-hgmguH29K2fvs9szpq2r3pz2/8cJd2LPS+b4tfNFCwE=",
|
||||
"lastModified": 1725234343,
|
||||
"narHash": "sha256-+ebgonl3NbiKD2UD0x4BszCZQ6sTfL4xioaM49o5B3Y=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "506278e768c2a08bec68eb62932193e341f55c90",
|
||||
"rev": "567b938d64d4b4112ee253b9274472dc3a346eb6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -41,11 +41,11 @@
|
|||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1726560853,
|
||||
"narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=",
|
||||
"lastModified": 1710146030,
|
||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a",
|
||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -64,16 +64,16 @@
|
|||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1732585640,
|
||||
"narHash": "sha256-sZxUPkGSTpcGgaRoB6X0xqodZMcqayCtOSceZxc0FjU=",
|
||||
"lastModified": 1728330908,
|
||||
"narHash": "sha256-2N7yfI0N4Up+aYzq7++BqMXZhuPcQGskSuq0TUcK5V0=",
|
||||
"owner": "ggerganov",
|
||||
"repo": "llama.cpp",
|
||||
"rev": "0eb4e12beebabae46d37b78742f4c5d4dbe52dc1",
|
||||
"rev": "6374743747b14db4eb73ce82ae449a2978bc3b47",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "ggerganov",
|
||||
"ref": "b4174",
|
||||
"ref": "b3896",
|
||||
"repo": "llama.cpp",
|
||||
"type": "github"
|
||||
}
|
||||
|
@ -96,23 +96,23 @@
|
|||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1730504152,
|
||||
"narHash": "sha256-lXvH/vOfb4aGYyvFmZK/HlsNsr/0CVWlwYvo2rxJk3s=",
|
||||
"lastModified": 1725233747,
|
||||
"narHash": "sha256-Ss8QWLXdr2JCBPcYChJhz4xJm+h/xjl4G0c0XlP6a74=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/356624c12086a18f2ea2825fed34523d60ccc4e3.tar.gz"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/356624c12086a18f2ea2825fed34523d60ccc4e3.tar.gz"
|
||||
}
|
||||
},
|
||||
"nixpkgs_2": {
|
||||
"locked": {
|
||||
"lastModified": 1730958623,
|
||||
"narHash": "sha256-JwQZIGSYnRNOgDDoIgqKITrPVil+RMWHsZH1eE1VGN0=",
|
||||
"lastModified": 1724208548,
|
||||
"narHash": "sha256-8Aiur5lv2L8o9ErxHqS2F293MHiHCoRG8C4vCwhkeXo=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "85f7e662eda4fa3a995556527c87b2524b691933",
|
||||
"rev": "4c30668e1edb7348169407f218fa7c71a94b17f3",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -124,11 +124,11 @@
|
|||
},
|
||||
"nixpkgs_3": {
|
||||
"locked": {
|
||||
"lastModified": 1728538411,
|
||||
"narHash": "sha256-f0SBJz1eZ2yOuKUr5CA9BHULGXVSn6miBuUWdTyhUhU=",
|
||||
"lastModified": 1718428119,
|
||||
"narHash": "sha256-WdWDpNaq6u1IPtxtYHHWpl5BmabtpmLnMAx0RdJ/vo8=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "b69de56fac8c2b6f8fd27f2eca01dcda8e0a4221",
|
||||
"rev": "e6cea36f83499eb4e9cd184c8a8e823296b50ad5",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -170,11 +170,11 @@
|
|||
"nixpkgs": "nixpkgs_3"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1731119076,
|
||||
"narHash": "sha256-2eVhmocCZHJlFAz6Mt3EwPdFFVAtGgIySJc1EHQVxcc=",
|
||||
"lastModified": 1724206841,
|
||||
"narHash": "sha256-L8dKaX4T3k+TR2fEHCfGbH4UXdspovz/pj87iai9qmc=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "23c4b3ba5f806fcf25d5a3b6b54fa0d07854c032",
|
||||
"rev": "45e98fbd62c32e5927e952d2833fa1ba4fb35a61",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
34
flake.nix
34
flake.nix
|
@ -2,12 +2,12 @@
|
|||
description = "A Nix-flake-based Rust development environment";
|
||||
nixConfig = {
|
||||
extra-substituters = [
|
||||
# "https://nixcache.vlt81.de"
|
||||
"https://nixcache.vlt81.de"
|
||||
"https://llama-cpp.cachix.org"
|
||||
"https://cuda-maintainers.cachix.org"
|
||||
];
|
||||
extra-trusted-public-keys = [
|
||||
# "nixcache.vlt81.de:nw0FfUpePtL6P3IMNT9X6oln0Wg9REZINtkkI9SisqQ="
|
||||
"nixcache.vlt81.de:nw0FfUpePtL6P3IMNT9X6oln0Wg9REZINtkkI9SisqQ="
|
||||
"llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc="
|
||||
"cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E="
|
||||
];
|
||||
|
@ -23,7 +23,7 @@
|
|||
flake = false;
|
||||
};
|
||||
llama-cpp = {
|
||||
url = "github:ggerganov/llama.cpp/b4174";
|
||||
url = "github:ggerganov/llama.cpp/b3896";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
inputs.flake-parts.follows = "flake-parts";
|
||||
};
|
||||
|
@ -49,9 +49,6 @@
|
|||
(final: prev: {
|
||||
customRustToolchain = prev.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
|
||||
})
|
||||
(final: prev: {
|
||||
nodejs-16_x = prev.nodePackages.nodejs; # needed for npmlock2nix
|
||||
})
|
||||
(final: prev: {
|
||||
npmlock2nix = import npmlock2nix { pkgs = prev; };
|
||||
})
|
||||
|
@ -76,7 +73,7 @@
|
|||
};
|
||||
customNodeModules = pkgs.npmlock2nix.v2.node_modules {
|
||||
src = ./.;
|
||||
nodejs = pkgs.nodejs_22;
|
||||
nodejs = pkgs.nodejs_20;
|
||||
};
|
||||
buildInputs = with pkgs; [
|
||||
harfbuzz
|
||||
|
@ -124,8 +121,7 @@
|
|||
(lib.cmakeBool "GGML_NATIVE" true)
|
||||
(lib.cmakeBool "GGML_BLAS" false)
|
||||
(lib.cmakeBool "GGML_CUDA" false)
|
||||
(lib.cmakeBool "GGML_HIP" true) # new one ? kinda undocumented ?
|
||||
(lib.cmakeBool "GGML_HIPBLAS" true) # seems to be depr
|
||||
(lib.cmakeBool "GGML_HIPBLAS" true)
|
||||
(lib.cmakeBool "GGML_METAL" false)
|
||||
(lib.cmakeBool "GGML_VULKAN" false)
|
||||
(lib.cmakeBool "GGML_STATIC" false)
|
||||
|
@ -135,7 +131,6 @@
|
|||
(lib.cmakeBool "GGML_AVX512" false)
|
||||
(lib.cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||
(lib.cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmTargets))
|
||||
(lib.cmakeFeature "AMDGPU_TARGETS" (builtins.concatStringsSep ";" rocmTargets))
|
||||
];
|
||||
});
|
||||
};
|
||||
|
@ -167,13 +162,13 @@
|
|||
cargo-outdated
|
||||
cargo-release
|
||||
calc
|
||||
# jre8 # needed for xmlls
|
||||
jre8 # needed for xmlls
|
||||
dart-sass
|
||||
fish
|
||||
inotify-tools
|
||||
leptosfmt
|
||||
mold
|
||||
# nodejs_20
|
||||
nodejs_20
|
||||
pkg-config
|
||||
rustywind
|
||||
sccache
|
||||
|
@ -184,15 +179,16 @@
|
|||
|
||||
buildInputs = buildInputs;
|
||||
shellHook = ''
|
||||
# setup node-modules
|
||||
export NPM_LOCAL_PREFIX=${customNodeModules}/node_modules
|
||||
(ln -s $NPM_LOCAL_PREFIX ./node_modules 2>/dev/null || unlink ./node_modules) && ln -s $NPM_LOCAL_PREFIX ./node_modules 2>/dev/null
|
||||
# setup node-modules
|
||||
export NPM_LOCAL_PREFIX=${customNodeModules}/node_modules
|
||||
(ln -s $NPM_LOCAL_PREFIX ./node_modules 2>/dev/null || unlink ./node_modules) && ln -s $NPM_LOCAL_PREFIX ./node_modules 2>/dev/null
|
||||
|
||||
# export NIX_LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath buildInputs}:$NIX_LD_LIBRARY_PATH
|
||||
# export NIX_LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath buildInputs}:$NIX_LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath buildInputs}"
|
||||
export LEPTOS_SASS_VERSION=1.71.0
|
||||
export LEPTOS_TAILWIND_VERSION=3.4.1
|
||||
export MALLOC_CONF=thp:always,metadata_thp:always
|
||||
export LEPTOS_SASS_VERSION=1.71.0
|
||||
export LEPTOS_TAILWIND_VERSION=3.4.1
|
||||
export MALLOC_CONF=thp:always,metadata_thp:always
|
||||
|
||||
'';
|
||||
};
|
||||
});
|
||||
|
|
|
@ -5,37 +5,30 @@ models:
|
|||
- name: "tabby-code"
|
||||
port: 18080
|
||||
internal_port: 28080
|
||||
autostart: true
|
||||
vram_usage: 26.7G # Coder-32B + draft 0.5B
|
||||
ram_usage: 3G # Coder-32B + draft 0.5B
|
||||
# vram_usage: 8.25G # Coder-7B
|
||||
# ram_usage: 2.6G # Coder-7B
|
||||
vram_usage: 8.25G
|
||||
ram_usage: 2.6G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
# host: 0.0.0.0
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-7B-Instruct-Q6_K_L.gguf
|
||||
gpu-layers: 9999
|
||||
flash-attn: true
|
||||
ctx-size: 32768
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
|
||||
gpu-layers: 9999
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
|
||||
gpu-layers-draft: 9999
|
||||
draft-max: 16
|
||||
draft-min: 5
|
||||
host: 0.0.0.0
|
||||
- name: "tabby-embeddings"
|
||||
port: 18081
|
||||
internal_port: 28081
|
||||
vram_usage: 0.4G
|
||||
ram_usage: 2.5G
|
||||
vram_usage: 1G
|
||||
ram_usage: 2G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
|
||||
gpu-layers: -1
|
||||
gpu-layers: 9999
|
||||
flash-attn: true
|
||||
# host: 0.0.0.0
|
||||
host: 0.0.0.0
|
||||
embeddings: true
|
||||
- name: "big-chat"
|
||||
port: 18082
|
||||
|
@ -50,21 +43,4 @@ models:
|
|||
gpu-layers: 9999
|
||||
ctx-size: 16384
|
||||
flash-attn: true
|
||||
# host: 0.0.0.0
|
||||
- name: "bigger-chat"
|
||||
port: 18083
|
||||
internal_port: 28083
|
||||
vram_usage: 29G
|
||||
ram_usage: 4G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
|
||||
gpu-layers: 9999
|
||||
flash-attn: true
|
||||
# ctx-size: 8192
|
||||
ctx-size: 16384
|
||||
# host: 0.0.0.0
|
||||
cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
host: 0.0.0.0
|
||||
|
|
|
@ -84,10 +84,6 @@ struct SharedState {
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// TODO add autostart of models based on config
|
||||
// abstract starting logic out of handler for this to allow seperate calls to start
|
||||
// maybe add to SharedState & LLamaInstance ?
|
||||
|
||||
initialize_logger();
|
||||
// Read and parse the YAML configuration
|
||||
let config_str = std::fs::read_to_string("config.yaml").expect("Failed to read config.yaml");
|
||||
|
@ -268,10 +264,9 @@ async fn handle_request(
|
|||
cmd.kill_on_drop(true);
|
||||
cmd.envs(model_config.env.clone());
|
||||
cmd.args(&args);
|
||||
// TODO use openport crate via pick_random_unused_port for determining these
|
||||
cmd.arg("--port");
|
||||
cmd.arg(format!("{}", model_config.internal_port));
|
||||
cmd.stdout(Stdio::null()).stderr(Stdio::null()); // TODO save output and allow retrieval via api
|
||||
cmd.stdout(Stdio::null()).stderr(Stdio::null());
|
||||
|
||||
tracing::info!("Starting llama-server with {:?}", cmd);
|
||||
let process = Arc::new(Mutex::new(
|
||||
|
|
Loading…
Add table
Reference in a new issue