feat: Embedded proxy_man for forge
- Add `figment` for config yamls - Small `Makefile.toml` fixes ? (docset seems still broken ??) - Copy `config.yaml` workspace & forge - Embed proxy_man in forge - Remove `backend_process.rs` and `process.rs` - Update `llama_proxy_man/Cargo.toml` and `config.rs` for new dependencies - Format
This commit is contained in:
parent
5b12762511
commit
8d8923294d
18 changed files with 347 additions and 299 deletions
65
Cargo.lock
generated
65
Cargo.lock
generated
|
@ -238,6 +238,15 @@ dependencies = [
|
|||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
|
@ -1422,6 +1431,22 @@ dependencies = [
|
|||
"rustc_version 0.4.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "figment"
|
||||
version = "0.10.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
|
||||
dependencies = [
|
||||
"atomic",
|
||||
"pear",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"toml",
|
||||
"uncased",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.35"
|
||||
|
@ -2492,6 +2517,12 @@ dependencies = [
|
|||
"hashbrown 0.15.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inlinable_string"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.13"
|
||||
|
@ -2981,6 +3012,7 @@ dependencies = [
|
|||
"leptos_axum",
|
||||
"leptos_meta",
|
||||
"leptos_router",
|
||||
"llama_proxy_man",
|
||||
"mime_guess",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
|
@ -3023,6 +3055,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"axum",
|
||||
"derive_more 2.0.1",
|
||||
"figment",
|
||||
"futures",
|
||||
"hyper",
|
||||
"itertools 0.13.0",
|
||||
|
@ -3688,6 +3721,29 @@ version = "0.2.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
|
||||
|
||||
[[package]]
|
||||
name = "pear"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
|
||||
dependencies = [
|
||||
"inlinable_string",
|
||||
"pear_codegen",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pear_codegen"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"proc-macro2-diagnostics",
|
||||
"quote",
|
||||
"syn 2.0.98",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem-rfc7468"
|
||||
version = "0.7.0"
|
||||
|
@ -6178,6 +6234,15 @@ version = "1.17.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
|
||||
|
||||
[[package]]
|
||||
name = "uncased"
|
||||
version = "0.9.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.8.1"
|
||||
|
|
|
@ -16,7 +16,7 @@ script = "echo test123-${CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER}"
|
|||
|
||||
[tasks.refresh-all]
|
||||
description = "clean the repo and rebuild everything"
|
||||
dependencies = ["clean", "all", "lall"]
|
||||
dependencies = ["clean", "all"]
|
||||
|
||||
[tasks.mksitedir]
|
||||
workspace = false
|
||||
|
@ -24,10 +24,21 @@ script = "mkdir -p ./target/site"
|
|||
|
||||
[tasks.all]
|
||||
description = "rebuild everything"
|
||||
dependencies = ["mksitedir", "docset", "check", "clippy", "build", "build-release", "format", "lformat", "lbuild", "lbuild-release"]
|
||||
dependencies = [
|
||||
"mksitedir",
|
||||
"docset",
|
||||
"check",
|
||||
"clippy",
|
||||
"build",
|
||||
"build-release",
|
||||
"format",
|
||||
"lformat",
|
||||
"lbuild",
|
||||
"lbuild-release",
|
||||
]
|
||||
|
||||
[tasks.lformat]
|
||||
scripts = "leptosfmt . && rustywind . --write"
|
||||
script = "leptosfmt . && rustywind . --write"
|
||||
|
||||
[tasks.lbuild]
|
||||
category = "Build"
|
||||
|
@ -37,7 +48,7 @@ set current "$CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER"
|
|||
if contains "$LEPTOS_REPOS" $current
|
||||
cargo leptos build
|
||||
else
|
||||
$current is not a leptos repo!
|
||||
echo $current is not a leptos repo!
|
||||
end
|
||||
'''
|
||||
|
||||
|
@ -60,7 +71,7 @@ dependencies = ["make-docset", "cp-docset"]
|
|||
|
||||
[tasks.make-docset]
|
||||
workspace = false
|
||||
script = "cargo docset --workspace --platform-family redvault-ai "
|
||||
script = "cargo docset --workspace --no-clean --platform-family redvault-ai && sleep 1 && sync"
|
||||
|
||||
[tasks.cp-docset]
|
||||
workspace = false
|
||||
|
|
108
config.yaml
Normal file
108
config.yaml
Normal file
|
@ -0,0 +1,108 @@
|
|||
system_resources:
|
||||
ram: 48G
|
||||
vram: 30G
|
||||
model_specs:
|
||||
- name: "tabby-code"
|
||||
port: 18080
|
||||
# internal_port: 28080 # Optional
|
||||
autostart: "true"
|
||||
vram_usage: "26.7G" # Coder-32B + draft 0.5B
|
||||
ram_usage: "3G" # Coder-32B + draft 0.5B
|
||||
# vram_usage: 8.25G # Coder-7B
|
||||
# ram_usage: 2.6G # Coder-7B
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
# host: 0.0.0.0
|
||||
flash-attn: "true"
|
||||
ctx-size: "32768"
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
|
||||
gpu-layers: "9999"
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
|
||||
gpu-layers-draft: "9999"
|
||||
draft-max: "16"
|
||||
draft-min: "5"
|
||||
- name: "tabby-embeddings"
|
||||
port: 18081
|
||||
vram_usage: "0.4G"
|
||||
ram_usage: "2.5G"
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
model: "/media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf"
|
||||
gpu-layers: "-1"
|
||||
flash-attn: "true"
|
||||
# host: 0.0.0.0
|
||||
embeddings: "true"
|
||||
- name: "big-chat"
|
||||
port: 18082
|
||||
vram_usage: 26.5G
|
||||
ram_usage: 2.5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: "16384"
|
||||
flash-attn: "true"
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
|
||||
gpu-layers: "9999"
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
|
||||
gpu-layers-draft: "9999"
|
||||
# draft-max: "16"
|
||||
# draft-min: "5"
|
||||
- name: "bigger-chat"
|
||||
port: 18085
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: "8192"
|
||||
flash-attn: "true"
|
||||
cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
|
||||
gpu-layers: "9999"
|
||||
model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
gpu-layers-draft: "0"
|
||||
# draft-max: "16"
|
||||
# draft-min: "5"
|
||||
- name: "bigger-chat-2"
|
||||
port: 18083
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: "8192"
|
||||
flash-attn: "true"
|
||||
cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
|
||||
gpu-layers: "9999"
|
||||
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
# gpu-layers-draft: 0
|
||||
# draft-max: "16"
|
||||
# draft-min: "5"
|
||||
- name: "deep-think"
|
||||
port: 18084
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: "32768"
|
||||
flash-attn: "true"
|
||||
# cache-type-k: q8_0
|
||||
# cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
|
||||
gpu-layers: "9999"
|
||||
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
# gpu-layers-draft: 0
|
||||
# draft-max: "16"
|
||||
# draft-min: "5"
|
|
@ -13,6 +13,7 @@ version.workspace=true
|
|||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[dependencies]
|
||||
llama_proxy_man = {path="../llama_proxy_man", optional = true}
|
||||
wasm-bindgen = "=0.2.100"
|
||||
# TODO Update to 0.7
|
||||
leptos = { version = "0.6", features = [
|
||||
|
@ -93,6 +94,7 @@ pulldown-cmark = { version = "0.12.2", features = ["serde"] }
|
|||
default = ["ssr"]
|
||||
hydrate = ["leptos/hydrate", "leptos_meta/hydrate", "leptos_router/hydrate"]
|
||||
ssr = [
|
||||
"dep:llama_proxy_man",
|
||||
"dep:async-broadcast",
|
||||
"dep:axum",
|
||||
"dep:dashmap",
|
||||
|
|
108
llama_forge_rs/config.yaml
Normal file
108
llama_forge_rs/config.yaml
Normal file
|
@ -0,0 +1,108 @@
|
|||
system_resources:
|
||||
ram: 48G
|
||||
vram: 30G
|
||||
model_specs:
|
||||
- name: "tabby-code"
|
||||
port: 18080
|
||||
# internal_port: 28080 # Optional
|
||||
autostart: true
|
||||
vram_usage: 26.7G # Coder-32B + draft 0.5B
|
||||
ram_usage: 3G # Coder-32B + draft 0.5B
|
||||
# vram_usage: 8.25G # Coder-7B
|
||||
# ram_usage: 2.6G # Coder-7B
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
# host: 0.0.0.0
|
||||
flash-attn: true
|
||||
ctx-size: 32768
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
|
||||
gpu-layers: 9999
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
|
||||
gpu-layers-draft: 9999
|
||||
draft-max: 16
|
||||
draft-min: 5
|
||||
- name: "tabby-embeddings"
|
||||
port: 18081
|
||||
vram_usage: 0.4G
|
||||
ram_usage: 2.5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
|
||||
gpu-layers: -1
|
||||
flash-attn: true
|
||||
# host: 0.0.0.0
|
||||
embeddings: true
|
||||
- name: "big-chat"
|
||||
port: 18082
|
||||
vram_usage: 26.5G
|
||||
ram_usage: 2.5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: 16384
|
||||
flash-attn: true
|
||||
model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
|
||||
gpu-layers: 9999
|
||||
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
|
||||
gpu-layers-draft: 9999
|
||||
# draft-max: 16
|
||||
# draft-min: 5
|
||||
- name: "bigger-chat"
|
||||
port: 18085
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: 8192
|
||||
flash-attn: true
|
||||
cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
|
||||
gpu-layers: 9999
|
||||
model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
gpu-layers-draft: 0
|
||||
# draft-max: 16
|
||||
# draft-min: 5
|
||||
- name: "bigger-chat-2"
|
||||
port: 18083
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: 8192
|
||||
flash-attn: true
|
||||
cache-type-k: q8_0
|
||||
cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
|
||||
gpu-layers: 9999
|
||||
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
# gpu-layers-draft: 0
|
||||
# draft-max: 16
|
||||
# draft-min: 5
|
||||
- name: "deep-think"
|
||||
port: 18084
|
||||
vram_usage: 29G
|
||||
ram_usage: 5G
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: 0
|
||||
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
|
||||
args:
|
||||
ctx-size: 32768
|
||||
flash-attn: true
|
||||
# cache-type-k: q8_0
|
||||
# cache-type-v: q8_0
|
||||
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
|
||||
gpu-layers: 9999
|
||||
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
|
||||
# gpu-layers-draft: 0
|
||||
# draft-max: 16
|
||||
# draft-min: 5
|
|
@ -1,195 +0,0 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use leptos::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::backend_settings::BackendSettings;
|
||||
use crate::app::components::FormControl;
|
||||
|
||||
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct BackendProcess {
|
||||
id: i64,
|
||||
#[sqlx(flatten)]
|
||||
settings: BackendSettings,
|
||||
status: ProcessStatus,
|
||||
}
|
||||
|
||||
/// The `ProcessStatus` enum represents the various states a process can be in. It includes the following states:
|
||||
/// - `WaitingForStart`: The process is waiting to be started.
|
||||
/// - `Running`: The process is currently running.
|
||||
/// - `WaitingForStop`: The process is waiting to be stopped.
|
||||
/// - `Finished`: The process has completed its execution successfully.
|
||||
/// - `Failed`: The process has failed or encountered an error during execution.
|
||||
///
|
||||
/// This enum is used to keep track of the state of a process in a backend application, allowing for proper management and control over the process lifecycle.
|
||||
#[derive(
|
||||
Default, PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::Type, strum::EnumString,
|
||||
)]
|
||||
#[sqlx(rename_all = "snake_case")]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum ProcessStatus {
|
||||
#[default]
|
||||
WaitingForStart,
|
||||
Running,
|
||||
WaitingForStop,
|
||||
Finished,
|
||||
Failed,
|
||||
}
|
||||
|
||||
/// CHecks the db every 100ms to see if stuff has to be started
|
||||
#[cfg(feature = "ssr")]
|
||||
pub async fn run_starter_task(pool: sqlx::SqlitePool) {
|
||||
use tokio::{
|
||||
time::{self, Instant},
|
||||
try_join,
|
||||
};
|
||||
use tokio_stream::{wrappers::IntervalStream, StreamExt};
|
||||
|
||||
use crate::server::backends::BackendService;
|
||||
let _ = tracing::debug_span!("starter_task");
|
||||
|
||||
tracing::debug!("AAAAAAAAAAAAAAA");
|
||||
return; // TODO ????
|
||||
tracing::debug!("Starter task started");
|
||||
|
||||
let service_handle = BackendService::new();
|
||||
let mut stream = IntervalStream::new(time::interval(Duration::from_millis(1000)));
|
||||
while let Some(instant) = stream.next().await {
|
||||
break; // TODO integrate proxy man ?
|
||||
|
||||
tracing::debug!("fire; instant={:?}", instant);
|
||||
|
||||
let waiting_to_start: Vec<BackendProcess> = sqlx::query_as(
|
||||
"SELECT id, status,
|
||||
se.version, se.model_dir, se.default_model, se.created_at
|
||||
FROM backend_process INNER JOIN settings AS se
|
||||
ON backend_process.settings_version = se.version
|
||||
WHERE status == ?",
|
||||
)
|
||||
.bind(ProcessStatus::WaitingForStart)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let waiting_to_stop: Vec<BackendProcess> = sqlx::query_as(
|
||||
"SELECT id, status,
|
||||
se.version, se.model_dir, se.default_model, se.created_at
|
||||
FROM backend_process INNER JOIN settings AS se
|
||||
ON backend_process.settings_version = se.version
|
||||
WHERE status == ?",
|
||||
)
|
||||
.bind(ProcessStatus::WaitingForStop)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
tracing::debug!(?waiting_to_start, ?waiting_to_stop);
|
||||
use futures::future::FutureExt;
|
||||
|
||||
for to_stop in waiting_to_stop.into_iter() {
|
||||
tracing::debug!(?to_stop);
|
||||
let update_query_fut = sqlx::query!(
|
||||
"UPDATE backend_process
|
||||
SET status == ?
|
||||
WHERE id == ?",
|
||||
ProcessStatus::Finished,
|
||||
to_stop.id
|
||||
)
|
||||
.execute(&pool)
|
||||
.map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
|
||||
let stop_fut = service_handle.stop();
|
||||
|
||||
tracing::debug!(msg = "Stopping", ?to_stop);
|
||||
try_join!(update_query_fut, stop_fut).expect("stop fail");
|
||||
tracing::debug!(msg = "Stopped!", ?to_stop);
|
||||
}
|
||||
|
||||
for to_start in waiting_to_start.into_iter() {
|
||||
// TODO Pass args to backend
|
||||
// TODO Save Services tehrefore allow multiples
|
||||
tracing::debug!(?to_start);
|
||||
|
||||
let update_query_fut = sqlx::query!(
|
||||
"UPDATE backend_process
|
||||
SET status == ?
|
||||
WHERE id == ?",
|
||||
ProcessStatus::Running,
|
||||
to_start.id
|
||||
)
|
||||
.execute(&pool)
|
||||
.map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
|
||||
|
||||
tracing::debug!(msg = "Starting", ?to_start);
|
||||
let start_fut = service_handle.start();
|
||||
try_join!(update_query_fut, start_fut).expect("start fail");
|
||||
tracing::debug!(msg = "Started!", ?to_start);
|
||||
}
|
||||
tracing::debug!(msg="Starter task iteration finished", iteration_dur=?Instant::now().duration_since(instant))
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts a new process and updates the process status.
|
||||
#[server]
|
||||
pub async fn start_process() -> Result<(), ServerFnError> {
|
||||
use crate::server::pool;
|
||||
|
||||
let pool = pool()?;
|
||||
|
||||
// Set all existing processes to Finished
|
||||
sqlx::query!(
|
||||
"UPDATE backend_process SET status = 'waiting_for_stop' WHERE status != 'finished'"
|
||||
)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
|
||||
// Get the current newest settings version
|
||||
let settings_version =
|
||||
sqlx::query_scalar!("SELECT version FROM settings ORDER BY version DESC LIMIT 1")
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
|
||||
// Create a new ProcessStatus with the current newest settings version and initial status of waiting
|
||||
sqlx::query!(
|
||||
"INSERT INTO backend_process (settings_version, status) VALUES (?, 'waiting_for_start')",
|
||||
settings_version
|
||||
)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stops all processes and updates the process status.
|
||||
#[server]
|
||||
pub async fn stop_process() -> Result<(), ServerFnError> {
|
||||
use crate::server::pool;
|
||||
|
||||
let pool = pool()?;
|
||||
|
||||
// Set all processes to finished
|
||||
sqlx::query!("UPDATE backend_process SET status = 'waiting_for_stop'")
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches the status of the newest process.
|
||||
#[server(ProcessStatusFn)]
|
||||
pub async fn process_status() -> Result<ProcessStatus, ServerFnError> {
|
||||
use crate::server::pool;
|
||||
|
||||
let pool = pool()?;
|
||||
|
||||
let status: Option<ProcessStatus> =
|
||||
sqlx::query_scalar!("SELECT status FROM backend_process ORDER BY id DESC LIMIT 1")
|
||||
.fetch_optional(&pool)
|
||||
.await?
|
||||
.map(|str| str.parse())
|
||||
.transpose()?;
|
||||
|
||||
match status {
|
||||
Some(status) => Ok(status),
|
||||
None => Ok(ProcessStatus::Finished),
|
||||
}
|
||||
}
|
|
@ -1,14 +1,12 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
mod backend_process;
|
||||
mod backend_settings;
|
||||
mod chat;
|
||||
// pub mod backend_process;
|
||||
// pub mod backend_settings;
|
||||
// pub mod chat;
|
||||
|
||||
pub use backend_process::*;
|
||||
pub use backend_settings::*;
|
||||
pub use chat::*;
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ fn ChatMessageBubble(
|
|||
if edit_mode.get() {
|
||||
view! { <p inner_html=move || { msg_str() }></p> }
|
||||
} else {
|
||||
view! { <p inner_html=move || { md_str() }></p> }
|
||||
view! { <p class="prose" inner_html=move || { md_str() }></p> }
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -149,13 +149,23 @@ pub fn ChatHistory(
|
|||
response: RwSignal<String>,
|
||||
receiving_response: RwSignal<bool>,
|
||||
) -> impl IntoView {
|
||||
use pulldown_cmark;
|
||||
let md_str = move || {
|
||||
let owned_str = response();
|
||||
let parser = pulldown_cmark::Parser::new(&owned_str);
|
||||
let mut md_output = String::new();
|
||||
pulldown_cmark::html::push_html(&mut md_output, parser);
|
||||
|
||||
md_output
|
||||
};
|
||||
|
||||
let streaming_response_elem = move || {
|
||||
if receiving_response() {
|
||||
view! {
|
||||
<div class="chat chat-start">
|
||||
<div class="chat-header">"Assistant"</div>
|
||||
<div class="chat-bubble">
|
||||
<span inner_html=response></span>
|
||||
<span class="prose" inner_html=move || { md_str() }></span>
|
||||
<span class="loading loading-ball loading-xs"></span>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -4,10 +4,8 @@ use leptos_router::*;
|
|||
use crate::app::{components::svgs::*, RefreshGen};
|
||||
|
||||
mod backend;
|
||||
pub mod process;
|
||||
|
||||
use backend::SettingsForm;
|
||||
use process::ProcessForm;
|
||||
|
||||
#[component]
|
||||
fn TabButton(path: &'static str, label: &'static str) -> impl IntoView {
|
||||
|
@ -33,7 +31,8 @@ fn BackendSettingsPage() -> impl IntoView {
|
|||
view! {
|
||||
<div class="flex justify-around">
|
||||
<div class="flex flex-col p-4 w-1/3">
|
||||
<ProcessForm />
|
||||
<p>"WIP"</p>
|
||||
// <ProcessForm />
|
||||
</div>
|
||||
<div class="flex flex-col p-4 w-2/3">
|
||||
<SettingsForm />
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use leptos::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
api::{process_status, ProcessStatus, StartProcess, StopProcess},
|
||||
app::components::FormControl,
|
||||
};
|
||||
|
||||
/// A form component to start and stop the process.
|
||||
#[component]
|
||||
pub fn ProcessForm() -> impl IntoView {
|
||||
let gen =
|
||||
use_context::<RwSignal<super::RefreshGen>>().expect("to have found the setter provided");
|
||||
let _ = gloo_timers::callback::Interval::new(1000, move || {
|
||||
gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
|
||||
})
|
||||
.forget();
|
||||
|
||||
let status = create_local_resource(gen, |_| async move {
|
||||
process_status().await.unwrap_or(ProcessStatus::Finished)
|
||||
});
|
||||
|
||||
let is_running = move || match status.get().unwrap_or(ProcessStatus::Finished) {
|
||||
ProcessStatus::WaitingForStart => true,
|
||||
ProcessStatus::Running => true,
|
||||
ProcessStatus::WaitingForStop => false,
|
||||
ProcessStatus::Finished => false,
|
||||
ProcessStatus::Failed => false,
|
||||
};
|
||||
|
||||
let toggle_color = move || match status.get().unwrap_or(ProcessStatus::Finished) {
|
||||
ProcessStatus::WaitingForStart => "toggle-info",
|
||||
ProcessStatus::Running => "toggle-success",
|
||||
ProcessStatus::WaitingForStop => "toggle-warning",
|
||||
ProcessStatus::Finished => "toggle-info",
|
||||
ProcessStatus::Failed => "toggle-error",
|
||||
};
|
||||
|
||||
let stop_process_action = create_server_action::<StopProcess>();
|
||||
let start_process_action = create_server_action::<StartProcess>();
|
||||
|
||||
let on_toggle = move |_| {
|
||||
if is_running() {
|
||||
stop_process_action.dispatch(StopProcess {});
|
||||
} else {
|
||||
start_process_action.dispatch(StartProcess {});
|
||||
}
|
||||
gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
|
||||
};
|
||||
|
||||
let status_text = move || {
|
||||
if let Some(status) = status.get() {
|
||||
match status {
|
||||
ProcessStatus::WaitingForStart => "waiting to start",
|
||||
ProcessStatus::Running => "running",
|
||||
ProcessStatus::WaitingForStop => "waiting to stop",
|
||||
ProcessStatus::Finished => "finished",
|
||||
ProcessStatus::Failed => "failed",
|
||||
}
|
||||
} else {
|
||||
"Loading"
|
||||
}
|
||||
};
|
||||
|
||||
view! {
|
||||
<FormControl label="Running">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked=is_running
|
||||
on:click=on_toggle
|
||||
class=move || format!("toggle {}", toggle_color())
|
||||
/>
|
||||
</FormControl>
|
||||
<FormControl label="Status">
|
||||
|
||||
<span class="label-text">{status_text}</span>
|
||||
</FormControl>
|
||||
}
|
||||
}
|
|
@ -95,12 +95,18 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
let serve = axum::serve(listener, app(leptos_options).await.into_make_service()).into_future();
|
||||
|
||||
let backend_fut = async move { Ok::<(), anyhow::Error>(()) };
|
||||
let proxy_man_fut = async move {
|
||||
use llama_proxy_man::{config::AppConfig, start_server};
|
||||
let config = AppConfig::default_figment();
|
||||
start_server(config).await;
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
};
|
||||
|
||||
let (serve_res, backend_res, wry_res) = tokio::try_join!(
|
||||
tokio::spawn(serve),
|
||||
tokio::task::spawn_blocking(wry_main),
|
||||
tokio::spawn(backend_fut),
|
||||
tokio::spawn(proxy_man_fut),
|
||||
)?;
|
||||
|
||||
serve_res?;
|
||||
|
|
|
@ -99,7 +99,6 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender<ChannelMessage>) -> an
|
|||
.expect("channel fail");
|
||||
|
||||
es.close();
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ use tracing::Level;
|
|||
|
||||
use self::fileserv::file_and_error_handler;
|
||||
use crate::{
|
||||
api::run_starter_task,
|
||||
app::*,
|
||||
server::middleware::{LoggingLayer, LoggingService},
|
||||
};
|
||||
|
@ -117,8 +116,9 @@ pub async fn app(leptos_options: LeptosOptions) -> Router {
|
|||
let pool = new_pool().await.expect("pool err");
|
||||
|
||||
// // TODO move this out of server(pool has to be moved out too)
|
||||
let task = run_starter_task(pool.clone());
|
||||
tokio::task::spawn(task);
|
||||
// FIXME: Should proxy_man move here ?
|
||||
// let task = run_starter_task(pool.clone());
|
||||
// tokio::task::spawn(task);
|
||||
|
||||
let app_state = AppState {
|
||||
leptos_options,
|
||||
|
|
|
@ -30,3 +30,4 @@ reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json"
|
|||
itertools = "0.13.0"
|
||||
openport = { version = "0.1.1", features = ["rand"] }
|
||||
derive_more = { version = "2.0.1", features = ["deref"] }
|
||||
figment = { version = "0.10.19", features = ["env", "json", "toml", "yaml"] }
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use figment::{
|
||||
providers::{Env, Format, Json, Toml, Yaml},
|
||||
Figment,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct AppConfig {
|
||||
pub system_resources: SystemResources,
|
||||
|
@ -8,8 +13,21 @@ pub struct AppConfig {
|
|||
}
|
||||
|
||||
impl AppConfig {
|
||||
pub fn default_figment() -> Self {
|
||||
let config: Result<Self, _> = Figment::new()
|
||||
.merge(Toml::file("config.toml"))
|
||||
.merge(Yaml::file("config.yaml"))
|
||||
.merge(Env::prefixed("LLAMA_FORGE_"))
|
||||
.join(Json::file("Cargo.json"))
|
||||
.extract();
|
||||
|
||||
tracing::info!(?config);
|
||||
|
||||
config.unwrap().assign_internal_ports()
|
||||
}
|
||||
|
||||
pub fn default_from_pwd_yml() -> Self {
|
||||
let config_str = fs::read_to_string("config.yaml").expect("Failed to read config.yaml");
|
||||
let config_str = fs::read_to_string("./config.yaml").expect("Failed to read config.yaml");
|
||||
serde_yaml::from_str::<Self>(&config_str)
|
||||
.expect("Failed to parse config.yaml")
|
||||
.assign_internal_ports()
|
||||
|
|
|
@ -17,7 +17,7 @@ use tower_http::trace::{
|
|||
use tracing::Level;
|
||||
|
||||
/// Creates an Axum application to handle inference requests for a specific model.
|
||||
pub fn create_app(spec: &ModelSpec, state: AppState) -> Router {
|
||||
pub fn axum_router(spec: &ModelSpec, state: AppState) -> Router {
|
||||
Router::new()
|
||||
.route(
|
||||
"/",
|
||||
|
@ -55,7 +55,7 @@ pub async fn start_server(config: AppConfig) {
|
|||
let spec = spec.clone();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let app = create_app(&spec, state);
|
||||
let app = axum_router(&spec, state);
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], spec.port));
|
||||
tracing::info!(msg = "Listening", ?spec);
|
||||
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
|
||||
|
|
|
@ -93,7 +93,7 @@ pub fn initialize_logger() {
|
|||
.from_env_lossy();
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.compact()
|
||||
.pretty()
|
||||
.with_env_filter(env_filter)
|
||||
.init();
|
||||
});
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use emacs::{defun, Env, IntoLisp, Result, Value};
|
||||
use std::sync::Once;
|
||||
use std::sync::OnceLock;
|
||||
use tokio::runtime::{Builder, Runtime};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue