From 8d8923294d1e88e4a79267f1fdbe9fe111624eb7 Mon Sep 17 00:00:00 2001 From: Tristan Druyen Date: Tue, 11 Feb 2025 04:22:14 +0100 Subject: [PATCH] feat: Embedded proxy_man for forge - Add `figment` for config yamls - Small `Makefile.toml` fixes ? (docset seems still broken ??) - Copy `config.yaml` workspace & forge - Embed proxy_man in forge - Remove `backend_process.rs` and `process.rs` - Update `llama_proxy_man/Cargo.toml` and `config.rs` for new dependencies - Format --- Cargo.lock | 65 ++++++ Makefile.toml | 21 +- config.yaml | 108 ++++++++++ llama_forge_rs/Cargo.toml | 2 + llama_forge_rs/config.yaml | 108 ++++++++++ llama_forge_rs/src/api/backend_process.rs | 195 ------------------ llama_forge_rs/src/api/mod.rs | 2 - llama_forge_rs/src/app/pages/chat.rs | 14 +- llama_forge_rs/src/app/pages/settings/mod.rs | 5 +- .../src/app/pages/settings/process.rs | 81 -------- llama_forge_rs/src/main.rs | 10 +- .../src/server/backends/llama_chat.rs | 1 - llama_forge_rs/src/server/mod.rs | 6 +- llama_proxy_man/Cargo.toml | 1 + llama_proxy_man/src/config.rs | 20 +- llama_proxy_man/src/lib.rs | 4 +- llama_proxy_man/src/logging.rs | 2 +- redvault_el_rs/src/lib.rs | 1 - 18 files changed, 347 insertions(+), 299 deletions(-) create mode 100644 config.yaml create mode 100644 llama_forge_rs/config.yaml delete mode 100644 llama_forge_rs/src/api/backend_process.rs delete mode 100644 llama_forge_rs/src/app/pages/settings/process.rs diff --git a/Cargo.lock b/Cargo.lock index ac5cac1..75ab1af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,6 +238,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +dependencies = [ + "bytemuck", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1422,6 +1431,22 @@ dependencies = [ "rustc_version 0.4.1", ] +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "pear", + "serde", + "serde_json", + "serde_yaml", + "toml", + "uncased", + "version_check", +] + [[package]] name = "flate2" version = "1.0.35" @@ -2492,6 +2517,12 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "instant" version = "0.1.13" @@ -2981,6 +3012,7 @@ dependencies = [ "leptos_axum", "leptos_meta", "leptos_router", + "llama_proxy_man", "mime_guess", "once_cell", "pin-project-lite", @@ -3023,6 +3055,7 @@ dependencies = [ "anyhow", "axum", "derive_more 2.0.1", + "figment", "futures", "hyper", "itertools 0.13.0", @@ -3688,6 +3721,29 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.98", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -6178,6 +6234,15 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + [[package]] name = "unicase" version = "2.8.1" diff --git a/Makefile.toml b/Makefile.toml index d94ab39..81e2c91 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -16,7 +16,7 @@ script = "echo test123-${CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER}" [tasks.refresh-all] description = "clean the repo and rebuild everything" -dependencies = ["clean", "all", "lall"] +dependencies = ["clean", "all"] [tasks.mksitedir] workspace = false @@ -24,10 +24,21 @@ script = "mkdir -p ./target/site" [tasks.all] description = "rebuild everything" -dependencies = ["mksitedir", "docset", "check", "clippy", "build", "build-release", "format", "lformat", "lbuild", "lbuild-release"] +dependencies = [ + "mksitedir", + "docset", + "check", + "clippy", + "build", + "build-release", + "format", + "lformat", + "lbuild", + "lbuild-release", +] [tasks.lformat] -scripts = "leptosfmt . && rustywind . --write" +script = "leptosfmt . && rustywind . --write" [tasks.lbuild] category = "Build" @@ -37,7 +48,7 @@ set current "$CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER" if contains "$LEPTOS_REPOS" $current cargo leptos build else - $current is not a leptos repo! + echo $current is not a leptos repo! end ''' @@ -60,7 +71,7 @@ dependencies = ["make-docset", "cp-docset"] [tasks.make-docset] workspace = false -script = "cargo docset --workspace --platform-family redvault-ai " +script = "cargo docset --workspace --no-clean --platform-family redvault-ai && sleep 1 && sync" [tasks.cp-docset] workspace = false diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..900dc98 --- /dev/null +++ b/config.yaml @@ -0,0 +1,108 @@ +system_resources: + ram: 48G + vram: 30G +model_specs: + - name: "tabby-code" + port: 18080 + # internal_port: 28080 # Optional + autostart: "true" + vram_usage: "26.7G" # Coder-32B + draft 0.5B + ram_usage: "3G" # Coder-32B + draft 0.5B + # vram_usage: 8.25G # Coder-7B + # ram_usage: 2.6G # Coder-7B + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + # host: 0.0.0.0 + flash-attn: "true" + ctx-size: "32768" + model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf + gpu-layers: "9999" + model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf + gpu-layers-draft: "9999" + draft-max: "16" + draft-min: "5" + - name: "tabby-embeddings" + port: 18081 + vram_usage: "0.4G" + ram_usage: "2.5G" + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + model: "/media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf" + gpu-layers: "-1" + flash-attn: "true" + # host: 0.0.0.0 + embeddings: "true" + - name: "big-chat" + port: 18082 + vram_usage: 26.5G + ram_usage: 2.5G + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: "16384" + flash-attn: "true" + model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf + gpu-layers: "9999" + model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf + gpu-layers-draft: "9999" + # draft-max: "16" + # draft-min: "5" + - name: "bigger-chat" + port: 18085 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: "8192" + flash-attn: "true" + cache-type-k: q8_0 + cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf + gpu-layers: "9999" + model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + gpu-layers-draft: "0" + # draft-max: "16" + # draft-min: "5" + - name: "bigger-chat-2" + port: 18083 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: "8192" + flash-attn: "true" + cache-type-k: q8_0 + cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf + gpu-layers: "9999" + # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + # gpu-layers-draft: 0 + # draft-max: "16" + # draft-min: "5" + - name: "deep-think" + port: 18084 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: "0" + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: "32768" + flash-attn: "true" + # cache-type-k: q8_0 + # cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf + gpu-layers: "9999" + # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + # gpu-layers-draft: 0 + # draft-max: "16" + # draft-min: "5" diff --git a/llama_forge_rs/Cargo.toml b/llama_forge_rs/Cargo.toml index bb0afe4..986a55a 100644 --- a/llama_forge_rs/Cargo.toml +++ b/llama_forge_rs/Cargo.toml @@ -13,6 +13,7 @@ version.workspace=true crate-type = ["cdylib", "rlib"] [dependencies] +llama_proxy_man = {path="../llama_proxy_man", optional = true} wasm-bindgen = "=0.2.100" # TODO Update to 0.7 leptos = { version = "0.6", features = [ @@ -93,6 +94,7 @@ pulldown-cmark = { version = "0.12.2", features = ["serde"] } default = ["ssr"] hydrate = ["leptos/hydrate", "leptos_meta/hydrate", "leptos_router/hydrate"] ssr = [ + "dep:llama_proxy_man", "dep:async-broadcast", "dep:axum", "dep:dashmap", diff --git a/llama_forge_rs/config.yaml b/llama_forge_rs/config.yaml new file mode 100644 index 0000000..a19d518 --- /dev/null +++ b/llama_forge_rs/config.yaml @@ -0,0 +1,108 @@ +system_resources: + ram: 48G + vram: 30G +model_specs: + - name: "tabby-code" + port: 18080 + # internal_port: 28080 # Optional + autostart: true + vram_usage: 26.7G # Coder-32B + draft 0.5B + ram_usage: 3G # Coder-32B + draft 0.5B + # vram_usage: 8.25G # Coder-7B + # ram_usage: 2.6G # Coder-7B + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + # host: 0.0.0.0 + flash-attn: true + ctx-size: 32768 + model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf + gpu-layers: 9999 + model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf + gpu-layers-draft: 9999 + draft-max: 16 + draft-min: 5 + - name: "tabby-embeddings" + port: 18081 + vram_usage: 0.4G + ram_usage: 2.5G + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf + gpu-layers: -1 + flash-attn: true + # host: 0.0.0.0 + embeddings: true + - name: "big-chat" + port: 18082 + vram_usage: 26.5G + ram_usage: 2.5G + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: 16384 + flash-attn: true + model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf + gpu-layers: 9999 + model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf + gpu-layers-draft: 9999 + # draft-max: 16 + # draft-min: 5 + - name: "bigger-chat" + port: 18085 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: 8192 + flash-attn: true + cache-type-k: q8_0 + cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf + gpu-layers: 9999 + model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + gpu-layers-draft: 0 + # draft-max: 16 + # draft-min: 5 + - name: "bigger-chat-2" + port: 18083 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: 8192 + flash-attn: true + cache-type-k: q8_0 + cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf + gpu-layers: 9999 + # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + # gpu-layers-draft: 0 + # draft-max: 16 + # draft-min: 5 + - name: "deep-think" + port: 18084 + vram_usage: 29G + ram_usage: 5G + env: + CUDA_VISIBLE_DEVICES: 0 + HSA_OVERRIDE_GFX_VERSION: '11.0.0' + args: + ctx-size: 32768 + flash-attn: true + # cache-type-k: q8_0 + # cache-type-v: q8_0 + model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf + gpu-layers: 9999 + # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf + # gpu-layers-draft: 0 + # draft-max: 16 + # draft-min: 5 diff --git a/llama_forge_rs/src/api/backend_process.rs b/llama_forge_rs/src/api/backend_process.rs deleted file mode 100644 index 7179155..0000000 --- a/llama_forge_rs/src/api/backend_process.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::time::Duration; - -use leptos::*; -use serde::{Deserialize, Serialize}; - -use super::backend_settings::BackendSettings; -use crate::app::components::FormControl; - -#[derive(PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] -pub struct BackendProcess { - id: i64, - #[sqlx(flatten)] - settings: BackendSettings, - status: ProcessStatus, -} - -/// The `ProcessStatus` enum represents the various states a process can be in. It includes the following states: -/// - `WaitingForStart`: The process is waiting to be started. -/// - `Running`: The process is currently running. -/// - `WaitingForStop`: The process is waiting to be stopped. -/// - `Finished`: The process has completed its execution successfully. -/// - `Failed`: The process has failed or encountered an error during execution. -/// -/// This enum is used to keep track of the state of a process in a backend application, allowing for proper management and control over the process lifecycle. -#[derive( - Default, PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::Type, strum::EnumString, -)] -#[sqlx(rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum ProcessStatus { - #[default] - WaitingForStart, - Running, - WaitingForStop, - Finished, - Failed, -} - -/// CHecks the db every 100ms to see if stuff has to be started -#[cfg(feature = "ssr")] -pub async fn run_starter_task(pool: sqlx::SqlitePool) { - use tokio::{ - time::{self, Instant}, - try_join, - }; - use tokio_stream::{wrappers::IntervalStream, StreamExt}; - - use crate::server::backends::BackendService; - let _ = tracing::debug_span!("starter_task"); - - tracing::debug!("AAAAAAAAAAAAAAA"); - return; // TODO ???? - tracing::debug!("Starter task started"); - - let service_handle = BackendService::new(); - let mut stream = IntervalStream::new(time::interval(Duration::from_millis(1000))); - while let Some(instant) = stream.next().await { - break; // TODO integrate proxy man ? - - tracing::debug!("fire; instant={:?}", instant); - - let waiting_to_start: Vec = sqlx::query_as( - "SELECT id, status, - se.version, se.model_dir, se.default_model, se.created_at - FROM backend_process INNER JOIN settings AS se - ON backend_process.settings_version = se.version - WHERE status == ?", - ) - .bind(ProcessStatus::WaitingForStart) - .fetch_all(&pool) - .await - .unwrap(); - - let waiting_to_stop: Vec = sqlx::query_as( - "SELECT id, status, - se.version, se.model_dir, se.default_model, se.created_at - FROM backend_process INNER JOIN settings AS se - ON backend_process.settings_version = se.version - WHERE status == ?", - ) - .bind(ProcessStatus::WaitingForStop) - .fetch_all(&pool) - .await - .unwrap(); - - tracing::debug!(?waiting_to_start, ?waiting_to_stop); - use futures::future::FutureExt; - - for to_stop in waiting_to_stop.into_iter() { - tracing::debug!(?to_stop); - let update_query_fut = sqlx::query!( - "UPDATE backend_process - SET status == ? - WHERE id == ?", - ProcessStatus::Finished, - to_stop.id - ) - .execute(&pool) - .map(|f| f.map_err(|_e| anyhow::Error::msg("update fail"))); - let stop_fut = service_handle.stop(); - - tracing::debug!(msg = "Stopping", ?to_stop); - try_join!(update_query_fut, stop_fut).expect("stop fail"); - tracing::debug!(msg = "Stopped!", ?to_stop); - } - - for to_start in waiting_to_start.into_iter() { - // TODO Pass args to backend - // TODO Save Services tehrefore allow multiples - tracing::debug!(?to_start); - - let update_query_fut = sqlx::query!( - "UPDATE backend_process - SET status == ? - WHERE id == ?", - ProcessStatus::Running, - to_start.id - ) - .execute(&pool) - .map(|f| f.map_err(|_e| anyhow::Error::msg("update fail"))); - - tracing::debug!(msg = "Starting", ?to_start); - let start_fut = service_handle.start(); - try_join!(update_query_fut, start_fut).expect("start fail"); - tracing::debug!(msg = "Started!", ?to_start); - } - tracing::debug!(msg="Starter task iteration finished", iteration_dur=?Instant::now().duration_since(instant)) - } -} - -/// Starts a new process and updates the process status. -#[server] -pub async fn start_process() -> Result<(), ServerFnError> { - use crate::server::pool; - - let pool = pool()?; - - // Set all existing processes to Finished - sqlx::query!( - "UPDATE backend_process SET status = 'waiting_for_stop' WHERE status != 'finished'" - ) - .execute(&pool) - .await?; - - // Get the current newest settings version - let settings_version = - sqlx::query_scalar!("SELECT version FROM settings ORDER BY version DESC LIMIT 1") - .fetch_one(&pool) - .await?; - - // Create a new ProcessStatus with the current newest settings version and initial status of waiting - sqlx::query!( - "INSERT INTO backend_process (settings_version, status) VALUES (?, 'waiting_for_start')", - settings_version - ) - .execute(&pool) - .await?; - - Ok(()) -} - -/// Stops all processes and updates the process status. -#[server] -pub async fn stop_process() -> Result<(), ServerFnError> { - use crate::server::pool; - - let pool = pool()?; - - // Set all processes to finished - sqlx::query!("UPDATE backend_process SET status = 'waiting_for_stop'") - .execute(&pool) - .await?; - - Ok(()) -} - -/// Fetches the status of the newest process. -#[server(ProcessStatusFn)] -pub async fn process_status() -> Result { - use crate::server::pool; - - let pool = pool()?; - - let status: Option = - sqlx::query_scalar!("SELECT status FROM backend_process ORDER BY id DESC LIMIT 1") - .fetch_optional(&pool) - .await? - .map(|str| str.parse()) - .transpose()?; - - match status { - Some(status) => Ok(status), - None => Ok(ProcessStatus::Finished), - } -} diff --git a/llama_forge_rs/src/api/mod.rs b/llama_forge_rs/src/api/mod.rs index fb61682..9ccf76c 100644 --- a/llama_forge_rs/src/api/mod.rs +++ b/llama_forge_rs/src/api/mod.rs @@ -1,14 +1,12 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; -mod backend_process; mod backend_settings; mod chat; // pub mod backend_process; // pub mod backend_settings; // pub mod chat; -pub use backend_process::*; pub use backend_settings::*; pub use chat::*; diff --git a/llama_forge_rs/src/app/pages/chat.rs b/llama_forge_rs/src/app/pages/chat.rs index 4bfe255..b6b44dc 100644 --- a/llama_forge_rs/src/app/pages/chat.rs +++ b/llama_forge_rs/src/app/pages/chat.rs @@ -51,7 +51,7 @@ fn ChatMessageBubble( if edit_mode.get() { view! {

} } else { - view! {

} + view! {

} } }; @@ -149,13 +149,23 @@ pub fn ChatHistory( response: RwSignal, receiving_response: RwSignal, ) -> impl IntoView { + use pulldown_cmark; + let md_str = move || { + let owned_str = response(); + let parser = pulldown_cmark::Parser::new(&owned_str); + let mut md_output = String::new(); + pulldown_cmark::html::push_html(&mut md_output, parser); + + md_output + }; + let streaming_response_elem = move || { if receiving_response() { view! {
"Assistant"
- +
diff --git a/llama_forge_rs/src/app/pages/settings/mod.rs b/llama_forge_rs/src/app/pages/settings/mod.rs index a023620..7d5bc47 100644 --- a/llama_forge_rs/src/app/pages/settings/mod.rs +++ b/llama_forge_rs/src/app/pages/settings/mod.rs @@ -4,10 +4,8 @@ use leptos_router::*; use crate::app::{components::svgs::*, RefreshGen}; mod backend; -pub mod process; use backend::SettingsForm; -use process::ProcessForm; #[component] fn TabButton(path: &'static str, label: &'static str) -> impl IntoView { @@ -33,7 +31,8 @@ fn BackendSettingsPage() -> impl IntoView { view! {
- +

"WIP"

+ //
diff --git a/llama_forge_rs/src/app/pages/settings/process.rs b/llama_forge_rs/src/app/pages/settings/process.rs deleted file mode 100644 index a770dd6..0000000 --- a/llama_forge_rs/src/app/pages/settings/process.rs +++ /dev/null @@ -1,81 +0,0 @@ -use std::time::Duration; - -use leptos::*; -use serde::{Deserialize, Serialize}; - -use crate::{ - api::{process_status, ProcessStatus, StartProcess, StopProcess}, - app::components::FormControl, -}; - -/// A form component to start and stop the process. -#[component] -pub fn ProcessForm() -> impl IntoView { - let gen = - use_context::>().expect("to have found the setter provided"); - let _ = gloo_timers::callback::Interval::new(1000, move || { - gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1)); - }) - .forget(); - - let status = create_local_resource(gen, |_| async move { - process_status().await.unwrap_or(ProcessStatus::Finished) - }); - - let is_running = move || match status.get().unwrap_or(ProcessStatus::Finished) { - ProcessStatus::WaitingForStart => true, - ProcessStatus::Running => true, - ProcessStatus::WaitingForStop => false, - ProcessStatus::Finished => false, - ProcessStatus::Failed => false, - }; - - let toggle_color = move || match status.get().unwrap_or(ProcessStatus::Finished) { - ProcessStatus::WaitingForStart => "toggle-info", - ProcessStatus::Running => "toggle-success", - ProcessStatus::WaitingForStop => "toggle-warning", - ProcessStatus::Finished => "toggle-info", - ProcessStatus::Failed => "toggle-error", - }; - - let stop_process_action = create_server_action::(); - let start_process_action = create_server_action::(); - - let on_toggle = move |_| { - if is_running() { - stop_process_action.dispatch(StopProcess {}); - } else { - start_process_action.dispatch(StartProcess {}); - } - gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1)); - }; - - let status_text = move || { - if let Some(status) = status.get() { - match status { - ProcessStatus::WaitingForStart => "waiting to start", - ProcessStatus::Running => "running", - ProcessStatus::WaitingForStop => "waiting to stop", - ProcessStatus::Finished => "finished", - ProcessStatus::Failed => "failed", - } - } else { - "Loading" - } - }; - - view! { - - - - - - {status_text} - - } -} diff --git a/llama_forge_rs/src/main.rs b/llama_forge_rs/src/main.rs index 8d29de6..f34e639 100644 --- a/llama_forge_rs/src/main.rs +++ b/llama_forge_rs/src/main.rs @@ -95,12 +95,18 @@ async fn main() -> anyhow::Result<()> { let serve = axum::serve(listener, app(leptos_options).await.into_make_service()).into_future(); - let backend_fut = async move { Ok::<(), anyhow::Error>(()) }; + let proxy_man_fut = async move { + use llama_proxy_man::{config::AppConfig, start_server}; + let config = AppConfig::default_figment(); + start_server(config).await; + + Ok::<(), anyhow::Error>(()) + }; let (serve_res, backend_res, wry_res) = tokio::try_join!( tokio::spawn(serve), tokio::task::spawn_blocking(wry_main), - tokio::spawn(backend_fut), + tokio::spawn(proxy_man_fut), )?; serve_res?; diff --git a/llama_forge_rs/src/server/backends/llama_chat.rs b/llama_forge_rs/src/server/backends/llama_chat.rs index 4600c36..0b25d21 100644 --- a/llama_forge_rs/src/server/backends/llama_chat.rs +++ b/llama_forge_rs/src/server/backends/llama_chat.rs @@ -99,7 +99,6 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender) -> an .expect("channel fail"); es.close(); - break; } diff --git a/llama_forge_rs/src/server/mod.rs b/llama_forge_rs/src/server/mod.rs index 336e29d..7f7227a 100644 --- a/llama_forge_rs/src/server/mod.rs +++ b/llama_forge_rs/src/server/mod.rs @@ -28,7 +28,6 @@ use tracing::Level; use self::fileserv::file_and_error_handler; use crate::{ - api::run_starter_task, app::*, server::middleware::{LoggingLayer, LoggingService}, }; @@ -117,8 +116,9 @@ pub async fn app(leptos_options: LeptosOptions) -> Router { let pool = new_pool().await.expect("pool err"); // // TODO move this out of server(pool has to be moved out too) - let task = run_starter_task(pool.clone()); - tokio::task::spawn(task); + // FIXME: Should proxy_man move here ? + // let task = run_starter_task(pool.clone()); + // tokio::task::spawn(task); let app_state = AppState { leptos_options, diff --git a/llama_proxy_man/Cargo.toml b/llama_proxy_man/Cargo.toml index 5d9df28..d75e1ab 100644 --- a/llama_proxy_man/Cargo.toml +++ b/llama_proxy_man/Cargo.toml @@ -30,3 +30,4 @@ reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json" itertools = "0.13.0" openport = { version = "0.1.1", features = ["rand"] } derive_more = { version = "2.0.1", features = ["deref"] } +figment = { version = "0.10.19", features = ["env", "json", "toml", "yaml"] } diff --git a/llama_proxy_man/src/config.rs b/llama_proxy_man/src/config.rs index 0d90974..397ebd5 100644 --- a/llama_proxy_man/src/config.rs +++ b/llama_proxy_man/src/config.rs @@ -1,6 +1,11 @@ use serde::Deserialize; use std::{collections::HashMap, fs}; +use figment::{ + providers::{Env, Format, Json, Toml, Yaml}, + Figment, +}; + #[derive(Clone, Debug, Deserialize)] pub struct AppConfig { pub system_resources: SystemResources, @@ -8,8 +13,21 @@ pub struct AppConfig { } impl AppConfig { + pub fn default_figment() -> Self { + let config: Result = Figment::new() + .merge(Toml::file("config.toml")) + .merge(Yaml::file("config.yaml")) + .merge(Env::prefixed("LLAMA_FORGE_")) + .join(Json::file("Cargo.json")) + .extract(); + + tracing::info!(?config); + + config.unwrap().assign_internal_ports() + } + pub fn default_from_pwd_yml() -> Self { - let config_str = fs::read_to_string("config.yaml").expect("Failed to read config.yaml"); + let config_str = fs::read_to_string("./config.yaml").expect("Failed to read config.yaml"); serde_yaml::from_str::(&config_str) .expect("Failed to parse config.yaml") .assign_internal_ports() diff --git a/llama_proxy_man/src/lib.rs b/llama_proxy_man/src/lib.rs index 4a1b7e2..1869f81 100644 --- a/llama_proxy_man/src/lib.rs +++ b/llama_proxy_man/src/lib.rs @@ -17,7 +17,7 @@ use tower_http::trace::{ use tracing::Level; /// Creates an Axum application to handle inference requests for a specific model. -pub fn create_app(spec: &ModelSpec, state: AppState) -> Router { +pub fn axum_router(spec: &ModelSpec, state: AppState) -> Router { Router::new() .route( "/", @@ -55,7 +55,7 @@ pub async fn start_server(config: AppConfig) { let spec = spec.clone(); let handle = tokio::spawn(async move { - let app = create_app(&spec, state); + let app = axum_router(&spec, state); let addr = SocketAddr::from(([0, 0, 0, 0], spec.port)); tracing::info!(msg = "Listening", ?spec); let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); diff --git a/llama_proxy_man/src/logging.rs b/llama_proxy_man/src/logging.rs index 9c93185..9d55564 100644 --- a/llama_proxy_man/src/logging.rs +++ b/llama_proxy_man/src/logging.rs @@ -93,7 +93,7 @@ pub fn initialize_logger() { .from_env_lossy(); tracing_subscriber::fmt() - .compact() + .pretty() .with_env_filter(env_filter) .init(); }); diff --git a/redvault_el_rs/src/lib.rs b/redvault_el_rs/src/lib.rs index 310a51b..95b3a08 100644 --- a/redvault_el_rs/src/lib.rs +++ b/redvault_el_rs/src/lib.rs @@ -1,5 +1,4 @@ use emacs::{defun, Env, IntoLisp, Result, Value}; -use std::sync::Once; use std::sync::OnceLock; use tokio::runtime::{Builder, Runtime};