feat: Embedded proxy_man for forge

- Add `figment` for config yamls - Small `Makefile.toml` fixes ? (docset seems still broken ??) - Copy `config.yaml` workspace & forge - Embed proxy_man in forge - Remove `backend_process.rs` and `process.rs` - Update `llama_proxy_man/Cargo.toml` and `config.rs` for new dependencies - Format
2025-02-11 04:22:14 +01:00 · 2025-02-11 04:22:14 +01:00 · 8d8923294d
commit 8d8923294d
parent 5b12762511
18 changed files with 347 additions and 299 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -238,6 +238,15 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "atomic"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@ -1422,6 +1431,22 @@ dependencies = [
 "rustc_version 0.4.1",
 ]

+[[package]]
+name = "figment"
+version = "0.10.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
+dependencies = [
+ "atomic",
+ "pear",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "toml",
+ "uncased",
+ "version_check",
+]
+
 [[package]]
 name = "flate2"
 version = "1.0.35"
@ -2492,6 +2517,12 @@ dependencies = [
 "hashbrown 0.15.2",
 ]

+[[package]]
+name = "inlinable_string"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
+
 [[package]]
 name = "instant"
 version = "0.1.13"
@ -2981,6 +3012,7 @@ dependencies = [
 "leptos_axum",
 "leptos_meta",
 "leptos_router",
+ "llama_proxy_man",
 "mime_guess",
 "once_cell",
 "pin-project-lite",
@ -3023,6 +3055,7 @@ dependencies = [
 "anyhow",
 "axum",
 "derive_more 2.0.1",
+ "figment",
 "futures",
 "hyper",
 "itertools 0.13.0",
@ -3688,6 +3721,29 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"

+[[package]]
+name = "pear"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
+dependencies = [
+ "inlinable_string",
+ "pear_codegen",
+ "yansi",
+]
+
+[[package]]
+name = "pear_codegen"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
+dependencies = [
+ "proc-macro2",
+ "proc-macro2-diagnostics",
+ "quote",
+ "syn 2.0.98",
+]
+
 [[package]]
 name = "pem-rfc7468"
 version = "0.7.0"
@ -6178,6 +6234,15 @@ version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"

+[[package]]
+name = "uncased"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "unicase"
 version = "2.8.1"
--- a/Makefile.toml
+++ b/Makefile.toml
@ -16,7 +16,7 @@ script = "echo test123-${CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER}"

 [tasks.refresh-all]
 description = "clean the repo and rebuild everything"
-dependencies = ["clean", "all", "lall"]
+dependencies = ["clean", "all"]

 [tasks.mksitedir]
 workspace = false
@ -24,10 +24,21 @@ script = "mkdir -p ./target/site"

 [tasks.all]
 description = "rebuild everything"
-dependencies = ["mksitedir", "docset", "check", "clippy", "build", "build-release", "format",  "lformat", "lbuild", "lbuild-release"]
+dependencies = [
+  "mksitedir",
+  "docset",
+  "check",
+  "clippy",
+  "build",
+  "build-release",
+  "format",
+  "lformat",
+  "lbuild",
+  "lbuild-release",
+]

 [tasks.lformat]
-scripts = "leptosfmt . && rustywind . --write"
+script = "leptosfmt . && rustywind . --write"

 [tasks.lbuild]
 category = "Build"
@ -37,7 +48,7 @@ set current "$CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER"
 if contains "$LEPTOS_REPOS" $current
  cargo leptos build
 else
-  $current is not a leptos repo!
+  echo $current is not a leptos repo!
 end
 '''

@ -60,7 +71,7 @@ dependencies = ["make-docset", "cp-docset"]

 [tasks.make-docset]
 workspace = false
-script = "cargo docset --workspace --platform-family redvault-ai "
+script = "cargo docset --workspace --no-clean --platform-family redvault-ai && sleep 1 && sync"

 [tasks.cp-docset]
 workspace = false
--- a/config.yaml
+++ b/config.yaml
@ -0,0 +1,108 @@
+system_resources:
+  ram: 48G
+  vram: 30G
+model_specs:
+  - name: "tabby-code"
+    port: 18080
+    # internal_port: 28080 # Optional
+    autostart: "true"
+    vram_usage: "26.7G"   # Coder-32B + draft 0.5B
+    ram_usage: "3G"       # Coder-32B + draft 0.5B
+    # vram_usage: 8.25G # Coder-7B
+    # ram_usage: 2.6G   # Coder-7B
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      # host: 0.0.0.0
+      flash-attn: "true"
+      ctx-size: "32768"
+      model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
+      gpu-layers: "9999"
+      model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
+      gpu-layers-draft: "9999"
+      draft-max: "16"
+      draft-min: "5"
+  - name: "tabby-embeddings"
+    port: 18081
+    vram_usage: "0.4G"
+    ram_usage: "2.5G"
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      model: "/media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf"
+      gpu-layers: "-1"
+      flash-attn: "true"
+      # host: 0.0.0.0
+      embeddings: "true"
+  - name: "big-chat"
+    port: 18082
+    vram_usage: 26.5G
+    ram_usage: 2.5G
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: "16384"
+      flash-attn: "true"
+      model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
+      gpu-layers: "9999"
+      model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
+      gpu-layers-draft: "9999"
+      # draft-max: "16"
+      # draft-min: "5"
+  - name: "bigger-chat"
+    port: 18085
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: "8192"
+      flash-attn: "true"
+      cache-type-k: q8_0
+      cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
+      gpu-layers: "9999"
+      model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      gpu-layers-draft: "0"
+      # draft-max: "16"
+      # draft-min: "5"
+  - name: "bigger-chat-2"
+    port: 18083
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: "8192"
+      flash-attn: "true"
+      cache-type-k: q8_0
+      cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
+      gpu-layers: "9999"
+      # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      # gpu-layers-draft: 0
+      # draft-max: "16"
+      # draft-min: "5"
+  - name: "deep-think"
+    port: 18084
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: "32768"
+      flash-attn: "true"
+      # cache-type-k: q8_0
+      # cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
+      gpu-layers: "9999"
+      # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      # gpu-layers-draft: 0
+      # draft-max: "16"
+      # draft-min: "5"
--- a/llama_forge_rs/Cargo.toml
+++ b/llama_forge_rs/Cargo.toml
@ -13,6 +13,7 @@ version.workspace=true
 crate-type = ["cdylib", "rlib"]

 [dependencies]
+llama_proxy_man = {path="../llama_proxy_man", optional = true}
 wasm-bindgen = "=0.2.100"
 # TODO Update to 0.7
 leptos = { version = "0.6", features = [
@ -93,6 +94,7 @@ pulldown-cmark = { version = "0.12.2", features = ["serde"] }
 default = ["ssr"]
 hydrate = ["leptos/hydrate", "leptos_meta/hydrate", "leptos_router/hydrate"]
 ssr = [
+  "dep:llama_proxy_man",
  "dep:async-broadcast",
  "dep:axum",
  "dep:dashmap",
--- a/llama_forge_rs/config.yaml
+++ b/llama_forge_rs/config.yaml
@ -0,0 +1,108 @@
+system_resources:
+  ram: 48G
+  vram: 30G
+model_specs:
+  - name: "tabby-code"
+    port: 18080
+    # internal_port: 28080 # Optional
+    autostart: true
+    vram_usage: 26.7G   # Coder-32B + draft 0.5B
+    ram_usage: 3G       # Coder-32B + draft 0.5B
+    # vram_usage: 8.25G # Coder-7B
+    # ram_usage: 2.6G   # Coder-7B
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      # host: 0.0.0.0
+      flash-attn: true
+      ctx-size: 32768
+      model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
+      gpu-layers: 9999
+      model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
+      gpu-layers-draft: 9999
+      draft-max: 16
+      draft-min: 5
+  - name: "tabby-embeddings"
+    port: 18081
+    vram_usage: 0.4G
+    ram_usage: 2.5G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
+      gpu-layers: -1
+      flash-attn: true
+      # host: 0.0.0.0
+      embeddings: true
+  - name: "big-chat"
+    port: 18082
+    vram_usage: 26.5G
+    ram_usage: 2.5G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: 16384
+      flash-attn: true
+      model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
+      gpu-layers: 9999
+      model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
+      gpu-layers-draft: 9999
+      # draft-max: 16
+      # draft-min: 5
+  - name: "bigger-chat"
+    port: 18085
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: 8192
+      flash-attn: true
+      cache-type-k: q8_0
+      cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
+      gpu-layers: 9999
+      model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      gpu-layers-draft: 0
+      # draft-max: 16
+      # draft-min: 5
+  - name: "bigger-chat-2"
+    port: 18083
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: 8192
+      flash-attn: true
+      cache-type-k: q8_0
+      cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
+      gpu-layers: 9999
+      # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      # gpu-layers-draft: 0
+      # draft-max: 16
+      # draft-min: 5
+  - name: "deep-think"
+    port: 18084
+    vram_usage: 29G
+    ram_usage: 5G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      ctx-size: 32768
+      flash-attn: true
+      # cache-type-k: q8_0
+      # cache-type-v: q8_0
+      model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
+      gpu-layers: 9999
+      # model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
+      # gpu-layers-draft: 0
+      # draft-max: 16
+      # draft-min: 5
--- a/llama_forge_rs/src/api/backend_process.rs
+++ b/llama_forge_rs/src/api/backend_process.rs
@ -1,195 +0,0 @@
-use std::time::Duration;
-
-use leptos::*;
-use serde::{Deserialize, Serialize};
-
-use super::backend_settings::BackendSettings;
-use crate::app::components::FormControl;
-
-#[derive(PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
-pub struct BackendProcess {
-    id: i64,
-    #[sqlx(flatten)]
-    settings: BackendSettings,
-    status: ProcessStatus,
-}
-
-/// The `ProcessStatus` enum represents the various states a process can be in. It includes the following states:
-/// - `WaitingForStart`: The process is waiting to be started.
-/// - `Running`: The process is currently running.
-/// - `WaitingForStop`: The process is waiting to be stopped.
-/// - `Finished`: The process has completed its execution successfully.
-/// - `Failed`: The process has failed or encountered an error during execution.
-///
-/// This enum is used to keep track of the state of a process in a backend application, allowing for proper management and control over the process lifecycle.
-#[derive(
-    Default, PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::Type, strum::EnumString,
-)]
-#[sqlx(rename_all = "snake_case")]
-#[strum(serialize_all = "snake_case")]
-pub enum ProcessStatus {
-    #[default]
-    WaitingForStart,
-    Running,
-    WaitingForStop,
-    Finished,
-    Failed,
-}
-
-/// CHecks the db every 100ms to see if stuff has to be started
-#[cfg(feature = "ssr")]
-pub async fn run_starter_task(pool: sqlx::SqlitePool) {
-    use tokio::{
-        time::{self, Instant},
-        try_join,
-    };
-    use tokio_stream::{wrappers::IntervalStream, StreamExt};
-
-    use crate::server::backends::BackendService;
-    let _ = tracing::debug_span!("starter_task");
-
-    tracing::debug!("AAAAAAAAAAAAAAA");
-    return; // TODO ????
-    tracing::debug!("Starter task started");
-
-    let service_handle = BackendService::new();
-    let mut stream = IntervalStream::new(time::interval(Duration::from_millis(1000)));
-    while let Some(instant) = stream.next().await {
-        break; // TODO integrate proxy man ?
-
-        tracing::debug!("fire; instant={:?}", instant);
-
-        let waiting_to_start: Vec<BackendProcess> = sqlx::query_as(
-            "SELECT id, status,
-                se.version, se.model_dir, se.default_model, se.created_at
-            FROM backend_process INNER JOIN settings AS se
-            ON backend_process.settings_version = se.version
-            WHERE status == ?",
-        )
-        .bind(ProcessStatus::WaitingForStart)
-        .fetch_all(&pool)
-        .await
-        .unwrap();
-
-        let waiting_to_stop: Vec<BackendProcess> = sqlx::query_as(
-            "SELECT id, status,
-                se.version, se.model_dir, se.default_model, se.created_at
-            FROM backend_process INNER JOIN settings AS se
-            ON backend_process.settings_version = se.version
-            WHERE status == ?",
-        )
-        .bind(ProcessStatus::WaitingForStop)
-        .fetch_all(&pool)
-        .await
-        .unwrap();
-
-        tracing::debug!(?waiting_to_start, ?waiting_to_stop);
-        use futures::future::FutureExt;
-
-        for to_stop in waiting_to_stop.into_iter() {
-            tracing::debug!(?to_stop);
-            let update_query_fut = sqlx::query!(
-                "UPDATE backend_process
-                SET status == ?
-                WHERE id == ?",
-                ProcessStatus::Finished,
-                to_stop.id
-            )
-            .execute(&pool)
-            .map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
-            let stop_fut = service_handle.stop();
-
-            tracing::debug!(msg = "Stopping", ?to_stop);
-            try_join!(update_query_fut, stop_fut).expect("stop fail");
-            tracing::debug!(msg = "Stopped!", ?to_stop);
-        }
-
-        for to_start in waiting_to_start.into_iter() {
-            // TODO Pass args to backend
-            // TODO Save Services tehrefore allow multiples
-            tracing::debug!(?to_start);
-
-            let update_query_fut = sqlx::query!(
-                "UPDATE backend_process
-                SET status == ?
-                WHERE id == ?",
-                ProcessStatus::Running,
-                to_start.id
-            )
-            .execute(&pool)
-            .map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
-
-            tracing::debug!(msg = "Starting", ?to_start);
-            let start_fut = service_handle.start();
-            try_join!(update_query_fut, start_fut).expect("start fail");
-            tracing::debug!(msg = "Started!", ?to_start);
-        }
-        tracing::debug!(msg="Starter task iteration finished", iteration_dur=?Instant::now().duration_since(instant))
-    }
-}
-
-/// Starts a new process and updates the process status.
-#[server]
-pub async fn start_process() -> Result<(), ServerFnError> {
-    use crate::server::pool;
-
-    let pool = pool()?;
-
-    // Set all existing processes to Finished
-    sqlx::query!(
-        "UPDATE backend_process SET status = 'waiting_for_stop' WHERE status != 'finished'"
-    )
-    .execute(&pool)
-    .await?;
-
-    // Get the current newest settings version
-    let settings_version =
-        sqlx::query_scalar!("SELECT version FROM settings ORDER BY version DESC LIMIT 1")
-            .fetch_one(&pool)
-            .await?;
-
-    // Create a new ProcessStatus with the current newest settings version and initial status of waiting
-    sqlx::query!(
-        "INSERT INTO backend_process (settings_version, status) VALUES (?, 'waiting_for_start')",
-        settings_version
-    )
-    .execute(&pool)
-    .await?;
-
-    Ok(())
-}
-
-/// Stops all processes and updates the process status.
-#[server]
-pub async fn stop_process() -> Result<(), ServerFnError> {
-    use crate::server::pool;
-
-    let pool = pool()?;
-
-    // Set all processes to finished
-    sqlx::query!("UPDATE backend_process SET status = 'waiting_for_stop'")
-        .execute(&pool)
-        .await?;
-
-    Ok(())
-}
-
-/// Fetches the status of the newest process.
-#[server(ProcessStatusFn)]
-pub async fn process_status() -> Result<ProcessStatus, ServerFnError> {
-    use crate::server::pool;
-
-    let pool = pool()?;
-
-    let status: Option<ProcessStatus> =
-        sqlx::query_scalar!("SELECT status FROM backend_process ORDER BY id DESC LIMIT 1")
-            .fetch_optional(&pool)
-            .await?
-            .map(|str| str.parse())
-            .transpose()?;
-
-    match status {
-        Some(status) => Ok(status),
-        None => Ok(ProcessStatus::Finished),
-    }
-}
--- a/llama_forge_rs/src/api/mod.rs
+++ b/llama_forge_rs/src/api/mod.rs
@ -1,14 +1,12 @@
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;

-mod backend_process;
 mod backend_settings;
 mod chat;
 // pub mod backend_process;
 // pub mod backend_settings;
 // pub mod chat;

-pub use backend_process::*;
 pub use backend_settings::*;
 pub use chat::*;

--- a/llama_forge_rs/src/app/pages/chat.rs
+++ b/llama_forge_rs/src/app/pages/chat.rs
@ -51,7 +51,7 @@ fn ChatMessageBubble(
            if edit_mode.get() {
                view! { <p inner_html=move || { msg_str() }></p> }
            } else {
-                view! { <p inner_html=move || { md_str() }></p> }
+                view! { <p class="prose" inner_html=move || { md_str() }></p> }
            }
        };

@ -149,13 +149,23 @@ pub fn ChatHistory(
    response: RwSignal<String>,
    receiving_response: RwSignal<bool>,
 ) -> impl IntoView {
+    use pulldown_cmark;
+    let md_str = move || {
+        let owned_str = response();
+        let parser = pulldown_cmark::Parser::new(&owned_str);
+        let mut md_output = String::new();
+        pulldown_cmark::html::push_html(&mut md_output, parser);
+
+        md_output
+    };
+
    let streaming_response_elem = move || {
        if receiving_response() {
            view! {
                <div class="chat chat-start">
                    <div class="chat-header">"Assistant"</div>
                    <div class="chat-bubble">
-                        <span inner_html=response></span>
+                        <span class="prose" inner_html=move || { md_str() }></span>
                        <span class="loading loading-ball loading-xs"></span>
                    </div>
                </div>
--- a/llama_forge_rs/src/app/pages/settings/mod.rs
+++ b/llama_forge_rs/src/app/pages/settings/mod.rs
@ -4,10 +4,8 @@ use leptos_router::*;
 use crate::app::{components::svgs::*, RefreshGen};

 mod backend;
-pub mod process;

 use backend::SettingsForm;
-use process::ProcessForm;

 #[component]
 fn TabButton(path: &'static str, label: &'static str) -> impl IntoView {
@ -33,7 +31,8 @@ fn BackendSettingsPage() -> impl IntoView {
    view! {
        <div class="flex justify-around">
            <div class="flex flex-col p-4 w-1/3">
-                <ProcessForm />
+                <p>"WIP"</p>
+            // <ProcessForm />
            </div>
            <div class="flex flex-col p-4 w-2/3">
                <SettingsForm />
--- a/llama_forge_rs/src/app/pages/settings/process.rs
+++ b/llama_forge_rs/src/app/pages/settings/process.rs
@ -1,81 +0,0 @@
-use std::time::Duration;
-
-use leptos::*;
-use serde::{Deserialize, Serialize};
-
-use crate::{
-    api::{process_status, ProcessStatus, StartProcess, StopProcess},
-    app::components::FormControl,
-};
-
-/// A form component to start and stop the process.
-#[component]
-pub fn ProcessForm() -> impl IntoView {
-    let gen =
-        use_context::<RwSignal<super::RefreshGen>>().expect("to have found the setter provided");
-    let _ = gloo_timers::callback::Interval::new(1000, move || {
-        gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
-    })
-    .forget();
-
-    let status = create_local_resource(gen, |_| async move {
-        process_status().await.unwrap_or(ProcessStatus::Finished)
-    });
-
-    let is_running = move || match status.get().unwrap_or(ProcessStatus::Finished) {
-        ProcessStatus::WaitingForStart => true,
-        ProcessStatus::Running => true,
-        ProcessStatus::WaitingForStop => false,
-        ProcessStatus::Finished => false,
-        ProcessStatus::Failed => false,
-    };
-
-    let toggle_color = move || match status.get().unwrap_or(ProcessStatus::Finished) {
-        ProcessStatus::WaitingForStart => "toggle-info",
-        ProcessStatus::Running => "toggle-success",
-        ProcessStatus::WaitingForStop => "toggle-warning",
-        ProcessStatus::Finished => "toggle-info",
-        ProcessStatus::Failed => "toggle-error",
-    };
-
-    let stop_process_action = create_server_action::<StopProcess>();
-    let start_process_action = create_server_action::<StartProcess>();
-
-    let on_toggle = move |_| {
-        if is_running() {
-            stop_process_action.dispatch(StopProcess {});
-        } else {
-            start_process_action.dispatch(StartProcess {});
-        }
-        gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
-    };
-
-    let status_text = move || {
-        if let Some(status) = status.get() {
-            match status {
-                ProcessStatus::WaitingForStart => "waiting to start",
-                ProcessStatus::Running => "running",
-                ProcessStatus::WaitingForStop => "waiting to stop",
-                ProcessStatus::Finished => "finished",
-                ProcessStatus::Failed => "failed",
-            }
-        } else {
-            "Loading"
-        }
-    };
-
-    view! {
-        <FormControl label="Running">
-            <input
-                type="checkbox"
-                checked=is_running
-                on:click=on_toggle
-                class=move || format!("toggle {}", toggle_color())
-            />
-        </FormControl>
-        <FormControl label="Status">
-
-            <span class="label-text">{status_text}</span>
-        </FormControl>
-    }
-}
--- a/llama_forge_rs/src/main.rs
+++ b/llama_forge_rs/src/main.rs
@ -95,12 +95,18 @@ async fn main() -> anyhow::Result<()> {

    let serve = axum::serve(listener, app(leptos_options).await.into_make_service()).into_future();

-    let backend_fut = async move { Ok::<(), anyhow::Error>(()) };
+    let proxy_man_fut = async move {
+        use llama_proxy_man::{config::AppConfig, start_server};
+        let config = AppConfig::default_figment();
+        start_server(config).await;
+
+        Ok::<(), anyhow::Error>(())
+    };

    let (serve_res, backend_res, wry_res) = tokio::try_join!(
        tokio::spawn(serve),
        tokio::task::spawn_blocking(wry_main),
-        tokio::spawn(backend_fut),
+        tokio::spawn(proxy_man_fut),
    )?;

    serve_res?;
--- a/llama_forge_rs/src/server/backends/llama_chat.rs
+++ b/llama_forge_rs/src/server/backends/llama_chat.rs
@ -99,7 +99,6 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender<ChannelMessage>) -> an
                            .expect("channel fail");

                        es.close();
-
                        break;
                    }

--- a/llama_forge_rs/src/server/mod.rs
+++ b/llama_forge_rs/src/server/mod.rs
@ -28,7 +28,6 @@ use tracing::Level;

 use self::fileserv::file_and_error_handler;
 use crate::{
-    api::run_starter_task,
    app::*,
    server::middleware::{LoggingLayer, LoggingService},
 };
@ -117,8 +116,9 @@ pub async fn app(leptos_options: LeptosOptions) -> Router {
    let pool = new_pool().await.expect("pool err");

    // // TODO move this out of server(pool has to be moved out too)
-    let task = run_starter_task(pool.clone());
-    tokio::task::spawn(task);
+    // FIXME: Should proxy_man move here ?
+    // let task = run_starter_task(pool.clone());
+    // tokio::task::spawn(task);

    let app_state = AppState {
        leptos_options,
--- a/llama_proxy_man/Cargo.toml
+++ b/llama_proxy_man/Cargo.toml
@ -30,3 +30,4 @@ reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json"
 itertools = "0.13.0"
 openport = { version = "0.1.1", features = ["rand"] }
 derive_more = { version = "2.0.1", features = ["deref"] }
+figment = { version = "0.10.19", features = ["env", "json", "toml", "yaml"] }
--- a/llama_proxy_man/src/config.rs
+++ b/llama_proxy_man/src/config.rs
@ -1,6 +1,11 @@
 use serde::Deserialize;
 use std::{collections::HashMap, fs};

+use figment::{
+    providers::{Env, Format, Json, Toml, Yaml},
+    Figment,
+};
+
 #[derive(Clone, Debug, Deserialize)]
 pub struct AppConfig {
    pub system_resources: SystemResources,
@ -8,8 +13,21 @@ pub struct AppConfig {
 }

 impl AppConfig {
+    pub fn default_figment() -> Self {
+        let config: Result<Self, _> = Figment::new()
+            .merge(Toml::file("config.toml"))
+            .merge(Yaml::file("config.yaml"))
+            .merge(Env::prefixed("LLAMA_FORGE_"))
+            .join(Json::file("Cargo.json"))
+            .extract();
+
+        tracing::info!(?config);
+
+        config.unwrap().assign_internal_ports()
+    }
+
    pub fn default_from_pwd_yml() -> Self {
-        let config_str = fs::read_to_string("config.yaml").expect("Failed to read config.yaml");
+        let config_str = fs::read_to_string("./config.yaml").expect("Failed to read config.yaml");
        serde_yaml::from_str::<Self>(&config_str)
            .expect("Failed to parse config.yaml")
            .assign_internal_ports()
--- a/llama_proxy_man/src/lib.rs
+++ b/llama_proxy_man/src/lib.rs
@ -17,7 +17,7 @@ use tower_http::trace::{
 use tracing::Level;

 /// Creates an Axum application to handle inference requests for a specific model.
-pub fn create_app(spec: &ModelSpec, state: AppState) -> Router {
+pub fn axum_router(spec: &ModelSpec, state: AppState) -> Router {
    Router::new()
        .route(
            "/",
@ -55,7 +55,7 @@ pub async fn start_server(config: AppConfig) {
        let spec = spec.clone();

        let handle = tokio::spawn(async move {
-            let app = create_app(&spec, state);
+            let app = axum_router(&spec, state);
            let addr = SocketAddr::from(([0, 0, 0, 0], spec.port));
            tracing::info!(msg = "Listening", ?spec);
            let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
--- a/llama_proxy_man/src/logging.rs
+++ b/llama_proxy_man/src/logging.rs
@ -93,7 +93,7 @@ pub fn initialize_logger() {
            .from_env_lossy();

        tracing_subscriber::fmt()
-            .compact()
+            .pretty()
            .with_env_filter(env_filter)
            .init();
    });
--- a/redvault_el_rs/src/lib.rs
+++ b/redvault_el_rs/src/lib.rs
@ -1,5 +1,4 @@
 use emacs::{defun, Env, IntoLisp, Result, Value};
-use std::sync::Once;
 use std::sync::OnceLock;
 use tokio::runtime::{Builder, Runtime};