feat: Embedded proxy_man for forge

- Add `figment` for config yamls
- Small `Makefile.toml` fixes ? (docset seems still broken ??)
- Copy `config.yaml` workspace & forge
- Embed proxy_man in forge
- Remove `backend_process.rs` and `process.rs`
- Update `llama_proxy_man/Cargo.toml` and `config.rs` for new dependencies
- Format
This commit is contained in:
Tristan D. 2025-02-11 04:22:14 +01:00
parent 5b12762511
commit bf6caabfe8
Signed by: tristan
SSH key fingerprint: SHA256:3RU4RLOoM8oAjFU19f1W6t8uouZbA7GWkaSW6rjp1k8
18 changed files with 347 additions and 299 deletions

65
Cargo.lock generated
View file

@ -238,6 +238,15 @@ dependencies = [
"num-traits",
]
[[package]]
name = "atomic"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
dependencies = [
"bytemuck",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
@ -1422,6 +1431,22 @@ dependencies = [
"rustc_version 0.4.1",
]
[[package]]
name = "figment"
version = "0.10.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
dependencies = [
"atomic",
"pear",
"serde",
"serde_json",
"serde_yaml",
"toml",
"uncased",
"version_check",
]
[[package]]
name = "flate2"
version = "1.0.35"
@ -2492,6 +2517,12 @@ dependencies = [
"hashbrown 0.15.2",
]
[[package]]
name = "inlinable_string"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.13"
@ -2981,6 +3012,7 @@ dependencies = [
"leptos_axum",
"leptos_meta",
"leptos_router",
"llama_proxy_man",
"mime_guess",
"once_cell",
"pin-project-lite",
@ -3023,6 +3055,7 @@ dependencies = [
"anyhow",
"axum",
"derive_more 2.0.1",
"figment",
"futures",
"hyper",
"itertools 0.13.0",
@ -3688,6 +3721,29 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pear"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
dependencies = [
"inlinable_string",
"pear_codegen",
"yansi",
]
[[package]]
name = "pear_codegen"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
dependencies = [
"proc-macro2",
"proc-macro2-diagnostics",
"quote",
"syn 2.0.98",
]
[[package]]
name = "pem-rfc7468"
version = "0.7.0"
@ -6178,6 +6234,15 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "uncased"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
dependencies = [
"version_check",
]
[[package]]
name = "unicase"
version = "2.8.1"

View file

@ -16,7 +16,7 @@ script = "echo test123-${CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER}"
[tasks.refresh-all]
description = "clean the repo and rebuild everything"
dependencies = ["clean", "all", "lall"]
dependencies = ["clean", "all"]
[tasks.mksitedir]
workspace = false
@ -24,10 +24,21 @@ script = "mkdir -p ./target/site"
[tasks.all]
description = "rebuild everything"
dependencies = ["mksitedir", "docset", "check", "clippy", "build", "build-release", "format", "lformat", "lbuild", "lbuild-release"]
dependencies = [
"mksitedir",
"docset",
"check",
"clippy",
"build",
"build-release",
"format",
"lformat",
"lbuild",
"lbuild-release",
]
[tasks.lformat]
scripts = "leptosfmt . && rustywind . --write"
script = "leptosfmt . && rustywind . --write"
[tasks.lbuild]
category = "Build"
@ -37,7 +48,7 @@ set current "$CARGO_MAKE_CRATE_CURRENT_WORKSPACE_MEMBER"
if contains "$LEPTOS_REPOS" $current
cargo leptos build
else
$current is not a leptos repo!
echo $current is not a leptos repo!
end
'''
@ -60,7 +71,7 @@ dependencies = ["make-docset", "cp-docset"]
[tasks.make-docset]
workspace = false
script = "cargo docset --workspace --platform-family redvault-ai "
script = "cargo docset --workspace --no-clean --platform-family redvault-ai && sleep 1 && sync"
[tasks.cp-docset]
workspace = false

108
config.yaml Normal file
View file

@ -0,0 +1,108 @@
system_resources:
ram: 48G
vram: 30G
model_specs:
- name: "tabby-code"
port: 18080
# internal_port: 28080 # Optional
autostart: "true"
vram_usage: "26.7G" # Coder-32B + draft 0.5B
ram_usage: "3G" # Coder-32B + draft 0.5B
# vram_usage: 8.25G # Coder-7B
# ram_usage: 2.6G # Coder-7B
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
# host: 0.0.0.0
flash-attn: "true"
ctx-size: "32768"
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
gpu-layers: "9999"
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
gpu-layers-draft: "9999"
draft-max: "16"
draft-min: "5"
- name: "tabby-embeddings"
port: 18081
vram_usage: "0.4G"
ram_usage: "2.5G"
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
model: "/media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf"
gpu-layers: "-1"
flash-attn: "true"
# host: 0.0.0.0
embeddings: "true"
- name: "big-chat"
port: 18082
vram_usage: 26.5G
ram_usage: 2.5G
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: "16384"
flash-attn: "true"
model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
gpu-layers: "9999"
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
gpu-layers-draft: "9999"
# draft-max: "16"
# draft-min: "5"
- name: "bigger-chat"
port: 18085
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: "8192"
flash-attn: "true"
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
gpu-layers: "9999"
model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
gpu-layers-draft: "0"
# draft-max: "16"
# draft-min: "5"
- name: "bigger-chat-2"
port: 18083
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: "8192"
flash-attn: "true"
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
gpu-layers: "9999"
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: "16"
# draft-min: "5"
- name: "deep-think"
port: 18084
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: "0"
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: "32768"
flash-attn: "true"
# cache-type-k: q8_0
# cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
gpu-layers: "9999"
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: "16"
# draft-min: "5"

View file

@ -13,6 +13,7 @@ version.workspace=true
crate-type = ["cdylib", "rlib"]
[dependencies]
llama_proxy_man = {path="../llama_proxy_man", optional = true}
wasm-bindgen = "=0.2.100"
# TODO Update to 0.7
leptos = { version = "0.6", features = [
@ -93,6 +94,7 @@ pulldown-cmark = { version = "0.12.2", features = ["serde"] }
default = ["ssr"]
hydrate = ["leptos/hydrate", "leptos_meta/hydrate", "leptos_router/hydrate"]
ssr = [
"dep:llama_proxy_man",
"dep:async-broadcast",
"dep:axum",
"dep:dashmap",

108
llama_forge_rs/config.yaml Normal file
View file

@ -0,0 +1,108 @@
system_resources:
ram: 48G
vram: 30G
model_specs:
- name: "tabby-code"
port: 18080
# internal_port: 28080 # Optional
autostart: true
vram_usage: 26.7G # Coder-32B + draft 0.5B
ram_usage: 3G # Coder-32B + draft 0.5B
# vram_usage: 8.25G # Coder-7B
# ram_usage: 2.6G # Coder-7B
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
# host: 0.0.0.0
flash-attn: true
ctx-size: 32768
model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
gpu-layers: 9999
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
gpu-layers-draft: 9999
draft-max: 16
draft-min: 5
- name: "tabby-embeddings"
port: 18081
vram_usage: 0.4G
ram_usage: 2.5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
gpu-layers: -1
flash-attn: true
# host: 0.0.0.0
embeddings: true
- name: "big-chat"
port: 18082
vram_usage: 26.5G
ram_usage: 2.5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 16384
flash-attn: true
model: /media/SanDisk/ai/models_live/Qwen2.5-32B-Instruct-Q5_K_L.gguf
gpu-layers: 9999
model-draft: /media/SanDisk/ai/models_live/Qwen2.5-0.5B-Instruct-Q8_0.gguf
gpu-layers-draft: 9999
# draft-max: 16
# draft-min: 5
- name: "bigger-chat"
port: 18085
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 8192
flash-attn: true
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
gpu-layers: 9999
model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "bigger-chat-2"
port: 18083
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 8192
flash-attn: true
cache-type-k: q8_0
cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/Llama-3.3-70B-Instruct-IQ3_XXS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5
- name: "deep-think"
port: 18084
vram_usage: 29G
ram_usage: 5G
env:
CUDA_VISIBLE_DEVICES: 0
HSA_OVERRIDE_GFX_VERSION: '11.0.0'
args:
ctx-size: 32768
flash-attn: true
# cache-type-k: q8_0
# cache-type-v: q8_0
model: /media/SanDisk/ai/models_live/QwQ-32B-Preview-IQ4_XS.gguf
gpu-layers: 9999
# model-draft: /media/SanDisk/ai/models_live/Llama-3.2-1B-Instruct-Q8_0.gguf
# gpu-layers-draft: 0
# draft-max: 16
# draft-min: 5

View file

@ -1,195 +0,0 @@
use std::time::Duration;
use leptos::*;
use serde::{Deserialize, Serialize};
use super::backend_settings::BackendSettings;
use crate::app::components::FormControl;
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct BackendProcess {
id: i64,
#[sqlx(flatten)]
settings: BackendSettings,
status: ProcessStatus,
}
/// The `ProcessStatus` enum represents the various states a process can be in. It includes the following states:
/// - `WaitingForStart`: The process is waiting to be started.
/// - `Running`: The process is currently running.
/// - `WaitingForStop`: The process is waiting to be stopped.
/// - `Finished`: The process has completed its execution successfully.
/// - `Failed`: The process has failed or encountered an error during execution.
///
/// This enum is used to keep track of the state of a process in a backend application, allowing for proper management and control over the process lifecycle.
#[derive(
Default, PartialEq, Debug, Clone, Serialize, Deserialize, sqlx::Type, strum::EnumString,
)]
#[sqlx(rename_all = "snake_case")]
#[strum(serialize_all = "snake_case")]
pub enum ProcessStatus {
#[default]
WaitingForStart,
Running,
WaitingForStop,
Finished,
Failed,
}
/// CHecks the db every 100ms to see if stuff has to be started
#[cfg(feature = "ssr")]
pub async fn run_starter_task(pool: sqlx::SqlitePool) {
use tokio::{
time::{self, Instant},
try_join,
};
use tokio_stream::{wrappers::IntervalStream, StreamExt};
use crate::server::backends::BackendService;
let _ = tracing::debug_span!("starter_task");
tracing::debug!("AAAAAAAAAAAAAAA");
return; // TODO ????
tracing::debug!("Starter task started");
let service_handle = BackendService::new();
let mut stream = IntervalStream::new(time::interval(Duration::from_millis(1000)));
while let Some(instant) = stream.next().await {
break; // TODO integrate proxy man ?
tracing::debug!("fire; instant={:?}", instant);
let waiting_to_start: Vec<BackendProcess> = sqlx::query_as(
"SELECT id, status,
se.version, se.model_dir, se.default_model, se.created_at
FROM backend_process INNER JOIN settings AS se
ON backend_process.settings_version = se.version
WHERE status == ?",
)
.bind(ProcessStatus::WaitingForStart)
.fetch_all(&pool)
.await
.unwrap();
let waiting_to_stop: Vec<BackendProcess> = sqlx::query_as(
"SELECT id, status,
se.version, se.model_dir, se.default_model, se.created_at
FROM backend_process INNER JOIN settings AS se
ON backend_process.settings_version = se.version
WHERE status == ?",
)
.bind(ProcessStatus::WaitingForStop)
.fetch_all(&pool)
.await
.unwrap();
tracing::debug!(?waiting_to_start, ?waiting_to_stop);
use futures::future::FutureExt;
for to_stop in waiting_to_stop.into_iter() {
tracing::debug!(?to_stop);
let update_query_fut = sqlx::query!(
"UPDATE backend_process
SET status == ?
WHERE id == ?",
ProcessStatus::Finished,
to_stop.id
)
.execute(&pool)
.map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
let stop_fut = service_handle.stop();
tracing::debug!(msg = "Stopping", ?to_stop);
try_join!(update_query_fut, stop_fut).expect("stop fail");
tracing::debug!(msg = "Stopped!", ?to_stop);
}
for to_start in waiting_to_start.into_iter() {
// TODO Pass args to backend
// TODO Save Services tehrefore allow multiples
tracing::debug!(?to_start);
let update_query_fut = sqlx::query!(
"UPDATE backend_process
SET status == ?
WHERE id == ?",
ProcessStatus::Running,
to_start.id
)
.execute(&pool)
.map(|f| f.map_err(|_e| anyhow::Error::msg("update fail")));
tracing::debug!(msg = "Starting", ?to_start);
let start_fut = service_handle.start();
try_join!(update_query_fut, start_fut).expect("start fail");
tracing::debug!(msg = "Started!", ?to_start);
}
tracing::debug!(msg="Starter task iteration finished", iteration_dur=?Instant::now().duration_since(instant))
}
}
/// Starts a new process and updates the process status.
#[server]
pub async fn start_process() -> Result<(), ServerFnError> {
use crate::server::pool;
let pool = pool()?;
// Set all existing processes to Finished
sqlx::query!(
"UPDATE backend_process SET status = 'waiting_for_stop' WHERE status != 'finished'"
)
.execute(&pool)
.await?;
// Get the current newest settings version
let settings_version =
sqlx::query_scalar!("SELECT version FROM settings ORDER BY version DESC LIMIT 1")
.fetch_one(&pool)
.await?;
// Create a new ProcessStatus with the current newest settings version and initial status of waiting
sqlx::query!(
"INSERT INTO backend_process (settings_version, status) VALUES (?, 'waiting_for_start')",
settings_version
)
.execute(&pool)
.await?;
Ok(())
}
/// Stops all processes and updates the process status.
#[server]
pub async fn stop_process() -> Result<(), ServerFnError> {
use crate::server::pool;
let pool = pool()?;
// Set all processes to finished
sqlx::query!("UPDATE backend_process SET status = 'waiting_for_stop'")
.execute(&pool)
.await?;
Ok(())
}
/// Fetches the status of the newest process.
#[server(ProcessStatusFn)]
pub async fn process_status() -> Result<ProcessStatus, ServerFnError> {
use crate::server::pool;
let pool = pool()?;
let status: Option<ProcessStatus> =
sqlx::query_scalar!("SELECT status FROM backend_process ORDER BY id DESC LIMIT 1")
.fetch_optional(&pool)
.await?
.map(|str| str.parse())
.transpose()?;
match status {
Some(status) => Ok(status),
None => Ok(ProcessStatus::Finished),
}
}

View file

@ -1,14 +1,12 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
mod backend_process;
mod backend_settings;
mod chat;
// pub mod backend_process;
// pub mod backend_settings;
// pub mod chat;
pub use backend_process::*;
pub use backend_settings::*;
pub use chat::*;

View file

@ -51,7 +51,7 @@ fn ChatMessageBubble(
if edit_mode.get() {
view! { <p inner_html=move || { msg_str() }></p> }
} else {
view! { <p inner_html=move || { md_str() }></p> }
view! { <p class="prose" inner_html=move || { md_str() }></p> }
}
};
@ -149,13 +149,23 @@ pub fn ChatHistory(
response: RwSignal<String>,
receiving_response: RwSignal<bool>,
) -> impl IntoView {
use pulldown_cmark;
let md_str = move || {
let owned_str = response();
let parser = pulldown_cmark::Parser::new(&owned_str);
let mut md_output = String::new();
pulldown_cmark::html::push_html(&mut md_output, parser);
md_output
};
let streaming_response_elem = move || {
if receiving_response() {
view! {
<div class="chat chat-start">
<div class="chat-header">"Assistant"</div>
<div class="chat-bubble">
<span inner_html=response></span>
<span class="prose" inner_html=move || { md_str()}></span>
<span class="loading loading-ball loading-xs"></span>
</div>
</div>

View file

@ -4,10 +4,8 @@ use leptos_router::*;
use crate::app::{components::svgs::*, RefreshGen};
mod backend;
pub mod process;
use backend::SettingsForm;
use process::ProcessForm;
#[component]
fn TabButton(path: &'static str, label: &'static str) -> impl IntoView {
@ -33,7 +31,8 @@ fn BackendSettingsPage() -> impl IntoView {
view! {
<div class="flex justify-around">
<div class="flex flex-col p-4 w-1/3">
<ProcessForm />
<p>"WIP"</p>
// <ProcessForm />
</div>
<div class="flex flex-col p-4 w-2/3">
<SettingsForm />

View file

@ -1,81 +0,0 @@
use std::time::Duration;
use leptos::*;
use serde::{Deserialize, Serialize};
use crate::{
api::{process_status, ProcessStatus, StartProcess, StopProcess},
app::components::FormControl,
};
/// A form component to start and stop the process.
#[component]
pub fn ProcessForm() -> impl IntoView {
let gen =
use_context::<RwSignal<super::RefreshGen>>().expect("to have found the setter provided");
let _ = gloo_timers::callback::Interval::new(1000, move || {
gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
})
.forget();
let status = create_local_resource(gen, |_| async move {
process_status().await.unwrap_or(ProcessStatus::Finished)
});
let is_running = move || match status.get().unwrap_or(ProcessStatus::Finished) {
ProcessStatus::WaitingForStart => true,
ProcessStatus::Running => true,
ProcessStatus::WaitingForStop => false,
ProcessStatus::Finished => false,
ProcessStatus::Failed => false,
};
let toggle_color = move || match status.get().unwrap_or(ProcessStatus::Finished) {
ProcessStatus::WaitingForStart => "toggle-info",
ProcessStatus::Running => "toggle-success",
ProcessStatus::WaitingForStop => "toggle-warning",
ProcessStatus::Finished => "toggle-info",
ProcessStatus::Failed => "toggle-error",
};
let stop_process_action = create_server_action::<StopProcess>();
let start_process_action = create_server_action::<StartProcess>();
let on_toggle = move |_| {
if is_running() {
stop_process_action.dispatch(StopProcess {});
} else {
start_process_action.dispatch(StartProcess {});
}
gen.update(|gen| *gen = crate::app::RefreshGen(gen.0 + 1));
};
let status_text = move || {
if let Some(status) = status.get() {
match status {
ProcessStatus::WaitingForStart => "waiting to start",
ProcessStatus::Running => "running",
ProcessStatus::WaitingForStop => "waiting to stop",
ProcessStatus::Finished => "finished",
ProcessStatus::Failed => "failed",
}
} else {
"Loading"
}
};
view! {
<FormControl label="Running">
<input
type="checkbox"
checked=is_running
on:click=on_toggle
class=move || format!("toggle {}", toggle_color())
/>
</FormControl>
<FormControl label="Status">
<span class="label-text">{status_text}</span>
</FormControl>
}
}

View file

@ -95,12 +95,18 @@ async fn main() -> anyhow::Result<()> {
let serve = axum::serve(listener, app(leptos_options).await.into_make_service()).into_future();
let backend_fut = async move { Ok::<(), anyhow::Error>(()) };
let proxy_man_fut = async move {
use llama_proxy_man::{config::AppConfig, start_server};
let config = AppConfig::default_figment();
start_server(config).await;
Ok::<(), anyhow::Error>(())
};
let (serve_res, backend_res, wry_res) = tokio::try_join!(
tokio::spawn(serve),
tokio::task::spawn_blocking(wry_main),
tokio::spawn(backend_fut),
tokio::spawn(proxy_man_fut),
)?;
serve_res?;

View file

@ -99,7 +99,6 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender<ChannelMessage>) -> an
.expect("channel fail");
es.close();
break;
}

View file

@ -28,7 +28,6 @@ use tracing::Level;
use self::fileserv::file_and_error_handler;
use crate::{
api::run_starter_task,
app::*,
server::middleware::{LoggingLayer, LoggingService},
};
@ -117,8 +116,9 @@ pub async fn app(leptos_options: LeptosOptions) -> Router {
let pool = new_pool().await.expect("pool err");
// // TODO move this out of server(pool has to be moved out too)
let task = run_starter_task(pool.clone());
tokio::task::spawn(task);
// FIXME: Should proxy_man move here ?
// let task = run_starter_task(pool.clone());
// tokio::task::spawn(task);
let app_state = AppState {
leptos_options,

View file

@ -30,3 +30,4 @@ reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json"
itertools = "0.13.0"
openport = { version = "0.1.1", features = ["rand"] }
derive_more = { version = "2.0.1", features = ["deref"] }
figment = { version = "0.10.19", features = ["env", "json", "toml", "yaml"] }

View file

@ -1,6 +1,11 @@
use serde::Deserialize;
use std::{collections::HashMap, fs};
use figment::{
providers::{Env, Format, Json, Toml, Yaml},
Figment,
};
#[derive(Clone, Debug, Deserialize)]
pub struct AppConfig {
pub system_resources: SystemResources,
@ -8,8 +13,21 @@ pub struct AppConfig {
}
impl AppConfig {
pub fn default_figment() -> Self {
let config: Result<Self, _> = Figment::new()
.merge(Toml::file("config.toml"))
.merge(Yaml::file("config.yaml"))
.merge(Env::prefixed("LLAMA_FORGE_"))
.join(Json::file("Cargo.json"))
.extract();
tracing::info!(?config);
config.unwrap().assign_internal_ports()
}
pub fn default_from_pwd_yml() -> Self {
let config_str = fs::read_to_string("config.yaml").expect("Failed to read config.yaml");
let config_str = fs::read_to_string("./config.yaml").expect("Failed to read config.yaml");
serde_yaml::from_str::<Self>(&config_str)
.expect("Failed to parse config.yaml")
.assign_internal_ports()

View file

@ -17,7 +17,7 @@ use tower_http::trace::{
use tracing::Level;
/// Creates an Axum application to handle inference requests for a specific model.
pub fn create_app(spec: &ModelSpec, state: AppState) -> Router {
pub fn axum_router(spec: &ModelSpec, state: AppState) -> Router {
Router::new()
.route(
"/",
@ -55,7 +55,7 @@ pub async fn start_server(config: AppConfig) {
let spec = spec.clone();
let handle = tokio::spawn(async move {
let app = create_app(&spec, state);
let app = axum_router(&spec, state);
let addr = SocketAddr::from(([0, 0, 0, 0], spec.port));
tracing::info!(msg = "Listening", ?spec);
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();

View file

@ -93,7 +93,7 @@ pub fn initialize_logger() {
.from_env_lossy();
tracing_subscriber::fmt()
.compact()
.pretty()
.with_env_filter(env_filter)
.init();
});

View file

@ -1,5 +1,4 @@
use emacs::{defun, Env, IntoLisp, Result, Value};
use std::sync::Once;
use std::sync::OnceLock;
use tokio::runtime::{Builder, Runtime};