diff --git a/llama_forge_rs/Cargo.toml b/llama_forge_rs/Cargo.toml index 986a55a..9f39844 100644 --- a/llama_forge_rs/Cargo.toml +++ b/llama_forge_rs/Cargo.toml @@ -1,19 +1,19 @@ [package] name = "llama_forge_rs" -edition.workspace=true -authors.workspace=true +edition.workspace = true +authors.workspace = true description = "The LLama Forge RS" -license.workspace=true -publish.workspace=true +license.workspace = true +publish.workspace = true readme = "README.md" -repository.workspace=true -version.workspace=true +repository.workspace = true +version.workspace = true [lib] crate-type = ["cdylib", "rlib"] [dependencies] -llama_proxy_man = {path="../llama_proxy_man", optional = true} +llama_proxy_man = { path = "../llama_proxy_man", optional = true } wasm-bindgen = "=0.2.100" # TODO Update to 0.7 leptos = { version = "0.6", features = [ @@ -84,7 +84,13 @@ mime_guess = { version = "2.0.4", optional = true } tracing-test = "0.2.4" sysinfo = { version = "0.30.11", optional = true } derive_more = { version = "0.99.17", features = ["nightly"] } -sqlx-macros = { version = "0.7.4", optional = true, features = ["chrono", "json", "migrate", "sqlite", "uuid"] } +sqlx-macros = { version = "0.7.4", optional = true, features = [ + "chrono", + "json", + "migrate", + "sqlite", + "uuid", +] } pulldown-cmark = { version = "0.12.2", features = ["serde"] } # qdrant-client = "1.11.2" # swiftide = "0.9.1" diff --git a/llama_forge_rs/src/server/backends/llama_chat.rs b/llama_forge_rs/src/server/backends/llama_chat.rs index 0b25d21..7a859f3 100644 --- a/llama_forge_rs/src/server/backends/llama_chat.rs +++ b/llama_forge_rs/src/server/backends/llama_chat.rs @@ -11,16 +11,16 @@ use crate::api::{ChannelMessage, Chat, ChatMessage}; #[derive(Serialize, Debug)] struct LlamaChatCompletionRequest { - stream: bool, - model: String, + stream: bool, + model: String, messages: Vec, } impl From for LlamaChatCompletionRequest { fn from(value: Chat) -> Self { Self { - stream: true, - model: "default".to_string(), + stream: true, + model: "default".to_string(), messages: value.history.into_iter().map(|e| e.into()).collect(), } } @@ -28,14 +28,14 @@ impl From for LlamaChatCompletionRequest { #[derive(Serialize, Debug)] struct LlamaChatMessage { - role: String, + role: String, content: String, } impl From for LlamaChatMessage { fn from(chat_message: ChatMessage) -> Self { Self { - role: chat_message.role.into(), + role: chat_message.role.into(), content: chat_message.content, } } @@ -68,7 +68,9 @@ pub struct LlamaService { impl LlamaService { pub fn new(id: Uuid) -> Self { - Self { id } + Self { + id, + } } } diff --git a/llama_forge_rs/src/server/backends/mod.rs b/llama_forge_rs/src/server/backends/mod.rs index 2cfd72f..1702465 100644 --- a/llama_forge_rs/src/server/backends/mod.rs +++ b/llama_forge_rs/src/server/backends/mod.rs @@ -177,7 +177,10 @@ mod tests { use crate::{ api::{ChannelMessage, ChatMessage, ChatRole}, server::backends::{ - llama_chat::LlamaService, BackendService, BackendServiceStatus, ChatService, + llama_chat::LlamaService, + BackendService, + BackendServiceStatus, + ChatService, }, }; @@ -216,7 +219,7 @@ mod tests { tracing::debug!("response: {}", response); assert!(response.contains('4')); - service_handle.stop().await; + service_handle.stop().await.expect("Stop failed"); assert_eq!(service_handle.status().await, BackendServiceStatus::Stopped); } diff --git a/llama_forge_rs/src/server/backends/runner.rs b/llama_forge_rs/src/server/backends/runner.rs index 6247701..fde575e 100644 --- a/llama_forge_rs/src/server/backends/runner.rs +++ b/llama_forge_rs/src/server/backends/runner.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use tokio::process::Command; pub struct RunnerArgs { - ctx_size: i64, + ctx_size: i64, gpu_layers: i64, model_path: String, } @@ -36,8 +36,8 @@ impl From for Vec { #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Runner { - pwd: Option, - cmd: String, + pwd: Option, + cmd: String, args: Vec, } @@ -45,8 +45,8 @@ impl Runner { // FIXME does not exit properly when it is killed pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self { Self { - pwd: None, - cmd: "bash".to_string(), + pwd: None, + cmd: "bash".to_string(), args: vec![ format!( "{}/llamafile", @@ -64,8 +64,8 @@ impl Runner { pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self { Self { - pwd: None, - cmd: "llama-server".to_string(), + pwd: None, + cmd: "llama-server".to_string(), args: runner_args.into(), } } diff --git a/llama_forge_rs/src/server/middleware.rs b/llama_forge_rs/src/server/middleware.rs index 72af58f..9d913e1 100644 --- a/llama_forge_rs/src/server/middleware.rs +++ b/llama_forge_rs/src/server/middleware.rs @@ -18,7 +18,9 @@ impl Layer for LoggingLayer { type Service = LoggingService; fn layer(&self, inner: S) -> Self::Service { - LoggingService { inner } + LoggingService { + inner, + } } } @@ -48,8 +50,8 @@ where LoggingServiceFuture { inner: self.inner.call(req), - uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership - span: Arc::new(span), + uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership + span: Arc::new(span), } } } diff --git a/llama_forge_rs/src/server/mod.rs b/llama_forge_rs/src/server/mod.rs index 7f7227a..42485b3 100644 --- a/llama_forge_rs/src/server/mod.rs +++ b/llama_forge_rs/src/server/mod.rs @@ -6,20 +6,26 @@ use axum::{ http::Request, response::IntoResponse, routing::get, - Extension, Router, + Extension, + Router, }; use leptos::*; use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes}; use leptos_router::RouteListing; use sqlx::{ sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous}, - ConnectOptions, SqlitePool, + ConnectOptions, + SqlitePool, }; use tower::Layer; use tower_http::{ compression::CompressionLayer, trace::{ - DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse, + DefaultMakeSpan, + DefaultOnEos, + DefaultOnFailure, + DefaultOnRequest, + DefaultOnResponse, TraceLayer, }, CompressionLevel, diff --git a/llama_proxy_man/Cargo.toml b/llama_proxy_man/Cargo.toml index d75e1ab..5d21b08 100644 --- a/llama_proxy_man/Cargo.toml +++ b/llama_proxy_man/Cargo.toml @@ -15,7 +15,13 @@ serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.9" axum = { version = "0.7", features = ["macros"] } hyper = { version = "1.4", features = ["full"] } -reqwest = { version = "0.12", features = ["cookies", "multipart", "json", "stream", "native-tls"] } +reqwest = { version = "0.12", features = [ + "cookies", + "multipart", + "json", + "stream", + "native-tls", +] } futures = "0.3.30" anyhow = { version = "1.0.89", features = ["backtrace"] } thiserror = "1.0.63" @@ -26,7 +32,13 @@ pin-project-lite = "0.2.14" tower = { version = "0.4", features = ["tokio", "tracing"] } tower-http = { version = "0.5.2", features = ["trace"] } reqwest-retry = "0.6.1" -reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json", "multipart", "rustls-tls"] } +reqwest-middleware = { version = "0.3.3", features = [ + "charset", + "http2", + "json", + "multipart", + "rustls-tls", +] } itertools = "0.13.0" openport = { version = "0.1.1", features = ["rand"] } derive_more = { version = "2.0.1", features = ["deref"] } diff --git a/llama_proxy_man/src/config.rs b/llama_proxy_man/src/config.rs index 397ebd5..e1cfa8c 100644 --- a/llama_proxy_man/src/config.rs +++ b/llama_proxy_man/src/config.rs @@ -1,10 +1,10 @@ -use serde::Deserialize; use std::{collections::HashMap, fs}; use figment::{ providers::{Env, Format, Json, Toml, Yaml}, Figment, }; +use serde::Deserialize; #[derive(Clone, Debug, Deserialize)] pub struct AppConfig { @@ -50,7 +50,7 @@ impl AppConfig { #[derive(Clone, Debug, Deserialize)] pub struct SystemResources { - pub ram: String, + pub ram: String, pub vram: String, } diff --git a/llama_proxy_man/src/error.rs b/llama_proxy_man/src/error.rs index 59d5973..8c824a7 100644 --- a/llama_proxy_man/src/error.rs +++ b/llama_proxy_man/src/error.rs @@ -1,9 +1,10 @@ +use std::io; + use anyhow::Error as AnyError; use axum::{http, response::IntoResponse}; use hyper; use reqwest; use reqwest_middleware; -use std::io; use thiserror::Error; #[derive(Error, Debug)] diff --git a/llama_proxy_man/src/inference_process.rs b/llama_proxy_man/src/inference_process.rs index 4671d51..5bb0aa1 100644 --- a/llama_proxy_man/src/inference_process.rs +++ b/llama_proxy_man/src/inference_process.rs @@ -1,7 +1,7 @@ -use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size}; +use std::{process::Stdio, sync::Arc}; + use anyhow::anyhow; use itertools::Itertools; -use std::{process::Stdio, sync::Arc}; use tokio::{ net::TcpStream, process::{Child, Command}, @@ -9,9 +9,11 @@ use tokio::{ time::{sleep, Duration}, }; +use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size}; + #[derive(Clone, Debug)] pub struct InferenceProcess { - pub spec: ModelSpec, + pub spec: ModelSpec, pub process: Arc>, } @@ -115,7 +117,7 @@ impl InferenceProcess { let child = cmd.spawn().expect("Failed to start llama-server"); Ok(InferenceProcess { - spec: spec.clone(), + spec: spec.clone(), process: Arc::new(Mutex::new(child)), }) } diff --git a/llama_proxy_man/src/lib.rs b/llama_proxy_man/src/lib.rs index 1869f81..9d1a58c 100644 --- a/llama_proxy_man/src/lib.rs +++ b/llama_proxy_man/src/lib.rs @@ -6,12 +6,17 @@ pub mod proxy; pub mod state; pub mod util; +use std::net::SocketAddr; + use axum::{routing::any, Router}; use config::{AppConfig, ModelSpec}; use state::AppState; -use std::net::SocketAddr; use tower_http::trace::{ - DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse, + DefaultMakeSpan, + DefaultOnEos, + DefaultOnFailure, + DefaultOnRequest, + DefaultOnResponse, TraceLayer, }; use tracing::Level; diff --git a/llama_proxy_man/src/logging.rs b/llama_proxy_man/src/logging.rs index 9d55564..4bb0a4f 100644 --- a/llama_proxy_man/src/logging.rs +++ b/llama_proxy_man/src/logging.rs @@ -1,12 +1,10 @@ use std::{ future::Future, pin::Pin, - sync::Arc, + sync::{Arc, Once}, task::{Context, Poll}, }; -use std::sync::Once; - use axum::{body::Body, http::Request}; use pin_project_lite::pin_project; use tower::{Layer, Service}; @@ -18,7 +16,9 @@ impl Layer for LoggingLayer { type Service = LoggingService; fn layer(&self, inner: S) -> Self::Service { - LoggingService { inner } + LoggingService { + inner, + } } } @@ -53,7 +53,7 @@ where LoggingServiceFuture { inner: self.inner.call(req), - uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership + uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership } } } diff --git a/llama_proxy_man/src/proxy.rs b/llama_proxy_man/src/proxy.rs index 18217d2..d34c00a 100644 --- a/llama_proxy_man/src/proxy.rs +++ b/llama_proxy_man/src/proxy.rs @@ -1,7 +1,3 @@ -use crate::{ - config::ModelSpec, error::AppError, inference_process::InferenceProcess, state::AppState, - util::parse_size, -}; use axum::{ body::Body, http::{Request, Response}, @@ -11,6 +7,14 @@ use reqwest::Client; use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware}; +use crate::{ + config::ModelSpec, + error::AppError, + inference_process::InferenceProcess, + state::AppState, + util::parse_size, +}; + pub async fn proxy_request( req: Request, spec: &ModelSpec, diff --git a/llama_proxy_man/src/state.rs b/llama_proxy_man/src/state.rs index f0dd664..770187c 100644 --- a/llama_proxy_man/src/state.rs +++ b/llama_proxy_man/src/state.rs @@ -1,14 +1,16 @@ -use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size}; use std::{collections::HashMap, sync::Arc}; + use tokio::sync::Mutex; +use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size}; + #[derive(Clone, Debug)] pub struct ResourceManager { - pub total_ram: u64, + pub total_ram: u64, pub total_vram: u64, - pub used_ram: u64, - pub used_vram: u64, - pub processes: HashMap, + pub used_ram: u64, + pub used_vram: u64, + pub processes: HashMap, } pub type ResourceManagerHandle = Arc>; diff --git a/redvault_el_rs/src/lib.rs b/redvault_el_rs/src/lib.rs index 95b3a08..dfcc6d0 100644 --- a/redvault_el_rs/src/lib.rs +++ b/redvault_el_rs/src/lib.rs @@ -1,5 +1,6 @@ -use emacs::{defun, Env, IntoLisp, Result, Value}; use std::sync::OnceLock; + +use emacs::{defun, Env, IntoLisp, Result, Value}; use tokio::runtime::{Builder, Runtime}; // Emacs won't load the module without this. @@ -48,12 +49,9 @@ fn init(env: &Env) -> Result> { #[defun] fn say_hello(env: &Env, name: String) -> Result> { // env.message(&format!("Helloo Broooooooo, {}!", name)) - env.call( - "message", - [format!("Henlo whatsup, {}!!!!", name) - .as_str() - .into_lisp(env)?], - )?; + env.call("message", [format!("Henlo whatsup, {}!!!!", name) + .as_str() + .into_lisp(env)?])?; RUNTIME .get() .ok_or_else(|| anyhow::anyhow!("No runtime"))?