This commit is contained in:
Tristan D. 2025-02-20 02:13:21 +01:00
parent c545161878
commit 6eaad79f9a
Signed by: tristan
SSH key fingerprint: SHA256:9oFM1J63hYWJjCnLG6C0fxBS15rwNcWwdQNMOHYKJ/4
15 changed files with 105 additions and 62 deletions

View file

@ -1,19 +1,19 @@
[package] [package]
name = "llama_forge_rs" name = "llama_forge_rs"
edition.workspace=true edition.workspace = true
authors.workspace=true authors.workspace = true
description = "The LLama Forge RS" description = "The LLama Forge RS"
license.workspace=true license.workspace = true
publish.workspace=true publish.workspace = true
readme = "README.md" readme = "README.md"
repository.workspace=true repository.workspace = true
version.workspace=true version.workspace = true
[lib] [lib]
crate-type = ["cdylib", "rlib"] crate-type = ["cdylib", "rlib"]
[dependencies] [dependencies]
llama_proxy_man = {path="../llama_proxy_man", optional = true} llama_proxy_man = { path = "../llama_proxy_man", optional = true }
wasm-bindgen = "=0.2.100" wasm-bindgen = "=0.2.100"
# TODO Update to 0.7 # TODO Update to 0.7
leptos = { version = "0.6", features = [ leptos = { version = "0.6", features = [
@ -84,7 +84,13 @@ mime_guess = { version = "2.0.4", optional = true }
tracing-test = "0.2.4" tracing-test = "0.2.4"
sysinfo = { version = "0.30.11", optional = true } sysinfo = { version = "0.30.11", optional = true }
derive_more = { version = "0.99.17", features = ["nightly"] } derive_more = { version = "0.99.17", features = ["nightly"] }
sqlx-macros = { version = "0.7.4", optional = true, features = ["chrono", "json", "migrate", "sqlite", "uuid"] } sqlx-macros = { version = "0.7.4", optional = true, features = [
"chrono",
"json",
"migrate",
"sqlite",
"uuid",
] }
pulldown-cmark = { version = "0.12.2", features = ["serde"] } pulldown-cmark = { version = "0.12.2", features = ["serde"] }
# qdrant-client = "1.11.2" # qdrant-client = "1.11.2"
# swiftide = "0.9.1" # swiftide = "0.9.1"

View file

@ -11,16 +11,16 @@ use crate::api::{ChannelMessage, Chat, ChatMessage};
#[derive(Serialize, Debug)] #[derive(Serialize, Debug)]
struct LlamaChatCompletionRequest { struct LlamaChatCompletionRequest {
stream: bool, stream: bool,
model: String, model: String,
messages: Vec<LlamaChatMessage>, messages: Vec<LlamaChatMessage>,
} }
impl From<Chat> for LlamaChatCompletionRequest { impl From<Chat> for LlamaChatCompletionRequest {
fn from(value: Chat) -> Self { fn from(value: Chat) -> Self {
Self { Self {
stream: true, stream: true,
model: "default".to_string(), model: "default".to_string(),
messages: value.history.into_iter().map(|e| e.into()).collect(), messages: value.history.into_iter().map(|e| e.into()).collect(),
} }
} }
@ -28,14 +28,14 @@ impl From<Chat> for LlamaChatCompletionRequest {
#[derive(Serialize, Debug)] #[derive(Serialize, Debug)]
struct LlamaChatMessage { struct LlamaChatMessage {
role: String, role: String,
content: String, content: String,
} }
impl From<ChatMessage> for LlamaChatMessage { impl From<ChatMessage> for LlamaChatMessage {
fn from(chat_message: ChatMessage) -> Self { fn from(chat_message: ChatMessage) -> Self {
Self { Self {
role: chat_message.role.into(), role: chat_message.role.into(),
content: chat_message.content, content: chat_message.content,
} }
} }
@ -68,7 +68,9 @@ pub struct LlamaService {
impl LlamaService { impl LlamaService {
pub fn new(id: Uuid) -> Self { pub fn new(id: Uuid) -> Self {
Self { id } Self {
id,
}
} }
} }

View file

@ -177,7 +177,10 @@ mod tests {
use crate::{ use crate::{
api::{ChannelMessage, ChatMessage, ChatRole}, api::{ChannelMessage, ChatMessage, ChatRole},
server::backends::{ server::backends::{
llama_chat::LlamaService, BackendService, BackendServiceStatus, ChatService, llama_chat::LlamaService,
BackendService,
BackendServiceStatus,
ChatService,
}, },
}; };
@ -216,7 +219,7 @@ mod tests {
tracing::debug!("response: {}", response); tracing::debug!("response: {}", response);
assert!(response.contains('4')); assert!(response.contains('4'));
service_handle.stop().await; service_handle.stop().await.expect("Stop failed");
assert_eq!(service_handle.status().await, BackendServiceStatus::Stopped); assert_eq!(service_handle.status().await, BackendServiceStatus::Stopped);
} }

View file

@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
use tokio::process::Command; use tokio::process::Command;
pub struct RunnerArgs { pub struct RunnerArgs {
ctx_size: i64, ctx_size: i64,
gpu_layers: i64, gpu_layers: i64,
model_path: String, model_path: String,
} }
@ -36,8 +36,8 @@ impl From<RunnerArgs> for Vec<String> {
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Runner { pub struct Runner {
pwd: Option<String>, pwd: Option<String>,
cmd: String, cmd: String,
args: Vec<String>, args: Vec<String>,
} }
@ -45,8 +45,8 @@ impl Runner {
// FIXME does not exit properly when it is killed // FIXME does not exit properly when it is killed
pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self { pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self {
Self { Self {
pwd: None, pwd: None,
cmd: "bash".to_string(), cmd: "bash".to_string(),
args: vec![ args: vec![
format!( format!(
"{}/llamafile", "{}/llamafile",
@ -64,8 +64,8 @@ impl Runner {
pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self { pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self {
Self { Self {
pwd: None, pwd: None,
cmd: "llama-server".to_string(), cmd: "llama-server".to_string(),
args: runner_args.into(), args: runner_args.into(),
} }
} }

View file

@ -18,7 +18,9 @@ impl<S> Layer<S> for LoggingLayer {
type Service = LoggingService<S>; type Service = LoggingService<S>;
fn layer(&self, inner: S) -> Self::Service { fn layer(&self, inner: S) -> Self::Service {
LoggingService { inner } LoggingService {
inner,
}
} }
} }
@ -48,8 +50,8 @@ where
LoggingServiceFuture { LoggingServiceFuture {
inner: self.inner.call(req), inner: self.inner.call(req),
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
span: Arc::new(span), span: Arc::new(span),
} }
} }
} }

View file

@ -6,20 +6,26 @@ use axum::{
http::Request, http::Request,
response::IntoResponse, response::IntoResponse,
routing::get, routing::get,
Extension, Router, Extension,
Router,
}; };
use leptos::*; use leptos::*;
use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes}; use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes};
use leptos_router::RouteListing; use leptos_router::RouteListing;
use sqlx::{ use sqlx::{
sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous}, sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous},
ConnectOptions, SqlitePool, ConnectOptions,
SqlitePool,
}; };
use tower::Layer; use tower::Layer;
use tower_http::{ use tower_http::{
compression::CompressionLayer, compression::CompressionLayer,
trace::{ trace::{
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse, DefaultMakeSpan,
DefaultOnEos,
DefaultOnFailure,
DefaultOnRequest,
DefaultOnResponse,
TraceLayer, TraceLayer,
}, },
CompressionLevel, CompressionLevel,

View file

@ -15,7 +15,13 @@ serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9" serde_yaml = "0.9"
axum = { version = "0.7", features = ["macros"] } axum = { version = "0.7", features = ["macros"] }
hyper = { version = "1.4", features = ["full"] } hyper = { version = "1.4", features = ["full"] }
reqwest = { version = "0.12", features = ["cookies", "multipart", "json", "stream", "native-tls"] } reqwest = { version = "0.12", features = [
"cookies",
"multipart",
"json",
"stream",
"native-tls",
] }
futures = "0.3.30" futures = "0.3.30"
anyhow = { version = "1.0.89", features = ["backtrace"] } anyhow = { version = "1.0.89", features = ["backtrace"] }
thiserror = "1.0.63" thiserror = "1.0.63"
@ -26,7 +32,13 @@ pin-project-lite = "0.2.14"
tower = { version = "0.4", features = ["tokio", "tracing"] } tower = { version = "0.4", features = ["tokio", "tracing"] }
tower-http = { version = "0.5.2", features = ["trace"] } tower-http = { version = "0.5.2", features = ["trace"] }
reqwest-retry = "0.6.1" reqwest-retry = "0.6.1"
reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json", "multipart", "rustls-tls"] } reqwest-middleware = { version = "0.3.3", features = [
"charset",
"http2",
"json",
"multipart",
"rustls-tls",
] }
itertools = "0.13.0" itertools = "0.13.0"
openport = { version = "0.1.1", features = ["rand"] } openport = { version = "0.1.1", features = ["rand"] }
derive_more = { version = "2.0.1", features = ["deref"] } derive_more = { version = "2.0.1", features = ["deref"] }

View file

@ -1,10 +1,10 @@
use serde::Deserialize;
use std::{collections::HashMap, fs}; use std::{collections::HashMap, fs};
use figment::{ use figment::{
providers::{Env, Format, Json, Toml, Yaml}, providers::{Env, Format, Json, Toml, Yaml},
Figment, Figment,
}; };
use serde::Deserialize;
#[derive(Clone, Debug, Deserialize)] #[derive(Clone, Debug, Deserialize)]
pub struct AppConfig { pub struct AppConfig {
@ -50,7 +50,7 @@ impl AppConfig {
#[derive(Clone, Debug, Deserialize)] #[derive(Clone, Debug, Deserialize)]
pub struct SystemResources { pub struct SystemResources {
pub ram: String, pub ram: String,
pub vram: String, pub vram: String,
} }

View file

@ -1,9 +1,10 @@
use std::io;
use anyhow::Error as AnyError; use anyhow::Error as AnyError;
use axum::{http, response::IntoResponse}; use axum::{http, response::IntoResponse};
use hyper; use hyper;
use reqwest; use reqwest;
use reqwest_middleware; use reqwest_middleware;
use std::io;
use thiserror::Error; use thiserror::Error;
#[derive(Error, Debug)] #[derive(Error, Debug)]

View file

@ -1,7 +1,7 @@
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size}; use std::{process::Stdio, sync::Arc};
use anyhow::anyhow; use anyhow::anyhow;
use itertools::Itertools; use itertools::Itertools;
use std::{process::Stdio, sync::Arc};
use tokio::{ use tokio::{
net::TcpStream, net::TcpStream,
process::{Child, Command}, process::{Child, Command},
@ -9,9 +9,11 @@ use tokio::{
time::{sleep, Duration}, time::{sleep, Duration},
}; };
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct InferenceProcess { pub struct InferenceProcess {
pub spec: ModelSpec, pub spec: ModelSpec,
pub process: Arc<Mutex<Child>>, pub process: Arc<Mutex<Child>>,
} }
@ -115,7 +117,7 @@ impl InferenceProcess {
let child = cmd.spawn().expect("Failed to start llama-server"); let child = cmd.spawn().expect("Failed to start llama-server");
Ok(InferenceProcess { Ok(InferenceProcess {
spec: spec.clone(), spec: spec.clone(),
process: Arc::new(Mutex::new(child)), process: Arc::new(Mutex::new(child)),
}) })
} }

View file

@ -6,12 +6,17 @@ pub mod proxy;
pub mod state; pub mod state;
pub mod util; pub mod util;
use std::net::SocketAddr;
use axum::{routing::any, Router}; use axum::{routing::any, Router};
use config::{AppConfig, ModelSpec}; use config::{AppConfig, ModelSpec};
use state::AppState; use state::AppState;
use std::net::SocketAddr;
use tower_http::trace::{ use tower_http::trace::{
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse, DefaultMakeSpan,
DefaultOnEos,
DefaultOnFailure,
DefaultOnRequest,
DefaultOnResponse,
TraceLayer, TraceLayer,
}; };
use tracing::Level; use tracing::Level;

View file

@ -1,12 +1,10 @@
use std::{ use std::{
future::Future, future::Future,
pin::Pin, pin::Pin,
sync::Arc, sync::{Arc, Once},
task::{Context, Poll}, task::{Context, Poll},
}; };
use std::sync::Once;
use axum::{body::Body, http::Request}; use axum::{body::Body, http::Request};
use pin_project_lite::pin_project; use pin_project_lite::pin_project;
use tower::{Layer, Service}; use tower::{Layer, Service};
@ -18,7 +16,9 @@ impl<S> Layer<S> for LoggingLayer {
type Service = LoggingService<S>; type Service = LoggingService<S>;
fn layer(&self, inner: S) -> Self::Service { fn layer(&self, inner: S) -> Self::Service {
LoggingService { inner } LoggingService {
inner,
}
} }
} }
@ -53,7 +53,7 @@ where
LoggingServiceFuture { LoggingServiceFuture {
inner: self.inner.call(req), inner: self.inner.call(req),
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
} }
} }
} }

View file

@ -1,7 +1,3 @@
use crate::{
config::ModelSpec, error::AppError, inference_process::InferenceProcess, state::AppState,
util::parse_size,
};
use axum::{ use axum::{
body::Body, body::Body,
http::{Request, Response}, http::{Request, Response},
@ -11,6 +7,14 @@ use reqwest::Client;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware}; use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use crate::{
config::ModelSpec,
error::AppError,
inference_process::InferenceProcess,
state::AppState,
util::parse_size,
};
pub async fn proxy_request( pub async fn proxy_request(
req: Request<Body>, req: Request<Body>,
spec: &ModelSpec, spec: &ModelSpec,

View file

@ -1,14 +1,16 @@
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
use std::{collections::HashMap, sync::Arc}; use std::{collections::HashMap, sync::Arc};
use tokio::sync::Mutex; use tokio::sync::Mutex;
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ResourceManager { pub struct ResourceManager {
pub total_ram: u64, pub total_ram: u64,
pub total_vram: u64, pub total_vram: u64,
pub used_ram: u64, pub used_ram: u64,
pub used_vram: u64, pub used_vram: u64,
pub processes: HashMap<u16, InferenceProcess>, pub processes: HashMap<u16, InferenceProcess>,
} }
pub type ResourceManagerHandle = Arc<Mutex<ResourceManager>>; pub type ResourceManagerHandle = Arc<Mutex<ResourceManager>>;

View file

@ -1,5 +1,6 @@
use emacs::{defun, Env, IntoLisp, Result, Value};
use std::sync::OnceLock; use std::sync::OnceLock;
use emacs::{defun, Env, IntoLisp, Result, Value};
use tokio::runtime::{Builder, Runtime}; use tokio::runtime::{Builder, Runtime};
// Emacs won't load the module without this. // Emacs won't load the module without this.
@ -48,12 +49,9 @@ fn init(env: &Env) -> Result<Value<'_>> {
#[defun] #[defun]
fn say_hello(env: &Env, name: String) -> Result<Value<'_>> { fn say_hello(env: &Env, name: String) -> Result<Value<'_>> {
// env.message(&format!("Helloo Broooooooo, {}!", name)) // env.message(&format!("Helloo Broooooooo, {}!", name))
env.call( env.call("message", [format!("Henlo whatsup, {}!!!!", name)
"message", .as_str()
[format!("Henlo whatsup, {}!!!!", name) .into_lisp(env)?])?;
.as_str()
.into_lisp(env)?],
)?;
RUNTIME RUNTIME
.get() .get()
.ok_or_else(|| anyhow::anyhow!("No runtime"))? .ok_or_else(|| anyhow::anyhow!("No runtime"))?