This commit is contained in:
Tristan D. 2025-02-20 02:13:21 +01:00
parent c545161878
commit 6eaad79f9a
Signed by: tristan
SSH key fingerprint: SHA256:9oFM1J63hYWJjCnLG6C0fxBS15rwNcWwdQNMOHYKJ/4
15 changed files with 105 additions and 62 deletions

View file

@ -1,19 +1,19 @@
[package]
name = "llama_forge_rs"
edition.workspace=true
authors.workspace=true
edition.workspace = true
authors.workspace = true
description = "The LLama Forge RS"
license.workspace=true
publish.workspace=true
license.workspace = true
publish.workspace = true
readme = "README.md"
repository.workspace=true
version.workspace=true
repository.workspace = true
version.workspace = true
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
llama_proxy_man = {path="../llama_proxy_man", optional = true}
llama_proxy_man = { path = "../llama_proxy_man", optional = true }
wasm-bindgen = "=0.2.100"
# TODO Update to 0.7
leptos = { version = "0.6", features = [
@ -84,7 +84,13 @@ mime_guess = { version = "2.0.4", optional = true }
tracing-test = "0.2.4"
sysinfo = { version = "0.30.11", optional = true }
derive_more = { version = "0.99.17", features = ["nightly"] }
sqlx-macros = { version = "0.7.4", optional = true, features = ["chrono", "json", "migrate", "sqlite", "uuid"] }
sqlx-macros = { version = "0.7.4", optional = true, features = [
"chrono",
"json",
"migrate",
"sqlite",
"uuid",
] }
pulldown-cmark = { version = "0.12.2", features = ["serde"] }
# qdrant-client = "1.11.2"
# swiftide = "0.9.1"

View file

@ -11,16 +11,16 @@ use crate::api::{ChannelMessage, Chat, ChatMessage};
#[derive(Serialize, Debug)]
struct LlamaChatCompletionRequest {
stream: bool,
model: String,
stream: bool,
model: String,
messages: Vec<LlamaChatMessage>,
}
impl From<Chat> for LlamaChatCompletionRequest {
fn from(value: Chat) -> Self {
Self {
stream: true,
model: "default".to_string(),
stream: true,
model: "default".to_string(),
messages: value.history.into_iter().map(|e| e.into()).collect(),
}
}
@ -28,14 +28,14 @@ impl From<Chat> for LlamaChatCompletionRequest {
#[derive(Serialize, Debug)]
struct LlamaChatMessage {
role: String,
role: String,
content: String,
}
impl From<ChatMessage> for LlamaChatMessage {
fn from(chat_message: ChatMessage) -> Self {
Self {
role: chat_message.role.into(),
role: chat_message.role.into(),
content: chat_message.content,
}
}
@ -68,7 +68,9 @@ pub struct LlamaService {
impl LlamaService {
pub fn new(id: Uuid) -> Self {
Self { id }
Self {
id,
}
}
}

View file

@ -177,7 +177,10 @@ mod tests {
use crate::{
api::{ChannelMessage, ChatMessage, ChatRole},
server::backends::{
llama_chat::LlamaService, BackendService, BackendServiceStatus, ChatService,
llama_chat::LlamaService,
BackendService,
BackendServiceStatus,
ChatService,
},
};
@ -216,7 +219,7 @@ mod tests {
tracing::debug!("response: {}", response);
assert!(response.contains('4'));
service_handle.stop().await;
service_handle.stop().await.expect("Stop failed");
assert_eq!(service_handle.status().await, BackendServiceStatus::Stopped);
}

View file

@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
use tokio::process::Command;
pub struct RunnerArgs {
ctx_size: i64,
ctx_size: i64,
gpu_layers: i64,
model_path: String,
}
@ -36,8 +36,8 @@ impl From<RunnerArgs> for Vec<String> {
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Runner {
pwd: Option<String>,
cmd: String,
pwd: Option<String>,
cmd: String,
args: Vec<String>,
}
@ -45,8 +45,8 @@ impl Runner {
// FIXME does not exit properly when it is killed
pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self {
Self {
pwd: None,
cmd: "bash".to_string(),
pwd: None,
cmd: "bash".to_string(),
args: vec![
format!(
"{}/llamafile",
@ -64,8 +64,8 @@ impl Runner {
pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self {
Self {
pwd: None,
cmd: "llama-server".to_string(),
pwd: None,
cmd: "llama-server".to_string(),
args: runner_args.into(),
}
}

View file

@ -18,7 +18,9 @@ impl<S> Layer<S> for LoggingLayer {
type Service = LoggingService<S>;
fn layer(&self, inner: S) -> Self::Service {
LoggingService { inner }
LoggingService {
inner,
}
}
}
@ -48,8 +50,8 @@ where
LoggingServiceFuture {
inner: self.inner.call(req),
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
span: Arc::new(span),
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
span: Arc::new(span),
}
}
}

View file

@ -6,20 +6,26 @@ use axum::{
http::Request,
response::IntoResponse,
routing::get,
Extension, Router,
Extension,
Router,
};
use leptos::*;
use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes};
use leptos_router::RouteListing;
use sqlx::{
sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous},
ConnectOptions, SqlitePool,
ConnectOptions,
SqlitePool,
};
use tower::Layer;
use tower_http::{
compression::CompressionLayer,
trace::{
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
DefaultMakeSpan,
DefaultOnEos,
DefaultOnFailure,
DefaultOnRequest,
DefaultOnResponse,
TraceLayer,
},
CompressionLevel,

View file

@ -15,7 +15,13 @@ serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
axum = { version = "0.7", features = ["macros"] }
hyper = { version = "1.4", features = ["full"] }
reqwest = { version = "0.12", features = ["cookies", "multipart", "json", "stream", "native-tls"] }
reqwest = { version = "0.12", features = [
"cookies",
"multipart",
"json",
"stream",
"native-tls",
] }
futures = "0.3.30"
anyhow = { version = "1.0.89", features = ["backtrace"] }
thiserror = "1.0.63"
@ -26,7 +32,13 @@ pin-project-lite = "0.2.14"
tower = { version = "0.4", features = ["tokio", "tracing"] }
tower-http = { version = "0.5.2", features = ["trace"] }
reqwest-retry = "0.6.1"
reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json", "multipart", "rustls-tls"] }
reqwest-middleware = { version = "0.3.3", features = [
"charset",
"http2",
"json",
"multipart",
"rustls-tls",
] }
itertools = "0.13.0"
openport = { version = "0.1.1", features = ["rand"] }
derive_more = { version = "2.0.1", features = ["deref"] }

View file

@ -1,10 +1,10 @@
use serde::Deserialize;
use std::{collections::HashMap, fs};
use figment::{
providers::{Env, Format, Json, Toml, Yaml},
Figment,
};
use serde::Deserialize;
#[derive(Clone, Debug, Deserialize)]
pub struct AppConfig {
@ -50,7 +50,7 @@ impl AppConfig {
#[derive(Clone, Debug, Deserialize)]
pub struct SystemResources {
pub ram: String,
pub ram: String,
pub vram: String,
}

View file

@ -1,9 +1,10 @@
use std::io;
use anyhow::Error as AnyError;
use axum::{http, response::IntoResponse};
use hyper;
use reqwest;
use reqwest_middleware;
use std::io;
use thiserror::Error;
#[derive(Error, Debug)]

View file

@ -1,7 +1,7 @@
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size};
use std::{process::Stdio, sync::Arc};
use anyhow::anyhow;
use itertools::Itertools;
use std::{process::Stdio, sync::Arc};
use tokio::{
net::TcpStream,
process::{Child, Command},
@ -9,9 +9,11 @@ use tokio::{
time::{sleep, Duration},
};
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size};
#[derive(Clone, Debug)]
pub struct InferenceProcess {
pub spec: ModelSpec,
pub spec: ModelSpec,
pub process: Arc<Mutex<Child>>,
}
@ -115,7 +117,7 @@ impl InferenceProcess {
let child = cmd.spawn().expect("Failed to start llama-server");
Ok(InferenceProcess {
spec: spec.clone(),
spec: spec.clone(),
process: Arc::new(Mutex::new(child)),
})
}

View file

@ -6,12 +6,17 @@ pub mod proxy;
pub mod state;
pub mod util;
use std::net::SocketAddr;
use axum::{routing::any, Router};
use config::{AppConfig, ModelSpec};
use state::AppState;
use std::net::SocketAddr;
use tower_http::trace::{
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
DefaultMakeSpan,
DefaultOnEos,
DefaultOnFailure,
DefaultOnRequest,
DefaultOnResponse,
TraceLayer,
};
use tracing::Level;

View file

@ -1,12 +1,10 @@
use std::{
future::Future,
pin::Pin,
sync::Arc,
sync::{Arc, Once},
task::{Context, Poll},
};
use std::sync::Once;
use axum::{body::Body, http::Request};
use pin_project_lite::pin_project;
use tower::{Layer, Service};
@ -18,7 +16,9 @@ impl<S> Layer<S> for LoggingLayer {
type Service = LoggingService<S>;
fn layer(&self, inner: S) -> Self::Service {
LoggingService { inner }
LoggingService {
inner,
}
}
}
@ -53,7 +53,7 @@ where
LoggingServiceFuture {
inner: self.inner.call(req),
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
}
}
}

View file

@ -1,7 +1,3 @@
use crate::{
config::ModelSpec, error::AppError, inference_process::InferenceProcess, state::AppState,
util::parse_size,
};
use axum::{
body::Body,
http::{Request, Response},
@ -11,6 +7,14 @@ use reqwest::Client;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use crate::{
config::ModelSpec,
error::AppError,
inference_process::InferenceProcess,
state::AppState,
util::parse_size,
};
pub async fn proxy_request(
req: Request<Body>,
spec: &ModelSpec,

View file

@ -1,14 +1,16 @@
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
use std::{collections::HashMap, sync::Arc};
use tokio::sync::Mutex;
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
#[derive(Clone, Debug)]
pub struct ResourceManager {
pub total_ram: u64,
pub total_ram: u64,
pub total_vram: u64,
pub used_ram: u64,
pub used_vram: u64,
pub processes: HashMap<u16, InferenceProcess>,
pub used_ram: u64,
pub used_vram: u64,
pub processes: HashMap<u16, InferenceProcess>,
}
pub type ResourceManagerHandle = Arc<Mutex<ResourceManager>>;

View file

@ -1,5 +1,6 @@
use emacs::{defun, Env, IntoLisp, Result, Value};
use std::sync::OnceLock;
use emacs::{defun, Env, IntoLisp, Result, Value};
use tokio::runtime::{Builder, Runtime};
// Emacs won't load the module without this.
@ -48,12 +49,9 @@ fn init(env: &Env) -> Result<Value<'_>> {
#[defun]
fn say_hello(env: &Env, name: String) -> Result<Value<'_>> {
// env.message(&format!("Helloo Broooooooo, {}!", name))
env.call(
"message",
[format!("Henlo whatsup, {}!!!!", name)
.as_str()
.into_lisp(env)?],
)?;
env.call("message", [format!("Henlo whatsup, {}!!!!", name)
.as_str()
.into_lisp(env)?])?;
RUNTIME
.get()
.ok_or_else(|| anyhow::anyhow!("No runtime"))?