Run fmt
This commit is contained in:
parent
c545161878
commit
6eaad79f9a
15 changed files with 105 additions and 62 deletions
|
@ -1,19 +1,19 @@
|
|||
[package]
|
||||
name = "llama_forge_rs"
|
||||
edition.workspace=true
|
||||
authors.workspace=true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description = "The LLama Forge RS"
|
||||
license.workspace=true
|
||||
publish.workspace=true
|
||||
license.workspace = true
|
||||
publish.workspace = true
|
||||
readme = "README.md"
|
||||
repository.workspace=true
|
||||
version.workspace=true
|
||||
repository.workspace = true
|
||||
version.workspace = true
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[dependencies]
|
||||
llama_proxy_man = {path="../llama_proxy_man", optional = true}
|
||||
llama_proxy_man = { path = "../llama_proxy_man", optional = true }
|
||||
wasm-bindgen = "=0.2.100"
|
||||
# TODO Update to 0.7
|
||||
leptos = { version = "0.6", features = [
|
||||
|
@ -84,7 +84,13 @@ mime_guess = { version = "2.0.4", optional = true }
|
|||
tracing-test = "0.2.4"
|
||||
sysinfo = { version = "0.30.11", optional = true }
|
||||
derive_more = { version = "0.99.17", features = ["nightly"] }
|
||||
sqlx-macros = { version = "0.7.4", optional = true, features = ["chrono", "json", "migrate", "sqlite", "uuid"] }
|
||||
sqlx-macros = { version = "0.7.4", optional = true, features = [
|
||||
"chrono",
|
||||
"json",
|
||||
"migrate",
|
||||
"sqlite",
|
||||
"uuid",
|
||||
] }
|
||||
pulldown-cmark = { version = "0.12.2", features = ["serde"] }
|
||||
# qdrant-client = "1.11.2"
|
||||
# swiftide = "0.9.1"
|
||||
|
|
|
@ -11,16 +11,16 @@ use crate::api::{ChannelMessage, Chat, ChatMessage};
|
|||
|
||||
#[derive(Serialize, Debug)]
|
||||
struct LlamaChatCompletionRequest {
|
||||
stream: bool,
|
||||
model: String,
|
||||
stream: bool,
|
||||
model: String,
|
||||
messages: Vec<LlamaChatMessage>,
|
||||
}
|
||||
|
||||
impl From<Chat> for LlamaChatCompletionRequest {
|
||||
fn from(value: Chat) -> Self {
|
||||
Self {
|
||||
stream: true,
|
||||
model: "default".to_string(),
|
||||
stream: true,
|
||||
model: "default".to_string(),
|
||||
messages: value.history.into_iter().map(|e| e.into()).collect(),
|
||||
}
|
||||
}
|
||||
|
@ -28,14 +28,14 @@ impl From<Chat> for LlamaChatCompletionRequest {
|
|||
|
||||
#[derive(Serialize, Debug)]
|
||||
struct LlamaChatMessage {
|
||||
role: String,
|
||||
role: String,
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl From<ChatMessage> for LlamaChatMessage {
|
||||
fn from(chat_message: ChatMessage) -> Self {
|
||||
Self {
|
||||
role: chat_message.role.into(),
|
||||
role: chat_message.role.into(),
|
||||
content: chat_message.content,
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +68,9 @@ pub struct LlamaService {
|
|||
|
||||
impl LlamaService {
|
||||
pub fn new(id: Uuid) -> Self {
|
||||
Self { id }
|
||||
Self {
|
||||
id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,10 @@ mod tests {
|
|||
use crate::{
|
||||
api::{ChannelMessage, ChatMessage, ChatRole},
|
||||
server::backends::{
|
||||
llama_chat::LlamaService, BackendService, BackendServiceStatus, ChatService,
|
||||
llama_chat::LlamaService,
|
||||
BackendService,
|
||||
BackendServiceStatus,
|
||||
ChatService,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -216,7 +219,7 @@ mod tests {
|
|||
tracing::debug!("response: {}", response);
|
||||
assert!(response.contains('4'));
|
||||
|
||||
service_handle.stop().await;
|
||||
service_handle.stop().await.expect("Stop failed");
|
||||
|
||||
assert_eq!(service_handle.status().await, BackendServiceStatus::Stopped);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
|
|||
use tokio::process::Command;
|
||||
|
||||
pub struct RunnerArgs {
|
||||
ctx_size: i64,
|
||||
ctx_size: i64,
|
||||
gpu_layers: i64,
|
||||
model_path: String,
|
||||
}
|
||||
|
@ -36,8 +36,8 @@ impl From<RunnerArgs> for Vec<String> {
|
|||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Runner {
|
||||
pwd: Option<String>,
|
||||
cmd: String,
|
||||
pwd: Option<String>,
|
||||
cmd: String,
|
||||
args: Vec<String>,
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,8 @@ impl Runner {
|
|||
// FIXME does not exit properly when it is killed
|
||||
pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self {
|
||||
Self {
|
||||
pwd: None,
|
||||
cmd: "bash".to_string(),
|
||||
pwd: None,
|
||||
cmd: "bash".to_string(),
|
||||
args: vec![
|
||||
format!(
|
||||
"{}/llamafile",
|
||||
|
@ -64,8 +64,8 @@ impl Runner {
|
|||
|
||||
pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self {
|
||||
Self {
|
||||
pwd: None,
|
||||
cmd: "llama-server".to_string(),
|
||||
pwd: None,
|
||||
cmd: "llama-server".to_string(),
|
||||
args: runner_args.into(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,9 @@ impl<S> Layer<S> for LoggingLayer {
|
|||
type Service = LoggingService<S>;
|
||||
|
||||
fn layer(&self, inner: S) -> Self::Service {
|
||||
LoggingService { inner }
|
||||
LoggingService {
|
||||
inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,8 +50,8 @@ where
|
|||
|
||||
LoggingServiceFuture {
|
||||
inner: self.inner.call(req),
|
||||
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
|
||||
span: Arc::new(span),
|
||||
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
|
||||
span: Arc::new(span),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,20 +6,26 @@ use axum::{
|
|||
http::Request,
|
||||
response::IntoResponse,
|
||||
routing::get,
|
||||
Extension, Router,
|
||||
Extension,
|
||||
Router,
|
||||
};
|
||||
use leptos::*;
|
||||
use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes};
|
||||
use leptos_router::RouteListing;
|
||||
use sqlx::{
|
||||
sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous},
|
||||
ConnectOptions, SqlitePool,
|
||||
ConnectOptions,
|
||||
SqlitePool,
|
||||
};
|
||||
use tower::Layer;
|
||||
use tower_http::{
|
||||
compression::CompressionLayer,
|
||||
trace::{
|
||||
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
|
||||
DefaultMakeSpan,
|
||||
DefaultOnEos,
|
||||
DefaultOnFailure,
|
||||
DefaultOnRequest,
|
||||
DefaultOnResponse,
|
||||
TraceLayer,
|
||||
},
|
||||
CompressionLevel,
|
||||
|
|
|
@ -15,7 +15,13 @@ serde = { version = "1.0", features = ["derive"] }
|
|||
serde_yaml = "0.9"
|
||||
axum = { version = "0.7", features = ["macros"] }
|
||||
hyper = { version = "1.4", features = ["full"] }
|
||||
reqwest = { version = "0.12", features = ["cookies", "multipart", "json", "stream", "native-tls"] }
|
||||
reqwest = { version = "0.12", features = [
|
||||
"cookies",
|
||||
"multipart",
|
||||
"json",
|
||||
"stream",
|
||||
"native-tls",
|
||||
] }
|
||||
futures = "0.3.30"
|
||||
anyhow = { version = "1.0.89", features = ["backtrace"] }
|
||||
thiserror = "1.0.63"
|
||||
|
@ -26,7 +32,13 @@ pin-project-lite = "0.2.14"
|
|||
tower = { version = "0.4", features = ["tokio", "tracing"] }
|
||||
tower-http = { version = "0.5.2", features = ["trace"] }
|
||||
reqwest-retry = "0.6.1"
|
||||
reqwest-middleware = { version = "0.3.3", features = ["charset", "http2", "json", "multipart", "rustls-tls"] }
|
||||
reqwest-middleware = { version = "0.3.3", features = [
|
||||
"charset",
|
||||
"http2",
|
||||
"json",
|
||||
"multipart",
|
||||
"rustls-tls",
|
||||
] }
|
||||
itertools = "0.13.0"
|
||||
openport = { version = "0.1.1", features = ["rand"] }
|
||||
derive_more = { version = "2.0.1", features = ["deref"] }
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use figment::{
|
||||
providers::{Env, Format, Json, Toml, Yaml},
|
||||
Figment,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct AppConfig {
|
||||
|
@ -50,7 +50,7 @@ impl AppConfig {
|
|||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct SystemResources {
|
||||
pub ram: String,
|
||||
pub ram: String,
|
||||
pub vram: String,
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use std::io;
|
||||
|
||||
use anyhow::Error as AnyError;
|
||||
use axum::{http, response::IntoResponse};
|
||||
use hyper;
|
||||
use reqwest;
|
||||
use reqwest_middleware;
|
||||
use std::io;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size};
|
||||
use std::{process::Stdio, sync::Arc};
|
||||
|
||||
use anyhow::anyhow;
|
||||
use itertools::Itertools;
|
||||
use std::{process::Stdio, sync::Arc};
|
||||
use tokio::{
|
||||
net::TcpStream,
|
||||
process::{Child, Command},
|
||||
|
@ -9,9 +9,11 @@ use tokio::{
|
|||
time::{sleep, Duration},
|
||||
};
|
||||
|
||||
use crate::{config::ModelSpec, error::AppError, state::AppState, util::parse_size};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InferenceProcess {
|
||||
pub spec: ModelSpec,
|
||||
pub spec: ModelSpec,
|
||||
pub process: Arc<Mutex<Child>>,
|
||||
}
|
||||
|
||||
|
@ -115,7 +117,7 @@ impl InferenceProcess {
|
|||
let child = cmd.spawn().expect("Failed to start llama-server");
|
||||
|
||||
Ok(InferenceProcess {
|
||||
spec: spec.clone(),
|
||||
spec: spec.clone(),
|
||||
process: Arc::new(Mutex::new(child)),
|
||||
})
|
||||
}
|
||||
|
|
|
@ -6,12 +6,17 @@ pub mod proxy;
|
|||
pub mod state;
|
||||
pub mod util;
|
||||
|
||||
use std::net::SocketAddr;
|
||||
|
||||
use axum::{routing::any, Router};
|
||||
use config::{AppConfig, ModelSpec};
|
||||
use state::AppState;
|
||||
use std::net::SocketAddr;
|
||||
use tower_http::trace::{
|
||||
DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
|
||||
DefaultMakeSpan,
|
||||
DefaultOnEos,
|
||||
DefaultOnFailure,
|
||||
DefaultOnRequest,
|
||||
DefaultOnResponse,
|
||||
TraceLayer,
|
||||
};
|
||||
use tracing::Level;
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
use std::{
|
||||
future::Future,
|
||||
pin::Pin,
|
||||
sync::Arc,
|
||||
sync::{Arc, Once},
|
||||
task::{Context, Poll},
|
||||
};
|
||||
|
||||
use std::sync::Once;
|
||||
|
||||
use axum::{body::Body, http::Request};
|
||||
use pin_project_lite::pin_project;
|
||||
use tower::{Layer, Service};
|
||||
|
@ -18,7 +16,9 @@ impl<S> Layer<S> for LoggingLayer {
|
|||
type Service = LoggingService<S>;
|
||||
|
||||
fn layer(&self, inner: S) -> Self::Service {
|
||||
LoggingService { inner }
|
||||
LoggingService {
|
||||
inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ where
|
|||
|
||||
LoggingServiceFuture {
|
||||
inner: self.inner.call(req),
|
||||
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
|
||||
uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
use crate::{
|
||||
config::ModelSpec, error::AppError, inference_process::InferenceProcess, state::AppState,
|
||||
util::parse_size,
|
||||
};
|
||||
use axum::{
|
||||
body::Body,
|
||||
http::{Request, Response},
|
||||
|
@ -11,6 +7,14 @@ use reqwest::Client;
|
|||
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
|
||||
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
||||
|
||||
use crate::{
|
||||
config::ModelSpec,
|
||||
error::AppError,
|
||||
inference_process::InferenceProcess,
|
||||
state::AppState,
|
||||
util::parse_size,
|
||||
};
|
||||
|
||||
pub async fn proxy_request(
|
||||
req: Request<Body>,
|
||||
spec: &ModelSpec,
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::{config::AppConfig, inference_process::InferenceProcess, util::parse_size};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ResourceManager {
|
||||
pub total_ram: u64,
|
||||
pub total_ram: u64,
|
||||
pub total_vram: u64,
|
||||
pub used_ram: u64,
|
||||
pub used_vram: u64,
|
||||
pub processes: HashMap<u16, InferenceProcess>,
|
||||
pub used_ram: u64,
|
||||
pub used_vram: u64,
|
||||
pub processes: HashMap<u16, InferenceProcess>,
|
||||
}
|
||||
|
||||
pub type ResourceManagerHandle = Arc<Mutex<ResourceManager>>;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use emacs::{defun, Env, IntoLisp, Result, Value};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use emacs::{defun, Env, IntoLisp, Result, Value};
|
||||
use tokio::runtime::{Builder, Runtime};
|
||||
|
||||
// Emacs won't load the module without this.
|
||||
|
@ -48,12 +49,9 @@ fn init(env: &Env) -> Result<Value<'_>> {
|
|||
#[defun]
|
||||
fn say_hello(env: &Env, name: String) -> Result<Value<'_>> {
|
||||
// env.message(&format!("Helloo Broooooooo, {}!", name))
|
||||
env.call(
|
||||
"message",
|
||||
[format!("Henlo whatsup, {}!!!!", name)
|
||||
.as_str()
|
||||
.into_lisp(env)?],
|
||||
)?;
|
||||
env.call("message", [format!("Henlo whatsup, {}!!!!", name)
|
||||
.as_str()
|
||||
.into_lisp(env)?])?;
|
||||
RUNTIME
|
||||
.get()
|
||||
.ok_or_else(|| anyhow::anyhow!("No runtime"))?
|
||||
|
|
Loading…
Add table
Reference in a new issue