diff --git a/Cargo.lock b/Cargo.lock index fa531f2..2af41fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1730,6 +1730,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -2869,7 +2878,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -2955,6 +2964,7 @@ dependencies = [ "mime_guess", "once_cell", "pin-project-lite", + "pulldown-cmark", "rand 0.8.5", "regex", "reqwest", @@ -4039,6 +4049,26 @@ dependencies = [ "psl-types", ] +[[package]] +name = "pulldown-cmark" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86ba2052aebccc42cbbb3ed234b8b13ce76f75c3551a303cb2bcffcff12bb14" +dependencies = [ + "bitflags 2.8.0", + "getopts", + "memchr", + "pulldown-cmark-escape", + "serde", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "quick-xml" version = "0.37.2" @@ -6166,6 +6196,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-xid" version = "0.2.6" diff --git a/llama_forge_rs/Cargo.toml b/llama_forge_rs/Cargo.toml index aadce4e..a07ade3 100644 --- a/llama_forge_rs/Cargo.toml +++ b/llama_forge_rs/Cargo.toml @@ -83,6 +83,7 @@ tracing-test = "0.2.4" sysinfo = { version = "0.30.11", optional = true } derive_more = { version = "0.99.17", features = ["nightly"] } sqlx-macros = { version = "0.7.4", optional = true, features = ["chrono", "json", "migrate", "sqlite", "uuid"] } +pulldown-cmark = { version = "0.12.2", features = ["serde"] } # qdrant-client = "1.11.2" # swiftide = "0.9.1" diff --git a/llama_forge_rs/src/api/backend_process.rs b/llama_forge_rs/src/api/backend_process.rs index fcbdbd3..7179155 100644 --- a/llama_forge_rs/src/api/backend_process.rs +++ b/llama_forge_rs/src/api/backend_process.rs @@ -48,11 +48,15 @@ pub async fn run_starter_task(pool: sqlx::SqlitePool) { use crate::server::backends::BackendService; let _ = tracing::debug_span!("starter_task"); + tracing::debug!("AAAAAAAAAAAAAAA"); + return; // TODO ???? tracing::debug!("Starter task started"); let service_handle = BackendService::new(); let mut stream = IntervalStream::new(time::interval(Duration::from_millis(1000))); while let Some(instant) = stream.next().await { + break; // TODO integrate proxy man ? + tracing::debug!("fire; instant={:?}", instant); let waiting_to_start: Vec = sqlx::query_as( diff --git a/llama_forge_rs/src/app/mod.rs b/llama_forge_rs/src/app/mod.rs index 171291a..8ed0adb 100644 --- a/llama_forge_rs/src/app/mod.rs +++ b/llama_forge_rs/src/app/mod.rs @@ -41,7 +41,8 @@ pub fn App() -> impl IntoView { - + // TODO make settings page for proxy-man + // diff --git a/llama_forge_rs/src/app/pages/chat.rs b/llama_forge_rs/src/app/pages/chat.rs index 4e1382a..1ebd5b4 100644 --- a/llama_forge_rs/src/app/pages/chat.rs +++ b/llama_forge_rs/src/app/pages/chat.rs @@ -12,7 +12,7 @@ use crate::{ app::components::{svgs::*, Card}, }; -#[component] +#[island] fn ChatMessageBubble( msg: RwSignal, history: RwSignal>>, @@ -33,12 +33,37 @@ fn ChatMessageBubble( let textarea_ref = NodeRef::::new(); + use pulldown_cmark; let editable_p = move || { + // TODO Convert back to raw str when editable let mode = if edit_mode.get() { "true" } else { "false" }; + let msg_str = move || msg.get().content.clone(); + let md_str = move || { + let owned_str = msg_str(); + let parser = pulldown_cmark::Parser::new(&owned_str); + let mut md_output = String::new(); + pulldown_cmark::html::push_html(&mut md_output, parser); + + md_output + }; + + let inner_p = move || { + if edit_mode.get() { + view! { + +

+ } + } else { + view! { +

+ } + } + }; + view! {

- + {inner_p}

} }; diff --git a/llama_forge_rs/src/server/backends/llama_chat.rs b/llama_forge_rs/src/server/backends/llama_chat.rs index eeb583d..4600c36 100644 --- a/llama_forge_rs/src/server/backends/llama_chat.rs +++ b/llama_forge_rs/src/server/backends/llama_chat.rs @@ -11,16 +11,16 @@ use crate::api::{ChannelMessage, Chat, ChatMessage}; #[derive(Serialize, Debug)] struct LlamaChatCompletionRequest { - stream: bool, - model: String, + stream: bool, + model: String, messages: Vec, } impl From for LlamaChatCompletionRequest { fn from(value: Chat) -> Self { Self { - stream: true, - model: "default".to_string(), + stream: true, + model: "default".to_string(), messages: value.history.into_iter().map(|e| e.into()).collect(), } } @@ -28,14 +28,14 @@ impl From for LlamaChatCompletionRequest { #[derive(Serialize, Debug)] struct LlamaChatMessage { - role: String, + role: String, content: String, } impl From for LlamaChatMessage { fn from(chat_message: ChatMessage) -> Self { Self { - role: chat_message.role.into(), + role: chat_message.role.into(), content: chat_message.content, } } @@ -68,9 +68,7 @@ pub struct LlamaService { impl LlamaService { pub fn new(id: Uuid) -> Self { - Self { - id, - } + Self { id } } } @@ -80,7 +78,8 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender) -> an let request_body: LlamaChatCompletionRequest = chat.into(); let request_builder = client - .post("http://localhost:8080/v1/chat/completions") + // # .post("http://localhost:8080/v1/chat/completions") + .post("http://100.64.0.3:18080/v1/chat/completions") .header("Content-Type", "application/json") .json(&request_body); @@ -92,7 +91,19 @@ async fn do_chat_request(chat: Chat, sender: mpsc::Sender) -> an Ok(Event::Message(event)) => match event.event.as_str() { "message" => { let data = event.data; - let response: LlamaChatResponse = serde_json::from_str(&data).unwrap(); + tracing::debug!(?data); + if data == "[DONE]" { + sender + .send(ChannelMessage::Stop) + .await + .expect("channel fail"); + + es.close(); + + break; + } + + let response: LlamaChatResponse = serde_json::from_str(&data).expect("no json"); for choice in response.choices.into_iter() { if let Some(delta) = choice.delta { diff --git a/llama_forge_rs/src/server/backends/llama_completion.rs b/llama_forge_rs/src/server/backends/llama_completion.rs index 3c52ce1..473bd1e 100644 --- a/llama_forge_rs/src/server/backends/llama_completion.rs +++ b/llama_forge_rs/src/server/backends/llama_completion.rs @@ -112,7 +112,7 @@ pub async fn do_completion_request() -> Result<(), Box> { let request_body = CompletionRequest::default(); let request_builder = client - .post("http://localhost:8080/completion") + .post("http://100.64.0.3:18080/completion") .header("Accept", "text/event-stream") .header("Content-Type", "application/json") .header("User-Agent", "llama_forge_rs") diff --git a/llama_forge_rs/src/server/backends/mod.rs b/llama_forge_rs/src/server/backends/mod.rs index f670383..2cfd72f 100644 --- a/llama_forge_rs/src/server/backends/mod.rs +++ b/llama_forge_rs/src/server/backends/mod.rs @@ -177,10 +177,7 @@ mod tests { use crate::{ api::{ChannelMessage, ChatMessage, ChatRole}, server::backends::{ - llama_chat::LlamaService, - BackendService, - BackendServiceStatus, - ChatService, + llama_chat::LlamaService, BackendService, BackendServiceStatus, ChatService, }, }; diff --git a/llama_forge_rs/src/server/backends/runner.rs b/llama_forge_rs/src/server/backends/runner.rs index fde575e..6247701 100644 --- a/llama_forge_rs/src/server/backends/runner.rs +++ b/llama_forge_rs/src/server/backends/runner.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use tokio::process::Command; pub struct RunnerArgs { - ctx_size: i64, + ctx_size: i64, gpu_layers: i64, model_path: String, } @@ -36,8 +36,8 @@ impl From for Vec { #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Runner { - pwd: Option, - cmd: String, + pwd: Option, + cmd: String, args: Vec, } @@ -45,8 +45,8 @@ impl Runner { // FIXME does not exit properly when it is killed pub fn new_llamafile_bin(runner_args: RunnerArgs) -> Self { Self { - pwd: None, - cmd: "bash".to_string(), + pwd: None, + cmd: "bash".to_string(), args: vec![ format!( "{}/llamafile", @@ -64,8 +64,8 @@ impl Runner { pub fn new_llama_server_bin(runner_args: RunnerArgs) -> Self { Self { - pwd: None, - cmd: "llama-server".to_string(), + pwd: None, + cmd: "llama-server".to_string(), args: runner_args.into(), } } diff --git a/llama_forge_rs/src/server/middleware.rs b/llama_forge_rs/src/server/middleware.rs index 9d913e1..72af58f 100644 --- a/llama_forge_rs/src/server/middleware.rs +++ b/llama_forge_rs/src/server/middleware.rs @@ -18,9 +18,7 @@ impl Layer for LoggingLayer { type Service = LoggingService; fn layer(&self, inner: S) -> Self::Service { - LoggingService { - inner, - } + LoggingService { inner } } } @@ -50,8 +48,8 @@ where LoggingServiceFuture { inner: self.inner.call(req), - uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership - span: Arc::new(span), + uuid: Arc::new(request_uuid), // Store UUID in an Arc for shared ownership + span: Arc::new(span), } } } diff --git a/llama_forge_rs/src/server/mod.rs b/llama_forge_rs/src/server/mod.rs index 081c5f2..336e29d 100644 --- a/llama_forge_rs/src/server/mod.rs +++ b/llama_forge_rs/src/server/mod.rs @@ -6,26 +6,20 @@ use axum::{ http::Request, response::IntoResponse, routing::get, - Extension, - Router, + Extension, Router, }; use leptos::*; use leptos_axum::{generate_route_list, handle_server_fns_with_context, LeptosRoutes}; use leptos_router::RouteListing; use sqlx::{ sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous}, - ConnectOptions, - SqlitePool, + ConnectOptions, SqlitePool, }; use tower::Layer; use tower_http::{ compression::CompressionLayer, trace::{ - DefaultMakeSpan, - DefaultOnEos, - DefaultOnFailure, - DefaultOnRequest, - DefaultOnResponse, + DefaultMakeSpan, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse, TraceLayer, }, CompressionLevel, @@ -122,7 +116,7 @@ pub async fn app(leptos_options: LeptosOptions) -> Router { let pool = new_pool().await.expect("pool err"); - // TODO move this out of server(pool has to be moved out too) + // // TODO move this out of server(pool has to be moved out too) let task = run_starter_task(pool.clone()); tokio::task::spawn(task);