// use crate::db;
use axum::{
extract::{Query, State},
response::{
sse::{Event, Sse},
IntoResponse,
},
routing::{get, post},
Json, Router,
};
use futures::stream::Stream;
use serde::{Deserialize, Serialize};
use chrono::Utc;
use sqlx::Row;
use std::collections::HashMap;
use std::convert::Infallible;
use std::sync::Arc;
use tokio::sync::{broadcast, mpsc, RwLock};
use tower_http::cors::{Any, CorsLayer};
use crate::utils::tokenizer::JapaneseTokenizer;
use crate::utils::lsa::LsaModel;
#[derive(Clone)]
pub struct AppState {
pub db_pool: sqlx::SqlitePool,
pub tx: broadcast::Sender<String>,
pub llama_status: Arc<RwLock<String>>,
pub model_name: String,
// MCP sessions map
pub sessions: Arc<RwLock<HashMap<String, mpsc::UnboundedSender<String>>>>,
// Japanese NLP & LSA
pub tokenizer: Arc<JapaneseTokenizer>,
pub lsa_model: Arc<RwLock<Option<LsaModel>>>,
}
pub async fn run_server(
port: u16,
db_pool: sqlx::SqlitePool,
llama_status: Arc<RwLock<String>>,
model_name: String,
) {
let (tx, _rx) = broadcast::channel(100);
let sessions: Arc<RwLock<HashMap<String, mpsc::UnboundedSender<String>>>> = Arc::new(RwLock::new(HashMap::new()));
// llama-server status monitor
let llama_status_clone = llama_status.clone();
tokio::spawn(async move {
let client = reqwest::Client::new();
loop {
let status = match client.get("http://127.0.0.1:8080/health").send().await {
Ok(resp) if resp.status().is_success() => "running".to_string(),
Ok(_) => "error".to_string(),
Err(_) => "stopped".to_string(),
};
{
let mut s = llama_status_clone.write().await;
if *s != status {
log::info!("llama-server status changed: {} -> {}", *s, status);
*s = status;
}
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
});
let app_state = AppState {
db_pool: db_pool.clone(),
tx,
llama_status: llama_status.clone(),
model_name,
sessions,
tokenizer: Arc::new(JapaneseTokenizer::new().expect("Failed to init tokenizer")),
lsa_model: Arc::new(RwLock::new(None)),
};
// 起動時に既存のデータから LSA モデルを構築する (重い処理なので非同期で実行)
let app_state_for_lsa = app_state.clone();
tokio::spawn(async move {
log::info!("Starting initial LSA model training...");
if let Ok(rows) = sqlx::query("SELECT content FROM items").fetch_all(&app_state_for_lsa.db_pool).await {
if !rows.is_empty() {
let mut builder = crate::utils::lsa::TermDocumentMatrixBuilder::new();
for row in rows {
let content: String = row.get(0);
let tokens = app_state_for_lsa.tokenizer.tokenize_to_vec(&content).unwrap_or_default();
builder.add_document(tokens);
}
let matrix = builder.build_matrix();
match LsaModel::train(&matrix, builder.vocabulary, 50) { // 50次元に圧縮
Ok(model) => {
let mut lsa = app_state_for_lsa.lsa_model.write().await;
*lsa = Some(model);
log::info!("LSA model trained successfully with {} documents.", builder.counts.len());
}
Err(e) => log::error!("LSA training failed: {}", e),
}
}
}
});
let cors = CorsLayer::new()
.allow_origin(Any)
.allow_methods(Any)
.allow_headers(Any);
let app = Router::new()
.route("/sse", get(sse_handler))
.route("/messages", post(mcp_messages_handler))
.route("/llama_status", get(llama_status_handler))
.route("/doc_count", get(doc_count_handler))
.route("/model_name", get(model_name_handler))
.layer(cors)
.with_state(app_state);
let listener = tokio::net::TcpListener::bind(format!("127.0.0.1:{}", port))
.await
.unwrap();
log::info!("MCP Server listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.unwrap();
}
async fn llama_status_handler(State(state): State<AppState>) -> impl IntoResponse {
let status = state.llama_status.read().await.clone();
Json(serde_json::json!({ "status": status }))
}
async fn doc_count_handler(State(state): State<AppState>) -> impl IntoResponse {
let row = sqlx::query("SELECT COUNT(*) FROM items")
.fetch_one(&state.db_pool)
.await
.unwrap();
let count: i64 = row.get(0);
Json(serde_json::json!({ "count": count }))
}
async fn model_name_handler(State(state): State<AppState>) -> impl IntoResponse {
Json(serde_json::json!({ "model_name": state.model_name }))
}
#[allow(dead_code)]
#[derive(Deserialize)]
struct SseQuery {
session_id: Option<String>,
}
async fn sse_handler(
State(state): State<AppState>,
Query(_query): Query<SseQuery>,
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
// Generate a simple session ID
let session_id = uuid::Uuid::new_v4().to_string();
let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<String>();
log::info!("New MCP SSE Session: {}", session_id);
// Register session
state.sessions.write().await.insert(session_id.clone(), tx);
// Initial endpoint event
let endpoint_url = format!("/messages?session_id={}", session_id);
let endpoint_event = Event::default().event("endpoint").data(endpoint_url);
let session_id_for_close = session_id.clone();
let sessions_for_close = state.sessions.clone();
let global_rx = state.tx.subscribe();
let stream = futures::stream::unfold(
(
rx,
Some(endpoint_event),
session_id_for_close,
sessions_for_close,
global_rx,
),
|(mut rx, mut initial, sid, smap, mut grx): (
tokio::sync::mpsc::UnboundedReceiver<String>,
Option<Event>,
String,
Arc<RwLock<HashMap<String, tokio::sync::mpsc::UnboundedSender<String>>>>,
tokio::sync::broadcast::Receiver<String>,
)| async move {
if let Some(event) = initial.take() {
return Some((Ok(event), (rx, None, sid, smap, grx)));
}
tokio::select! {
Some(msg) = rx.recv() => {
Some((Ok(Event::default().event("message").data(msg)), (rx, None, sid, smap, grx)))
}
Ok(msg) = grx.recv() => {
// Global notification (e.g. data update)
Some((Ok(Event::default().event("update").data(msg)), (rx, None, sid, smap, grx)))
}
else => {
log::info!("MCP SSE Session Closed: {}", sid);
smap.write().await.remove(&sid);
None
}
}
},
);
Sse::new(stream).keep_alive(axum::response::sse::KeepAlive::default())
}
#[derive(Serialize, Deserialize)]
struct JsonRpcRequest {
jsonrpc: String,
method: String,
params: Option<serde_json::Value>,
id: Option<serde_json::Value>,
}
#[derive(Serialize)]
struct JsonRpcResponse {
jsonrpc: &'static str,
#[serde(skip_serializing_if = "Option::is_none")]
result: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<serde_json::Value>,
id: Option<serde_json::Value>,
}
#[derive(Deserialize)]
struct MessageQuery {
session_id: Option<String>,
}
impl IntoResponse for JsonRpcResponse {
fn into_response(self) -> axum::response::Response {
Json(self).into_response()
}
}
async fn get_embedding(content: &str) -> Result<Vec<f32>, String> {
let payload = serde_json::json!({
"input": [content],
"model": "default"
});
log::info!("Sending embedding request: {}", payload);
let client = reqwest::Client::new();
let resp = client
.post("http://127.0.0.1:8080/v1/embeddings")
.json(&payload)
.send()
.await
.map_err(|e| e.to_string())?;
let body_text = resp.text().await.map_err(|e| e.to_string())?;
log::info!("Received embedding response: {}", body_text);
let json: serde_json::Value = serde_json::from_str(&body_text).map_err(|e| e.to_string())?;
// Parse OpenAI-compatible response: {"data": [{"embedding": [...]}]}
let emb_value = json["data"][0]["embedding"].as_array();
let embedding = emb_value
.ok_or_else(|| format!("No embedding found in llama-server response: {}", json))?
.iter()
.map(|v| v.as_f64().unwrap_or(0.0) as f32)
.collect();
Ok(embedding)
}
async fn mcp_messages_handler(
State(state): State<AppState>,
Query(query): Query<MessageQuery>,
Json(req): Json<JsonRpcRequest>,
) -> impl IntoResponse {
let method = req.method.as_str();
log::info!("MCP Request: {} (Session: {:?})", method, query.session_id);
// 受信データを構造化JSONで出力(timestamp と source を含む)
let structured = serde_json::json!({
"timestamp": Utc::now().to_rfc3339(),
"source": "mcp",
"session": query.session_id,
"method": method,
"id": req.id,
"params": req.params,
});
log::info!("{}", serde_json::to_string(&structured).unwrap_or_else(|_| "{\"error\":\"serialize_failed\"}".to_string()));
let result: Option<serde_json::Value> = match method {
"initialize" => {
let client_version = req.params.as_ref()
.and_then(|p| p.get("protocolVersion"))
.and_then(|v| v.as_str())
.unwrap_or("2024-11-05");
log::info!("MCP Handshake: Client requested protocol version {}", client_version);
Some(serde_json::json!({
"protocolVersion": client_version,
"capabilities": {
"tools": { "listChanged": false },
"resources": { "listChanged": false, "subscribe": false },
"prompts": { "listChanged": false },
"logging": {}
},
"serverInfo": { "name": "TelosDB", "version": "0.1.0" }
}))
},
"notifications/initialized" => None,
"tools/list" => Some(serde_json::json!({
"tools": [
{
"name": "add_item_text",
"description": "Store text with auto-generated embeddings.",
"inputSchema": {
"type": "object",
"properties": {
"content": { "type": "string" },
"path": { "type": "string" }
},
"required": ["content"]
}
},
{
"name": "search_text",
"description": "Semantic search using vector embeddings.",
"inputSchema": {
"type": "object",
"properties": {
"content": { "type": "string" },
"limit": { "type": "number" }
},
"required": ["content"]
}
},
{
"name": "lsa_search",
"description": "Lightweight Japanese semantic search using LSA (Latent Semantic Analysis). No LLM required.",
"inputSchema": {
"type": "object",
"properties": {
"query": { "type": "string" },
"limit": { "type": "number" }
},
"required": ["query"]
}
},
{
"name": "lsa_retrain",
"description": "Rebuild the LSA semantic model from all current documents. Use this when you've added many new items.",
"inputSchema": { "type": "object", "properties": {} }
},
{
"name": "update_item",
"description": "Update existing text and its embedding.",
"inputSchema": {
"type": "object",
"properties": {
"id": { "type": "integer" },
"content": { "type": "string" },
"path": { "type": "string" }
},
"required": ["id", "content"]
}
},
{
"name": "delete_item",
"description": "Delete item by ID.",
"inputSchema": {
"type": "object",
"properties": {
"id": { "type": "integer" }
},
"required": ["id"]
}
},
{
"name": "get_item_by_id",
"description": "Get text content by item ID.",
"inputSchema": {
"type": "object",
"properties": {
"id": { "type": "integer" }
},
"required": ["id"]
}
}
]
})),
"search_text" | "tools/call" | "add_item_text" | "update_item" | "delete_item" | "get_item_by_id" => {
let p = req.params.clone().unwrap_or_default();
let (actual_method, args) = if method == "tools/call" {
(
p.get("name").and_then(|v| v.as_str()).unwrap_or(""),
p.get("arguments").cloned().unwrap_or_default(),
)
} else {
(method, p)
};
// UIへの通知(ツール呼び出し開始)
let _ = state.tx.send(format!("mcp:call:{}", actual_method));
match actual_method {
"get_item_by_id" => {
let id = args.get("id").and_then(|v| v.as_i64()).unwrap_or(0);
let row = sqlx::query("SELECT id, content, path FROM items WHERE id = ?")
.bind(id)
.fetch_optional(&state.db_pool)
.await
.unwrap_or(None);
if let Some(row) = row {
let content: String = row.get("content");
let path: Option<String> = row.try_get("path").ok();
Some(serde_json::json!({
"id": id,
"content": content,
"path": path
}))
} else {
Some(serde_json::json!({ "error": format!("Item not found: {}", id) }))
}
}
"add_item_text" => {
let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
let path = args.get("path").and_then(|v| v.as_str());
log::info!(
"Executing add_item_text with chunking: content length={}, path='{:?}'",
content.chars().count(),
path
);
// 800文字ずつに分割
let chars: Vec<char> = content.chars().collect();
let chunks: Vec<String> = chars
.chunks(800)
.map(|chunk| chunk.iter().collect::<String>())
.collect();
let mut results = Vec::new();
for (_i, chunk_content) in chunks.iter().enumerate() {
// 各チャンクに対して埋め込みを取得して保存
match get_embedding(chunk_content).await {
Ok(emb) => {
async fn add_item_inner(
state: &AppState,
content: &str,
path: Option<&str>,
emb: Vec<f32>,
) -> Result<i64, String> {
let mut tx =
state.db_pool.begin().await.map_err(|e| {
format!("Failed to begin transaction: {}", e)
})?;
let res =
sqlx::query("INSERT INTO items (content, path) VALUES (?, ?)")
.bind(content)
.bind(path)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to insert item: {}", e))?;
let id = res.last_insert_rowid();
sqlx::query("INSERT INTO vec_items (id, embedding) VALUES (?, ?)")
.bind(id)
.bind(serde_json::to_string(&emb).unwrap_or("[]".to_string()))
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to insert vector: {}", e))?;
// LSA ベクトルの計算と保存
let lsa_guard = state.lsa_model.read().await;
if let Some(model) = lsa_guard.as_ref() {
let mut query_counts = HashMap::new();
let tokens = state.tokenizer.tokenize_to_vec(content).unwrap_or_default();
for token in tokens {
if let Some(&tid) = model.vocabulary.get(&token) {
*query_counts.entry(tid).or_insert(0.0) += 1.0;
}
}
let mut query_vec = ndarray::Array1::zeros(model.vocabulary.len());
for (tid, count) in query_counts {
query_vec[tid] = count;
}
if let Ok(projected) = model.project_query(&query_vec) {
let proj_vec: Vec<f64> = projected.to_vec();
let vector_blob = bincode::serialize(&proj_vec).unwrap_or_default();
sqlx::query("INSERT INTO items_lsa (id, vector) VALUES (?, ?)")
.bind(id)
.bind(vector_blob)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to insert LSA vector: {}", e))?;
}
}
tx.commit()
.await
.map_err(|e| format!("Failed to commit transaction: {}", e))?;
Ok(id)
}
match add_item_inner(&state, chunk_content, path, emb).await {
Ok(id) => {
results.push(id);
}
Err(e) => {
log::error!("Failed to add chunk: {}", e);
}
}
}
Err(e) => {
log::error!("Embedding failed for chunk: {}", e);
}
}
}
if !results.is_empty() {
let _ = state.tx.send("data_changed".to_string());
log::info!("Successfully added {} chunks.", results.len());
Some(
serde_json::json!({ "content": [{ "type": "text", "text": format!("Successfully added {} chunks.", results.len()) }] }),
)
} else {
Some(serde_json::json!({ "error": "Failed to add any chunks." }))
}
}
"search_text" => {
let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(10) as u32;
match get_embedding(content).await {
Ok(emb) => {
let rows = sqlx::query(
"SELECT items.id, items.content, v.distance
FROM items
JOIN vec_items v ON items.id = v.id
WHERE v.embedding MATCH ? AND k = ?
ORDER BY distance LIMIT ?",
)
.bind(serde_json::to_string(&emb).unwrap_or("[]".to_string()))
.bind(limit)
.bind(limit)
.fetch_all(&state.db_pool)
.await
.unwrap_or_default();
log::info!("Search query: '{}'", content);
log::info!("Embedding (first 5): {:?}", &emb[..5.min(emb.len())]);
// Log results for debugging regardless of output format
for r in &rows {
let id = r.get::<i64, _>(0);
let d = r.get::<f64, _>(2);
log::info!("Result ID: {}, Distance: {}", id, d);
}
let is_mcp_output = method == "tools/call";
if is_mcp_output {
let txt = if rows.is_empty() {
"No results.".to_string()
} else {
rows.iter()
.map(|r| {
format!(
"[ID: {}, Distance: {:.4}]\n{}",
r.get::<i64, _>(0),
r.get::<f64, _>(2),
r.get::<String, _>(1)
)
})
.collect::<Vec<_>>()
.join("\n\n---\n\n")
};
Some(
serde_json::json!({ "content": [{ "type": "text", "text": txt }] }),
)
} else {
let res: Vec<_> = rows
.iter()
.map(|r| {
serde_json::json!({
"id": r.get::<i64,_>(0),
"content": r.get::<String,_>(1),
"distance": r.get::<f64, _>(2)
})
})
.collect();
Some(serde_json::json!({ "content": res }))
}
}
Err(e) => {
log::warn!(
"Embedding failed in search_text, falling back to LIKE: {}",
e
);
// Fallback to LIKE if llama-server is not running
let rows = sqlx::query(
"SELECT id, content FROM items WHERE content LIKE ? LIMIT ?",
)
.bind(format!("%{}%", content))
.bind(limit)
.fetch_all(&state.db_pool)
.await
.unwrap_or_default();
let txt =
format!("(Fallback SEARCH due to embedding error: {})\n\n", e);
let results = rows
.iter()
.map(|r| {
format!(
"ID: {}, Content: {}",
r.get::<i64, _>(0),
r.get::<String, _>(1)
)
})
.collect::<Vec<_>>()
.join("\n\n");
Some(
serde_json::json!({ "content": [{ "type": "text", "text": txt + &results }] }),
)
}
}
}
"lsa_search" => {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
let lsa_guard = state.lsa_model.read().await;
if let Some(model) = lsa_guard.as_ref() {
// クエリのベクトル化 (TF)
let mut query_counts = HashMap::new();
let tokens = state.tokenizer.tokenize_to_vec(query).unwrap_or_default();
for token in tokens {
if let Some(&id) = model.vocabulary.get(&token) {
*query_counts.entry(id).or_insert(0.0) += 1.0;
}
}
let mut query_vec = ndarray::Array1::zeros(model.vocabulary.len());
for (id, count) in query_counts {
query_vec[id] = count;
}
// LSA 空間への射影
if let Ok(query_lsa) = model.project_query(&query_vec) {
// DB から全ベクトルを取得して比較 (件数が少ない想定)
// 本来はアイテム数が多い場合は BLOB を全件回すと遅いため、インメモリキャッシュ等を検討
let rows = sqlx::query("SELECT id, vector FROM items_lsa")
.fetch_all(&state.db_pool)
.await
.unwrap_or_default();
let mut results = Vec::new();
for row in rows {
let id: i64 = row.get(0);
let vector_blob: Vec<u8> = row.get(1);
if let Ok(vector_f64) = bincode::deserialize::<Vec<f64>>(&vector_blob) {
let doc_vec = ndarray::Array1::from_vec(vector_f64);
let sim = crate::utils::lsa::LsaModel::cosine_similarity(&query_lsa, &doc_vec);
results.push((id, sim));
}
}
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
results.truncate(limit as usize);
let mut filtered_results = Vec::new();
for (id, sim) in results {
if let Ok(doc_row) = sqlx::query("SELECT content FROM items WHERE id = ?").bind(id).fetch_one(&state.db_pool).await {
let content: String = doc_row.get(0);
filtered_results.push(serde_json::json!({
"id": id,
"content": content,
"similarity": sim
}));
}
}
Some(serde_json::json!({ "content": filtered_results }))
} else {
Some(serde_json::json!({ "error": "Query projection failed" }))
}
} else {
Some(serde_json::json!({ "error": "LSA model not initialized or no data available" }))
}
}
"update_item" => {
let id = args.get("id").and_then(|v| v.as_i64()).unwrap_or(0);
let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
let path = args.get("path").and_then(|v| v.as_str());
match get_embedding(content).await {
Ok(emb) => {
async fn update_item_inner(
state: &AppState,
id: i64,
content: &str,
path: Option<&str>,
emb: Vec<f32>,
) -> Result<(), String> {
let mut tx =
state.db_pool.begin().await.map_err(|e| {
format!("Failed to begin transaction: {}", e)
})?;
sqlx::query("UPDATE items SET content = ?, path = ? WHERE id = ?")
.bind(content)
.bind(path)
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to update item: {}", e))?;
sqlx::query("UPDATE vec_items SET embedding = ? WHERE id = ?")
.bind(serde_json::to_string(&emb).unwrap_or("[]".to_string()))
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to update vector: {}", e))?;
// LSA ベクトルの更新
let lsa_guard = state.lsa_model.read().await;
if let Some(model) = lsa_guard.as_ref() {
let mut query_counts = HashMap::new();
let tokens = state.tokenizer.tokenize_to_vec(content).unwrap_or_default();
for token in tokens {
if let Some(&tid) = model.vocabulary.get(&token) {
*query_counts.entry(tid).or_insert(0.0) += 1.0;
}
}
let mut query_vec = ndarray::Array1::zeros(model.vocabulary.len());
for (tid, count) in query_counts {
query_vec[tid] = count;
}
if let Ok(projected) = model.project_query(&query_vec) {
let vector_blob = bincode::serialize(&projected.to_vec()).unwrap_or_default();
sqlx::query("INSERT OR REPLACE INTO items_lsa (id, vector) VALUES (?, ?)")
.bind(id)
.bind(vector_blob)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to update LSA vector: {}", e))?;
}
}
tx.commit()
.await
.map_err(|e| format!("Failed to commit transaction: {}", e))?;
Ok(())
}
if let Err(e) = update_item_inner(&state, id, content, path, emb).await
{
Some(serde_json::json!({ "error": e }))
} else {
let _ = state.tx.send("data_changed".to_string());
Some(
serde_json::json!({ "content": [{ "type": "text", "text": format!("Successfully updated item {}", id) }] }),
)
}
}
Err(e) => {
Some(serde_json::json!({ "error": format!("Embedding failed: {}", e) }))
}
}
}
"delete_item" => {
let id = args.get("id").and_then(|v| v.as_i64()).unwrap_or(0);
async fn delete_item_inner(state: &AppState, id: i64) -> Result<(), String> {
let mut tx = state
.db_pool
.begin()
.await
.map_err(|e| format!("Failed to begin transaction: {}", e))?;
sqlx::query("DELETE FROM items WHERE id = ?")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to delete item: {}", e))?;
sqlx::query("DELETE FROM vec_items WHERE id = ?")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| format!("Failed to delete vector: {}", e))?;
tx.commit()
.await
.map_err(|e| format!("Failed to commit transaction: {}", e))?;
Ok(())
}
if let Err(e) = delete_item_inner(&state, id).await {
Some(serde_json::json!({ "error": e }))
} else {
let _ = state.tx.send("data_changed".to_string());
Some(
serde_json::json!({ "content": [{ "type": "text", "text": format!("Successfully deleted item {}", id) }] }),
)
}
}
"lsa_retrain" => {
log::info!("Manual LSA retrain triggered.");
let state_clone = state.clone();
tokio::spawn(async move {
if let Ok(rows) = sqlx::query("SELECT id, content FROM items").fetch_all(&state_clone.db_pool).await {
if !rows.is_empty() {
let mut builder = crate::utils::lsa::TermDocumentMatrixBuilder::new();
let mut ids = Vec::new();
for row in rows {
let id: i64 = row.get(0);
let content: String = row.get(1);
let tokens = state_clone.tokenizer.tokenize_to_vec(&content).unwrap_or_default();
builder.add_document(tokens);
ids.push(id);
}
let matrix = builder.build_matrix();
match LsaModel::train(&matrix, builder.vocabulary, 50) {
Ok(model) => {
// 全ドキュメントのベクトルを再計算して DB に保存
let mut tx = state_clone.db_pool.begin().await.unwrap();
sqlx::query("DELETE FROM items_lsa").execute(&mut *tx).await.unwrap();
for (i, &id) in ids.iter().enumerate() {
// 文書 i のベクトルは VT[.., i] * Sigma
// project_query は U^T * TF なので、全文書一括なら U や VT を使った方が早いが
// ここでは一貫性のために各文書の TF を作って射影する
let mut doc_tf = ndarray::Array1::zeros(model.vocabulary.len());
for (&tid, &count) in &builder.counts[i] {
doc_tf[tid] = count;
}
if let Ok(projected) = model.project_query(&doc_tf) {
let vector_blob = bincode::serialize(&projected.to_vec()).unwrap_or_default();
sqlx::query("INSERT INTO items_lsa (id, vector) VALUES (?, ?)")
.bind(id)
.bind(vector_blob)
.execute(&mut *tx)
.await
.unwrap();
}
}
tx.commit().await.unwrap();
let mut lsa = state_clone.lsa_model.write().await;
*lsa = Some(model);
log::info!("Manual LSA retrain completed successfully.");
}
Err(e) => log::error!("Manual LSA training failed: {}", e),
}
}
}
});
Some(serde_json::json!({ "content": [{ "type": "text", "text": "LSA retrain started in background." }] }))
}
_ => Some(serde_json::json!({ "error": "Unknown tool" })),
}
}
_ => Some(serde_json::json!({ "error": "Not implemented" })),
};
// Notifications (id == null) MUST NOT receive a response
if req.id.is_none() || req.id.as_ref().map_or(false, |v| v.is_null()) {
log::info!("MCP Notification received: {} (No response sent)", method);
return axum::http::StatusCode::NO_CONTENT.into_response();
}
if let Some(id_val) = req.id {
let resp = JsonRpcResponse {
jsonrpc: "2.0",
result,
error: None,
id: Some(id_val),
};
if let Some(sid) = query.session_id {
// MCP Client (SSE Mode)
let resp_str = serde_json::to_string(&resp).unwrap();
log::info!("Sending MCP Response (Session: {}, ID: {:?}): {}", sid, resp.id, resp_str);
let sessions = state.sessions.read().await;
if let Some(tx) = sessions.get(&sid) {
let _ = tx.send(resp_str);
}
axum::http::StatusCode::ACCEPTED.into_response()
} else {
// App UI (Direct Mode)
resp.into_response()
}
} else {
axum::http::StatusCode::NO_CONTENT.into_response()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_chunking_logic() {
// 800文字ずつの分割を確認する
let chunk_size = 800;
// 1. ちょうど 800 文字
let text_800 = "a".repeat(800);
let chunks_800: Vec<String> = text_800.chars()
.collect::<Vec<char>>()
.chunks(chunk_size)
.map(|c| c.iter().collect())
.collect();
assert_eq!(chunks_800.len(), 1);
assert_eq!(chunks_800[0].len(), 800);
// 2. 801 文字 (2 チャンク)
let text_801 = "a".repeat(801);
let chunks_801: Vec<String> = text_801.chars()
.collect::<Vec<char>>()
.chunks(chunk_size)
.map(|c| c.iter().collect())
.collect();
assert_eq!(chunks_801.len(), 2);
assert_eq!(chunks_801[0].len(), 800);
assert_eq!(chunks_801[1].len(), 1);
// 3. 1600 文字 (2 チャンク)
let text_1600 = "a".repeat(1600);
let chunks_1600: Vec<String> = text_1600.chars()
.collect::<Vec<char>>()
.chunks(chunk_size)
.map(|c| c.iter().collect())
.collect();
assert_eq!(chunks_1600.len(), 2);
// 4. 空文字列
let text_empty = "";
let chunks_empty: Vec<String> = text_empty.chars()
.collect::<Vec<char>>()
.chunks(chunk_size)
.map(|c| c.iter().collect())
.collect();
assert_eq!(chunks_empty.len(), 0);
}
}