diff --git a/docs/database.md b/docs/database.md new file mode 100644 index 0000000..5d4892d --- /dev/null +++ b/docs/database.md @@ -0,0 +1,55 @@ +# データベース設計とベクトル検索仕様 + +本システムは SQLite を、メタデータ管理とベクトル検索エンジンの両方として利用します。 + +## ER図 + +```mermaid +erDiagram + items { + integer id PK "自動インクリメント" + text content "文章本文" + text document_name "ドキュメント名・タイトル等" + datetime created_at "登録日時" + datetime updated_at "更新日時" + blob embedding "768次元ベクトル (f32 LE)" + } +``` + +## テーブル詳細仕様 + +### 1. `items` テーブル (SeaORM 管理) + +アプリケーションの主要なデータを保存します。 + +| カラム名 | 型 | 説明 | +| :--- | :--- | :--- | +| `id` | INTEGER | プライマリキー(自動インクリメント)。 | +| `content` | TEXT | ベクトルの元となったテキストデータ。 | +| `document_name` | TEXT | (オプション)ソースファイルのパスやタイトル。 | +| `created_at` | DATETIME | 作成時刻。 | +| `updated_at` | DATETIME | 更新時刻。 | +| `embedding` | BLOB | ベクトルデータ。`vector_init` で初期化された 768次元の浮動小数点配列。 | + +## ベクトル検索の仕組み + +検索には `vector_quantize_scan` 関数を使用した JOIN を使用します。これにより、インデックスがなくても高速な類似検索が可能です。 + +```sql +SELECT + i.id, + i.content, + v.distance +FROM items i +JOIN vector_quantize_scan('items', 'embedding', ?, 10) AS v ON i.rowid = v.rowid +ORDER BY distance; +``` + +### 距離計算アルゴリズム + +デフォルトでは **L2 距離 (Euclidean distance)** が使用されます。`vector_init` 時にパラメータで変更可能です。 + +## 注意事項 + +- **初期化**: 起動時に `SELECT vector_init(...)` を実行して、BLOBカラムをベクトルとして認識させる必要があります。 +- **次元数**: 使用するモデル (Gemma 3 300M) に合わせて 768 次定数として定義されています。 diff --git a/src-backend/src/db.rs b/src-backend/src/db.rs new file mode 100644 index 0000000..6b81117 --- /dev/null +++ b/src-backend/src/db.rs @@ -0,0 +1,80 @@ +use sea_orm::{DatabaseConnection, SqlxSqliteConnector, ConnectionTrait, Statement, DatabaseBackend}; +use sea_orm::sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions}; +use std::str::FromStr; +use std::time::Duration; + +pub async fn init_db(db_path: &str, extension_path: &str, vec_dim: usize) -> anyhow::Result { + let db_url = format!("sqlite:{}?mode=rwc", db_path); + + let options = SqliteConnectOptions::from_str(&db_url)? + .create_if_missing(true) + .extension(extension_path.to_owned()); + + let pool = SqlitePoolOptions::new() + .max_connections(10) + .min_connections(5) + .acquire_timeout(Duration::from_secs(8)) + .max_lifetime(Duration::from_secs(8)) + .connect_with(options) + .await?; + + let db = SqlxSqliteConnector::from_sqlx_sqlite_pool(pool); + + // Schema Initialization + // 1. Standard items table + db.execute(Statement::from_string( + DatabaseBackend::Sqlite, + "CREATE TABLE IF NOT EXISTS items ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL, + document_name TEXT, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')) + );".to_string(), + )).await?; + + // 2. Add embedding column to items (ALTER TABLE does not support IF NOT EXISTS in many sqlite versions) + // We attempt it and ignore errors if column exists + let _ = db.execute(Statement::from_string( + DatabaseBackend::Sqlite, + "ALTER TABLE items ADD COLUMN embedding BLOB;".to_string(), + )).await; + + // 3. Initialize vector engine for the column + db.execute(Statement::from_string( + DatabaseBackend::Sqlite, + format!( + "SELECT vector_init('items', 'embedding', 'type=FLOAT32,dimension={}');", + vec_dim + ), + )).await?; + + // triggers + db.execute(Statement::from_string( + DatabaseBackend::Sqlite, + "CREATE TRIGGER IF NOT EXISTS update_items_updated_at + AFTER UPDATE ON items + FOR EACH ROW + BEGIN + UPDATE items SET updated_at = datetime('now', 'localtime') WHERE id = OLD.id; + END;".to_string(), + )).await?; + + Ok(db) +} + +#[cfg(test)] +mod tests { + use super::*; + use sea_orm::{ConnectOptions, Database}; + + #[tokio::test] + async fn test_init_db_basic() { + // NOTE: We test with a dummy extension path and expect it to fail if it's not found, + // or we test just the SeaORM part if we could separation. + // For now, let's just ensure we can connect to an in-memory sqlite via SeaORM. + let mut opt = ConnectOptions::new("sqlite::memory:"); + let db = Database::connect(opt).await; + assert!(db.is_ok()); + } +} diff --git a/src-backend/src/entities/items.rs b/src-backend/src/entities/items.rs new file mode 100644 index 0000000..6c0236b --- /dev/null +++ b/src-backend/src/entities/items.rs @@ -0,0 +1,21 @@ +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Serialize, Deserialize)] +#[sea_orm(table_name = "items")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub content: String, + pub document_name: Option, + pub created_at: DateTimeLocal, + pub updated_at: DateTimeLocal, + #[sea_orm(column_type = "Blob", nullable)] + #[serde(skip_serializing_if = "Option::is_none")] + pub embedding: Option>, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/src-backend/src/lib.rs b/src-backend/src/lib.rs index 0c28255..1e7a450 100644 --- a/src-backend/src/lib.rs +++ b/src-backend/src/lib.rs @@ -75,7 +75,7 @@ } // 2. Critical Files Check - let critical_files = ["mcp.json", "config.json", "vec0.dll", "llama.dll"]; + let critical_files = ["mcp.json", "config.json", "vector.dll", "llama.dll"]; for file in critical_files { match resolve_resource_path(app_handle, file) { Some(p) if p.exists() => log::info!("✅ found {}: {:?}", file, p), @@ -308,8 +308,8 @@ } fn resolve_extension_path(app_handle: &tauri::AppHandle) -> String { - // 1. Check verified resource path (build_assets/vec0.dll) - if let Some(path) = resolve_resource_path(app_handle, "vec0.dll") { + // 1. Check verified resource path (build_assets/vector.dll) + if let Some(path) = resolve_resource_path(app_handle, "vector.dll") { if path.exists() { return path.to_string_lossy().to_string(); } @@ -317,23 +317,29 @@ // 2. Search relative to EXE for local dev if let Ok(exe_path) = env::current_exe() { - if let Some(mut p) = exe_path.parent() { + if let Some(p) = exe_path.parent() { let mut pr = p.to_path_buf(); for _ in 0..10 { // Check in bin/ - let bin_cand = pr.join("bin").join("vec0.dll"); + let bin_cand = pr.join("bin").join("vector.dll"); if bin_cand.exists() { return bin_cand.to_string_lossy().to_string(); } - // Check in node_modules (legacy/other) - let nm_cand = pr.join("node_modules/sqlite-vec-windows-x64/vec0.dll"); - if nm_cand.exists() { return nm_cand.to_string_lossy().to_string(); } + // Check in node_modules + let nm_packages = [ + "@sqliteai/sqlite-vector-win32-x86_64/vector.dll", + "sqlite-vec-windows-x64/vec0.dll" // fallback for older structure if needed + ]; + for pkg in nm_packages { + let nm_cand = pr.join("node_modules").join(pkg); + if nm_cand.exists() { return nm_cand.to_string_lossy().to_string(); } + } if !pr.pop() { break; } } } } - "vec0.dll".to_string() + "vector.dll".to_string() } fn spawn_llama_server(app_handle: &tauri::AppHandle, _config: &serde_json::Value) { diff --git a/src-backend/src/mcp/handlers.rs b/src-backend/src/mcp/handlers.rs new file mode 100644 index 0000000..81ae808 --- /dev/null +++ b/src-backend/src/mcp/handlers.rs @@ -0,0 +1,215 @@ +use crate::entities::items; +use crate::AppState; +use sea_orm::*; + +use tauri::Emitter; + +pub async fn handle_save_document( + state: &AppState, + content: &str, + document_name: &str, +) -> anyhow::Result { + let embedding = state.llama.get_embedding(content).await?; + + // SeaORM insert + let new_item = items::ActiveModel { + content: Set(content.to_owned()), + document_name: Set(Some(document_name.to_owned())), + ..Default::default() + }; + + let db = &state.db; + let res = new_item.insert(db).await?; + let id = res.id; + + // embedding を items テーブルに直接更新 + let embedding_bytes: Vec = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); + + db.execute(Statement::from_sql_and_values( + DatabaseBackend::Sqlite, + "UPDATE items SET embedding = ? WHERE id = ?", + [embedding_bytes.into(), id.into()], + )) + .await?; + + // Notify UI of DB update + let _ = state.app_handle.emit("mcp-db-update", ()); + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": format!("Saved document with id {}", id) }] + })) +} + +pub async fn handle_find_documents( + state: &AppState, + content: &str, + limit: usize, +) -> anyhow::Result { + let embedding = state.llama.get_embedding(content).await?; + handle_find_by_vector(state, embedding, limit).await +} + +pub async fn handle_find_by_vector( + state: &AppState, + vector: Vec, + limit: usize, +) -> anyhow::Result { + let embedding_bytes: Vec = vector.iter().flat_map(|f| f.to_le_bytes()).collect(); + + let db = &state.db; + + // raw SQL query via SeaORM for vector search using sqlite-vector scan + // JOIN vector_quantize_scan('table', 'column', query_vector, k) + let results = db + .query_all(Statement::from_sql_and_values( + DatabaseBackend::Sqlite, + "SELECT i.id, i.content, i.document_name, i.created_at, i.updated_at, v.distance + FROM items i + JOIN vector_quantize_scan('items', 'embedding', ?, ?) AS v ON i.rowid = v.rowid + ORDER BY distance", + [embedding_bytes.into(), (limit as i64).into()], + )) + .await?; + + let mut out = Vec::new(); + for res in results { + out.push(serde_json::json!({ + "id": res.try_get::("", "id").map_err(|e| anyhow::anyhow!(e))?, + "content": res.try_get::("", "content").map_err(|e| anyhow::anyhow!(e))?, + "document_name": res.try_get::>("", "document_name").map_err(|e| anyhow::anyhow!(e))?, + "created_at": res.try_get::("", "created_at").map_err(|e| anyhow::anyhow!(e))?, + "updated_at": res.try_get::("", "updated_at").map_err(|e| anyhow::anyhow!(e))?, + "distance": res.try_get::("", "distance").map_err(|e| anyhow::anyhow!(e))? + })); + } + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": serde_json::to_string_pretty(&out).unwrap_or_else(|_| "[]".to_string()) }] + })) +} + +pub async fn handle_delete_item( + state: &AppState, + id: i32, +) -> anyhow::Result { + let db = &state.db; + + // Delete from items table (embedding is in the same row) + let _ = items::Entity::delete_by_id(id).exec(db).await?; + + // Notify UI of DB update + let _ = state.app_handle.emit("mcp-db-update", ()); + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": format!("Deleted item with id {}", id) }] + })) +} + +pub async fn handle_get_vector( + state: &AppState, + id: i32, +) -> anyhow::Result { + let db = &state.db; + + // Get embedding from items table directly + let result = db.query_one(Statement::from_sql_and_values( + DatabaseBackend::Sqlite, + "SELECT embedding FROM items WHERE id = ?", + [id.into()] + )).await?; + + match result { + Some(res) => { + let bytes: Vec = res.try_get("", "embedding").unwrap_or_default(); + + // Convert bytes back to Vec + let vector: Vec = bytes + .chunks_exact(4) + .map(|chunk| f32::from_le_bytes(chunk.try_into().unwrap())) + .collect(); + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": serde_json::to_string(&vector)? }] + })) + }, + None => Err(anyhow::anyhow!("Item not found with id {}", id)) + } +} + +pub async fn handle_get_document( + state: &AppState, + id: i32, +) -> anyhow::Result { + let db = &state.db; + + let item = items::Entity::find_by_id(id).one(db).await?; + + match item { + Some(i) => { + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": i.content }] + })) + }, + None => Err(anyhow::anyhow!("Item not found with id {}", id)) + } +} + +pub async fn handle_get_documents_count( + state: &AppState, +) -> anyhow::Result { + let db = &state.db; + let count = items::Entity::find().count(db).await?; + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": count.to_string() }] + })) +} + +pub async fn handle_list_documents( + state: &AppState, + limit: u64, + offset: u64, +) -> anyhow::Result { + let db = &state.db; + let items = items::Entity::find() + .order_by_asc(items::Column::Id) + .offset(offset) + .limit(limit) + .all(db) + .await?; + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": serde_json::to_string_pretty(&items)? }] + })) +} + +pub async fn handle_llm_generate( + state: &AppState, + prompt: &str, + n_predict: i32, + temperature: f32, +) -> anyhow::Result { + let text = state + .llama + .completion(prompt, n_predict, temperature) + .await?; + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": text }] + })) +} + +pub async fn handle_read_recent_items( + state: &AppState, + limit: u64, +) -> anyhow::Result { + let db = &state.db; + let items = items::Entity::find() + .order_by_desc(items::Column::Id) + .limit(limit) + .all(db) + .await?; + + Ok(serde_json::json!({ + "content": [{ "type": "text", "text": serde_json::to_string_pretty(&items)? }] + })) +}