Newer
Older
TelosDB / src / llama-client.js
@楽曲作りまくりおじさん 楽曲作りまくりおじさん 8 days ago 1 KB Add MCP server with llama.cpp integration and docs
const DEFAULT_BASE_URL = "http://127.0.0.1:8080";

function getBaseUrl() {
  return process.env.LLAMA_CPP_BASE_URL ?? DEFAULT_BASE_URL;
}

function getEmbeddingModel() {
  return process.env.LLAMA_CPP_EMBEDDING_MODEL;
}

function getCompletionModel() {
  return process.env.LLAMA_CPP_MODEL;
}

export async function llamaEmbedding(text) {
  const baseUrl = getBaseUrl();
  const model = getEmbeddingModel();

  const res = await fetch(`${baseUrl}/embeddings`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model,
      input: text,
    }),
  });

  if (!res.ok) {
    const detail = await res.text();
    throw new Error(`llama.cpp embeddings error: ${res.status} ${detail}`);
  }

  const data = await res.json();
  const embedding = data?.data?.[0]?.embedding ?? data?.embedding;

  if (!Array.isArray(embedding)) {
    throw new Error("llama.cpp embeddings response is missing embedding array");
  }

  return embedding;
}

export async function llamaCompletion(prompt, options = {}) {
  const baseUrl = getBaseUrl();
  const model = getCompletionModel();

  const res = await fetch(`${baseUrl}/completion`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model,
      prompt,
      n_predict: options.n_predict ?? 128,
      temperature: options.temperature ?? 0.2,
      stream: false,
    }),
  });

  if (!res.ok) {
    const detail = await res.text();
    throw new Error(`llama.cpp completion error: ${res.status} ${detail}`);
  }

  const data = await res.json();
  const text = data?.content ?? data?.completion ?? data?.response;

  if (typeof text !== "string") {
    throw new Error("llama.cpp completion response is missing text");
  }

  return text;
}