const DEFAULT_BASE_URL = "http://127.0.0.1:8080";
function getBaseUrl() {
return process.env.LLAMA_CPP_BASE_URL ?? DEFAULT_BASE_URL;
}
function getEmbeddingModel() {
return process.env.LLAMA_CPP_EMBEDDING_MODEL;
}
function getCompletionModel() {
return process.env.LLAMA_CPP_MODEL;
}
export async function llamaEmbedding(text) {
const baseUrl = getBaseUrl();
const model = getEmbeddingModel();
const res = await fetch(`${baseUrl}/embeddings`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model,
input: text,
}),
});
if (!res.ok) {
const detail = await res.text();
throw new Error(`llama.cpp embeddings error: ${res.status} ${detail}`);
}
const data = await res.json();
const embedding = data?.data?.[0]?.embedding ?? data?.embedding;
if (!Array.isArray(embedding)) {
throw new Error("llama.cpp embeddings response is missing embedding array");
}
return embedding;
}
export async function llamaCompletion(prompt, options = {}) {
const baseUrl = getBaseUrl();
const model = getCompletionModel();
const res = await fetch(`${baseUrl}/completion`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model,
prompt,
n_predict: options.n_predict ?? 128,
temperature: options.temperature ?? 0.2,
stream: false,
}),
});
if (!res.ok) {
const detail = await res.text();
throw new Error(`llama.cpp completion error: ${res.status} ${detail}`);
}
const data = await res.json();
const text = data?.content ?? data?.completion ?? data?.response;
if (typeof text !== "string") {
throw new Error("llama.cpp completion response is missing text");
}
return text;
}