diff options
Diffstat (limited to 'llama.cpp/tools/server/chat.mjs')
| -rw-r--r-- | llama.cpp/tools/server/chat.mjs | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/chat.mjs b/llama.cpp/tools/server/chat.mjs new file mode 100644 index 0000000..4fef565 --- /dev/null +++ b/llama.cpp/tools/server/chat.mjs @@ -0,0 +1,131 @@ +import * as readline from 'node:readline' +import { stdin, stdout } from 'node:process' +import { readFileSync } from 'node:fs' +import { SchemaConverter } from './public_legacy/json-schema-to-grammar.mjs' + +const args = process.argv.slice(2); +const grammarJsonSchemaFile = args.find( + (_, index) => args[index - 1] === "--grammar-json-schema" +); + +const no_cached_prompt = args.find( + (_, index) => args[index - 1] === "--no-cache-prompt" +) ?? "false"; + +const grammarFile = args.find((_, index) => args[index - 1] === "--grammar"); + +// Example usage: function,arguments +const grammarJsonSchemaPropOrder = args.find( + (_, index) => args[index - 1] === "--grammar-json-schema-prop-order" +); +const propOrder = grammarJsonSchemaPropOrder + ? grammarJsonSchemaPropOrder + .split(",") + .reduce((acc, cur, index) => ({ ...acc, [cur]: index }), {}) + : {}; + +let grammar = null +if (grammarJsonSchemaFile) { + let schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8')) + const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true}) + schema = await converter.resolveRefs(schema, grammarJsonSchemaFile) + converter.visit(schema, '') + grammar = converter.formatGrammar() +} +if (grammarFile) { + grammar = readFileSync(grammarFile, 'utf-8') +} + +// for cached prompt +let slot_id = -1; + +const API_URL = 'http://127.0.0.1:8080' + +const chat = [ + { + human: "Hello, Assistant.", + assistant: "Hello. How may I help you today?" + }, + { + human: "Please tell me the largest city in Europe.", + assistant: "Sure. The largest city in Europe is Moscow, the capital of Russia." + }, +] + +const instruction = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.` + +function format_prompt(question) { + return `${instruction}\n${ + chat.map(m =>`### Human: ${m.human}\n### Assistant: ${m.assistant}`).join("\n") + }\n### Human: ${question}\n### Assistant:` +} + +async function tokenize(content) { + const result = await fetch(`${API_URL}/tokenize`, { + method: 'POST', + body: JSON.stringify({ content }) + }) + + if (!result.ok) { + return [] + } + + return await result.json().tokens +} + +const n_keep = await tokenize(instruction).length + +async function chat_completion(question) { + const result = await fetch(`${API_URL}/completion`, { + method: 'POST', + body: JSON.stringify({ + prompt: format_prompt(question), + temperature: 0.2, + top_k: 40, + top_p: 0.9, + n_keep: n_keep, + n_predict: 256, + cache_prompt: no_cached_prompt === "false", + slot_id: slot_id, + stop: ["\n### Human:"], // stop completion after generating this + grammar, + stream: true, + }) + }) + + if (!result.ok) { + return + } + + let answer = '' + + for await (var chunk of result.body) { + const t = Buffer.from(chunk).toString('utf8') + if (t.startsWith('data: ')) { + const message = JSON.parse(t.substring(6)) + slot_id = message.slot_id + answer += message.content + process.stdout.write(message.content) + if (message.stop) { + if (message.truncated) { + chat.shift() + } + break + } + } + } + + process.stdout.write('\n') + chat.push({ human: question, assistant: answer.trimStart() }) +} + +const rl = readline.createInterface({ input: stdin, output: stdout }); + +const readlineQuestion = (rl, query, options) => new Promise((resolve, reject) => { + rl.question(query, options, resolve) +}); + +while(true) { + const question = await readlineQuestion(rl, '> ') + await chat_completion(question) +} |
