llmnpc - llama.cpp/tools/server/chat.mjs

Path: llmnpc / llama.cpp / tools / server / chat.mjs (raw)
  1import * as readline from 'node:readline'
  2import { stdin, stdout } from 'node:process'
  3import { readFileSync } from 'node:fs'
  4import { SchemaConverter }  from './public_legacy/json-schema-to-grammar.mjs'
  5
  6const args = process.argv.slice(2);
  7const grammarJsonSchemaFile = args.find(
  8    (_, index) => args[index - 1] === "--grammar-json-schema"
  9);
 10
 11const no_cached_prompt = args.find(
 12    (_, index) => args[index - 1] === "--no-cache-prompt"
 13) ?? "false";
 14
 15const grammarFile = args.find((_, index) => args[index - 1] === "--grammar");
 16
 17// Example usage: function,arguments
 18const grammarJsonSchemaPropOrder = args.find(
 19    (_, index) => args[index - 1] === "--grammar-json-schema-prop-order"
 20);
 21const propOrder = grammarJsonSchemaPropOrder
 22    ? grammarJsonSchemaPropOrder
 23          .split(",")
 24          .reduce((acc, cur, index) => ({ ...acc, [cur]: index }), {})
 25    : {};
 26
 27let grammar = null
 28if (grammarJsonSchemaFile) {
 29    let schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
 30    const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true})
 31    schema = await converter.resolveRefs(schema, grammarJsonSchemaFile)
 32    converter.visit(schema, '')
 33    grammar = converter.formatGrammar()
 34}
 35if (grammarFile) {
 36    grammar = readFileSync(grammarFile, 'utf-8')
 37}
 38
 39// for cached prompt
 40let slot_id = -1;
 41
 42const API_URL = 'http://127.0.0.1:8080'
 43
 44const chat = [
 45    {
 46        human: "Hello, Assistant.",
 47        assistant: "Hello. How may I help you today?"
 48    },
 49    {
 50        human: "Please tell me the largest city in Europe.",
 51        assistant: "Sure. The largest city in Europe is Moscow, the capital of Russia."
 52    },
 53]
 54
 55const instruction = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.`
 56
 57function format_prompt(question) {
 58    return `${instruction}\n${
 59        chat.map(m =>`### Human: ${m.human}\n### Assistant: ${m.assistant}`).join("\n")
 60    }\n### Human: ${question}\n### Assistant:`
 61}
 62
 63async function tokenize(content) {
 64    const result = await fetch(`${API_URL}/tokenize`, {
 65        method: 'POST',
 66        body: JSON.stringify({ content })
 67    })
 68
 69    if (!result.ok) {
 70        return []
 71    }
 72
 73    return await result.json().tokens
 74}
 75
 76const n_keep = await tokenize(instruction).length
 77
 78async function chat_completion(question) {
 79    const result = await fetch(`${API_URL}/completion`, {
 80        method: 'POST',
 81        body: JSON.stringify({
 82            prompt: format_prompt(question),
 83            temperature: 0.2,
 84            top_k: 40,
 85            top_p: 0.9,
 86            n_keep: n_keep,
 87            n_predict: 256,
 88            cache_prompt: no_cached_prompt === "false",
 89            slot_id: slot_id,
 90            stop: ["\n### Human:"], // stop completion after generating this
 91            grammar,
 92            stream: true,
 93        })
 94    })
 95
 96    if (!result.ok) {
 97        return
 98    }
 99
100    let answer = ''
101
102    for await (var chunk of result.body) {
103        const t = Buffer.from(chunk).toString('utf8')
104        if (t.startsWith('data: ')) {
105            const message = JSON.parse(t.substring(6))
106            slot_id = message.slot_id
107            answer += message.content
108            process.stdout.write(message.content)
109            if (message.stop) {
110                if (message.truncated) {
111                    chat.shift()
112                }
113                break
114            }
115        }
116    }
117
118    process.stdout.write('\n')
119    chat.push({ human: question, assistant: answer.trimStart() })
120}
121
122const rl = readline.createInterface({ input: stdin, output: stdout });
123
124const readlineQuestion = (rl, query, options) => new Promise((resolve, reject) => {
125    rl.question(query, options, resolve)
126});
127
128while(true) {
129    const question = await readlineQuestion(rl, '> ')
130    await chat_completion(question)
131}