1const paramDefaults = {
2 stream: true,
3 n_predict: 500,
4 temperature: 0.2,
5 stop: ["</s>"]
6};
7
8let generation_settings = null;
9
10
11// Completes the prompt as a generator. Recommended for most use cases.
12//
13// Example:
14//
15// import { llama } from '/completion.js'
16//
17// const request = llama("Tell me a joke", {n_predict: 800})
18// for await (const chunk of request) {
19// document.write(chunk.data.content)
20// }
21//
22export async function* llama(prompt, params = {}, config = {}) {
23 let controller = config.controller;
24 const api_url = config.api_url?.replace(/\/+$/, '') || "";
25
26 if (!controller) {
27 controller = new AbortController();
28 }
29
30 const completionParams = { ...paramDefaults, ...params, prompt };
31
32 const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
33 method: 'POST',
34 body: JSON.stringify(completionParams),
35 headers: {
36 'Connection': 'keep-alive',
37 'Content-Type': 'application/json',
38 'Accept': 'text/event-stream',
39 ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
40 },
41 signal: controller.signal,
42 });
43
44 const reader = response.body.getReader();
45 const decoder = new TextDecoder();
46
47 let content = "";
48 let leftover = ""; // Buffer for partially read lines
49
50 try {
51 let cont = true;
52
53 while (cont) {
54 const result = await reader.read();
55 if (result.done) {
56 break;
57 }
58
59 // Add any leftover data to the current chunk of data
60 const text = leftover + decoder.decode(result.value);
61
62 // Check if the last character is a line break
63 const endsWithLineBreak = text.endsWith('\n');
64
65 // Split the text into lines
66 let lines = text.split('\n');
67
68 // If the text doesn't end with a line break, then the last line is incomplete
69 // Store it in leftover to be added to the next chunk of data
70 if (!endsWithLineBreak) {
71 leftover = lines.pop();
72 } else {
73 leftover = ""; // Reset leftover if we have a line break at the end
74 }
75
76 // Parse all sse events and add them to result
77 const regex = /^(\S+):\s(.*)$/gm;
78 for (const line of lines) {
79 const match = regex.exec(line);
80 if (match) {
81 result[match[1]] = match[2];
82 if (result.data === '[DONE]') {
83 cont = false;
84 break;
85 }
86
87 // since we know this is llama.cpp, let's just decode the json in data
88 if (result.data) {
89 result.data = JSON.parse(result.data);
90 content += result.data.content;
91
92 // yield
93 yield result;
94
95 // if we got a stop token from server, we will break here
96 if (result.data.stop) {
97 if (result.data.generation_settings) {
98 generation_settings = result.data.generation_settings;
99 }
100 cont = false;
101 break;
102 }
103 }
104 if (result.error) {
105 try {
106 result.error = JSON.parse(result.error);
107 if (result.error.message.includes('slot unavailable')) {
108 // Throw an error to be caught by upstream callers
109 throw new Error('slot unavailable');
110 } else {
111 console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
112 }
113 } catch(e) {
114 console.error(`llama.cpp error ${result.error}`)
115 }
116 }
117 }
118 }
119 }
120 } catch (e) {
121 if (e.name !== 'AbortError') {
122 console.error("llama error: ", e);
123 }
124 throw e;
125 }
126 finally {
127 controller.abort();
128 }
129
130 return content;
131}
132
133// Call llama, return an event target that you can subscribe to
134//
135// Example:
136//
137// import { llamaEventTarget } from '/completion.js'
138//
139// const conn = llamaEventTarget(prompt)
140// conn.addEventListener("message", (chunk) => {
141// document.write(chunk.detail.content)
142// })
143//
144export const llamaEventTarget = (prompt, params = {}, config = {}) => {
145 const eventTarget = new EventTarget();
146 (async () => {
147 let content = "";
148 for await (const chunk of llama(prompt, params, config)) {
149 if (chunk.data) {
150 content += chunk.data.content;
151 eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
152 }
153 if (chunk.data.generation_settings) {
154 eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
155 }
156 if (chunk.data.timings) {
157 eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
158 }
159 }
160 eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
161 })();
162 return eventTarget;
163}
164
165// Call llama, return a promise that resolves to the completed text. This does not support streaming
166//
167// Example:
168//
169// llamaPromise(prompt).then((content) => {
170// document.write(content)
171// })
172//
173// or
174//
175// const content = await llamaPromise(prompt)
176// document.write(content)
177//
178export const llamaPromise = (prompt, params = {}, config = {}) => {
179 return new Promise(async (resolve, reject) => {
180 let content = "";
181 try {
182 for await (const chunk of llama(prompt, params, config)) {
183 content += chunk.data.content;
184 }
185 resolve(content);
186 } catch (error) {
187 reject(error);
188 }
189 });
190};
191
192/**
193 * (deprecated)
194 */
195export const llamaComplete = async (params, controller, callback) => {
196 for await (const chunk of llama(params.prompt, params, { controller })) {
197 callback(chunk);
198 }
199}
200
201// Get the model info from the server. This is useful for getting the context window and so on.
202export const llamaModelInfo = async (config = {}) => {
203 if (!generation_settings) {
204 const api_url = config.api_url?.replace(/\/+$/, '') || "";
205 const props = await fetch(`${api_url}/props`).then(r => r.json());
206 generation_settings = props.default_generation_settings;
207 }
208 return generation_settings;
209}