summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/public_simplechat
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/tools/server/public_simplechat')
-rw-r--r--llama.cpp/tools/server/public_simplechat/datautils.mjs266
-rw-r--r--llama.cpp/tools/server/public_simplechat/index.html51
-rw-r--r--llama.cpp/tools/server/public_simplechat/readme.md286
-rw-r--r--llama.cpp/tools/server/public_simplechat/simplechat.css79
-rw-r--r--llama.cpp/tools/server/public_simplechat/simplechat.js929
-rw-r--r--llama.cpp/tools/server/public_simplechat/simplechat_screens.webpbin0 -> 21376 bytes
-rw-r--r--llama.cpp/tools/server/public_simplechat/ui.mjs211
7 files changed, 1822 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/public_simplechat/datautils.mjs b/llama.cpp/tools/server/public_simplechat/datautils.mjs
new file mode 100644
index 0000000..75159d6
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/datautils.mjs
@@ -0,0 +1,266 @@
+//@ts-check
+// Helpers to work with different data types
+// by Humans for All
+//
+
+/**
+ * Given the limited context size of local LLMs and , many a times when context gets filled
+ * between the prompt and the response, it can lead to repeating text garbage generation.
+ * And many a times setting penalty wrt repeatation leads to over-intelligent garbage
+ * repeatation with slight variations. These garbage inturn can lead to overloading of the
+ * available model context, leading to less valuable response for subsequent prompts/queries,
+ * if chat history is sent to ai model.
+ *
+ * So two simple minded garbage trimming logics are experimented below.
+ * * one based on progressively-larger-substring-based-repeat-matching-with-partial-skip and
+ * * another based on char-histogram-driven garbage trimming.
+ * * in future characteristic of histogram over varying lengths could be used to allow for
+ * a more aggressive and adaptive trimming logic.
+ */
+
+
+/**
+ * Simple minded logic to help remove repeating garbage at end of the string.
+ * The repeatation needs to be perfectly matching.
+ *
+ * The logic progressively goes on probing for longer and longer substring based
+ * repeatation, till there is no longer repeatation. Inturn picks the one with
+ * the longest chain.
+ *
+ * @param {string} sIn
+ * @param {number} maxSubL
+ * @param {number} maxMatchLenThreshold
+ */
+export function trim_repeat_garbage_at_end(sIn, maxSubL=10, maxMatchLenThreshold=40) {
+ let rCnt = [0];
+ let maxMatchLen = maxSubL;
+ let iMML = -1;
+ for(let subL=1; subL < maxSubL; subL++) {
+ rCnt.push(0);
+ let i;
+ let refS = sIn.substring(sIn.length-subL, sIn.length);
+ for(i=sIn.length; i > 0; i -= subL) {
+ let curS = sIn.substring(i-subL, i);
+ if (refS != curS) {
+ let curMatchLen = rCnt[subL]*subL;
+ if (maxMatchLen < curMatchLen) {
+ maxMatchLen = curMatchLen;
+ iMML = subL;
+ }
+ break;
+ }
+ rCnt[subL] += 1;
+ }
+ }
+ console.debug("DBUG:DU:TrimRepeatGarbage:", rCnt);
+ if ((iMML == -1) || (maxMatchLen < maxMatchLenThreshold)) {
+ return {trimmed: false, data: sIn};
+ }
+ console.debug("DBUG:TrimRepeatGarbage:TrimmedCharLen:", maxMatchLen);
+ let iEnd = sIn.length - maxMatchLen;
+ return { trimmed: true, data: sIn.substring(0, iEnd) };
+}
+
+
+/**
+ * Simple minded logic to help remove repeating garbage at end of the string, till it cant.
+ * If its not able to trim, then it will try to skip a char at end and then trim, a few times.
+ * This ensures that even if there are multiple runs of garbage with different patterns, the
+ * logic still tries to munch through them.
+ *
+ * @param {string} sIn
+ * @param {number} maxSubL
+ * @param {number | undefined} [maxMatchLenThreshold]
+ */
+export function trim_repeat_garbage_at_end_loop(sIn, maxSubL, maxMatchLenThreshold, skipMax=16) {
+ let sCur = sIn;
+ let sSaved = "";
+ let iTry = 0;
+ while(true) {
+ let got = trim_repeat_garbage_at_end(sCur, maxSubL, maxMatchLenThreshold);
+ if (got.trimmed != true) {
+ if (iTry == 0) {
+ sSaved = got.data;
+ }
+ iTry += 1;
+ if (iTry >= skipMax) {
+ return sSaved;
+ }
+ got.data = got.data.substring(0,got.data.length-1);
+ } else {
+ iTry = 0;
+ }
+ sCur = got.data;
+ }
+}
+
+
+/**
+ * A simple minded try trim garbage at end using histogram driven characteristics.
+ * There can be variation in the repeatations, as long as no new char props up.
+ *
+ * This tracks the chars and their frequency in a specified length of substring at the end
+ * and inturn checks if moving further into the generated text from the end remains within
+ * the same char subset or goes beyond it and based on that either trims the string at the
+ * end or not. This allows to filter garbage at the end, including even if there are certain
+ * kind of small variations in the repeated text wrt position of seen chars.
+ *
+ * Allow the garbage to contain upto maxUniq chars, but at the same time ensure that
+ * a given type of char ie numerals or alphabets or other types dont cross the specified
+ * maxType limit. This allows intermixed text garbage to be identified and trimmed.
+ *
+ * ALERT: This is not perfect and only provides a rough garbage identification logic.
+ * Also it currently only differentiates between character classes wrt english.
+ *
+ * @param {string} sIn
+ * @param {number} maxType
+ * @param {number} maxUniq
+ * @param {number} maxMatchLenThreshold
+ */
+export function trim_hist_garbage_at_end(sIn, maxType, maxUniq, maxMatchLenThreshold) {
+ if (sIn.length < maxMatchLenThreshold) {
+ return { trimmed: false, data: sIn };
+ }
+ let iAlp = 0;
+ let iNum = 0;
+ let iOth = 0;
+ // Learn
+ let hist = {};
+ let iUniq = 0;
+ for(let i=0; i<maxMatchLenThreshold; i++) {
+ let c = sIn[sIn.length-1-i];
+ if (c in hist) {
+ hist[c] += 1;
+ } else {
+ if(c.match(/[0-9]/) != null) {
+ iNum += 1;
+ } else if(c.match(/[A-Za-z]/) != null) {
+ iAlp += 1;
+ } else {
+ iOth += 1;
+ }
+ iUniq += 1;
+ if (iUniq >= maxUniq) {
+ break;
+ }
+ hist[c] = 1;
+ }
+ }
+ console.debug("DBUG:TrimHistGarbage:", hist);
+ if ((iAlp > maxType) || (iNum > maxType) || (iOth > maxType)) {
+ return { trimmed: false, data: sIn };
+ }
+ // Catch and Trim
+ for(let i=0; i < sIn.length; i++) {
+ let c = sIn[sIn.length-1-i];
+ if (!(c in hist)) {
+ if (i < maxMatchLenThreshold) {
+ return { trimmed: false, data: sIn };
+ }
+ console.debug("DBUG:TrimHistGarbage:TrimmedCharLen:", i);
+ return { trimmed: true, data: sIn.substring(0, sIn.length-i+1) };
+ }
+ }
+ console.debug("DBUG:TrimHistGarbage:Trimmed fully");
+ return { trimmed: true, data: "" };
+}
+
+/**
+ * Keep trimming repeatedly using hist_garbage logic, till you no longer can.
+ * This ensures that even if there are multiple runs of garbage with different patterns,
+ * the logic still tries to munch through them.
+ *
+ * @param {any} sIn
+ * @param {number} maxType
+ * @param {number} maxUniq
+ * @param {number} maxMatchLenThreshold
+ */
+export function trim_hist_garbage_at_end_loop(sIn, maxType, maxUniq, maxMatchLenThreshold) {
+ let sCur = sIn;
+ while (true) {
+ let got = trim_hist_garbage_at_end(sCur, maxType, maxUniq, maxMatchLenThreshold);
+ if (!got.trimmed) {
+ return got.data;
+ }
+ sCur = got.data;
+ }
+}
+
+/**
+ * Try trim garbage at the end by using both the hist-driven-garbage-trimming as well as
+ * skip-a-bit-if-reqd-then-repeat-pattern-based-garbage-trimming, with blind retrying.
+ * @param {string} sIn
+ */
+export function trim_garbage_at_end(sIn) {
+ let sCur = sIn;
+ for(let i=0; i<2; i++) {
+ sCur = trim_hist_garbage_at_end_loop(sCur, 8, 24, 72);
+ sCur = trim_repeat_garbage_at_end_loop(sCur, 32, 72, 12);
+ }
+ return sCur;
+}
+
+
+/**
+ * NewLines array helper.
+ * Allow for maintaining a list of lines.
+ * Allow for a line to be builtup/appended part by part.
+ */
+export class NewLines {
+
+ constructor() {
+ /** @type {string[]} */
+ this.lines = [];
+ }
+
+ /**
+ * Extracts lines from the passed string and inturn either
+ * append to a previous partial line or add a new line.
+ * @param {string} sLines
+ */
+ add_append(sLines) {
+ let aLines = sLines.split("\n");
+ let lCnt = 0;
+ for(let line of aLines) {
+ lCnt += 1;
+ // Add back newline removed if any during split
+ if (lCnt < aLines.length) {
+ line += "\n";
+ } else {
+ if (sLines.endsWith("\n")) {
+ line += "\n";
+ }
+ }
+ // Append if required
+ if (lCnt == 1) {
+ let lastLine = this.lines[this.lines.length-1];
+ if (lastLine != undefined) {
+ if (!lastLine.endsWith("\n")) {
+ this.lines[this.lines.length-1] += line;
+ continue;
+ }
+ }
+ }
+ // Add new line
+ this.lines.push(line);
+ }
+ }
+
+ /**
+ * Shift the oldest/earliest/0th line in the array. [Old-New|Earliest-Latest]
+ * Optionally control whether only full lines (ie those with newline at end) will be returned
+ * or will a partial line without a newline at end (can only be the last line) be returned.
+ * @param {boolean} bFullWithNewLineOnly
+ */
+ shift(bFullWithNewLineOnly=true) {
+ let line = this.lines[0];
+ if (line == undefined) {
+ return undefined;
+ }
+ if ((line[line.length-1] != "\n") && bFullWithNewLineOnly){
+ return undefined;
+ }
+ return this.lines.shift();
+ }
+
+}
diff --git a/llama.cpp/tools/server/public_simplechat/index.html b/llama.cpp/tools/server/public_simplechat/index.html
new file mode 100644
index 0000000..f641301
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/index.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <title>SimpleChat LlamaCppEtal </title>
+ <meta charset="UTF-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
+ <meta name="message" content="Save Nature Save Earth" />
+ <meta name="description" content="SimpleChat: trigger LLM web service endpoints /chat/completions and /completions, single/multi chat sessions" />
+ <meta name="author" content="by Humans for All" />
+ <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
+ <script type="importmap">
+ {
+ "imports": {
+ "datautils": "./datautils.mjs",
+ "ui": "./ui.mjs"
+ }
+ }
+ </script>
+ <script src="simplechat.js" type="module" defer></script>
+ <link rel="stylesheet" href="simplechat.css" />
+ </head>
+ <body>
+ <div class="samecolumn" id="fullbody">
+
+ <div class="sameline" id="heading">
+ <p class="heading flex-grow" > <b> SimpleChat </b> </p>
+ <button id="settings">Settings</button>
+ </div>
+
+ <div id="sessions-div" class="sameline"></div>
+
+ <hr>
+ <div class="sameline">
+ <label for="system-in">System</label>
+ <textarea name="system" id="system-in" rows="2" placeholder="e.g. you are a helpful ai assistant, who provides concise answers" class="flex-grow"></textarea>
+ </div>
+
+ <hr>
+ <div id="chat-div">
+ <p> You need to have javascript enabled.</p>
+ </div>
+
+ <hr>
+ <div class="sameline">
+ <textarea id="user-in" class="flex-grow" rows="2" placeholder="enter your query to the ai model here" ></textarea>
+ <button id="user-btn">submit</button>
+ </div>
+
+ </div>
+ </body>
+</html>
diff --git a/llama.cpp/tools/server/public_simplechat/readme.md b/llama.cpp/tools/server/public_simplechat/readme.md
new file mode 100644
index 0000000..24e026d
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/readme.md
@@ -0,0 +1,286 @@
+
+# SimpleChat
+
+by Humans for All.
+
+## quickstart
+
+To run from the build dir
+
+bin/llama-server -m path/model.gguf --path ../tools/server/public_simplechat
+
+Continue reading for the details.
+
+## overview
+
+This simple web frontend, allows triggering/testing the server's /completions or /chat/completions endpoints
+in a simple way with minimal code from a common code base. Inturn additionally it tries to allow single or
+multiple independent back and forth chatting to an extent, with the ai llm model at a basic level, with their
+own system prompts.
+
+This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
+or potentially as it is being generated, in a streamed manner from the server/ai-model.
+
+![Chat and Settings screens](./simplechat_screens.webp "Chat and Settings screens")
+
+Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
+open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
+
+The UI follows a responsive web design so that the layout can adapt to available display space in a usable
+enough manner, in general.
+
+Allows developer/end-user to control some of the behaviour by updating gMe members from browser's devel-tool
+console. Parallely some of the directly useful to end-user settings can also be changed using the provided
+settings ui.
+
+NOTE: Current web service api doesnt expose the model context length directly, so client logic doesnt provide
+any adaptive culling of old messages nor of replacing them with summary of their content etal. However there
+is a optional sliding window based chat logic, which provides a simple minded culling of old messages from
+the chat history before sending to the ai model.
+
+NOTE: Wrt options sent with the request, it mainly sets temperature, max_tokens and optionaly stream for now.
+However if someone wants they can update the js file or equivalent member in gMe as needed.
+
+NOTE: One may be able to use this to chat with openai api web-service /chat/completions endpoint, in a very
+limited / minimal way. One will need to set model, openai url and authorization bearer key in settings ui.
+
+
+## usage
+
+One could run this web frontend directly using server itself or if anyone is thinking of adding a built in web
+frontend to configure the server over http(s) or so, then run this web frontend using something like python's
+http module.
+
+### running using tools/server
+
+./llama-server -m path/model.gguf --path tools/server/public_simplechat [--port PORT]
+
+### running using python3's server module
+
+first run tools/server
+* ./llama-server -m path/model.gguf
+
+next run this web front end in tools/server/public_simplechat
+* cd ../tools/server/public_simplechat
+* python3 -m http.server PORT
+
+### using the front end
+
+Open this simple web front end from your local browser
+
+* http://127.0.0.1:PORT/index.html
+
+Once inside
+
+* If you want to, you can change many of the default global settings
+ * the base url (ie ip addr / domain name, port)
+ * chat (default) vs completion mode
+ * try trim garbage in response or not
+ * amount of chat history in the context sent to server/ai-model
+ * oneshot or streamed mode.
+
+* In completion mode
+ * one normally doesnt use a system prompt in completion mode.
+ * logic by default doesnt insert any role specific "ROLE: " prefix wrt each role's message.
+ If the model requires any prefix wrt user role messages, then the end user has to
+ explicitly add the needed prefix, when they enter their chat message.
+ Similarly if the model requires any prefix to trigger assistant/ai-model response,
+ then the end user needs to enter the same.
+ This keeps the logic simple, while still giving flexibility to the end user to
+ manage any templating/tagging requirement wrt their messages to the model.
+ * the logic doesnt insert newline at the begining and end wrt the prompt message generated.
+ However if the chat being sent to /completions end point has more than one role's message,
+ then insert newline when moving from one role's message to the next role's message, so
+ that it can be clearly identified/distinguished.
+ * given that /completions endpoint normally doesnt add additional chat-templating of its
+ own, the above ensures that end user can create a custom single/multi message combo with
+ any tags/special-tokens related chat templating to test out model handshake. Or enduser
+ can use it just for normal completion related/based query.
+
+* If you want to provide a system prompt, then ideally enter it first, before entering any user query.
+ Normally Completion mode doesnt need system prompt, while Chat mode can generate better/interesting
+ responses with a suitable system prompt.
+ * if chat.add_system_begin is used
+ * you cant change the system prompt, after it is has been submitted once along with user query.
+ * you cant set a system prompt, after you have submitted any user query
+ * if chat.add_system_anytime is used
+ * one can change the system prompt any time during chat, by changing the contents of system prompt.
+ * inturn the updated/changed system prompt will be inserted into the chat session.
+ * this allows for the subsequent user chatting to be driven by the new system prompt set above.
+
+* Enter your query and either press enter or click on the submit button.
+ If you want to insert enter (\n) as part of your chat/query to ai model, use shift+enter.
+
+* Wait for the logic to communicate with the server and get the response.
+ * the user is not allowed to enter any fresh query during this time.
+ * the user input box will be disabled and a working message will be shown in it.
+ * if trim garbage is enabled, the logic will try to trim repeating text kind of garbage to some extent.
+
+* just refresh the page, to reset wrt the chat history and or system prompt and start afresh.
+
+* Using NewChat one can start independent chat sessions.
+ * two independent chat sessions are setup by default.
+
+* When you want to print, switching ChatHistoryInCtxt to Full and clicking on the chat session button of
+ interest, will display the full chat history till then wrt same, if you want full history for printing.
+
+
+## Devel note
+
+### Reason behind this
+
+The idea is to be easy enough to use for basic purposes, while also being simple and easily discernable
+by developers who may not be from web frontend background (so inturn may not be familiar with template /
+end-use-specific-language-extensions driven flows) so that they can use it to explore/experiment things.
+
+And given that the idea is also to help explore/experiment for developers, some flexibility is provided
+to change behaviour easily using the devel-tools/console or provided minimal settings ui (wrt few aspects).
+Skeletal logic has been implemented to explore some of the end points and ideas/implications around them.
+
+
+### General
+
+Me/gMe consolidates the settings which control the behaviour into one object.
+One can see the current settings, as well as change/update them using browsers devel-tool/console.
+It is attached to the document object. Some of these can also be updated using the Settings UI.
+
+ baseURL - the domain-name/ip-address and inturn the port to send the request.
+
+ bStream - control between oneshot-at-end and live-stream-as-its-generated collating and showing
+ of the generated response.
+
+ the logic assumes that the text sent from the server follows utf-8 encoding.
+
+ in streaming mode - if there is any exception, the logic traps the same and tries to ensure
+ that text generated till then is not lost.
+
+ if a very long text is being generated, which leads to no user interaction for sometime and
+ inturn the machine goes into power saving mode or so, the platform may stop network connection,
+ leading to exception.
+
+ apiEP - select between /completions and /chat/completions endpoint provided by the server/ai-model.
+
+ bCompletionFreshChatAlways - whether Completion mode collates complete/sliding-window history when
+ communicating with the server or only sends the latest user query/message.
+
+ bCompletionInsertStandardRolePrefix - whether Completion mode inserts role related prefix wrt the
+ messages that get inserted into prompt field wrt /Completion endpoint.
+
+ bTrimGarbage - whether garbage repeatation at the end of the generated ai response, should be
+ trimmed or left as is. If enabled, it will be trimmed so that it wont be sent back as part of
+ subsequent chat history. At the same time the actual trimmed text is shown to the user, once
+ when it was generated, so user can check if any useful info/data was there in the response.
+
+ One may be able to request the ai-model to continue (wrt the last response) (if chat-history
+ is enabled as part of the chat-history-in-context setting), and chances are the ai-model will
+ continue starting from the trimmed part, thus allows long response to be recovered/continued
+ indirectly, in many cases.
+
+ The histogram/freq based trimming logic is currently tuned for english language wrt its
+ is-it-a-alpabetic|numeral-char regex match logic.
+
+ apiRequestOptions - maintains the list of options/fields to send along with api request,
+ irrespective of whether /chat/completions or /completions endpoint.
+
+ If you want to add additional options/fields to send to the server/ai-model, and or
+ modify the existing options value or remove them, for now you can update this global var
+ using browser's development-tools/console.
+
+ For string, numeric and boolean fields in apiRequestOptions, including even those added by a
+ user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
+ created.
+
+ cache_prompt option supported by example/server is allowed to be controlled by user, so that
+ any caching supported wrt system-prompt and chat history, if usable can get used. When chat
+ history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
+ wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
+ However system prompt should ideally get the benefit of caching.
+
+ headers - maintains the list of http headers sent when request is made to the server. By default
+ Content-Type is set to application/json. Additionally Authorization entry is provided, which can
+ be set if needed using the settings ui.
+
+ iRecentUserMsgCnt - a simple minded SlidingWindow to limit context window load at Ai Model end.
+ This is disabled by default. However if enabled, then in addition to latest system message, only
+ the last/latest iRecentUserMsgCnt user messages after the latest system prompt and its responses
+ from the ai model will be sent to the ai-model, when querying for a new response. IE if enabled,
+ only user messages after the latest system message/prompt will be considered.
+
+ This specified sliding window user message count also includes the latest user query.
+ <0 : Send entire chat history to server
+ 0 : Send only the system message if any to the server
+ >0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
+
+
+By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
+the implications of loading of the ai-model's context window by chat history, wrt chat response to
+some extent in a simple crude way. You may also want to control the context size enabled when the
+server loads ai-model, on the server end.
+
+
+Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
+may not be visible. Also remember that just refreshing/reloading page in browser or for that
+matter clearing site data, dont directly override site caching in all cases. Worst case you may
+have to change port. Or in dev tools of browser, you may be able to disable caching fully.
+
+
+Currently the server to communicate with is maintained globally and not as part of a specific
+chat session. So if one changes the server ip/url in setting, then all chat sessions will auto
+switch to this new server, when you try using those sessions.
+
+
+By switching between chat.add_system_begin/anytime, one can control whether one can change
+the system prompt, anytime during the conversation or only at the beginning.
+
+
+### Default setup
+
+By default things are setup to try and make the user experience a bit better, if possible.
+However a developer when testing the server of ai-model may want to change these value.
+
+Using iRecentUserMsgCnt reduce chat history context sent to the server/ai-model to be
+just the system-prompt, prev-user-request-and-ai-response and cur-user-request, instead of
+full chat history. This way if there is any response with garbage/repeatation, it doesnt
+mess with things beyond the next question/request/query, in some ways. The trim garbage
+option also tries to help avoid issues with garbage in the context to an extent.
+
+Set max_tokens to 1024, so that a relatively large previous reponse doesnt eat up the space
+available wrt next query-response. However dont forget that the server when started should
+also be started with a model context size of 1k or more, to be on safe side.
+
+ The /completions endpoint of tools/server doesnt take max_tokens, instead it takes the
+ internal n_predict, for now add the same here on the client side, maybe later add max_tokens
+ to /completions endpoint handling code on server side.
+
+NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
+wrt the set of fields sent to server along with the user query, to check how the model behaves
+wrt repeatations in general in the generated text response.
+
+A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
+using the provided settings ui (for settings exposed through the ui).
+
+
+### OpenAi / Equivalent API WebService
+
+One may be abe to handshake with OpenAI/Equivalent api web service's /chat/completions endpoint
+for a minimal chatting experimentation by setting the below.
+
+* the baseUrl in settings ui
+ * https://api.openai.com/v1 or similar
+
+* Wrt request body - gMe.apiRequestOptions
+ * model (settings ui)
+ * any additional fields if required in future
+
+* Wrt request headers - gMe.headers
+ * Authorization (available through settings ui)
+ * Bearer THE_OPENAI_API_KEY
+ * any additional optional header entries like "OpenAI-Organization", "OpenAI-Project" or so
+
+NOTE: Not tested, as there is no free tier api testing available. However logically this might
+work.
+
+
+## At the end
+
+Also a thank you to all open source and open model developers, who strive for the common good.
diff --git a/llama.cpp/tools/server/public_simplechat/simplechat.css b/llama.cpp/tools/server/public_simplechat/simplechat.css
new file mode 100644
index 0000000..13bfb80
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/simplechat.css
@@ -0,0 +1,79 @@
+/**
+ * the styling of the simplechat web frontend
+ * by Humans for All
+ */
+
+#fullbody {
+ height: 98vh;
+}
+
+.heading {
+ background-color: lightgray;
+}
+
+.session-selected {
+ background-color: lightblue;
+}
+
+.role-system {
+ background-color: lightblue;
+}
+.role-user {
+ background-color: lightgray;
+}
+.role-trim {
+ background-color: lightpink;
+}
+
+.gridx2 {
+ display: grid;
+ grid-template-columns: repeat(2, 1fr);
+ border-bottom-style: dotted;
+ border-bottom-width: thin;
+ border-bottom-color: lightblue;
+}
+
+.flex-grow {
+ flex-grow: 1;
+}
+.float-right {
+ float: right;
+}
+
+#chat-div {
+ overflow: scroll;
+ flex-grow: 1;
+ flex-shrink: 1;
+ min-height: 40vh;
+}
+button {
+ min-width: 8vw;
+}
+
+.sameline {
+ display: flex;
+ flex-direction: row;
+}
+.samecolumn {
+ display: flex;
+ flex-direction: column;
+}
+
+.ul1 {
+ padding-inline-start: 2vw;
+}
+.ul2 {
+ padding-inline-start: 2vw;
+}
+
+* {
+ margin: 0.6vmin;
+}
+
+@media print {
+
+ #fullbody {
+ height: auto;
+ }
+
+}
diff --git a/llama.cpp/tools/server/public_simplechat/simplechat.js b/llama.cpp/tools/server/public_simplechat/simplechat.js
new file mode 100644
index 0000000..2fcd24a
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/simplechat.js
@@ -0,0 +1,929 @@
+// @ts-check
+// A simple completions and chat/completions test related web front end logic
+// by Humans for All
+
+import * as du from "./datautils.mjs";
+import * as ui from "./ui.mjs"
+
+class Roles {
+ static System = "system";
+ static User = "user";
+ static Assistant = "assistant";
+}
+
+class ApiEP {
+ static Type = {
+ Chat: "chat",
+ Completion: "completion",
+ }
+ static UrlSuffix = {
+ 'chat': `/chat/completions`,
+ 'completion': `/completions`,
+ }
+
+ /**
+ * Build the url from given baseUrl and apiEp id.
+ * @param {string} baseUrl
+ * @param {string} apiEP
+ */
+ static Url(baseUrl, apiEP) {
+ if (baseUrl.endsWith("/")) {
+ baseUrl = baseUrl.substring(0, baseUrl.length-1);
+ }
+ return `${baseUrl}${this.UrlSuffix[apiEP]}`;
+ }
+
+}
+
+
+let gUsageMsg = `
+ <p class="role-system">Usage</p>
+ <ul class="ul1">
+ <li> System prompt above, to try control ai response characteristics.</li>
+ <ul class="ul2">
+ <li> Completion mode - no system prompt normally.</li>
+ </ul>
+ <li> Use shift+enter for inserting enter/newline.</li>
+ <li> Enter your query to ai assistant below.</li>
+ <li> Default ContextWindow = [System, Last Query+Resp, Cur Query].</li>
+ <ul class="ul2">
+ <li> ChatHistInCtxt, MaxTokens, ModelCtxt window to expand</li>
+ </ul>
+ </ul>
+`;
+
+
+/** @typedef {{role: string, content: string}[]} ChatMessages */
+
+/** @typedef {{iLastSys: number, xchat: ChatMessages}} SimpleChatODS */
+
+class SimpleChat {
+
+ /**
+ * @param {string} chatId
+ */
+ constructor(chatId) {
+ this.chatId = chatId;
+ /**
+ * Maintain in a form suitable for common LLM web service chat/completions' messages entry
+ * @type {ChatMessages}
+ */
+ this.xchat = [];
+ this.iLastSys = -1;
+ this.latestResponse = "";
+ }
+
+ clear() {
+ this.xchat = [];
+ this.iLastSys = -1;
+ }
+
+ ods_key() {
+ return `SimpleChat-${this.chatId}`
+ }
+
+ save() {
+ /** @type {SimpleChatODS} */
+ let ods = {iLastSys: this.iLastSys, xchat: this.xchat};
+ localStorage.setItem(this.ods_key(), JSON.stringify(ods));
+ }
+
+ load() {
+ let sods = localStorage.getItem(this.ods_key());
+ if (sods == null) {
+ return;
+ }
+ /** @type {SimpleChatODS} */
+ let ods = JSON.parse(sods);
+ this.iLastSys = ods.iLastSys;
+ this.xchat = ods.xchat;
+ }
+
+ /**
+ * Recent chat messages.
+ * If iRecentUserMsgCnt < 0
+ * Then return the full chat history
+ * Else
+ * Return chat messages from latest going back till the last/latest system prompt.
+ * While keeping track that the number of user queries/messages doesnt exceed iRecentUserMsgCnt.
+ * @param {number} iRecentUserMsgCnt
+ */
+ recent_chat(iRecentUserMsgCnt) {
+ if (iRecentUserMsgCnt < 0) {
+ return this.xchat;
+ }
+ if (iRecentUserMsgCnt == 0) {
+ console.warn("WARN:SimpleChat:SC:RecentChat:iRecentUsermsgCnt of 0 means no user message/query sent");
+ }
+ /** @type{ChatMessages} */
+ let rchat = [];
+ let sysMsg = this.get_system_latest();
+ if (sysMsg.length != 0) {
+ rchat.push({role: Roles.System, content: sysMsg});
+ }
+ let iUserCnt = 0;
+ let iStart = this.xchat.length;
+ for(let i=this.xchat.length-1; i > this.iLastSys; i--) {
+ if (iUserCnt >= iRecentUserMsgCnt) {
+ break;
+ }
+ let msg = this.xchat[i];
+ if (msg.role == Roles.User) {
+ iStart = i;
+ iUserCnt += 1;
+ }
+ }
+ for(let i = iStart; i < this.xchat.length; i++) {
+ let msg = this.xchat[i];
+ if (msg.role == Roles.System) {
+ continue;
+ }
+ rchat.push({role: msg.role, content: msg.content});
+ }
+ return rchat;
+ }
+
+ /**
+ * Collate the latest response from the server/ai-model, as it is becoming available.
+ * This is mainly useful for the stream mode.
+ * @param {string} content
+ */
+ append_response(content) {
+ this.latestResponse += content;
+ }
+
+ /**
+ * Add an entry into xchat
+ * @param {string} role
+ * @param {string|undefined|null} content
+ */
+ add(role, content) {
+ if ((content == undefined) || (content == null) || (content == "")) {
+ return false;
+ }
+ this.xchat.push( {role: role, content: content} );
+ if (role == Roles.System) {
+ this.iLastSys = this.xchat.length - 1;
+ }
+ this.save();
+ return true;
+ }
+
+ /**
+ * Show the contents in the specified div
+ * @param {HTMLDivElement} div
+ * @param {boolean} bClear
+ */
+ show(div, bClear=true) {
+ if (bClear) {
+ div.replaceChildren();
+ }
+ let last = undefined;
+ for(const x of this.recent_chat(gMe.iRecentUserMsgCnt)) {
+ let entry = ui.el_create_append_p(`${x.role}: ${x.content}`, div);
+ entry.className = `role-${x.role}`;
+ last = entry;
+ }
+ if (last !== undefined) {
+ last.scrollIntoView(false);
+ } else {
+ if (bClear) {
+ div.innerHTML = gUsageMsg;
+ gMe.setup_load(div, this);
+ gMe.show_info(div);
+ }
+ }
+ return last;
+ }
+
+ /**
+ * Setup the fetch headers.
+ * It picks the headers from gMe.headers.
+ * It inserts Authorization only if its non-empty.
+ * @param {string} apiEP
+ */
+ fetch_headers(apiEP) {
+ let headers = new Headers();
+ for(let k in gMe.headers) {
+ let v = gMe.headers[k];
+ if ((k == "Authorization") && (v.trim() == "")) {
+ continue;
+ }
+ headers.append(k, v);
+ }
+ return headers;
+ }
+
+ /**
+ * Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
+ * The needed fields/options are picked from a global object.
+ * Add optional stream flag, if required.
+ * Convert the json into string.
+ * @param {Object} obj
+ */
+ request_jsonstr_extend(obj) {
+ for(let k in gMe.apiRequestOptions) {
+ obj[k] = gMe.apiRequestOptions[k];
+ }
+ if (gMe.bStream) {
+ obj["stream"] = true;
+ }
+ return JSON.stringify(obj);
+ }
+
+ /**
+ * Return a string form of json object suitable for chat/completions
+ */
+ request_messages_jsonstr() {
+ let req = {
+ messages: this.recent_chat(gMe.iRecentUserMsgCnt),
+ }
+ return this.request_jsonstr_extend(req);
+ }
+
+ /**
+ * Return a string form of json object suitable for /completions
+ * @param {boolean} bInsertStandardRolePrefix Insert "<THE_ROLE>: " as prefix wrt each role's message
+ */
+ request_prompt_jsonstr(bInsertStandardRolePrefix) {
+ let prompt = "";
+ let iCnt = 0;
+ for(const chat of this.recent_chat(gMe.iRecentUserMsgCnt)) {
+ iCnt += 1;
+ if (iCnt > 1) {
+ prompt += "\n";
+ }
+ if (bInsertStandardRolePrefix) {
+ prompt += `${chat.role}: `;
+ }
+ prompt += `${chat.content}`;
+ }
+ let req = {
+ prompt: prompt,
+ }
+ return this.request_jsonstr_extend(req);
+ }
+
+ /**
+ * Return a string form of json object suitable for specified api endpoint.
+ * @param {string} apiEP
+ */
+ request_jsonstr(apiEP) {
+ if (apiEP == ApiEP.Type.Chat) {
+ return this.request_messages_jsonstr();
+ } else {
+ return this.request_prompt_jsonstr(gMe.bCompletionInsertStandardRolePrefix);
+ }
+ }
+
+ /**
+ * Extract the ai-model/assistant's response from the http response got.
+ * Optionally trim the message wrt any garbage at the end.
+ * @param {any} respBody
+ * @param {string} apiEP
+ */
+ response_extract(respBody, apiEP) {
+ let assistant = "";
+ if (apiEP == ApiEP.Type.Chat) {
+ assistant = respBody["choices"][0]["message"]["content"];
+ } else {
+ try {
+ assistant = respBody["choices"][0]["text"];
+ } catch {
+ assistant = respBody["content"];
+ }
+ }
+ return assistant;
+ }
+
+ /**
+ * Extract the ai-model/assistant's response from the http response got in streaming mode.
+ * @param {any} respBody
+ * @param {string} apiEP
+ */
+ response_extract_stream(respBody, apiEP) {
+ let assistant = "";
+ if (apiEP == ApiEP.Type.Chat) {
+ if (respBody["choices"][0]["finish_reason"] !== "stop") {
+ assistant = respBody["choices"][0]["delta"]["content"];
+ }
+ } else {
+ try {
+ assistant = respBody["choices"][0]["text"];
+ } catch {
+ assistant = respBody["content"];
+ }
+ }
+ return assistant;
+ }
+
+ /**
+ * Allow setting of system prompt, but only at begining.
+ * @param {string} sysPrompt
+ * @param {string} msgTag
+ */
+ add_system_begin(sysPrompt, msgTag) {
+ if (this.xchat.length == 0) {
+ if (sysPrompt.length > 0) {
+ return this.add(Roles.System, sysPrompt);
+ }
+ } else {
+ if (sysPrompt.length > 0) {
+ if (this.xchat[0].role !== Roles.System) {
+ console.error(`ERRR:SimpleChat:SC:${msgTag}:You need to specify system prompt before any user query, ignoring...`);
+ } else {
+ if (this.xchat[0].content !== sysPrompt) {
+ console.error(`ERRR:SimpleChat:SC:${msgTag}:You cant change system prompt, mid way through, ignoring...`);
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Allow setting of system prompt, at any time.
+ * @param {string} sysPrompt
+ * @param {string} msgTag
+ */
+ add_system_anytime(sysPrompt, msgTag) {
+ if (sysPrompt.length <= 0) {
+ return false;
+ }
+
+ if (this.iLastSys < 0) {
+ return this.add(Roles.System, sysPrompt);
+ }
+
+ let lastSys = this.xchat[this.iLastSys].content;
+ if (lastSys !== sysPrompt) {
+ return this.add(Roles.System, sysPrompt);
+ }
+ return false;
+ }
+
+ /**
+ * Retrieve the latest system prompt.
+ */
+ get_system_latest() {
+ if (this.iLastSys == -1) {
+ return "";
+ }
+ let sysPrompt = this.xchat[this.iLastSys].content;
+ return sysPrompt;
+ }
+
+
+ /**
+ * Handle the multipart response from server/ai-model
+ * @param {Response} resp
+ * @param {string} apiEP
+ * @param {HTMLDivElement} elDiv
+ */
+ async handle_response_multipart(resp, apiEP, elDiv) {
+ let elP = ui.el_create_append_p("", elDiv);
+ if (!resp.body) {
+ throw Error("ERRR:SimpleChat:SC:HandleResponseMultiPart:No body...");
+ }
+ let tdUtf8 = new TextDecoder("utf-8");
+ let rr = resp.body.getReader();
+ this.latestResponse = "";
+ let xLines = new du.NewLines();
+ while(true) {
+ let { value: cur, done: done } = await rr.read();
+ if (cur) {
+ let curBody = tdUtf8.decode(cur, {stream: true});
+ console.debug("DBUG:SC:PART:Str:", curBody);
+ xLines.add_append(curBody);
+ }
+ while(true) {
+ let curLine = xLines.shift(!done);
+ if (curLine == undefined) {
+ break;
+ }
+ if (curLine.trim() == "") {
+ continue;
+ }
+ if (curLine.startsWith("data:")) {
+ curLine = curLine.substring(5);
+ }
+ if (curLine.trim() === "[DONE]") {
+ break;
+ }
+ let curJson = JSON.parse(curLine);
+ console.debug("DBUG:SC:PART:Json:", curJson);
+ this.append_response(this.response_extract_stream(curJson, apiEP));
+ }
+ elP.innerText = this.latestResponse;
+ elP.scrollIntoView(false);
+ if (done) {
+ break;
+ }
+ }
+ console.debug("DBUG:SC:PART:Full:", this.latestResponse);
+ return this.latestResponse;
+ }
+
+ /**
+ * Handle the oneshot response from server/ai-model
+ * @param {Response} resp
+ * @param {string} apiEP
+ */
+ async handle_response_oneshot(resp, apiEP) {
+ let respBody = await resp.json();
+ console.debug(`DBUG:SimpleChat:SC:${this.chatId}:HandleUserSubmit:RespBody:${JSON.stringify(respBody)}`);
+ return this.response_extract(respBody, apiEP);
+ }
+
+ /**
+ * Handle the response from the server be it in oneshot or multipart/stream mode.
+ * Also take care of the optional garbage trimming.
+ * @param {Response} resp
+ * @param {string} apiEP
+ * @param {HTMLDivElement} elDiv
+ */
+ async handle_response(resp, apiEP, elDiv) {
+ let theResp = {
+ assistant: "",
+ trimmed: "",
+ }
+ if (gMe.bStream) {
+ try {
+ theResp.assistant = await this.handle_response_multipart(resp, apiEP, elDiv);
+ this.latestResponse = "";
+ } catch (error) {
+ theResp.assistant = this.latestResponse;
+ this.add(Roles.Assistant, theResp.assistant);
+ this.latestResponse = "";
+ throw error;
+ }
+ } else {
+ theResp.assistant = await this.handle_response_oneshot(resp, apiEP);
+ }
+ if (gMe.bTrimGarbage) {
+ let origMsg = theResp.assistant;
+ theResp.assistant = du.trim_garbage_at_end(origMsg);
+ theResp.trimmed = origMsg.substring(theResp.assistant.length);
+ }
+ this.add(Roles.Assistant, theResp.assistant);
+ return theResp;
+ }
+
+}
+
+
+class MultiChatUI {
+
+ constructor() {
+ /** @type {Object<string, SimpleChat>} */
+ this.simpleChats = {};
+ /** @type {string} */
+ this.curChatId = "";
+
+ // the ui elements
+ this.elInSystem = /** @type{HTMLInputElement} */(document.getElementById("system-in"));
+ this.elDivChat = /** @type{HTMLDivElement} */(document.getElementById("chat-div"));
+ this.elBtnUser = /** @type{HTMLButtonElement} */(document.getElementById("user-btn"));
+ this.elInUser = /** @type{HTMLInputElement} */(document.getElementById("user-in"));
+ this.elDivHeading = /** @type{HTMLSelectElement} */(document.getElementById("heading"));
+ this.elDivSessions = /** @type{HTMLDivElement} */(document.getElementById("sessions-div"));
+ this.elBtnSettings = /** @type{HTMLButtonElement} */(document.getElementById("settings"));
+
+ this.validate_element(this.elInSystem, "system-in");
+ this.validate_element(this.elDivChat, "chat-div");
+ this.validate_element(this.elInUser, "user-in");
+ this.validate_element(this.elDivHeading, "heading");
+ this.validate_element(this.elDivChat, "sessions-div");
+ this.validate_element(this.elBtnSettings, "settings");
+ }
+
+ /**
+ * Check if the element got
+ * @param {HTMLElement | null} el
+ * @param {string} msgTag
+ */
+ validate_element(el, msgTag) {
+ if (el == null) {
+ throw Error(`ERRR:SimpleChat:MCUI:${msgTag} element missing in html...`);
+ } else {
+ console.debug(`INFO:SimpleChat:MCUI:${msgTag} Id[${el.id}] Name[${el["name"]}]`);
+ }
+ }
+
+ /**
+ * Reset user input ui.
+ * * clear user input
+ * * enable user input
+ * * set focus to user input
+ */
+ ui_reset_userinput() {
+ this.elInUser.value = "";
+ this.elInUser.disabled = false;
+ this.elInUser.focus();
+ }
+
+ /**
+ * Setup the needed callbacks wrt UI, curChatId to defaultChatId and
+ * optionally switch to specified defaultChatId.
+ * @param {string} defaultChatId
+ * @param {boolean} bSwitchSession
+ */
+ setup_ui(defaultChatId, bSwitchSession=false) {
+
+ this.curChatId = defaultChatId;
+ if (bSwitchSession) {
+ this.handle_session_switch(this.curChatId);
+ }
+
+ this.elBtnSettings.addEventListener("click", (ev)=>{
+ this.elDivChat.replaceChildren();
+ gMe.show_settings(this.elDivChat);
+ });
+
+ this.elBtnUser.addEventListener("click", (ev)=>{
+ if (this.elInUser.disabled) {
+ return;
+ }
+ this.handle_user_submit(this.curChatId, gMe.apiEP).catch((/** @type{Error} */reason)=>{
+ let msg = `ERRR:SimpleChat\nMCUI:HandleUserSubmit:${this.curChatId}\n${reason.name}:${reason.message}`;
+ console.error(msg.replace("\n", ":"));
+ alert(msg);
+ this.ui_reset_userinput();
+ });
+ });
+
+ this.elInUser.addEventListener("keyup", (ev)=> {
+ // allow user to insert enter into their message using shift+enter.
+ // while just pressing enter key will lead to submitting.
+ if ((ev.key === "Enter") && (!ev.shiftKey)) {
+ let value = this.elInUser.value;
+ this.elInUser.value = value.substring(0,value.length-1);
+ this.elBtnUser.click();
+ ev.preventDefault();
+ }
+ });
+
+ this.elInSystem.addEventListener("keyup", (ev)=> {
+ // allow user to insert enter into the system prompt using shift+enter.
+ // while just pressing enter key will lead to setting the system prompt.
+ if ((ev.key === "Enter") && (!ev.shiftKey)) {
+ let value = this.elInSystem.value;
+ this.elInSystem.value = value.substring(0,value.length-1);
+ let chat = this.simpleChats[this.curChatId];
+ chat.add_system_anytime(this.elInSystem.value, this.curChatId);
+ chat.show(this.elDivChat);
+ ev.preventDefault();
+ }
+ });
+
+ }
+
+ /**
+ * Setup a new chat session and optionally switch to it.
+ * @param {string} chatId
+ * @param {boolean} bSwitchSession
+ */
+ new_chat_session(chatId, bSwitchSession=false) {
+ this.simpleChats[chatId] = new SimpleChat(chatId);
+ if (bSwitchSession) {
+ this.handle_session_switch(chatId);
+ }
+ }
+
+
+ /**
+ * Handle user query submit request, wrt specified chat session.
+ * @param {string} chatId
+ * @param {string} apiEP
+ */
+ async handle_user_submit(chatId, apiEP) {
+
+ let chat = this.simpleChats[chatId];
+
+ // In completion mode, if configured, clear any previous chat history.
+ // So if user wants to simulate a multi-chat based completion query,
+ // they will have to enter the full thing, as a suitable multiline
+ // user input/query.
+ if ((apiEP == ApiEP.Type.Completion) && (gMe.bCompletionFreshChatAlways)) {
+ chat.clear();
+ }
+
+ chat.add_system_anytime(this.elInSystem.value, chatId);
+
+ let content = this.elInUser.value;
+ if (!chat.add(Roles.User, content)) {
+ console.debug(`WARN:SimpleChat:MCUI:${chatId}:HandleUserSubmit:Ignoring empty user input...`);
+ return;
+ }
+ chat.show(this.elDivChat);
+
+ let theUrl = ApiEP.Url(gMe.baseURL, apiEP);
+ let theBody = chat.request_jsonstr(apiEP);
+
+ this.elInUser.value = "working...";
+ this.elInUser.disabled = true;
+ console.debug(`DBUG:SimpleChat:MCUI:${chatId}:HandleUserSubmit:${theUrl}:ReqBody:${theBody}`);
+ let theHeaders = chat.fetch_headers(apiEP);
+ let resp = await fetch(theUrl, {
+ method: "POST",
+ headers: theHeaders,
+ body: theBody,
+ });
+
+ let theResp = await chat.handle_response(resp, apiEP, this.elDivChat);
+ if (chatId == this.curChatId) {
+ chat.show(this.elDivChat);
+ if (theResp.trimmed.length > 0) {
+ let p = ui.el_create_append_p(`TRIMMED:${theResp.trimmed}`, this.elDivChat);
+ p.className="role-trim";
+ }
+ } else {
+ console.debug(`DBUG:SimpleChat:MCUI:HandleUserSubmit:ChatId has changed:[${chatId}] [${this.curChatId}]`);
+ }
+ this.ui_reset_userinput();
+ }
+
+ /**
+ * Show buttons for NewChat and available chat sessions, in the passed elDiv.
+ * If elDiv is undefined/null, then use this.elDivSessions.
+ * Take care of highlighting the selected chat-session's btn.
+ * @param {HTMLDivElement | undefined} elDiv
+ */
+ show_sessions(elDiv=undefined) {
+ if (!elDiv) {
+ elDiv = this.elDivSessions;
+ }
+ elDiv.replaceChildren();
+ // Btn for creating new chat session
+ let btnNew = ui.el_create_button("New CHAT", (ev)=> {
+ if (this.elInUser.disabled) {
+ console.error(`ERRR:SimpleChat:MCUI:NewChat:Current session [${this.curChatId}] awaiting response, ignoring request...`);
+ alert("ERRR:SimpleChat\nMCUI:NewChat\nWait for response to pending query, before starting new chat session");
+ return;
+ }
+ let chatId = `Chat${Object.keys(this.simpleChats).length}`;
+ let chatIdGot = prompt("INFO:SimpleChat\nMCUI:NewChat\nEnter id for new chat session", chatId);
+ if (!chatIdGot) {
+ console.error("ERRR:SimpleChat:MCUI:NewChat:Skipping based on user request...");
+ return;
+ }
+ this.new_chat_session(chatIdGot, true);
+ this.create_session_btn(elDiv, chatIdGot);
+ ui.el_children_config_class(elDiv, chatIdGot, "session-selected", "");
+ });
+ elDiv.appendChild(btnNew);
+ // Btns for existing chat sessions
+ let chatIds = Object.keys(this.simpleChats);
+ for(let cid of chatIds) {
+ let btn = this.create_session_btn(elDiv, cid);
+ if (cid == this.curChatId) {
+ btn.className = "session-selected";
+ }
+ }
+ }
+
+ create_session_btn(elDiv, cid) {
+ let btn = ui.el_create_button(cid, (ev)=>{
+ let target = /** @type{HTMLButtonElement} */(ev.target);
+ console.debug(`DBUG:SimpleChat:MCUI:SessionClick:${target.id}`);
+ if (this.elInUser.disabled) {
+ console.error(`ERRR:SimpleChat:MCUI:SessionClick:${target.id}:Current session [${this.curChatId}] awaiting response, ignoring switch...`);
+ alert("ERRR:SimpleChat\nMCUI:SessionClick\nWait for response to pending query, before switching");
+ return;
+ }
+ this.handle_session_switch(target.id);
+ ui.el_children_config_class(elDiv, target.id, "session-selected", "");
+ });
+ elDiv.appendChild(btn);
+ return btn;
+ }
+
+ /**
+ * Switch ui to the specified chatId and set curChatId to same.
+ * @param {string} chatId
+ */
+ async handle_session_switch(chatId) {
+ let chat = this.simpleChats[chatId];
+ if (chat == undefined) {
+ console.error(`ERRR:SimpleChat:MCUI:HandleSessionSwitch:${chatId} missing...`);
+ return;
+ }
+ this.elInSystem.value = chat.get_system_latest();
+ this.elInUser.value = "";
+ chat.show(this.elDivChat);
+ this.elInUser.focus();
+ this.curChatId = chatId;
+ console.log(`INFO:SimpleChat:MCUI:HandleSessionSwitch:${chatId} entered...`);
+ }
+
+}
+
+
+class Me {
+
+ constructor() {
+ this.baseURL = "http://127.0.0.1:8080";
+ this.defaultChatIds = [ "Default", "Other" ];
+ this.multiChat = new MultiChatUI();
+ this.bStream = true;
+ this.bCompletionFreshChatAlways = true;
+ this.bCompletionInsertStandardRolePrefix = false;
+ this.bTrimGarbage = true;
+ this.iRecentUserMsgCnt = 2;
+ this.sRecentUserMsgCnt = {
+ "Full": -1,
+ "Last0": 1,
+ "Last1": 2,
+ "Last2": 3,
+ "Last4": 5,
+ };
+ this.apiEP = ApiEP.Type.Chat;
+ this.headers = {
+ "Content-Type": "application/json",
+ "Authorization": "", // Authorization: Bearer OPENAI_API_KEY
+ }
+ // Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
+ this.apiRequestOptions = {
+ "model": "gpt-3.5-turbo",
+ "temperature": 0.7,
+ "max_tokens": 1024,
+ "n_predict": 1024,
+ "cache_prompt": false,
+ //"frequency_penalty": 1.2,
+ //"presence_penalty": 1.2,
+ };
+ }
+
+ /**
+ * Disable console.debug by mapping it to a empty function.
+ */
+ debug_disable() {
+ this.console_debug = console.debug;
+ console.debug = () => {
+
+ };
+ }
+
+ /**
+ * Setup the load saved chat ui.
+ * @param {HTMLDivElement} div
+ * @param {SimpleChat} chat
+ */
+ setup_load(div, chat) {
+ if (!(chat.ods_key() in localStorage)) {
+ return;
+ }
+ div.innerHTML += `<p class="role-system">Restore</p>
+ <p>Load previously saved chat session, if available</p>`;
+ let btn = ui.el_create_button(chat.ods_key(), (ev)=>{
+ console.log("DBUG:SimpleChat:SC:Load", chat);
+ chat.load();
+ queueMicrotask(()=>{
+ chat.show(div);
+ this.multiChat.elInSystem.value = chat.get_system_latest();
+ });
+ });
+ div.appendChild(btn);
+ }
+
+ /**
+ * Show the configurable parameters info in the passed Div element.
+ * @param {HTMLDivElement} elDiv
+ * @param {boolean} bAll
+ */
+ show_info(elDiv, bAll=false) {
+
+ let p = ui.el_create_append_p("Settings (devel-tools-console document[gMe])", elDiv);
+ p.className = "role-system";
+
+ if (bAll) {
+
+ ui.el_create_append_p(`baseURL:${this.baseURL}`, elDiv);
+
+ ui.el_create_append_p(`Authorization:${this.headers["Authorization"]}`, elDiv);
+
+ ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
+
+ ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
+
+ ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
+
+ ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
+
+ ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
+
+ ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
+
+ }
+
+ ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
+ ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
+
+ }
+
+ /**
+ * Auto create ui input elements for fields in apiRequestOptions
+ * Currently supports text and number field types.
+ * @param {HTMLDivElement} elDiv
+ */
+ show_settings_apirequestoptions(elDiv) {
+ let typeDict = {
+ "string": "text",
+ "number": "number",
+ };
+ let fs = document.createElement("fieldset");
+ let legend = document.createElement("legend");
+ legend.innerText = "ApiRequestOptions";
+ fs.appendChild(legend);
+ elDiv.appendChild(fs);
+ for(const k in this.apiRequestOptions) {
+ let val = this.apiRequestOptions[k];
+ let type = typeof(val);
+ if (((type == "string") || (type == "number"))) {
+ let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
+ if (type == "number") {
+ val = Number(val);
+ }
+ this.apiRequestOptions[k] = val;
+ });
+ fs.appendChild(inp.div);
+ } else if (type == "boolean") {
+ let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
+ this.apiRequestOptions[k] = userVal;
+ });
+ fs.appendChild(bbtn.div);
+ }
+ }
+ }
+
+ /**
+ * Show settings ui for configurable parameters, in the passed Div element.
+ * @param {HTMLDivElement} elDiv
+ */
+ show_settings(elDiv) {
+
+ let inp = ui.el_creatediv_input("SetBaseURL", "BaseURL", "text", this.baseURL, (val)=>{
+ this.baseURL = val;
+ });
+ elDiv.appendChild(inp.div);
+
+ inp = ui.el_creatediv_input("SetAuthorization", "Authorization", "text", this.headers["Authorization"], (val)=>{
+ this.headers["Authorization"] = val;
+ });
+ inp.el.placeholder = "Bearer OPENAI_API_KEY";
+ elDiv.appendChild(inp.div);
+
+ let bb = ui.el_creatediv_boolbutton("SetStream", "Stream", {true: "[+] yes stream", false: "[-] do oneshot"}, this.bStream, (val)=>{
+ this.bStream = val;
+ });
+ elDiv.appendChild(bb.div);
+
+ bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
+ this.bTrimGarbage = val;
+ });
+ elDiv.appendChild(bb.div);
+
+ this.show_settings_apirequestoptions(elDiv);
+
+ let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
+ this.apiEP = ApiEP.Type[val];
+ });
+ elDiv.appendChild(sel.div);
+
+ sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
+ this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
+ });
+ elDiv.appendChild(sel.div);
+
+ bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
+ this.bCompletionFreshChatAlways = val;
+ });
+ elDiv.appendChild(bb.div);
+
+ bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{
+ this.bCompletionInsertStandardRolePrefix = val;
+ });
+ elDiv.appendChild(bb.div);
+
+ }
+
+}
+
+
+/** @type {Me} */
+let gMe;
+
+function startme() {
+ console.log("INFO:SimpleChat:StartMe:Starting...");
+ gMe = new Me();
+ gMe.debug_disable();
+ document["gMe"] = gMe;
+ document["du"] = du;
+ for (let cid of gMe.defaultChatIds) {
+ gMe.multiChat.new_chat_session(cid);
+ }
+ gMe.multiChat.setup_ui(gMe.defaultChatIds[0], true);
+ gMe.multiChat.show_sessions();
+}
+
+document.addEventListener("DOMContentLoaded", startme);
diff --git a/llama.cpp/tools/server/public_simplechat/simplechat_screens.webp b/llama.cpp/tools/server/public_simplechat/simplechat_screens.webp
new file mode 100644
index 0000000..ccea443
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/simplechat_screens.webp
Binary files differ
diff --git a/llama.cpp/tools/server/public_simplechat/ui.mjs b/llama.cpp/tools/server/public_simplechat/ui.mjs
new file mode 100644
index 0000000..b2d5b9a
--- /dev/null
+++ b/llama.cpp/tools/server/public_simplechat/ui.mjs
@@ -0,0 +1,211 @@
+//@ts-check
+// Helpers to work with html elements
+// by Humans for All
+//
+
+
+/**
+ * Set the class of the children, based on whether it is the idSelected or not.
+ * @param {HTMLDivElement} elBase
+ * @param {string} idSelected
+ * @param {string} classSelected
+ * @param {string} classUnSelected
+ */
+export function el_children_config_class(elBase, idSelected, classSelected, classUnSelected="") {
+ for(let child of elBase.children) {
+ if (child.id == idSelected) {
+ child.className = classSelected;
+ } else {
+ child.className = classUnSelected;
+ }
+ }
+}
+
+/**
+ * Create button and set it up.
+ * @param {string} id
+ * @param {(this: HTMLButtonElement, ev: MouseEvent) => any} callback
+ * @param {string | undefined} name
+ * @param {string | undefined} innerText
+ */
+export function el_create_button(id, callback, name=undefined, innerText=undefined) {
+ if (!name) {
+ name = id;
+ }
+ if (!innerText) {
+ innerText = id;
+ }
+ let btn = document.createElement("button");
+ btn.id = id;
+ btn.name = name;
+ btn.innerText = innerText;
+ btn.addEventListener("click", callback);
+ return btn;
+}
+
+/**
+ * Create a para and set it up. Optionaly append it to a passed parent.
+ * @param {string} text
+ * @param {HTMLElement | undefined} elParent
+ * @param {string | undefined} id
+ */
+export function el_create_append_p(text, elParent=undefined, id=undefined) {
+ let para = document.createElement("p");
+ para.innerText = text;
+ if (id) {
+ para.id = id;
+ }
+ if (elParent) {
+ elParent.appendChild(para);
+ }
+ return para;
+}
+
+/**
+ * Create a button which represents bool value using specified text wrt true and false.
+ * When ever user clicks the button, it will toggle the value and update the shown text.
+ *
+ * @param {string} id
+ * @param {{true: string, false: string}} texts
+ * @param {boolean} defaultValue
+ * @param {function(boolean):void} cb
+ */
+export function el_create_boolbutton(id, texts, defaultValue, cb) {
+ let el = document.createElement("button");
+ el["xbool"] = defaultValue;
+ el["xtexts"] = structuredClone(texts);
+ el.innerText = el["xtexts"][String(defaultValue)];
+ if (id) {
+ el.id = id;
+ }
+ el.addEventListener('click', (ev)=>{
+ el["xbool"] = !el["xbool"];
+ el.innerText = el["xtexts"][String(el["xbool"])];
+ cb(el["xbool"]);
+ })
+ return el;
+}
+
+/**
+ * Create a div wrapped button which represents bool value using specified text wrt true and false.
+ * @param {string} id
+ * @param {string} label
+ * @param {{ true: string; false: string; }} texts
+ * @param {boolean} defaultValue
+ * @param {(arg0: boolean) => void} cb
+ * @param {string} className
+ */
+export function el_creatediv_boolbutton(id, label, texts, defaultValue, cb, className="gridx2") {
+ let div = document.createElement("div");
+ div.className = className;
+ let lbl = document.createElement("label");
+ lbl.setAttribute("for", id);
+ lbl.innerText = label;
+ div.appendChild(lbl);
+ let btn = el_create_boolbutton(id, texts, defaultValue, cb);
+ div.appendChild(btn);
+ return { div: div, el: btn };
+}
+
+
+/**
+ * Create a select ui element, with a set of options to select from.
+ * * options: an object which contains name-value pairs
+ * * defaultOption: the value whose name should be choosen, by default.
+ * * cb : the call back returns the name string of the option selected.
+ *
+ * @param {string} id
+ * @param {Object<string,*>} options
+ * @param {*} defaultOption
+ * @param {function(string):void} cb
+ */
+export function el_create_select(id, options, defaultOption, cb) {
+ let el = document.createElement("select");
+ el["xselected"] = defaultOption;
+ el["xoptions"] = structuredClone(options);
+ for(let cur of Object.keys(options)) {
+ let op = document.createElement("option");
+ op.value = cur;
+ op.innerText = cur;
+ if (options[cur] == defaultOption) {
+ op.selected = true;
+ }
+ el.appendChild(op);
+ }
+ if (id) {
+ el.id = id;
+ el.name = id;
+ }
+ el.addEventListener('change', (ev)=>{
+ let target = /** @type{HTMLSelectElement} */(ev.target);
+ console.log("DBUG:UI:Select:", id, ":", target.value);
+ cb(target.value);
+ })
+ return el;
+}
+
+/**
+ * Create a div wrapped select ui element, with a set of options to select from.
+ *
+ * @param {string} id
+ * @param {any} label
+ * @param {{ [x: string]: any; }} options
+ * @param {any} defaultOption
+ * @param {(arg0: string) => void} cb
+ * @param {string} className
+ */
+export function el_creatediv_select(id, label, options, defaultOption, cb, className="gridx2") {
+ let div = document.createElement("div");
+ div.className = className;
+ let lbl = document.createElement("label");
+ lbl.setAttribute("for", id);
+ lbl.innerText = label;
+ div.appendChild(lbl);
+ let sel = el_create_select(id, options,defaultOption, cb);
+ div.appendChild(sel);
+ return { div: div, el: sel };
+}
+
+
+/**
+ * Create a input ui element.
+ *
+ * @param {string} id
+ * @param {string} type
+ * @param {any} defaultValue
+ * @param {function(any):void} cb
+ */
+export function el_create_input(id, type, defaultValue, cb) {
+ let el = document.createElement("input");
+ el.type = type;
+ el.value = defaultValue;
+ if (id) {
+ el.id = id;
+ }
+ el.addEventListener('change', (ev)=>{
+ cb(el.value);
+ })
+ return el;
+}
+
+/**
+ * Create a div wrapped input.
+ *
+ * @param {string} id
+ * @param {string} label
+ * @param {string} type
+ * @param {any} defaultValue
+ * @param {function(any):void} cb
+ * @param {string} className
+ */
+export function el_creatediv_input(id, label, type, defaultValue, cb, className="gridx2") {
+ let div = document.createElement("div");
+ div.className = className;
+ let lbl = document.createElement("label");
+ lbl.setAttribute("for", id);
+ lbl.innerText = label;
+ div.appendChild(lbl);
+ let el = el_create_input(id, type, defaultValue, cb);
+ div.appendChild(el);
+ return { div: div, el: el };
+}