summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/server-http.h
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/tools/server/server-http.h')
-rw-r--r--llama.cpp/tools/server/server-http.h78
1 files changed, 78 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/server-http.h b/llama.cpp/tools/server/server-http.h
new file mode 100644
index 0000000..24c0b40
--- /dev/null
+++ b/llama.cpp/tools/server/server-http.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <map>
+#include <string>
+#include <thread>
+
+struct common_params;
+
+// generator-like API for HTTP response generation
+// this object response with one of the 2 modes:
+// 1) normal response: `data` contains the full response body
+// 2) streaming response: each call to next(output) generates the next chunk
+// when next(output) returns false, no more data after the current chunk
+// note: some chunks can be empty, in which case no data is sent for that chunk
+struct server_http_res {
+ std::string content_type = "application/json; charset=utf-8";
+ int status = 200;
+ std::string data;
+ std::map<std::string, std::string> headers;
+
+ // TODO: move this to a virtual function once we have proper polymorphism support
+ std::function<bool(std::string &)> next = nullptr;
+ bool is_stream() const {
+ return next != nullptr;
+ }
+
+ virtual ~server_http_res() = default;
+};
+
+// unique pointer, used by set_chunked_content_provider
+// httplib requires the stream provider to be stored in heap
+using server_http_res_ptr = std::unique_ptr<server_http_res>;
+
+struct server_http_req {
+ std::map<std::string, std::string> params; // path_params + query_params
+ std::map<std::string, std::string> headers; // reserved for future use
+ std::string path; // reserved for future use
+ std::string body;
+ const std::function<bool()> & should_stop;
+
+ std::string get_param(const std::string & key, const std::string & def = "") const {
+ auto it = params.find(key);
+ if (it != params.end()) {
+ return it->second;
+ }
+ return def;
+ }
+};
+
+struct server_http_context {
+ class Impl;
+ std::unique_ptr<Impl> pimpl;
+
+ std::thread thread; // server thread
+ std::atomic<bool> is_ready = false;
+
+ std::string path_prefix;
+ std::string hostname;
+ int port;
+
+ server_http_context();
+ ~server_http_context();
+
+ bool init(const common_params & params);
+ bool start();
+ void stop() const;
+
+ // note: the handler should never throw exceptions
+ using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;
+
+ void get(const std::string & path, const handler_t & handler) const;
+ void post(const std::string & path, const handler_t & handler) const;
+
+ // for debugging
+ std::string listening_address;
+};