Engage!

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-12 20:57:17 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-12 20:57:17 +0100
commit: b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree: 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/common/speculative.h
download: llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
1 files changed, 41 insertions, 0 deletions
diff --git a/llama.cpp/common/speculative.h b/llama.cpp/common/speculative.h
new file mode 100644
index 0000000..876cde3
--- /dev/null
+++ b/llama.cpp/common/speculative.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "llama.h"
+#include "common.h"
+
+struct common_speculative;
+
+// comma separated list of all types
+std::string common_speculative_type_name_str();
+
+// convert string to type
+enum common_speculative_type common_speculative_type_from_name(const std::string & name);
+
+// convert type to string
+std::string common_speculative_type_to_str(enum common_speculative_type type);
+
+// check if the llama_context is compatible for speculative decoding
+// note: clears the memory of the context
+bool common_speculative_is_compat(llama_context * ctx_tgt);
+
+common_speculative * common_speculative_init(
+        common_params_speculative & params,
+        llama_context             * ctx_tgt);
+
+void common_speculative_free(common_speculative * spec);
+
+// optionally call once at the beginning of a new generation
+void common_speculative_begin(common_speculative * spec, const llama_tokens & prompt);
+
+// sample up to n_draft tokens and add them to the batch using the draft model
+llama_tokens common_speculative_draft(
+                     common_speculative * spec,
+        const common_params_speculative & params,
+                     const llama_tokens & prompt,
+                            llama_token   id_last);
+
+// informs the speculative decoder that n_accepted tokens were accepted by the target model
+void common_speculative_accept(common_speculative * spec, uint16_t n_accepted);
+
+// print statistics about the speculative decoding
+void common_speculative_print_stats(const common_speculative * spec);
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-12 20:57:17 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-12 20:57:17 +0100
commit	b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree	211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/common/speculative.h
download	llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz