summaryrefslogtreecommitdiff
path: root/llama.cpp/common/speculative.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/common/speculative.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/common/speculative.h')
-rw-r--r--llama.cpp/common/speculative.h41
1 files changed, 41 insertions, 0 deletions
diff --git a/llama.cpp/common/speculative.h b/llama.cpp/common/speculative.h
new file mode 100644
index 0000000..876cde3
--- /dev/null
+++ b/llama.cpp/common/speculative.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "llama.h"
+#include "common.h"
+
+struct common_speculative;
+
+// comma separated list of all types
+std::string common_speculative_type_name_str();
+
+// convert string to type
+enum common_speculative_type common_speculative_type_from_name(const std::string & name);
+
+// convert type to string
+std::string common_speculative_type_to_str(enum common_speculative_type type);
+
+// check if the llama_context is compatible for speculative decoding
+// note: clears the memory of the context
+bool common_speculative_is_compat(llama_context * ctx_tgt);
+
+common_speculative * common_speculative_init(
+ common_params_speculative & params,
+ llama_context * ctx_tgt);
+
+void common_speculative_free(common_speculative * spec);
+
+// optionally call once at the beginning of a new generation
+void common_speculative_begin(common_speculative * spec, const llama_tokens & prompt);
+
+// sample up to n_draft tokens and add them to the batch using the draft model
+llama_tokens common_speculative_draft(
+ common_speculative * spec,
+ const common_params_speculative & params,
+ const llama_tokens & prompt,
+ llama_token id_last);
+
+// informs the speculative decoder that n_accepted tokens were accepted by the target model
+void common_speculative_accept(common_speculative * spec, uint16_t n_accepted);
+
+// print statistics about the speculative decoding
+void common_speculative_print_stats(const common_speculative * spec);