summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h')
-rw-r--r--llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h52
1 files changed, 52 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h b/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h
new file mode 100644
index 0000000..3acbc6a
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h
@@ -0,0 +1,52 @@
+// helper functions for ggml-metal that are too difficult to implement in Objective-C
+
+#pragma once
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ggml_tensor;
+struct ggml_cgraph;
+
+enum ggml_mem_range_type {
+ MEM_RANGE_TYPE_SRC = 0,
+ MEM_RANGE_TYPE_DST = 1,
+};
+
+// a helper object that can be used for reordering operations to improve concurrency
+//
+// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they
+// don't write to a memory that is being read by another task or written to by another task in the set
+//
+// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task
+// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
+// tasks already in the set)
+//
+typedef struct ggml_mem_ranges * ggml_mem_ranges_t;
+
+ggml_mem_ranges_t ggml_mem_ranges_init(int debug);
+void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);
+
+// remove all ranges from the set
+void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);
+
+// add src or dst ranges to track
+bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
+
+// return false if:
+// - new src range overlaps with any existing dst range
+// - new dst range overlaps with any existing range (src or dst)
+bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
+
+// reorder the nodes in the graph to improve concurrency, while respecting fusion
+//
+// note: this implementation is generic and not specific to metal
+// if it proves to work well, we can start using it for other backends in the future
+void ggml_graph_optimize(struct ggml_cgraph * gf);
+
+#ifdef __cplusplus
+}
+#endif