diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h b/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h new file mode 100644 index 0000000..3acbc6a --- /dev/null +++ b/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h @@ -0,0 +1,52 @@ +// helper functions for ggml-metal that are too difficult to implement in Objective-C + +#pragma once + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct ggml_tensor; +struct ggml_cgraph; + +enum ggml_mem_range_type { + MEM_RANGE_TYPE_SRC = 0, + MEM_RANGE_TYPE_DST = 1, +}; + +// a helper object that can be used for reordering operations to improve concurrency +// +// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they +// don't write to a memory that is being read by another task or written to by another task in the set +// +// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task +// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the +// tasks already in the set) +// +typedef struct ggml_mem_ranges * ggml_mem_ranges_t; + +ggml_mem_ranges_t ggml_mem_ranges_init(int debug); +void ggml_mem_ranges_free(ggml_mem_ranges_t mrs); + +// remove all ranges from the set +void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs); + +// add src or dst ranges to track +bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor); + +// return false if: +// - new src range overlaps with any existing dst range +// - new dst range overlaps with any existing range (src or dst) +bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor); + +// reorder the nodes in the graph to improve concurrency, while respecting fusion +// +// note: this implementation is generic and not specific to metal +// if it proves to work well, we can start using it for other backends in the future +void ggml_graph_optimize(struct ggml_cgraph * gf); + +#ifdef __cplusplus +} +#endif |
