diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-backend-impl.h')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-backend-impl.h | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-backend-impl.h b/llama.cpp/ggml/src/ggml-backend-impl.h new file mode 100644 index 0000000..59190b7 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-backend-impl.h @@ -0,0 +1,255 @@ +#pragma once + +// ggml-backend internal header + +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + + #define GGML_BACKEND_API_VERSION 2 + + // + // Backend buffer type + // + + struct ggml_backend_buffer_type_i { + const char * (*get_name) (ggml_backend_buffer_type_t buft); + // allocate a buffer of this type + ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); + // tensor alignment + size_t (*get_alignment) (ggml_backend_buffer_type_t buft); + // (optional) max buffer size that can be allocated (defaults to SIZE_MAX) + size_t (*get_max_size) (ggml_backend_buffer_type_t buft); + // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes) + size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); + // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false) + bool (*is_host) (ggml_backend_buffer_type_t buft); + }; + + struct ggml_backend_buffer_type { + struct ggml_backend_buffer_type_i iface; + ggml_backend_dev_t device; + void * context; + }; + + // + // Backend buffer + // + + struct ggml_backend_buffer_i { + // (optional) free the buffer + void (*free_buffer) (ggml_backend_buffer_t buffer); + // base address of the buffer + void * (*get_base) (ggml_backend_buffer_t buffer); + // (optional) initialize a tensor in the buffer (eg. add tensor extras) + enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + // tensor data access + void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); + void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported) + bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); + // clear the entire buffer + void (*clear) (ggml_backend_buffer_t buffer, uint8_t value); + // (optional) reset any internal state due to tensor initialization, such as tensor extras + void (*reset) (ggml_backend_buffer_t buffer); + }; + + struct ggml_backend_buffer { + struct ggml_backend_buffer_i iface; + ggml_backend_buffer_type_t buft; + void * context; + size_t size; + enum ggml_backend_buffer_usage usage; + }; + + GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( + ggml_backend_buffer_type_t buft, + struct ggml_backend_buffer_i iface, + void * context, + size_t size); + + // do not use directly, use ggml_backend_tensor_copy instead + GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); + + // multi-buffer + // buffer that contains a collection of buffers + GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); + GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); + GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); + + // + // Backend (stream) + // + + struct ggml_backend_i { + const char * (*get_name)(ggml_backend_t backend); + + void (*free)(ggml_backend_t backend); + + // (optional) asynchronous tensor data access + void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); + + // (optional) complete all pending operations (required if the backend supports async operations) + void (*synchronize)(ggml_backend_t backend); + + // (optional) graph plans (not used currently) + // compute graph with a plan + ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); + void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); + // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology + void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph); + // compute the graph with the plan + enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); + + // compute graph (always async if supported by the backend) + enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph); + + // (optional) event synchronization + // record an event on this stream + void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event); + // wait for an event on on a different stream + void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event); + + // (optional) sort/optimize the nodes in the graph + void (*graph_optimize) (ggml_backend_t backend, struct ggml_cgraph * cgraph); + }; + + struct ggml_backend { + ggml_guid_t guid; + struct ggml_backend_i iface; + ggml_backend_dev_t device; + void * context; + }; + + struct ggml_backend_event { + struct ggml_backend_device * device; + void * context; + }; + + // + // Backend device + // + + // Note: if additional properties are needed, we should add a struct with all of them + // the current functions to obtain the properties can remain, since they are more convenient for often used properties + struct ggml_backend_device_i { + // device name: short identifier for this device, such as "CPU" or "CUDA0" + const char * (*get_name)(ggml_backend_dev_t dev); + + // device description: short informative description of the device, could be the model name + const char * (*get_description)(ggml_backend_dev_t dev); + + // device memory in bytes: 0 bytes to indicate no memory to report + void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total); + + // device type + enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev); + + // device properties + void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props); + + // backend (stream) initialization + ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params); + + // preferred buffer type + ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev); + + // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device) + ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev); + + // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries) + ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size); + + // check if the backend can compute an operation + bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); + + // check if the backend can use tensors allocated in a buffer type + bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft); + + // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer + // these should be expensive operations that may benefit from running on this backend instead of the CPU backend + bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); + + // (optional) event synchronization + ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev); + void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event); + void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event); + }; + + struct ggml_backend_device { + struct ggml_backend_device_i iface; + ggml_backend_reg_t reg; + void * context; + }; + + // + // Backend (reg) + // + + struct ggml_backend_reg_i { + const char * (*get_name)(ggml_backend_reg_t reg); + + // enumerate available devices + size_t (*get_device_count)(ggml_backend_reg_t reg); + ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index); + + // (optional) get a pointer to a function in the backend + // backends can add custom functions that are not part of the standard ggml-backend interface + void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name); + }; + + struct ggml_backend_reg { + int api_version; // initialize to GGML_BACKEND_API_VERSION + struct ggml_backend_reg_i iface; + void * context; + }; + + // Add backend dynamic loading support to the backend + + // Initialize the backend + typedef ggml_backend_reg_t (*ggml_backend_init_t)(void); + // Optional: obtain a score for the backend based on the system configuration + // Higher scores are preferred, 0 means the backend is not supported in the current system + typedef int (*ggml_backend_score_t)(void); + +#ifdef GGML_BACKEND_DL +# ifdef __cplusplus +# define GGML_BACKEND_DL_IMPL(reg_fn) \ + extern "C" { \ + GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ + } \ + ggml_backend_reg_t ggml_backend_init(void) { \ + return reg_fn(); \ + } +# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ + extern "C" { \ + GGML_BACKEND_API int ggml_backend_score(void); \ + } \ + int ggml_backend_score(void) { \ + return score_fn(); \ + } +# else +# define GGML_BACKEND_DL_IMPL(reg_fn) \ + GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ + ggml_backend_reg_t ggml_backend_init(void) { \ + return reg_fn(); \ + } +# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ + GGML_BACKEND_API int ggml_backend_score(void); \ + int ggml_backend_score(void) { \ + return score_fn(); \ + } +# endif +#else +# define GGML_BACKEND_DL_IMPL(reg_fn) +# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) +#endif + +#ifdef __cplusplus +} +#endif |
