1#pragma once
2
3#include <string>
4#include <vector>
5
6struct common_params_model;
7
8using common_header = std::pair<std::string, std::string>;
9using common_header_list = std::vector<common_header>;
10
11struct common_remote_params {
12 common_header_list headers;
13 long timeout = 0; // in seconds, 0 means no timeout
14 long max_size = 0; // unlimited if 0
15};
16
17// get remote file content, returns <http_code, raw_response_body>
18std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
19
20// split HF repo with tag into <repo, tag>
21// for example: "user/model:tag" -> <"user/model", "tag">
22// if tag is not present, default to "latest"
23// example: "user/model" -> <"user/model", "latest">
24std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
25
26struct common_cached_model_info {
27 std::string manifest_path;
28 std::string user;
29 std::string model;
30 std::string tag;
31 size_t size = 0; // GGUF size in bytes
32 // return string representation like "user/model:tag"
33 // if tag is "latest", it will be omitted
34 std::string to_string() const {
35 return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
36 }
37};
38
39struct common_hf_file_res {
40 std::string repo; // repo name with ":tag" removed
41 std::string ggufFile;
42 std::string mmprojFile;
43};
44
45/**
46 * Allow getting the HF file from the HF repo with tag (like ollama), for example:
47 * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
48 * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
49 * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
50 * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
51 *
52 * Return pair of <repo, file> (with "repo" already having tag removed)
53 *
54 * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
55 */
56common_hf_file_res common_get_hf_file(
57 const std::string & hf_repo_with_tag,
58 const std::string & bearer_token,
59 bool offline,
60 const common_header_list & headers = {}
61);
62
63// returns true if download succeeded
64bool common_download_model(
65 const common_params_model & model,
66 const std::string & bearer_token,
67 bool offline,
68 const common_header_list & headers = {}
69);
70
71// returns list of cached models
72std::vector<common_cached_model_info> common_list_cached_models();
73
74// download single file from url to local path
75// returns status code or -1 on error
76int common_download_file_single(const std::string & url,
77 const std::string & path,
78 const std::string & bearer_token,
79 bool offline,
80 const common_header_list & headers = {});
81
82// resolve and download model from Docker registry
83// return local path to downloaded model file
84std::string common_docker_resolve_model(const std::string & docker);