diff options
Diffstat (limited to 'llama.cpp/common/jinja/string.cpp')
| -rw-r--r-- | llama.cpp/common/jinja/string.cpp | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/llama.cpp/common/jinja/string.cpp b/llama.cpp/common/jinja/string.cpp new file mode 100644 index 0000000..8087e15 --- /dev/null +++ b/llama.cpp/common/jinja/string.cpp @@ -0,0 +1,213 @@ +#include "jinja/string.h" +#include "jinja/value.h" + +#include <algorithm> +#include <functional> +#include <optional> +#include <sstream> +#include <string> +#include <vector> + +namespace jinja { + +// +// string_part +// + +bool string_part::is_uppercase() const { + for (char c : val) { + if (std::islower(static_cast<unsigned char>(c))) { + return false; + } + } + return true; +} + +bool string_part::is_lowercase() const { + for (char c : val) { + if (std::isupper(static_cast<unsigned char>(c))) { + return false; + } + } + return true; +} + +// +// string +// + +void string::mark_input() { + for (auto & part : parts) { + part.is_input = true; + } +} + +std::string string::str() const { + if (parts.size() == 1) { + return parts[0].val; + } + std::ostringstream oss; + for (const auto & part : parts) { + oss << part.val; + } + return oss.str(); +} + +size_t string::length() const { + size_t len = 0; + for (const auto & part : parts) { + len += part.val.length(); + } + return len; +} + +void string::hash_update(hasher & hash) const noexcept { + for (const auto & part : parts) { + hash.update(part.val.data(), part.val.length()); + } +} + +bool string::all_parts_are_input() const { + for (const auto & part : parts) { + if (!part.is_input) { + return false; + } + } + return true; +} + +bool string::is_uppercase() const { + for (const auto & part : parts) { + if (!part.is_uppercase()) { + return false; + } + } + return true; +} + +bool string::is_lowercase() const { + for (const auto & part : parts) { + if (!part.is_lowercase()) { + return false; + } + } + return true; +} + +// mark this string as input if other has ALL parts as input +void string::mark_input_based_on(const string & other) { + if (other.all_parts_are_input()) { + for (auto & part : parts) { + part.is_input = true; + } + } +} + +string string::append(const string & other) { + for (const auto & part : other.parts) { + parts.push_back(part); + } + return *this; +} + +// in-place transformation + +using transform_fn = std::function<std::string(const std::string&)>; +static string apply_transform(string & self, const transform_fn & fn) { + for (auto & part : self.parts) { + part.val = fn(part.val); + } + return self; +} + +string string::uppercase() { + return apply_transform(*this, [](const std::string & s) { + std::string res = s; + std::transform(res.begin(), res.end(), res.begin(), ::toupper); + return res; + }); +} +string string::lowercase() { + return apply_transform(*this, [](const std::string & s) { + std::string res = s; + std::transform(res.begin(), res.end(), res.begin(), ::tolower); + return res; + }); +} +string string::capitalize() { + return apply_transform(*this, [](const std::string & s) { + if (s.empty()) return s; + std::string res = s; + res[0] = ::toupper(static_cast<unsigned char>(res[0])); + std::transform(res.begin() + 1, res.end(), res.begin() + 1, ::tolower); + return res; + }); +} +string string::titlecase() { + return apply_transform(*this, [](const std::string & s) { + std::string res = s; + bool capitalize_next = true; + for (char &c : res) { + if (isspace(static_cast<unsigned char>(c))) { + capitalize_next = true; + } else if (capitalize_next) { + c = ::toupper(static_cast<unsigned char>(c)); + capitalize_next = false; + } else { + c = ::tolower(static_cast<unsigned char>(c)); + } + } + return res; + }); +} +string string::strip(bool left, bool right, std::optional<const std::string_view> chars) { + static auto strip_part = [](const std::string & s, bool left, bool right, std::optional<const std::string_view> chars) -> std::string { + size_t start = 0; + size_t end = s.length(); + auto match_char = [&chars](unsigned char c) -> bool { + return chars ? (*chars).find(c) != std::string::npos : isspace(c); + }; + if (left) { + while (start < end && match_char(static_cast<unsigned char>(s[start]))) { + ++start; + } + } + if (right) { + while (end > start && match_char(static_cast<unsigned char>(s[end - 1]))) { + --end; + } + } + return s.substr(start, end - start); + }; + if (parts.empty()) { + return *this; + } + if (left) { + for (size_t i = 0; i < parts.size(); ++i) { + parts[i].val = strip_part(parts[i].val, true, false, chars); + if (parts[i].val.empty()) { + // remove empty part + parts.erase(parts.begin() + i); + --i; + continue; + } else { + break; + } + } + } + if (right) { + for (size_t i = parts.size(); i-- > 0;) { + parts[i].val = strip_part(parts[i].val, false, true, chars); + if (parts[i].val.empty()) { + // remove empty part + parts.erase(parts.begin() + i); + continue; + } else { + break; + } + } + } + return *this; +} + +} // namespace jinja |
