summaryrefslogtreecommitdiff
path: root/llama.cpp/common/jinja/string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/common/jinja/string.cpp')
-rw-r--r--llama.cpp/common/jinja/string.cpp213
1 files changed, 213 insertions, 0 deletions
diff --git a/llama.cpp/common/jinja/string.cpp b/llama.cpp/common/jinja/string.cpp
new file mode 100644
index 0000000..8087e15
--- /dev/null
+++ b/llama.cpp/common/jinja/string.cpp
@@ -0,0 +1,213 @@
+#include "jinja/string.h"
+#include "jinja/value.h"
+
+#include <algorithm>
+#include <functional>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace jinja {
+
+//
+// string_part
+//
+
+bool string_part::is_uppercase() const {
+ for (char c : val) {
+ if (std::islower(static_cast<unsigned char>(c))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool string_part::is_lowercase() const {
+ for (char c : val) {
+ if (std::isupper(static_cast<unsigned char>(c))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+//
+// string
+//
+
+void string::mark_input() {
+ for (auto & part : parts) {
+ part.is_input = true;
+ }
+}
+
+std::string string::str() const {
+ if (parts.size() == 1) {
+ return parts[0].val;
+ }
+ std::ostringstream oss;
+ for (const auto & part : parts) {
+ oss << part.val;
+ }
+ return oss.str();
+}
+
+size_t string::length() const {
+ size_t len = 0;
+ for (const auto & part : parts) {
+ len += part.val.length();
+ }
+ return len;
+}
+
+void string::hash_update(hasher & hash) const noexcept {
+ for (const auto & part : parts) {
+ hash.update(part.val.data(), part.val.length());
+ }
+}
+
+bool string::all_parts_are_input() const {
+ for (const auto & part : parts) {
+ if (!part.is_input) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool string::is_uppercase() const {
+ for (const auto & part : parts) {
+ if (!part.is_uppercase()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool string::is_lowercase() const {
+ for (const auto & part : parts) {
+ if (!part.is_lowercase()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// mark this string as input if other has ALL parts as input
+void string::mark_input_based_on(const string & other) {
+ if (other.all_parts_are_input()) {
+ for (auto & part : parts) {
+ part.is_input = true;
+ }
+ }
+}
+
+string string::append(const string & other) {
+ for (const auto & part : other.parts) {
+ parts.push_back(part);
+ }
+ return *this;
+}
+
+// in-place transformation
+
+using transform_fn = std::function<std::string(const std::string&)>;
+static string apply_transform(string & self, const transform_fn & fn) {
+ for (auto & part : self.parts) {
+ part.val = fn(part.val);
+ }
+ return self;
+}
+
+string string::uppercase() {
+ return apply_transform(*this, [](const std::string & s) {
+ std::string res = s;
+ std::transform(res.begin(), res.end(), res.begin(), ::toupper);
+ return res;
+ });
+}
+string string::lowercase() {
+ return apply_transform(*this, [](const std::string & s) {
+ std::string res = s;
+ std::transform(res.begin(), res.end(), res.begin(), ::tolower);
+ return res;
+ });
+}
+string string::capitalize() {
+ return apply_transform(*this, [](const std::string & s) {
+ if (s.empty()) return s;
+ std::string res = s;
+ res[0] = ::toupper(static_cast<unsigned char>(res[0]));
+ std::transform(res.begin() + 1, res.end(), res.begin() + 1, ::tolower);
+ return res;
+ });
+}
+string string::titlecase() {
+ return apply_transform(*this, [](const std::string & s) {
+ std::string res = s;
+ bool capitalize_next = true;
+ for (char &c : res) {
+ if (isspace(static_cast<unsigned char>(c))) {
+ capitalize_next = true;
+ } else if (capitalize_next) {
+ c = ::toupper(static_cast<unsigned char>(c));
+ capitalize_next = false;
+ } else {
+ c = ::tolower(static_cast<unsigned char>(c));
+ }
+ }
+ return res;
+ });
+}
+string string::strip(bool left, bool right, std::optional<const std::string_view> chars) {
+ static auto strip_part = [](const std::string & s, bool left, bool right, std::optional<const std::string_view> chars) -> std::string {
+ size_t start = 0;
+ size_t end = s.length();
+ auto match_char = [&chars](unsigned char c) -> bool {
+ return chars ? (*chars).find(c) != std::string::npos : isspace(c);
+ };
+ if (left) {
+ while (start < end && match_char(static_cast<unsigned char>(s[start]))) {
+ ++start;
+ }
+ }
+ if (right) {
+ while (end > start && match_char(static_cast<unsigned char>(s[end - 1]))) {
+ --end;
+ }
+ }
+ return s.substr(start, end - start);
+ };
+ if (parts.empty()) {
+ return *this;
+ }
+ if (left) {
+ for (size_t i = 0; i < parts.size(); ++i) {
+ parts[i].val = strip_part(parts[i].val, true, false, chars);
+ if (parts[i].val.empty()) {
+ // remove empty part
+ parts.erase(parts.begin() + i);
+ --i;
+ continue;
+ } else {
+ break;
+ }
+ }
+ }
+ if (right) {
+ for (size_t i = parts.size(); i-- > 0;) {
+ parts[i].val = strip_part(parts[i].val, false, true, chars);
+ if (parts[i].val.empty()) {
+ // remove empty part
+ parts.erase(parts.begin() + i);
+ continue;
+ } else {
+ break;
+ }
+ }
+ }
+ return *this;
+}
+
+} // namespace jinja