Added vendor lock on depsHEAD master

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2024-10-25 00:47:47 +0200
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2024-10-25 00:47:47 +0200
commit: c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree: 36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/DavidBelicza/TextRank/v2/parse
parent: 0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
download: jbmafp-c6cc0108ca7738023b45e0eeac0fa2390532dd93.tar.gz
3 files changed, 159 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/parse/rule.go b/vendor/github.com/DavidBelicza/TextRank/v2/parse/rule.go
new file mode 100644
index 0000000..0f6ec91
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/parse/rule.go
@@ -0,0 +1,52 @@
+package parse
+
+// Rule interface and its methods make possible the polimorf usage of process
+// how Rule retrieve tokens from text.
+type Rule interface {
+	IsWordSeparator(rune rune) bool
+	IsSentenceSeparator(rune rune) bool
+}
+
+// RuleDefault struct implements the Rule interface. It contains the separator
+// characters and can decide a character is separator or not.
+type RuleDefault struct {
+	wordSeparators     [21]string
+	sentenceSeparators [3]string
+}
+
+// NewRule constructor retrieves a RuleDefault pointer.
+func NewRule() *RuleDefault {
+	return &RuleDefault{
+		[21]string{" ", ",", "'", "’", "\"", ")", "(", "[", "]", "{", "}", "\"", ";", "\n", ">", "<", "%", "@", "&", "=", "#"},
+		[3]string{"!", ".", "?"},
+	}
+}
+
+// IsWordSeparator method retrieves true when a character is a kind of special
+// character and possibly it separates to words from each other. It also checks
+// for sentence separator by IsSentenceSeparator method.
+func (r *RuleDefault) IsWordSeparator(rune rune) bool {
+	chr := string(rune)
+
+	for _, val := range r.wordSeparators {
+		if chr == val {
+			return true
+		}
+	}
+
+	return r.IsSentenceSeparator(rune)
+}
+
+// IsSentenceSeparator method retrieves true when a character is a kind of
+// special character and possibly it separates to words from each other.
+func (r *RuleDefault) IsSentenceSeparator(rune rune) bool {
+	chr := string(rune)
+
+	for _, val := range r.sentenceSeparators {
+		if chr == val {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/parse/text.go b/vendor/github.com/DavidBelicza/TextRank/v2/parse/text.go
new file mode 100644
index 0000000..aab27c3
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/parse/text.go
@@ -0,0 +1,44 @@
+package parse
+
+// Text struct contains a parsed text.
+type Text struct {
+	parsedSentences []ParsedSentence
+}
+
+// ParsedSentence struct contains the original raw sentences and their words.
+type ParsedSentence struct {
+	original string
+	words    []string
+}
+
+// Append method creates a sentence and its words and append them to the Text
+// object.
+func (text *Text) Append(rawSentence string, words []string) {
+	if len(words) > 0 {
+		parsedSentence := ParsedSentence{
+			original: rawSentence,
+			words:    words,
+		}
+
+		text.parsedSentences = append(
+			text.parsedSentences,
+			parsedSentence,
+		)
+	}
+}
+
+// GetSentences method returns ParsedSentence slice from Text struct.
+func (text *Text) GetSentences() []ParsedSentence {
+	return text.parsedSentences
+}
+
+// GetWords methods returns the words string slice of ParsedSentence struct.
+func (parsedSentence *ParsedSentence) GetWords() []string {
+	return parsedSentence.words
+}
+
+// GetOriginal method returns the original sentence as a string from a
+// ParsedSentence struct.
+func (parsedSentence *ParsedSentence) GetOriginal() string {
+	return parsedSentence.original
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/parse/tokenizer.go b/vendor/github.com/DavidBelicza/TextRank/v2/parse/tokenizer.go
new file mode 100644
index 0000000..003460e
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/parse/tokenizer.go
@@ -0,0 +1,63 @@
+package parse
+
+import (
+	"strings"
+)
+
+// TokenizeText function use the given raw text and parses by a Rule object and
+// retrieves the parsed text in a Text struct object.
+func TokenizeText(rawText string, rule Rule) Text {
+	return findSentences(rawText, rule)
+}
+
+func findSentences(rawText string, rule Rule) Text {
+	text := Text{}
+
+	var sentence string
+	var i int
+	slen := len(rawText)
+
+	for j, chr := range rawText {
+		j += len(string(chr))
+		//when separator or the last
+		if rule.IsSentenceSeparator(chr) || j == slen {
+			sentence = rawText[i:j]
+			if len(sentence) > 0 {
+				text.Append(sentence, findWords(sentence, rule))
+			}
+
+			sentence = ""
+			i = j
+		}
+	}
+
+	return text
+}
+
+func findWords(rawSentence string, rule Rule) (words []string) {
+	words = []string{}
+
+	var word string
+	var i int
+	slen := len(rawSentence)
+
+	for j, chr := range rawSentence {
+		chrlen := len(string(chr))
+		j += chrlen
+		//when separator or the last
+		if sep := rule.IsWordSeparator(chr); sep || j == slen {
+			if sep {
+				word = rawSentence[i : j-chrlen]
+			} else {
+				word = rawSentence[i:j]
+			}
+			if len(word) > 0 {
+				words = append(words, strings.ToLower(word))
+			}
+			word = ""
+			i = j
+		}
+	}
+
+	return
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2024-10-25 00:47:47 +0200
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2024-10-25 00:47:47 +0200
commit	c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree	36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/DavidBelicza/TextRank/v2/parse
parent	0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
download	jbmafp-c6cc0108ca7738023b45e0eeac0fa2390532dd93.tar.gz