summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go')
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go147
1 files changed, 147 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
new file mode 100644
index 0000000..3bcef7c
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
@@ -0,0 +1,147 @@
+package rank
+
+// Rank struct contains every original raw sentences, words, tokens, phrases,
+// indexes, word hits, phrase hits and minimum-maximum values.
+//
+// Max is the occurrence of the most used word.
+//
+// Min is the occurrence of the less used word. It is always greater then 0.
+//
+// Relation is the Relation object, contains phrases.
+//
+// SentenceMap contains raw sentences. Index is the sentence ID, value is the
+// sentence itself.
+//
+// Words contains Word objects. Index is the word ID, value is the word/token
+// itself.
+//
+// WordValID contains words. Index is the word/token, value is the ID.
+type Rank struct {
+ Max float32
+ Min float32
+ Relation Relation
+ SentenceMap map[int]string
+ Words map[int]*Word
+ WordValID map[string]int
+}
+
+// Word struct contains all data about the words.
+//
+// If a word is multiple times in the text then the multiple words point to the
+// same ID. So Word is unique.
+//
+// SentenceIDs contains all IDs of sentences what contain the word.
+//
+// ConnectionLeft contains all words what are connected to this word on the left
+// side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// ConnectionRight contains all words what are connected to this word on the
+// right side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// Token is the word itself, but not the original, it is tokenized.
+//
+// Qty is the number of occurrence of the word.
+//
+// Weight is the weight of the word between 0.00 and 1.00.
+type Word struct {
+ ID int
+ SentenceIDs []int
+ ConnectionLeft map[int]int
+ ConnectionRight map[int]int
+ Token string
+ Qty int
+ Weight float32
+}
+
+// NewRank constructor retrieves a Rank pointer.
+func NewRank() *Rank {
+ return &Rank{
+ 0,
+ 0,
+ Relation{
+ 0,
+ 0,
+ make(map[int]map[int]Score),
+ },
+ make(map[int]string),
+ make(map[int]*Word),
+ make(map[string]int),
+ }
+}
+
+// IsWordExist method retrieves true when the given word is already in the rank.
+func (rank *Rank) IsWordExist(word string) bool {
+ _, find := rank.WordValID[word]
+
+ return find
+}
+
+// AddNewWord method adds a new word to the rank object and it defines its ID.
+func (rank *Rank) AddNewWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+ wordID = len(rank.Words)
+ connectionLeft := make(map[int]int)
+
+ if prevWordIdx >= 0 {
+ connectionLeft[prevWordIdx] = 1
+ }
+
+ newWord := &Word{
+ ID: wordID,
+ SentenceIDs: []int{sentenceID},
+ ConnectionLeft: connectionLeft,
+ ConnectionRight: make(map[int]int),
+ Token: word,
+ Qty: 1,
+ Weight: 0,
+ }
+
+ rank.Words[wordID] = newWord
+ rank.WordValID[word] = wordID
+
+ return
+}
+
+// UpdateWord method update a word what already exists in the rank object. It
+// retrieves its ID.
+func (rank *Rank) UpdateWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+ wordID = rank.WordValID[word]
+
+ found := false
+
+ for _, oldSentenceID := range rank.Words[wordID].SentenceIDs {
+ if sentenceID == oldSentenceID {
+ found = true
+ break
+ }
+ }
+
+ if !found {
+ rank.Words[wordID].SentenceIDs = append(
+ rank.Words[wordID].SentenceIDs,
+ sentenceID,
+ )
+ }
+
+ rank.Words[wordID].Qty++
+
+ if prevWordIdx >= 0 {
+ rank.Words[wordID].ConnectionLeft[prevWordIdx]++
+ }
+
+ return
+}
+
+// UpdateRightConnection method adds the right connection to the word. It always
+// can be used after a word has added and the next word is known.
+func (rank *Rank) UpdateRightConnection(wordID int, rightWordID int) {
+ if wordID >= 0 {
+ rank.Words[wordID].ConnectionRight[rightWordID]++
+ }
+}
+
+// GetWordData method retrieves all words as a pointer.
+func (rank *Rank) GetWordData() map[int]*Word {
+ return rank.Words
+}