diff options
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go')
| -rw-r--r-- | vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go new file mode 100644 index 0000000..3bcef7c --- /dev/null +++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go @@ -0,0 +1,147 @@ +package rank + +// Rank struct contains every original raw sentences, words, tokens, phrases, +// indexes, word hits, phrase hits and minimum-maximum values. +// +// Max is the occurrence of the most used word. +// +// Min is the occurrence of the less used word. It is always greater then 0. +// +// Relation is the Relation object, contains phrases. +// +// SentenceMap contains raw sentences. Index is the sentence ID, value is the +// sentence itself. +// +// Words contains Word objects. Index is the word ID, value is the word/token +// itself. +// +// WordValID contains words. Index is the word/token, value is the ID. +type Rank struct { + Max float32 + Min float32 + Relation Relation + SentenceMap map[int]string + Words map[int]*Word + WordValID map[string]int +} + +// Word struct contains all data about the words. +// +// If a word is multiple times in the text then the multiple words point to the +// same ID. So Word is unique. +// +// SentenceIDs contains all IDs of sentences what contain the word. +// +// ConnectionLeft contains all words what are connected to this word on the left +// side. The map index is the ID of the related word and its value is the +// occurrence. +// +// ConnectionRight contains all words what are connected to this word on the +// right side. The map index is the ID of the related word and its value is the +// occurrence. +// +// Token is the word itself, but not the original, it is tokenized. +// +// Qty is the number of occurrence of the word. +// +// Weight is the weight of the word between 0.00 and 1.00. +type Word struct { + ID int + SentenceIDs []int + ConnectionLeft map[int]int + ConnectionRight map[int]int + Token string + Qty int + Weight float32 +} + +// NewRank constructor retrieves a Rank pointer. +func NewRank() *Rank { + return &Rank{ + 0, + 0, + Relation{ + 0, + 0, + make(map[int]map[int]Score), + }, + make(map[int]string), + make(map[int]*Word), + make(map[string]int), + } +} + +// IsWordExist method retrieves true when the given word is already in the rank. +func (rank *Rank) IsWordExist(word string) bool { + _, find := rank.WordValID[word] + + return find +} + +// AddNewWord method adds a new word to the rank object and it defines its ID. +func (rank *Rank) AddNewWord(word string, prevWordIdx int, sentenceID int) (wordID int) { + wordID = len(rank.Words) + connectionLeft := make(map[int]int) + + if prevWordIdx >= 0 { + connectionLeft[prevWordIdx] = 1 + } + + newWord := &Word{ + ID: wordID, + SentenceIDs: []int{sentenceID}, + ConnectionLeft: connectionLeft, + ConnectionRight: make(map[int]int), + Token: word, + Qty: 1, + Weight: 0, + } + + rank.Words[wordID] = newWord + rank.WordValID[word] = wordID + + return +} + +// UpdateWord method update a word what already exists in the rank object. It +// retrieves its ID. +func (rank *Rank) UpdateWord(word string, prevWordIdx int, sentenceID int) (wordID int) { + wordID = rank.WordValID[word] + + found := false + + for _, oldSentenceID := range rank.Words[wordID].SentenceIDs { + if sentenceID == oldSentenceID { + found = true + break + } + } + + if !found { + rank.Words[wordID].SentenceIDs = append( + rank.Words[wordID].SentenceIDs, + sentenceID, + ) + } + + rank.Words[wordID].Qty++ + + if prevWordIdx >= 0 { + rank.Words[wordID].ConnectionLeft[prevWordIdx]++ + } + + return +} + +// UpdateRightConnection method adds the right connection to the word. It always +// can be used after a word has added and the next word is known. +func (rank *Rank) UpdateRightConnection(wordID int, rightWordID int) { + if wordID >= 0 { + rank.Words[wordID].ConnectionRight[rightWordID]++ + } +} + +// GetWordData method retrieves all words as a pointer. +func (rank *Rank) GetWordData() map[int]*Word { + return rank.Words +} |
