summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/rank
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/rank')
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go99
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go147
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go66
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go77
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go202
5 files changed, 591 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go
new file mode 100644
index 0000000..8f9345f
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go
@@ -0,0 +1,99 @@
+package rank
+
+import (
+ "math"
+)
+
+// Algorithm interface and its methods make possible the polimorf usage of
+// weighting process.
+type Algorithm interface {
+ WeightingRelation(
+ word1ID int,
+ word2ID int,
+ rank *Rank,
+ ) float32
+
+ WeightingHits(
+ wordID int,
+ rank *Rank,
+ ) float32
+}
+
+// AlgorithmDefault struct is the basic implementation of Algorithm. It can
+// weight a word or phrase by comparing them.
+type AlgorithmDefault struct{}
+
+// NewAlgorithmDefault constructor retrieves an AlgorithmDefault pointer.
+func NewAlgorithmDefault() *AlgorithmDefault {
+ return &AlgorithmDefault{}
+}
+
+// WeightingRelation method is the traditional algorithm of text rank to
+// weighting a phrase.
+func (a *AlgorithmDefault) WeightingRelation(
+ word1ID int,
+ word2ID int,
+ rank *Rank,
+) float32 {
+ relationQty := rank.Relation.Node[word1ID][word2ID].Qty
+
+ return float32(relationQty)
+}
+
+// WeightingHits method ranks the words by their occurrence.
+func (a *AlgorithmDefault) WeightingHits(
+ wordID int,
+ rank *Rank,
+) float32 {
+ weight := rank.Words[wordID].Qty
+
+ return float32(weight)
+}
+
+// AlgorithmChain struct is the combined implementation of Algorithm. It is a
+// good example how weighting can be changed by a different implementations. It
+// can weight a word or phrase by comparing them.
+type AlgorithmChain struct{}
+
+// NewAlgorithmChain constructor retrieves an AlgorithmChain pointer.
+func NewAlgorithmChain() *AlgorithmChain {
+ return &AlgorithmChain{}
+}
+
+// WeightingRelation method is a combined algorithm of text rank and word
+// occurrence, it weights a phrase.
+func (a *AlgorithmChain) WeightingRelation(
+ word1ID int,
+ word2ID int,
+ rank *Rank,
+) float32 {
+ relationQty := rank.Relation.Node[word1ID][word2ID].Qty
+ word1Qty := rank.Words[word1ID].Qty
+ word2Qty := rank.Words[word2ID].Qty
+
+ qDiff := float32(math.Abs(float64(word1Qty)-float64(word2Qty))) / 100
+ weight := float32(relationQty) + qDiff
+
+ return weight
+}
+
+// WeightingHits method ranks the words by their occurrence.
+func (a *AlgorithmChain) WeightingHits(
+ wordID int,
+ rank *Rank,
+) float32 {
+ word := rank.Words[wordID]
+ qty := 0
+
+ for leftWordID, leftWordQty := range word.ConnectionLeft {
+ qty += rank.Words[leftWordID].Qty * leftWordQty
+ }
+
+ for rightWordID, rightWordQty := range word.ConnectionRight {
+ qty += rank.Words[rightWordID].Qty * rightWordQty
+ }
+
+ weight := float32(word.Qty) + (float32(qty))
+
+ return float32(weight)
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
new file mode 100644
index 0000000..3bcef7c
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
@@ -0,0 +1,147 @@
+package rank
+
+// Rank struct contains every original raw sentences, words, tokens, phrases,
+// indexes, word hits, phrase hits and minimum-maximum values.
+//
+// Max is the occurrence of the most used word.
+//
+// Min is the occurrence of the less used word. It is always greater then 0.
+//
+// Relation is the Relation object, contains phrases.
+//
+// SentenceMap contains raw sentences. Index is the sentence ID, value is the
+// sentence itself.
+//
+// Words contains Word objects. Index is the word ID, value is the word/token
+// itself.
+//
+// WordValID contains words. Index is the word/token, value is the ID.
+type Rank struct {
+ Max float32
+ Min float32
+ Relation Relation
+ SentenceMap map[int]string
+ Words map[int]*Word
+ WordValID map[string]int
+}
+
+// Word struct contains all data about the words.
+//
+// If a word is multiple times in the text then the multiple words point to the
+// same ID. So Word is unique.
+//
+// SentenceIDs contains all IDs of sentences what contain the word.
+//
+// ConnectionLeft contains all words what are connected to this word on the left
+// side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// ConnectionRight contains all words what are connected to this word on the
+// right side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// Token is the word itself, but not the original, it is tokenized.
+//
+// Qty is the number of occurrence of the word.
+//
+// Weight is the weight of the word between 0.00 and 1.00.
+type Word struct {
+ ID int
+ SentenceIDs []int
+ ConnectionLeft map[int]int
+ ConnectionRight map[int]int
+ Token string
+ Qty int
+ Weight float32
+}
+
+// NewRank constructor retrieves a Rank pointer.
+func NewRank() *Rank {
+ return &Rank{
+ 0,
+ 0,
+ Relation{
+ 0,
+ 0,
+ make(map[int]map[int]Score),
+ },
+ make(map[int]string),
+ make(map[int]*Word),
+ make(map[string]int),
+ }
+}
+
+// IsWordExist method retrieves true when the given word is already in the rank.
+func (rank *Rank) IsWordExist(word string) bool {
+ _, find := rank.WordValID[word]
+
+ return find
+}
+
+// AddNewWord method adds a new word to the rank object and it defines its ID.
+func (rank *Rank) AddNewWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+ wordID = len(rank.Words)
+ connectionLeft := make(map[int]int)
+
+ if prevWordIdx >= 0 {
+ connectionLeft[prevWordIdx] = 1
+ }
+
+ newWord := &Word{
+ ID: wordID,
+ SentenceIDs: []int{sentenceID},
+ ConnectionLeft: connectionLeft,
+ ConnectionRight: make(map[int]int),
+ Token: word,
+ Qty: 1,
+ Weight: 0,
+ }
+
+ rank.Words[wordID] = newWord
+ rank.WordValID[word] = wordID
+
+ return
+}
+
+// UpdateWord method update a word what already exists in the rank object. It
+// retrieves its ID.
+func (rank *Rank) UpdateWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+ wordID = rank.WordValID[word]
+
+ found := false
+
+ for _, oldSentenceID := range rank.Words[wordID].SentenceIDs {
+ if sentenceID == oldSentenceID {
+ found = true
+ break
+ }
+ }
+
+ if !found {
+ rank.Words[wordID].SentenceIDs = append(
+ rank.Words[wordID].SentenceIDs,
+ sentenceID,
+ )
+ }
+
+ rank.Words[wordID].Qty++
+
+ if prevWordIdx >= 0 {
+ rank.Words[wordID].ConnectionLeft[prevWordIdx]++
+ }
+
+ return
+}
+
+// UpdateRightConnection method adds the right connection to the word. It always
+// can be used after a word has added and the next word is known.
+func (rank *Rank) UpdateRightConnection(wordID int, rightWordID int) {
+ if wordID >= 0 {
+ rank.Words[wordID].ConnectionRight[rightWordID]++
+ }
+}
+
+// GetWordData method retrieves all words as a pointer.
+func (rank *Rank) GetWordData() map[int]*Word {
+ return rank.Words
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go
new file mode 100644
index 0000000..5fd2dfa
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go
@@ -0,0 +1,66 @@
+package rank
+
+// Calculate function ranking words by the given algorithm implementation.
+func Calculate(ranks *Rank, algorithm Algorithm) {
+ updateRanks(ranks, algorithm)
+}
+
+func updateRanks(ranks *Rank, algorithm Algorithm) {
+ for _, word := range ranks.Words {
+ weight := algorithm.WeightingHits(word.ID, ranks)
+ word.Weight = weight
+
+ if ranks.Max < word.Weight {
+ ranks.Max = word.Weight
+ }
+
+ if ranks.Min > word.Weight || ranks.Min == 0 {
+ ranks.Min = word.Weight
+ }
+ }
+
+ for _, word := range ranks.Words {
+ word.Weight = normalize(word.Weight, ranks.Min, ranks.Max)
+ }
+
+ for x, xMap := range ranks.Relation.Node {
+ for y := range xMap {
+ sentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
+ weight := algorithm.WeightingRelation(x, y, ranks)
+
+ ranks.Relation.Node[x][y] = Score{
+ ranks.Relation.Node[x][y].Qty,
+ weight,
+ sentenceIDs,
+ }
+
+ if ranks.Relation.Max < weight {
+ ranks.Relation.Max = weight
+ }
+
+ if ranks.Relation.Min > weight || ranks.Relation.Min == 0 {
+ ranks.Relation.Min = weight
+ }
+ }
+ }
+
+ for x, xMap := range ranks.Relation.Node {
+ for y := range xMap {
+ weight := normalize(
+ ranks.Relation.Node[x][y].Weight,
+ ranks.Relation.Min,
+ ranks.Relation.Max,
+ )
+
+ ranks.Relation.Node[x][y] = Score{
+ ranks.Relation.Node[x][y].Qty,
+ weight,
+ ranks.Relation.Node[x][y].SentenceIDs,
+ }
+ }
+ }
+}
+
+func normalize(weight float32, min float32, max float32) float32 {
+ return (weight - min) / (max - min)
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go
new file mode 100644
index 0000000..cb8b97e
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go
@@ -0,0 +1,77 @@
+package rank
+
+// Relation struct contains the phrase data.
+//
+// Max is the occurrence of the most used phrase.
+//
+// Min is the occurrence of the less used phrase. It is always greater then 0.
+//
+// Node is contains the Scores. Firs ID is the word 1, second ID is the word 2,
+// and the value is the Score what contains the data about their relation.
+type Relation struct {
+ Max float32
+ Min float32
+ Node map[int]map[int]Score
+}
+
+// Score struct contains data about a relation of two words.
+//
+// Qty is the occurrence of the phrase.
+//
+// Weight is the weight of the phrase between 0.00 and 1.00.
+//
+// SentenceIDs contains all IDs of sentences what contain the phrase.
+type Score struct {
+ Qty int
+ Weight float32
+ SentenceIDs []int
+}
+
+// AddRelation method adds a new relation to Relation object.
+func (relation *Relation) AddRelation(wordID int, relatedWordID int, sentenceID int) {
+ if relatedWordID == -1 {
+ return
+ }
+
+ if relation.updateRelation(relatedWordID, wordID, true, sentenceID) {
+ return
+ }
+
+ if relation.extendRelation(wordID, relatedWordID, true, sentenceID) {
+ return
+ }
+
+ relation.createRelation(wordID, relatedWordID, sentenceID)
+}
+
+func (relation *Relation) updateRelation(x int, y int, r bool, sentenceID int) bool {
+ if _, ok := relation.Node[x][y]; ok {
+ count := relation.Node[x][y].Qty + 1
+ weight := relation.Node[x][y].Weight
+ sentenceIDs := append(relation.Node[x][y].SentenceIDs, sentenceID)
+ relation.Node[x][y] = Score{count, weight, sentenceIDs}
+
+ return true
+ } else if r {
+ return relation.updateRelation(y, x, false, sentenceID)
+ }
+
+ return false
+}
+
+func (relation *Relation) extendRelation(x int, y int, r bool, sentenceID int) bool {
+ if _, ok := relation.Node[x]; ok {
+ relation.Node[x][y] = Score{1, 0, []int{sentenceID}}
+
+ return true
+ } else if r {
+ return relation.extendRelation(y, x, false, sentenceID)
+ }
+
+ return false
+}
+
+func (relation *Relation) createRelation(x int, y int, sentenceID int) {
+ relation.Node[x] = map[int]Score{}
+ relation.Node[x][y] = Score{1, 0, []int{sentenceID}}
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
new file mode 100644
index 0000000..6d00a97
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
@@ -0,0 +1,202 @@
+package rank
+
+import (
+ "sort"
+)
+
+// Phrase struct contains a single phrase and its data.
+//
+// LeftID is the ID of the word 1.
+//
+// RightID is the ID of the word 2.
+//
+// Left is the token of the word 1.
+//
+// Right is the token of the word 2.
+//
+// Weight is between 0.00 and 1.00.
+//
+// Qty is the occurrence of the phrase.
+type Phrase struct {
+ LeftID int
+ RightID int
+ Left string
+ Right string
+ Weight float32
+ Qty int
+}
+
+// FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper
+// instead.
+func FindPhrases(ranks *Rank) []Phrase {
+ var phrases []Phrase
+
+ for x, xMap := range ranks.Relation.Node {
+ for y := range xMap {
+ phrases = append(phrases, Phrase{
+ ranks.Words[x].ID,
+ ranks.Words[y].ID,
+ ranks.Words[x].Token,
+ ranks.Words[y].Token,
+ ranks.Relation.Node[x][y].Weight,
+ ranks.Relation.Node[x][y].Qty,
+ })
+ }
+ }
+
+ sort.Slice(phrases, func(i, j int) bool {
+ return phrases[i].Weight > phrases[j].Weight
+ })
+
+ return phrases
+}
+
+// SingleWord struct contains a single word and its data.
+//
+// ID of the word.
+//
+// Word itself, the token.
+//
+// Weight of the word between 0.00 and 1.00.
+//
+// Quantity of the word.
+type SingleWord struct {
+ ID int
+ Word string
+ Weight float32
+ Qty int
+}
+
+// FindSingleWords function has wrapper textrank.FindSingleWords. Use the
+// wrapper instead.
+func FindSingleWords(ranks *Rank) []SingleWord {
+ var singleWords []SingleWord
+
+ for _, word := range ranks.Words {
+ singleWords = append(singleWords, SingleWord{
+ word.ID,
+ word.Token,
+ word.Weight,
+ word.Qty,
+ })
+ }
+
+ sort.Slice(singleWords, func(i, j int) bool {
+ return singleWords[i].Weight > singleWords[j].Weight
+ })
+
+ return singleWords
+}
+
+// Sentence struct contains a single sentence and its data.
+type Sentence struct {
+ ID int
+ Value string
+}
+
+// ByQty filter by occurrence of word.
+const ByQty = 0
+
+// ByRelation filter by phrase weight.
+const ByRelation = 1
+
+// FindSentences function has wrappers textrank.FindSentencesByRelationWeight
+// and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead.
+func FindSentences(ranks *Rank, kind int, limit int) []Sentence {
+ var sentences []Sentence
+
+ cache := make(map[int]bool)
+
+ collect := func(sentenceIDs []int) bool {
+ for _, id := range sentenceIDs {
+ if len(sentences) >= limit {
+ return true
+ }
+
+ if !cache[id] {
+ sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]})
+ cache[id] = true
+ }
+ }
+
+ return false
+ }
+
+ if kind == ByQty {
+ singleWords := FindSingleWords(ranks)
+
+ for _, singleWord := range singleWords {
+ sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs
+
+ if collect(sentenceIDs) {
+ return sentences
+ }
+ }
+ } else if kind == ByRelation {
+ phrases := FindPhrases(ranks)
+
+ for _, phrase := range phrases {
+ sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs
+
+ if collect(sentenceIDs) {
+ return sentences
+ }
+ }
+ }
+
+ return sentences
+}
+
+// FindSentencesByPhrases function has wrapper
+// textrank.FindSentencesByPhraseChain. Use the wrapper instead.
+func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence {
+ var sentences []Sentence
+
+ reqMatch := len(words) - 1
+ sentenceIDs := make(map[int]int)
+
+ for _, i := range words {
+ for _, j := range words {
+ x := ranks.WordValID[i]
+ y := ranks.WordValID[j]
+
+ if _, ok := ranks.Relation.Node[x][y]; ok {
+ curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
+
+ for _, id := range curSentenceIDs {
+ if _, ok := sentenceIDs[id]; ok {
+ sentenceIDs[id]++
+ } else {
+ sentenceIDs[id] = 1
+ }
+ }
+ }
+ }
+ }
+
+ for sentenceID, v := range sentenceIDs {
+ if v >= reqMatch {
+ sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]})
+ }
+ }
+
+ sort.Slice(sentences, func(i, j int) bool {
+ return sentences[i].ID < sentences[j].ID
+ })
+
+ return sentences
+}
+
+// FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the
+// wrapper instead.
+func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence {
+ var sentences []Sentence
+
+ limit = id + limit - 1
+
+ for i := id; i <= limit; i++ {
+ sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]})
+ }
+
+ return sentences
+}