5 files changed, 591 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go
new file mode 100644
index 0000000..8f9345f
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/algorithm.go
@@ -0,0 +1,99 @@
+package rank
+
+import (
+	"math"
+)
+
+// Algorithm interface and its methods make possible the polimorf usage of
+// weighting process.
+type Algorithm interface {
+	WeightingRelation(
+		word1ID int,
+		word2ID int,
+		rank *Rank,
+	) float32
+
+	WeightingHits(
+		wordID int,
+		rank *Rank,
+	) float32
+}
+
+// AlgorithmDefault struct is the basic implementation of Algorithm. It can
+// weight a word or phrase by comparing them.
+type AlgorithmDefault struct{}
+
+// NewAlgorithmDefault constructor retrieves an AlgorithmDefault pointer.
+func NewAlgorithmDefault() *AlgorithmDefault {
+	return &AlgorithmDefault{}
+}
+
+// WeightingRelation method is the traditional algorithm of text rank to
+// weighting a phrase.
+func (a *AlgorithmDefault) WeightingRelation(
+	word1ID int,
+	word2ID int,
+	rank *Rank,
+) float32 {
+	relationQty := rank.Relation.Node[word1ID][word2ID].Qty
+
+	return float32(relationQty)
+}
+
+// WeightingHits method ranks the words by their occurrence.
+func (a *AlgorithmDefault) WeightingHits(
+	wordID int,
+	rank *Rank,
+) float32 {
+	weight := rank.Words[wordID].Qty
+
+	return float32(weight)
+}
+
+// AlgorithmChain struct is the combined implementation of Algorithm. It is a
+// good example how weighting can be changed by a different implementations. It
+// can weight a word or phrase by comparing them.
+type AlgorithmChain struct{}
+
+// NewAlgorithmChain constructor retrieves an AlgorithmChain pointer.
+func NewAlgorithmChain() *AlgorithmChain {
+	return &AlgorithmChain{}
+}
+
+// WeightingRelation method is a combined algorithm of text rank and word
+// occurrence, it weights a phrase.
+func (a *AlgorithmChain) WeightingRelation(
+	word1ID int,
+	word2ID int,
+	rank *Rank,
+) float32 {
+	relationQty := rank.Relation.Node[word1ID][word2ID].Qty
+	word1Qty := rank.Words[word1ID].Qty
+	word2Qty := rank.Words[word2ID].Qty
+
+	qDiff := float32(math.Abs(float64(word1Qty)-float64(word2Qty))) / 100
+	weight := float32(relationQty) + qDiff
+
+	return weight
+}
+
+// WeightingHits method ranks the words by their occurrence.
+func (a *AlgorithmChain) WeightingHits(
+	wordID int,
+	rank *Rank,
+) float32 {
+	word := rank.Words[wordID]
+	qty := 0
+
+	for leftWordID, leftWordQty := range word.ConnectionLeft {
+		qty += rank.Words[leftWordID].Qty * leftWordQty
+	}
+
+	for rightWordID, rightWordQty := range word.ConnectionRight {
+		qty += rank.Words[rightWordID].Qty * rightWordQty
+	}
+
+	weight := float32(word.Qty) + (float32(qty))
+
+	return float32(weight)
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
new file mode 100644
index 0000000..3bcef7c
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/rank.go
@@ -0,0 +1,147 @@
+package rank
+
+// Rank struct contains every original raw sentences, words, tokens, phrases,
+// indexes, word hits, phrase hits and minimum-maximum values.
+//
+// Max is the occurrence of the most used word.
+//
+// Min is the occurrence of the less used word. It is always greater then 0.
+//
+// Relation is the Relation object, contains phrases.
+//
+// SentenceMap contains raw sentences. Index is the sentence ID, value is the
+// sentence itself.
+//
+// Words contains Word objects. Index is the word ID, value is the word/token
+// itself.
+//
+// WordValID contains words. Index is the word/token, value is the ID.
+type Rank struct {
+	Max         float32
+	Min         float32
+	Relation    Relation
+	SentenceMap map[int]string
+	Words       map[int]*Word
+	WordValID   map[string]int
+}
+
+// Word struct contains all data about the words.
+//
+// If a word is multiple times in the text then the multiple words point to the
+// same ID. So Word is unique.
+//
+// SentenceIDs contains all IDs of sentences what contain the word.
+//
+// ConnectionLeft contains all words what are connected to this word on the left
+// side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// ConnectionRight contains all words what are connected to this word on the
+// right side. The map index is the ID of the related word and its value is the
+// occurrence.
+//
+// Token is the word itself, but not the original, it is tokenized.
+//
+// Qty is the number of occurrence of the word.
+//
+// Weight is the weight of the word between 0.00 and 1.00.
+type Word struct {
+	ID              int
+	SentenceIDs     []int
+	ConnectionLeft  map[int]int
+	ConnectionRight map[int]int
+	Token           string
+	Qty             int
+	Weight          float32
+}
+
+// NewRank constructor retrieves a Rank pointer.
+func NewRank() *Rank {
+	return &Rank{
+		0,
+		0,
+		Relation{
+			0,
+			0,
+			make(map[int]map[int]Score),
+		},
+		make(map[int]string),
+		make(map[int]*Word),
+		make(map[string]int),
+	}
+}
+
+// IsWordExist method retrieves true when the given word is already in the rank.
+func (rank *Rank) IsWordExist(word string) bool {
+	_, find := rank.WordValID[word]
+
+	return find
+}
+
+// AddNewWord method adds a new word to the rank object and it defines its ID.
+func (rank *Rank) AddNewWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+	wordID = len(rank.Words)
+	connectionLeft := make(map[int]int)
+
+	if prevWordIdx >= 0 {
+		connectionLeft[prevWordIdx] = 1
+	}
+
+	newWord := &Word{
+		ID:              wordID,
+		SentenceIDs:     []int{sentenceID},
+		ConnectionLeft:  connectionLeft,
+		ConnectionRight: make(map[int]int),
+		Token:           word,
+		Qty:             1,
+		Weight:          0,
+	}
+
+	rank.Words[wordID] = newWord
+	rank.WordValID[word] = wordID
+
+	return
+}
+
+// UpdateWord method update a word what already exists in the rank object. It
+// retrieves its ID.
+func (rank *Rank) UpdateWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
+	wordID = rank.WordValID[word]
+
+	found := false
+
+	for _, oldSentenceID := range rank.Words[wordID].SentenceIDs {
+		if sentenceID == oldSentenceID {
+			found = true
+			break
+		}
+	}
+
+	if !found {
+		rank.Words[wordID].SentenceIDs = append(
+			rank.Words[wordID].SentenceIDs,
+			sentenceID,
+		)
+	}
+
+	rank.Words[wordID].Qty++
+
+	if prevWordIdx >= 0 {
+		rank.Words[wordID].ConnectionLeft[prevWordIdx]++
+	}
+
+	return
+}
+
+// UpdateRightConnection method adds the right connection to the word. It always
+// can be used after a word has added and the next word is known.
+func (rank *Rank) UpdateRightConnection(wordID int, rightWordID int) {
+	if wordID >= 0 {
+		rank.Words[wordID].ConnectionRight[rightWordID]++
+	}
+}
+
+// GetWordData method retrieves all words as a pointer.
+func (rank *Rank) GetWordData() map[int]*Word {
+	return rank.Words
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go
new file mode 100644
index 0000000..5fd2dfa
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/ranking.go
@@ -0,0 +1,66 @@
+package rank
+
+// Calculate function ranking words by the given algorithm implementation.
+func Calculate(ranks *Rank, algorithm Algorithm) {
+	updateRanks(ranks, algorithm)
+}
+
+func updateRanks(ranks *Rank, algorithm Algorithm) {
+	for _, word := range ranks.Words {
+		weight := algorithm.WeightingHits(word.ID, ranks)
+		word.Weight = weight
+
+		if ranks.Max < word.Weight {
+			ranks.Max = word.Weight
+		}
+
+		if ranks.Min > word.Weight || ranks.Min == 0 {
+			ranks.Min = word.Weight
+		}
+	}
+
+	for _, word := range ranks.Words {
+		word.Weight = normalize(word.Weight, ranks.Min, ranks.Max)
+	}
+
+	for x, xMap := range ranks.Relation.Node {
+		for y := range xMap {
+			sentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
+			weight := algorithm.WeightingRelation(x, y, ranks)
+
+			ranks.Relation.Node[x][y] = Score{
+				ranks.Relation.Node[x][y].Qty,
+				weight,
+				sentenceIDs,
+			}
+
+			if ranks.Relation.Max < weight {
+				ranks.Relation.Max = weight
+			}
+
+			if ranks.Relation.Min > weight || ranks.Relation.Min == 0 {
+				ranks.Relation.Min = weight
+			}
+		}
+	}
+
+	for x, xMap := range ranks.Relation.Node {
+		for y := range xMap {
+			weight := normalize(
+				ranks.Relation.Node[x][y].Weight,
+				ranks.Relation.Min,
+				ranks.Relation.Max,
+			)
+
+			ranks.Relation.Node[x][y] = Score{
+				ranks.Relation.Node[x][y].Qty,
+				weight,
+				ranks.Relation.Node[x][y].SentenceIDs,
+			}
+		}
+	}
+}
+
+func normalize(weight float32, min float32, max float32) float32 {
+	return (weight - min) / (max - min)
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go
new file mode 100644
index 0000000..cb8b97e
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/relation.go
@@ -0,0 +1,77 @@
+package rank
+
+// Relation struct contains the phrase data.
+//
+// Max is the occurrence of the most used phrase.
+//
+// Min is the occurrence of the less used phrase. It is always greater then 0.
+//
+// Node is contains the Scores. Firs ID is the word 1, second ID is the word 2,
+// and the value is the Score what contains the data about their relation.
+type Relation struct {
+	Max  float32
+	Min  float32
+	Node map[int]map[int]Score
+}
+
+// Score struct contains data about a relation of two words.
+//
+// Qty is the occurrence of the phrase.
+//
+// Weight is the weight of the phrase between 0.00 and 1.00.
+//
+// SentenceIDs contains all IDs of sentences what contain the phrase.
+type Score struct {
+	Qty         int
+	Weight      float32
+	SentenceIDs []int
+}
+
+// AddRelation method adds a new relation to Relation object.
+func (relation *Relation) AddRelation(wordID int, relatedWordID int, sentenceID int) {
+	if relatedWordID == -1 {
+		return
+	}
+
+	if relation.updateRelation(relatedWordID, wordID, true, sentenceID) {
+		return
+	}
+
+	if relation.extendRelation(wordID, relatedWordID, true, sentenceID) {
+		return
+	}
+
+	relation.createRelation(wordID, relatedWordID, sentenceID)
+}
+
+func (relation *Relation) updateRelation(x int, y int, r bool, sentenceID int) bool {
+	if _, ok := relation.Node[x][y]; ok {
+		count := relation.Node[x][y].Qty + 1
+		weight := relation.Node[x][y].Weight
+		sentenceIDs := append(relation.Node[x][y].SentenceIDs, sentenceID)
+		relation.Node[x][y] = Score{count, weight, sentenceIDs}
+
+		return true
+	} else if r {
+		return relation.updateRelation(y, x, false, sentenceID)
+	}
+
+	return false
+}
+
+func (relation *Relation) extendRelation(x int, y int, r bool, sentenceID int) bool {
+	if _, ok := relation.Node[x]; ok {
+		relation.Node[x][y] = Score{1, 0, []int{sentenceID}}
+
+		return true
+	} else if r {
+		return relation.extendRelation(y, x, false, sentenceID)
+	}
+
+	return false
+}
+
+func (relation *Relation) createRelation(x int, y int, sentenceID int) {
+	relation.Node[x] = map[int]Score{}
+	relation.Node[x][y] = Score{1, 0, []int{sentenceID}}
+}
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
new file mode 100644
index 0000000..6d00a97
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
@@ -0,0 +1,202 @@
+package rank
+
+import (
+	"sort"
+)
+
+// Phrase struct contains a single phrase and its data.
+//
+// LeftID is the ID of the word 1.
+//
+// RightID is the ID of the word 2.
+//
+// Left is the token of the word 1.
+//
+// Right is the token of the word 2.
+//
+// Weight is between 0.00 and 1.00.
+//
+// Qty is the occurrence of the phrase.
+type Phrase struct {
+	LeftID  int
+	RightID int
+	Left    string
+	Right   string
+	Weight  float32
+	Qty     int
+}
+
+// FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper
+// instead.
+func FindPhrases(ranks *Rank) []Phrase {
+	var phrases []Phrase
+
+	for x, xMap := range ranks.Relation.Node {
+		for y := range xMap {
+			phrases = append(phrases, Phrase{
+				ranks.Words[x].ID,
+				ranks.Words[y].ID,
+				ranks.Words[x].Token,
+				ranks.Words[y].Token,
+				ranks.Relation.Node[x][y].Weight,
+				ranks.Relation.Node[x][y].Qty,
+			})
+		}
+	}
+
+	sort.Slice(phrases, func(i, j int) bool {
+		return phrases[i].Weight > phrases[j].Weight
+	})
+
+	return phrases
+}
+
+// SingleWord struct contains a single word and its data.
+//
+// ID of the word.
+//
+// Word itself, the token.
+//
+// Weight of the word between 0.00 and 1.00.
+//
+// Quantity of the word.
+type SingleWord struct {
+	ID     int
+	Word   string
+	Weight float32
+	Qty    int
+}
+
+// FindSingleWords function has wrapper textrank.FindSingleWords. Use the
+// wrapper instead.
+func FindSingleWords(ranks *Rank) []SingleWord {
+	var singleWords []SingleWord
+
+	for _, word := range ranks.Words {
+		singleWords = append(singleWords, SingleWord{
+			word.ID,
+			word.Token,
+			word.Weight,
+			word.Qty,
+		})
+	}
+
+	sort.Slice(singleWords, func(i, j int) bool {
+		return singleWords[i].Weight > singleWords[j].Weight
+	})
+
+	return singleWords
+}
+
+// Sentence struct contains a single sentence and its data.
+type Sentence struct {
+	ID    int
+	Value string
+}
+
+// ByQty filter by occurrence of word.
+const ByQty = 0
+
+// ByRelation filter by phrase weight.
+const ByRelation = 1
+
+// FindSentences function has wrappers textrank.FindSentencesByRelationWeight
+// and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead.
+func FindSentences(ranks *Rank, kind int, limit int) []Sentence {
+	var sentences []Sentence
+
+	cache := make(map[int]bool)
+
+	collect := func(sentenceIDs []int) bool {
+		for _, id := range sentenceIDs {
+			if len(sentences) >= limit {
+				return true
+			}
+
+			if !cache[id] {
+				sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]})
+				cache[id] = true
+			}
+		}
+
+		return false
+	}
+
+	if kind == ByQty {
+		singleWords := FindSingleWords(ranks)
+
+		for _, singleWord := range singleWords {
+			sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs
+
+			if collect(sentenceIDs) {
+				return sentences
+			}
+		}
+	} else if kind == ByRelation {
+		phrases := FindPhrases(ranks)
+
+		for _, phrase := range phrases {
+			sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs
+
+			if collect(sentenceIDs) {
+				return sentences
+			}
+		}
+	}
+
+	return sentences
+}
+
+// FindSentencesByPhrases function has wrapper
+// textrank.FindSentencesByPhraseChain. Use the wrapper instead.
+func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence {
+	var sentences []Sentence
+
+	reqMatch := len(words) - 1
+	sentenceIDs := make(map[int]int)
+
+	for _, i := range words {
+		for _, j := range words {
+			x := ranks.WordValID[i]
+			y := ranks.WordValID[j]
+
+			if _, ok := ranks.Relation.Node[x][y]; ok {
+				curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
+
+				for _, id := range curSentenceIDs {
+					if _, ok := sentenceIDs[id]; ok {
+						sentenceIDs[id]++
+					} else {
+						sentenceIDs[id] = 1
+					}
+				}
+			}
+		}
+	}
+
+	for sentenceID, v := range sentenceIDs {
+		if v >= reqMatch {
+			sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]})
+		}
+	}
+
+	sort.Slice(sentences, func(i, j int) bool {
+		return sentences[i].ID < sentences[j].ID
+	})
+
+	return sentences
+}
+
+// FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the
+// wrapper instead.
+func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence {
+	var sentences []Sentence
+
+	limit = id + limit - 1
+
+	for i := id; i <= limit; i++ {
+		sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]})
+	}
+
+	return sentences
+}