jbmafp - vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go

Path: jbmafp / vendor / github.com / DavidBelicza / TextRank / v2 / rank / sorting.go (raw)
  1package rank
  2
  3import (
  4	"sort"
  5)
  6
  7// Phrase struct contains a single phrase and its data.
  8//
  9// LeftID is the ID of the word 1.
 10//
 11// RightID is the ID of the word 2.
 12//
 13// Left is the token of the word 1.
 14//
 15// Right is the token of the word 2.
 16//
 17// Weight is between 0.00 and 1.00.
 18//
 19// Qty is the occurrence of the phrase.
 20type Phrase struct {
 21	LeftID  int
 22	RightID int
 23	Left    string
 24	Right   string
 25	Weight  float32
 26	Qty     int
 27}
 28
 29// FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper
 30// instead.
 31func FindPhrases(ranks *Rank) []Phrase {
 32	var phrases []Phrase
 33
 34	for x, xMap := range ranks.Relation.Node {
 35		for y := range xMap {
 36			phrases = append(phrases, Phrase{
 37				ranks.Words[x].ID,
 38				ranks.Words[y].ID,
 39				ranks.Words[x].Token,
 40				ranks.Words[y].Token,
 41				ranks.Relation.Node[x][y].Weight,
 42				ranks.Relation.Node[x][y].Qty,
 43			})
 44		}
 45	}
 46
 47	sort.Slice(phrases, func(i, j int) bool {
 48		return phrases[i].Weight > phrases[j].Weight
 49	})
 50
 51	return phrases
 52}
 53
 54// SingleWord struct contains a single word and its data.
 55//
 56// ID of the word.
 57//
 58// Word itself, the token.
 59//
 60// Weight of the word between 0.00 and 1.00.
 61//
 62// Quantity of the word.
 63type SingleWord struct {
 64	ID     int
 65	Word   string
 66	Weight float32
 67	Qty    int
 68}
 69
 70// FindSingleWords function has wrapper textrank.FindSingleWords. Use the
 71// wrapper instead.
 72func FindSingleWords(ranks *Rank) []SingleWord {
 73	var singleWords []SingleWord
 74
 75	for _, word := range ranks.Words {
 76		singleWords = append(singleWords, SingleWord{
 77			word.ID,
 78			word.Token,
 79			word.Weight,
 80			word.Qty,
 81		})
 82	}
 83
 84	sort.Slice(singleWords, func(i, j int) bool {
 85		return singleWords[i].Weight > singleWords[j].Weight
 86	})
 87
 88	return singleWords
 89}
 90
 91// Sentence struct contains a single sentence and its data.
 92type Sentence struct {
 93	ID    int
 94	Value string
 95}
 96
 97// ByQty filter by occurrence of word.
 98const ByQty = 0
 99
100// ByRelation filter by phrase weight.
101const ByRelation = 1
102
103// FindSentences function has wrappers textrank.FindSentencesByRelationWeight
104// and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead.
105func FindSentences(ranks *Rank, kind int, limit int) []Sentence {
106	var sentences []Sentence
107
108	cache := make(map[int]bool)
109
110	collect := func(sentenceIDs []int) bool {
111		for _, id := range sentenceIDs {
112			if len(sentences) >= limit {
113				return true
114			}
115
116			if !cache[id] {
117				sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]})
118				cache[id] = true
119			}
120		}
121
122		return false
123	}
124
125	if kind == ByQty {
126		singleWords := FindSingleWords(ranks)
127
128		for _, singleWord := range singleWords {
129			sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs
130
131			if collect(sentenceIDs) {
132				return sentences
133			}
134		}
135	} else if kind == ByRelation {
136		phrases := FindPhrases(ranks)
137
138		for _, phrase := range phrases {
139			sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs
140
141			if collect(sentenceIDs) {
142				return sentences
143			}
144		}
145	}
146
147	return sentences
148}
149
150// FindSentencesByPhrases function has wrapper
151// textrank.FindSentencesByPhraseChain. Use the wrapper instead.
152func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence {
153	var sentences []Sentence
154
155	reqMatch := len(words) - 1
156	sentenceIDs := make(map[int]int)
157
158	for _, i := range words {
159		for _, j := range words {
160			x := ranks.WordValID[i]
161			y := ranks.WordValID[j]
162
163			if _, ok := ranks.Relation.Node[x][y]; ok {
164				curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
165
166				for _, id := range curSentenceIDs {
167					if _, ok := sentenceIDs[id]; ok {
168						sentenceIDs[id]++
169					} else {
170						sentenceIDs[id] = 1
171					}
172				}
173			}
174		}
175	}
176
177	for sentenceID, v := range sentenceIDs {
178		if v >= reqMatch {
179			sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]})
180		}
181	}
182
183	sort.Slice(sentences, func(i, j int) bool {
184		return sentences[i].ID < sentences[j].ID
185	})
186
187	return sentences
188}
189
190// FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the
191// wrapper instead.
192func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence {
193	var sentences []Sentence
194
195	limit = id + limit - 1
196
197	for i := id; i <= limit; i++ {
198		sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]})
199	}
200
201	return sentences
202}