summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2024-10-25 00:47:47 +0200
committerMitja Felicijan <mitja.felicijan@gmail.com>2024-10-25 00:47:47 +0200
commitc6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch)
tree36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
parent0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff)
downloadjbmafp-master.tar.gz
Added vendor lock on depsHEADmaster
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go')
-rw-r--r--vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go202
1 files changed, 202 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
new file mode 100644
index 0000000..6d00a97
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
@@ -0,0 +1,202 @@
1package rank
2
3import (
4 "sort"
5)
6
7// Phrase struct contains a single phrase and its data.
8//
9// LeftID is the ID of the word 1.
10//
11// RightID is the ID of the word 2.
12//
13// Left is the token of the word 1.
14//
15// Right is the token of the word 2.
16//
17// Weight is between 0.00 and 1.00.
18//
19// Qty is the occurrence of the phrase.
20type Phrase struct {
21 LeftID int
22 RightID int
23 Left string
24 Right string
25 Weight float32
26 Qty int
27}
28
29// FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper
30// instead.
31func FindPhrases(ranks *Rank) []Phrase {
32 var phrases []Phrase
33
34 for x, xMap := range ranks.Relation.Node {
35 for y := range xMap {
36 phrases = append(phrases, Phrase{
37 ranks.Words[x].ID,
38 ranks.Words[y].ID,
39 ranks.Words[x].Token,
40 ranks.Words[y].Token,
41 ranks.Relation.Node[x][y].Weight,
42 ranks.Relation.Node[x][y].Qty,
43 })
44 }
45 }
46
47 sort.Slice(phrases, func(i, j int) bool {
48 return phrases[i].Weight > phrases[j].Weight
49 })
50
51 return phrases
52}
53
54// SingleWord struct contains a single word and its data.
55//
56// ID of the word.
57//
58// Word itself, the token.
59//
60// Weight of the word between 0.00 and 1.00.
61//
62// Quantity of the word.
63type SingleWord struct {
64 ID int
65 Word string
66 Weight float32
67 Qty int
68}
69
70// FindSingleWords function has wrapper textrank.FindSingleWords. Use the
71// wrapper instead.
72func FindSingleWords(ranks *Rank) []SingleWord {
73 var singleWords []SingleWord
74
75 for _, word := range ranks.Words {
76 singleWords = append(singleWords, SingleWord{
77 word.ID,
78 word.Token,
79 word.Weight,
80 word.Qty,
81 })
82 }
83
84 sort.Slice(singleWords, func(i, j int) bool {
85 return singleWords[i].Weight > singleWords[j].Weight
86 })
87
88 return singleWords
89}
90
91// Sentence struct contains a single sentence and its data.
92type Sentence struct {
93 ID int
94 Value string
95}
96
97// ByQty filter by occurrence of word.
98const ByQty = 0
99
100// ByRelation filter by phrase weight.
101const ByRelation = 1
102
103// FindSentences function has wrappers textrank.FindSentencesByRelationWeight
104// and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead.
105func FindSentences(ranks *Rank, kind int, limit int) []Sentence {
106 var sentences []Sentence
107
108 cache := make(map[int]bool)
109
110 collect := func(sentenceIDs []int) bool {
111 for _, id := range sentenceIDs {
112 if len(sentences) >= limit {
113 return true
114 }
115
116 if !cache[id] {
117 sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]})
118 cache[id] = true
119 }
120 }
121
122 return false
123 }
124
125 if kind == ByQty {
126 singleWords := FindSingleWords(ranks)
127
128 for _, singleWord := range singleWords {
129 sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs
130
131 if collect(sentenceIDs) {
132 return sentences
133 }
134 }
135 } else if kind == ByRelation {
136 phrases := FindPhrases(ranks)
137
138 for _, phrase := range phrases {
139 sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs
140
141 if collect(sentenceIDs) {
142 return sentences
143 }
144 }
145 }
146
147 return sentences
148}
149
150// FindSentencesByPhrases function has wrapper
151// textrank.FindSentencesByPhraseChain. Use the wrapper instead.
152func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence {
153 var sentences []Sentence
154
155 reqMatch := len(words) - 1
156 sentenceIDs := make(map[int]int)
157
158 for _, i := range words {
159 for _, j := range words {
160 x := ranks.WordValID[i]
161 y := ranks.WordValID[j]
162
163 if _, ok := ranks.Relation.Node[x][y]; ok {
164 curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs
165
166 for _, id := range curSentenceIDs {
167 if _, ok := sentenceIDs[id]; ok {
168 sentenceIDs[id]++
169 } else {
170 sentenceIDs[id] = 1
171 }
172 }
173 }
174 }
175 }
176
177 for sentenceID, v := range sentenceIDs {
178 if v >= reqMatch {
179 sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]})
180 }
181 }
182
183 sort.Slice(sentences, func(i, j int) bool {
184 return sentences[i].ID < sentences[j].ID
185 })
186
187 return sentences
188}
189
190// FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the
191// wrapper instead.
192func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence {
193 var sentences []Sentence
194
195 limit = id + limit - 1
196
197 for i := id; i <= limit; i++ {
198 sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]})
199 }
200
201 return sentences
202}