summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go
blob: 6d00a97b527be24980b209e9b7d55317fe21a658 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
package rank

import (
	"sort"
)

// Phrase struct contains a single phrase and its data.
//
// LeftID is the ID of the word 1.
//
// RightID is the ID of the word 2.
//
// Left is the token of the word 1.
//
// Right is the token of the word 2.
//
// Weight is between 0.00 and 1.00.
//
// Qty is the occurrence of the phrase.
type Phrase struct {
	LeftID  int
	RightID int
	Left    string
	Right   string
	Weight  float32
	Qty     int
}

// FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper
// instead.
func FindPhrases(ranks *Rank) []Phrase {
	var phrases []Phrase

	for x, xMap := range ranks.Relation.Node {
		for y := range xMap {
			phrases = append(phrases, Phrase{
				ranks.Words[x].ID,
				ranks.Words[y].ID,
				ranks.Words[x].Token,
				ranks.Words[y].Token,
				ranks.Relation.Node[x][y].Weight,
				ranks.Relation.Node[x][y].Qty,
			})
		}
	}

	sort.Slice(phrases, func(i, j int) bool {
		return phrases[i].Weight > phrases[j].Weight
	})

	return phrases
}

// SingleWord struct contains a single word and its data.
//
// ID of the word.
//
// Word itself, the token.
//
// Weight of the word between 0.00 and 1.00.
//
// Quantity of the word.
type SingleWord struct {
	ID     int
	Word   string
	Weight float32
	Qty    int
}

// FindSingleWords function has wrapper textrank.FindSingleWords. Use the
// wrapper instead.
func FindSingleWords(ranks *Rank) []SingleWord {
	var singleWords []SingleWord

	for _, word := range ranks.Words {
		singleWords = append(singleWords, SingleWord{
			word.ID,
			word.Token,
			word.Weight,
			word.Qty,
		})
	}

	sort.Slice(singleWords, func(i, j int) bool {
		return singleWords[i].Weight > singleWords[j].Weight
	})

	return singleWords
}

// Sentence struct contains a single sentence and its data.
type Sentence struct {
	ID    int
	Value string
}

// ByQty filter by occurrence of word.
const ByQty = 0

// ByRelation filter by phrase weight.
const ByRelation = 1

// FindSentences function has wrappers textrank.FindSentencesByRelationWeight
// and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead.
func FindSentences(ranks *Rank, kind int, limit int) []Sentence {
	var sentences []Sentence

	cache := make(map[int]bool)

	collect := func(sentenceIDs []int) bool {
		for _, id := range sentenceIDs {
			if len(sentences) >= limit {
				return true
			}

			if !cache[id] {
				sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]})
				cache[id] = true
			}
		}

		return false
	}

	if kind == ByQty {
		singleWords := FindSingleWords(ranks)

		for _, singleWord := range singleWords {
			sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs

			if collect(sentenceIDs) {
				return sentences
			}
		}
	} else if kind == ByRelation {
		phrases := FindPhrases(ranks)

		for _, phrase := range phrases {
			sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs

			if collect(sentenceIDs) {
				return sentences
			}
		}
	}

	return sentences
}

// FindSentencesByPhrases function has wrapper
// textrank.FindSentencesByPhraseChain. Use the wrapper instead.
func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence {
	var sentences []Sentence

	reqMatch := len(words) - 1
	sentenceIDs := make(map[int]int)

	for _, i := range words {
		for _, j := range words {
			x := ranks.WordValID[i]
			y := ranks.WordValID[j]

			if _, ok := ranks.Relation.Node[x][y]; ok {
				curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs

				for _, id := range curSentenceIDs {
					if _, ok := sentenceIDs[id]; ok {
						sentenceIDs[id]++
					} else {
						sentenceIDs[id] = 1
					}
				}
			}
		}
	}

	for sentenceID, v := range sentenceIDs {
		if v >= reqMatch {
			sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]})
		}
	}

	sort.Slice(sentences, func(i, j int) bool {
		return sentences[i].ID < sentences[j].ID
	})

	return sentences
}

// FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the
// wrapper instead.
func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence {
	var sentences []Sentence

	limit = id + limit - 1

	for i := id; i <= limit; i++ {
		sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]})
	}

	return sentences
}