diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2024-10-25 00:47:47 +0200 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2024-10-25 00:47:47 +0200 |
| commit | c6cc0108ca7738023b45e0eeac0fa2390532dd93 (patch) | |
| tree | 36890e6cd3091bbab8efbe686cc56f467f645bfd /vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go | |
| parent | 0130404a1dc663d4aa68d780c9bcb23a4243e68d (diff) | |
| download | jbmafp-master.tar.gz | |
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go')
| -rw-r--r-- | vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go new file mode 100644 index 0000000..6d00a97 --- /dev/null +++ b/vendor/github.com/DavidBelicza/TextRank/v2/rank/sorting.go | |||
| @@ -0,0 +1,202 @@ | |||
| 1 | package rank | ||
| 2 | |||
| 3 | import ( | ||
| 4 | "sort" | ||
| 5 | ) | ||
| 6 | |||
| 7 | // Phrase struct contains a single phrase and its data. | ||
| 8 | // | ||
| 9 | // LeftID is the ID of the word 1. | ||
| 10 | // | ||
| 11 | // RightID is the ID of the word 2. | ||
| 12 | // | ||
| 13 | // Left is the token of the word 1. | ||
| 14 | // | ||
| 15 | // Right is the token of the word 2. | ||
| 16 | // | ||
| 17 | // Weight is between 0.00 and 1.00. | ||
| 18 | // | ||
| 19 | // Qty is the occurrence of the phrase. | ||
| 20 | type Phrase struct { | ||
| 21 | LeftID int | ||
| 22 | RightID int | ||
| 23 | Left string | ||
| 24 | Right string | ||
| 25 | Weight float32 | ||
| 26 | Qty int | ||
| 27 | } | ||
| 28 | |||
| 29 | // FindPhrases function has wrapper textrank.FindPhrases. Use the wrapper | ||
| 30 | // instead. | ||
| 31 | func FindPhrases(ranks *Rank) []Phrase { | ||
| 32 | var phrases []Phrase | ||
| 33 | |||
| 34 | for x, xMap := range ranks.Relation.Node { | ||
| 35 | for y := range xMap { | ||
| 36 | phrases = append(phrases, Phrase{ | ||
| 37 | ranks.Words[x].ID, | ||
| 38 | ranks.Words[y].ID, | ||
| 39 | ranks.Words[x].Token, | ||
| 40 | ranks.Words[y].Token, | ||
| 41 | ranks.Relation.Node[x][y].Weight, | ||
| 42 | ranks.Relation.Node[x][y].Qty, | ||
| 43 | }) | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | sort.Slice(phrases, func(i, j int) bool { | ||
| 48 | return phrases[i].Weight > phrases[j].Weight | ||
| 49 | }) | ||
| 50 | |||
| 51 | return phrases | ||
| 52 | } | ||
| 53 | |||
| 54 | // SingleWord struct contains a single word and its data. | ||
| 55 | // | ||
| 56 | // ID of the word. | ||
| 57 | // | ||
| 58 | // Word itself, the token. | ||
| 59 | // | ||
| 60 | // Weight of the word between 0.00 and 1.00. | ||
| 61 | // | ||
| 62 | // Quantity of the word. | ||
| 63 | type SingleWord struct { | ||
| 64 | ID int | ||
| 65 | Word string | ||
| 66 | Weight float32 | ||
| 67 | Qty int | ||
| 68 | } | ||
| 69 | |||
| 70 | // FindSingleWords function has wrapper textrank.FindSingleWords. Use the | ||
| 71 | // wrapper instead. | ||
| 72 | func FindSingleWords(ranks *Rank) []SingleWord { | ||
| 73 | var singleWords []SingleWord | ||
| 74 | |||
| 75 | for _, word := range ranks.Words { | ||
| 76 | singleWords = append(singleWords, SingleWord{ | ||
| 77 | word.ID, | ||
| 78 | word.Token, | ||
| 79 | word.Weight, | ||
| 80 | word.Qty, | ||
| 81 | }) | ||
| 82 | } | ||
| 83 | |||
| 84 | sort.Slice(singleWords, func(i, j int) bool { | ||
| 85 | return singleWords[i].Weight > singleWords[j].Weight | ||
| 86 | }) | ||
| 87 | |||
| 88 | return singleWords | ||
| 89 | } | ||
| 90 | |||
| 91 | // Sentence struct contains a single sentence and its data. | ||
| 92 | type Sentence struct { | ||
| 93 | ID int | ||
| 94 | Value string | ||
| 95 | } | ||
| 96 | |||
| 97 | // ByQty filter by occurrence of word. | ||
| 98 | const ByQty = 0 | ||
| 99 | |||
| 100 | // ByRelation filter by phrase weight. | ||
| 101 | const ByRelation = 1 | ||
| 102 | |||
| 103 | // FindSentences function has wrappers textrank.FindSentencesByRelationWeight | ||
| 104 | // and textrank.FindSentencesByWordQtyWeight. Use the wrappers instead. | ||
| 105 | func FindSentences(ranks *Rank, kind int, limit int) []Sentence { | ||
| 106 | var sentences []Sentence | ||
| 107 | |||
| 108 | cache := make(map[int]bool) | ||
| 109 | |||
| 110 | collect := func(sentenceIDs []int) bool { | ||
| 111 | for _, id := range sentenceIDs { | ||
| 112 | if len(sentences) >= limit { | ||
| 113 | return true | ||
| 114 | } | ||
| 115 | |||
| 116 | if !cache[id] { | ||
| 117 | sentences = append(sentences, Sentence{id, ranks.SentenceMap[id]}) | ||
| 118 | cache[id] = true | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | return false | ||
| 123 | } | ||
| 124 | |||
| 125 | if kind == ByQty { | ||
| 126 | singleWords := FindSingleWords(ranks) | ||
| 127 | |||
| 128 | for _, singleWord := range singleWords { | ||
| 129 | sentenceIDs := ranks.Words[singleWord.ID].SentenceIDs | ||
| 130 | |||
| 131 | if collect(sentenceIDs) { | ||
| 132 | return sentences | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } else if kind == ByRelation { | ||
| 136 | phrases := FindPhrases(ranks) | ||
| 137 | |||
| 138 | for _, phrase := range phrases { | ||
| 139 | sentenceIDs := ranks.Relation.Node[phrase.LeftID][phrase.RightID].SentenceIDs | ||
| 140 | |||
| 141 | if collect(sentenceIDs) { | ||
| 142 | return sentences | ||
| 143 | } | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | return sentences | ||
| 148 | } | ||
| 149 | |||
| 150 | // FindSentencesByPhrases function has wrapper | ||
| 151 | // textrank.FindSentencesByPhraseChain. Use the wrapper instead. | ||
| 152 | func FindSentencesByPhrases(ranks *Rank, words []string) []Sentence { | ||
| 153 | var sentences []Sentence | ||
| 154 | |||
| 155 | reqMatch := len(words) - 1 | ||
| 156 | sentenceIDs := make(map[int]int) | ||
| 157 | |||
| 158 | for _, i := range words { | ||
| 159 | for _, j := range words { | ||
| 160 | x := ranks.WordValID[i] | ||
| 161 | y := ranks.WordValID[j] | ||
| 162 | |||
| 163 | if _, ok := ranks.Relation.Node[x][y]; ok { | ||
| 164 | curSentenceIDs := ranks.Relation.Node[x][y].SentenceIDs | ||
| 165 | |||
| 166 | for _, id := range curSentenceIDs { | ||
| 167 | if _, ok := sentenceIDs[id]; ok { | ||
| 168 | sentenceIDs[id]++ | ||
| 169 | } else { | ||
| 170 | sentenceIDs[id] = 1 | ||
| 171 | } | ||
| 172 | } | ||
| 173 | } | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | for sentenceID, v := range sentenceIDs { | ||
| 178 | if v >= reqMatch { | ||
| 179 | sentences = append(sentences, Sentence{sentenceID, ranks.SentenceMap[sentenceID]}) | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | sort.Slice(sentences, func(i, j int) bool { | ||
| 184 | return sentences[i].ID < sentences[j].ID | ||
| 185 | }) | ||
| 186 | |||
| 187 | return sentences | ||
| 188 | } | ||
| 189 | |||
| 190 | // FindSentencesFrom function has wrapper textrank.FindSentencesFrom. Use the | ||
| 191 | // wrapper instead. | ||
| 192 | func FindSentencesFrom(ranks *Rank, id int, limit int) []Sentence { | ||
| 193 | var sentences []Sentence | ||
| 194 | |||
| 195 | limit = id + limit - 1 | ||
| 196 | |||
| 197 | for i := id; i <= limit; i++ { | ||
| 198 | sentences = append(sentences, Sentence{i, ranks.SentenceMap[i]}) | ||
| 199 | } | ||
| 200 | |||
| 201 | return sentences | ||
| 202 | } | ||
