diff options
Diffstat (limited to 'vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go')
| -rw-r--r-- | vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go b/vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go new file mode 100644 index 0000000..fdad698 --- /dev/null +++ b/vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | package convert | ||
| 2 | |||
| 3 | import "unicode/utf8" | ||
| 4 | |||
| 5 | // Language interface and its methods make possible the polimorf usage of | ||
| 6 | // language specific features by custom implementations. | ||
| 7 | type Language interface { | ||
| 8 | IsStopWord(word string) bool | ||
| 9 | FindRootWord(word string) (bool, string) | ||
| 10 | SetActiveLanguage(code string) | ||
| 11 | SetWords(code string, words []string) | ||
| 12 | } | ||
| 13 | |||
| 14 | // LanguageDefault struct is implementation of Language interface. It stores | ||
| 15 | // the stop words of loaded languages and can find stop words by tokens. | ||
| 16 | type LanguageDefault struct { | ||
| 17 | defaultLang string | ||
| 18 | languages map[string][]string | ||
| 19 | } | ||
| 20 | |||
| 21 | // NewLanguage constructor of the LanguageDefault Retrieves a pointer | ||
| 22 | // LanguageDefault. It has setup to English by default. | ||
| 23 | func NewLanguage() *LanguageDefault { | ||
| 24 | lang := &LanguageDefault{ | ||
| 25 | "en", | ||
| 26 | make(map[string][]string), | ||
| 27 | } | ||
| 28 | |||
| 29 | words := getDefaultEnglish() | ||
| 30 | |||
| 31 | lang.SetWords("en", words) | ||
| 32 | |||
| 33 | return lang | ||
| 34 | } | ||
| 35 | |||
| 36 | // IsStopWord method retrieves true when the given word is in the stop word | ||
| 37 | // list or when the word has less character then 2. | ||
| 38 | func (lang *LanguageDefault) IsStopWord(word string) bool { | ||
| 39 | if utf8.RuneCountInString(word) <= 2 { | ||
| 40 | return true | ||
| 41 | } | ||
| 42 | |||
| 43 | if stopWords, ok := lang.languages[lang.defaultLang]; ok { | ||
| 44 | for _, val := range stopWords { | ||
| 45 | if val == word { | ||
| 46 | return true | ||
| 47 | } | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | return false | ||
| 52 | } | ||
| 53 | |||
| 54 | // FindRootWord method gets a word as an input, "apples" for example and it | ||
| 55 | // retrieves the root-word of this given word, "apple" for example. The first | ||
| 56 | // return parameter is true when a word-root has found, otherwise it's false. | ||
| 57 | func (lang *LanguageDefault) FindRootWord(word string) (bool, string) { | ||
| 58 | return false, "" | ||
| 59 | } | ||
| 60 | |||
| 61 | // SetActiveLanguage method switch between languages by the language's code. The | ||
| 62 | // language code is not standard, it can be anything. | ||
| 63 | func (lang *LanguageDefault) SetActiveLanguage(code string) { | ||
| 64 | lang.defaultLang = code | ||
| 65 | } | ||
| 66 | |||
| 67 | // SetWords method set stop words into the LanguageDefault struct by the | ||
| 68 | // language's code. | ||
| 69 | func (lang *LanguageDefault) SetWords(code string, words []string) { | ||
| 70 | lang.languages[code] = words | ||
| 71 | } | ||
