summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/convert/language.go
blob: fdad6989640b3c287fbbf83a5fc23bdbf2221c11 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package convert

import "unicode/utf8"

// Language interface and its methods make possible the polimorf usage of
// language specific features by custom implementations.
type Language interface {
	IsStopWord(word string) bool
	FindRootWord(word string) (bool, string)
	SetActiveLanguage(code string)
	SetWords(code string, words []string)
}

// LanguageDefault struct is implementation of Language interface. It stores
// the stop words of loaded languages and can find stop words by tokens.
type LanguageDefault struct {
	defaultLang string
	languages   map[string][]string
}

// NewLanguage constructor of the LanguageDefault Retrieves a pointer
// LanguageDefault. It has setup to English by default.
func NewLanguage() *LanguageDefault {
	lang := &LanguageDefault{
		"en",
		make(map[string][]string),
	}

	words := getDefaultEnglish()

	lang.SetWords("en", words)

	return lang
}

// IsStopWord method retrieves true when the given word is in the stop word
// list or when the word has less character then 2.
func (lang *LanguageDefault) IsStopWord(word string) bool {
	if utf8.RuneCountInString(word) <= 2 {
		return true
	}

	if stopWords, ok := lang.languages[lang.defaultLang]; ok {
		for _, val := range stopWords {
			if val == word {
				return true
			}
		}
	}

	return false
}

// FindRootWord method gets a word as an input, "apples" for example and it
// retrieves the root-word of this given word, "apple" for example. The first
// return parameter is true when a word-root has found, otherwise it's false.
func (lang *LanguageDefault) FindRootWord(word string) (bool, string) {
	return false, ""
}

// SetActiveLanguage method switch between languages by the language's code. The
// language code is not standard, it can be anything.
func (lang *LanguageDefault) SetActiveLanguage(code string) {
	lang.defaultLang = code
}

// SetWords method set stop words into the LanguageDefault struct by the
// language's code.
func (lang *LanguageDefault) SetWords(code string, words []string) {
	lang.languages[code] = words
}