summaryrefslogtreecommitdiff
path: root/vendor/github.com/DavidBelicza/TextRank/v2/parse/rule.go
blob: 0f6ec91418307ad1e59d4c149abc3f1f5d83f1c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package parse

// Rule interface and its methods make possible the polimorf usage of process
// how Rule retrieve tokens from text.
type Rule interface {
	IsWordSeparator(rune rune) bool
	IsSentenceSeparator(rune rune) bool
}

// RuleDefault struct implements the Rule interface. It contains the separator
// characters and can decide a character is separator or not.
type RuleDefault struct {
	wordSeparators     [21]string
	sentenceSeparators [3]string
}

// NewRule constructor retrieves a RuleDefault pointer.
func NewRule() *RuleDefault {
	return &RuleDefault{
		[21]string{" ", ",", "'", "’", "\"", ")", "(", "[", "]", "{", "}", "\"", ";", "\n", ">", "<", "%", "@", "&", "=", "#"},
		[3]string{"!", ".", "?"},
	}
}

// IsWordSeparator method retrieves true when a character is a kind of special
// character and possibly it separates to words from each other. It also checks
// for sentence separator by IsSentenceSeparator method.
func (r *RuleDefault) IsWordSeparator(rune rune) bool {
	chr := string(rune)

	for _, val := range r.wordSeparators {
		if chr == val {
			return true
		}
	}

	return r.IsSentenceSeparator(rune)
}

// IsSentenceSeparator method retrieves true when a character is a kind of
// special character and possibly it separates to words from each other.
func (r *RuleDefault) IsSentenceSeparator(rune rune) bool {
	chr := string(rune)

	for _, val := range r.sentenceSeparators {
		if chr == val {
			return true
		}
	}

	return false
}