1 files changed, 194 insertions, 0 deletions
diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go b/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go
new file mode 100644
index 0000000..ed48ce3
--- /dev/null
+++ b/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go
@@ -0,0 +1,194 @@
+package textrank
+import (
+        "github.com/DavidBelicza/TextRank/v2/convert"
+        "github.com/DavidBelicza/TextRank/v2/parse"
+        "github.com/DavidBelicza/TextRank/v2/rank"
+)
+// TextRank structure contains the Rank data object. This structure is a wrapper
+// around the whole text ranking functionality.
+type TextRank struct {
+        rank *rank.Rank
+}
+// NewTextRank constructor retrieves a TextRank pointer. This is the 1th step to
+// use TextRank.
+func NewTextRank() *TextRank {
+        return &TextRank{
+                rank.NewRank(),
+        }
+}
+// NewDefaultRule function retrieves a default Rule object what works in the
+// most cases in English or similar Latin languages like French or Spanish. The
+// Rule defines raw text how should be split to sentences and words. Because
+// Rule is an interface it's possible modify the ranking by inject different
+// Rule implementation. This is the 2nd step to use TextRank.
+func NewDefaultRule() *parse.RuleDefault {
+        return parse.NewRule()
+}
+// NewDefaultLanguage function retrieves a default Language object. It defines
+// what words are real and what words are just Stop Words or useless Junk Words.
+// It uses the default English Stop Words, but it's possible to set different
+// Stop Words in English or any other languages. Because Language is an
+// interface it's possible to modify the ranking by inject different Language
+// implementation. This is the 3rd step to use TextRank.
+func NewDefaultLanguage() *convert.LanguageDefault {
+        return convert.NewLanguage()
+}
+// NewDefaultAlgorithm function retrieves an Algorithm object. It defines how
+// should work the text ranking algorithm, the weighting. This is the general
+// text rank by weighting the connection between the words to find the strongest
+// phrases. Because Algorithm is an interface it's possible to modify the
+// ranking algorithm by inject different implementation. This is the 4th step to
+// use TextRank.
+func NewDefaultAlgorithm() *rank.AlgorithmDefault {
+        return rank.NewAlgorithmDefault()
+}
+// NewChainAlgorithm function retrieves an Algorithm object. It defines how
+// should work the text ranking algorithm, the weighting. This is an alternative
+// way to ranking words by weighting the number of the words. Because Algorithm
+// is an interface it's possible to modify the ranking algorithm by inject
+// different implementation. This is  the 4th step to use TextRank.
+func NewChainAlgorithm() *rank.AlgorithmChain {
+        return rank.NewAlgorithmChain()
+}
+// Populate method adds a raw text to the text-ranking graph. It parses,
+// tokenize the raw text and prepares it to weighting and scoring. It's possible
+// to append a new raw text to an existing one even if the previously text is
+// already ranked. This is 5th step to use TextRank.
+//
+// text string must be a plain text from TXT or PDF or any document, it can
+// contain new lines, break lines or any unnecessary text parts, but it should
+// not contain HTML tags or codes.
+//
+// lang Language object can be loaded from NewDefaultLanguage function.
+//
+// rule Rule object can be loaded from NewDefaultRule function.
+func (textRank *TextRank) Populate(
+        text string,
+        lang convert.Language,
+        rule parse.Rule,
+) {
+        parsedText := parse.TokenizeText(text, rule)
+        for _, sentence := range parsedText.GetSentences() {
+                convert.TextToRank(sentence, lang, textRank.rank)
+        }
+}
+// Ranking method counts the words and connections between the words, then it
+// weights the numbers then normalize them in type float32 between 0.00 and
+// 1.00. This is the 6th step to use TextRank.
+//
+// algorithm Algorithm is the object of the weighting and scoring methods.
+func (textRank *TextRank) Ranking(algorithm rank.Algorithm) {
+        rank.Calculate(textRank.rank, algorithm)
+}
+// GetRankData method retrieves the Rank data to that case if the developer want
+// access to the whole graph and sentences, words, weights and all of the data
+// to analyze it or just implement a new search logic or finder method.
+func (textRank *TextRank) GetRankData() *rank.Rank {
+        return textRank.rank
+}
+// FindPhrases function retrieves a slice of Phrase structures by TextRank
+// object. The return value contains the sorted phrases with IDs, words, weights
+// and quantities by weight from 1 to 0. Weight is calculated from quantities of
+// relation between two words. A single phrase is from two words - not less and
+// more. (But it's possible to find chain of phrases by
+// FindSentencesByPhraseChain function.)
+func FindPhrases(textRank *TextRank) []rank.Phrase {
+        return rank.FindPhrases(textRank.rank)
+}
+// FindSingleWords function retrieves a slice of SingleWord structures by
+// TextRank object. The return value contains the sorted words with IDs, words,
+// weights and quantities by weight from 1 to 0. Weight is calculated from
+// quantities of word.
+func FindSingleWords(textRank *TextRank) []rank.SingleWord {
+        return rank.FindSingleWords(textRank.rank)
+}
+// FindSentencesByRelationWeight function retrieves a slice of Sentence
+// structures by TextRank object. The return value contains the ID of the
+// sentence and the sentence text itself. The slice is sorted by weight of
+// phrases from 1 to 0.
+func FindSentencesByRelationWeight(
+        textRank *TextRank,
+        limit int,
+) []rank.Sentence {
+        return rank.FindSentences(textRank.rank, rank.ByRelation, limit)
+}
+// FindSentencesByWordQtyWeight function retrieves a slice of Sentence
+// structures by TextRank object. The return value contains the ID of the
+// sentence and the sentence text itself. The slice is sorted by weight of word
+// quantities from 1 to 0.
+func FindSentencesByWordQtyWeight(
+        textRank *TextRank,
+        limit int,
+) []rank.Sentence {
+        return rank.FindSentences(textRank.rank, rank.ByQty, limit)
+}
+// FindSentencesByPhraseChain function retrieves a slice of Sentence structures
+// by TextRank object and slice of phrases. The return value contains the ID of
+// the sentence and the sentence text itself. The slice is sorted by weight of
+// word quantities from 1 to 0.
+//
+// textRank TextRank is the object of the TextRank.
+//
+// phrases []string is a slice of phrases. A single phrase is from two words, so
+// when the slice contains 3 words the inner method will search for two phrases.
+// The search algorithm seeks for "len(phrases)!". In case of three item the
+// possible combination is 3 factorial (3!) = 3 * 2 * 1.
+//
+//    rawText := "Long raw text, lorem ipsum..."
+//    rule := NewDefaultRule()
+//    language := NewDefaultLanguage()
+//    algorithm := NewDefaultAlgorithm()
+//
+//    Append(rawText, language, rule, 1)
+//    Ranking(1, algorithm)
+//
+//    FindSentencesByPhraseChain(1, []string{
+//        "captain",
+//        "james",
+//        "kirk",
+//    })
+//
+// The above code searches for captain james kirk, captain kirk james, james
+// kirk captain, james captain kirk, kirk james captain and james kirk captain
+// combinations in the graph. The 3 of words have to be related to each other
+// in the same sentence but the search algorithm ignores the stop words. So if
+// there is a sentence "James Kirk is the Captain of the Enterprise." the
+// sentence will be returned because the words "is" and "the" are stop words.
+func FindSentencesByPhraseChain(
+        textRank *TextRank,
+        phrases []string,
+) []rank.Sentence {
+        return rank.FindSentencesByPhrases(textRank.rank, phrases)
+}
+// FindSentencesFrom function retrieves a slice of Sentence structures by
+// TextRank object and by ID of the sentence. The return value contains the
+// sentence text itself. The returned slice contains sentences sorted by their
+// IDs started from the given sentence ID in ascending sort.
+func FindSentencesFrom(
+        textRank *TextRank,
+        sentenceID int,
+        limit int,
+) []rank.Sentence {
+        return rank.FindSentencesFrom(textRank.rank, sentenceID, limit)
+}

diff --git a/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go b/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go new file mode 100644 index 0000000..ed48ce3 --- /dev/null +++ b/vendor/github.com/DavidBelicza/TextRank/v2/textrank.go
@@ -0,0 +1,194 @@
	1	package textrank
	2
	3	import (
	4	"github.com/DavidBelicza/TextRank/v2/convert"
	5	"github.com/DavidBelicza/TextRank/v2/parse"
	6	"github.com/DavidBelicza/TextRank/v2/rank"
	7	)
	8
	9	// TextRank structure contains the Rank data object. This structure is a wrapper
	10	// around the whole text ranking functionality.
	11	type TextRank struct {
	12	rank *rank.Rank
	13	}
	14
	15	// NewTextRank constructor retrieves a TextRank pointer. This is the 1th step to
	16	// use TextRank.
	17	func NewTextRank() *TextRank {
	18	return &TextRank{
	19	rank.NewRank(),
	20	}
	21	}
	22
	23	// NewDefaultRule function retrieves a default Rule object what works in the
	24	// most cases in English or similar Latin languages like French or Spanish. The
	25	// Rule defines raw text how should be split to sentences and words. Because
	26	// Rule is an interface it's possible modify the ranking by inject different
	27	// Rule implementation. This is the 2nd step to use TextRank.
	28	func NewDefaultRule() *parse.RuleDefault {
	29	return parse.NewRule()
	30	}
	31
	32	// NewDefaultLanguage function retrieves a default Language object. It defines
	33	// what words are real and what words are just Stop Words or useless Junk Words.
	34	// It uses the default English Stop Words, but it's possible to set different
	35	// Stop Words in English or any other languages. Because Language is an
	36	// interface it's possible to modify the ranking by inject different Language
	37	// implementation. This is the 3rd step to use TextRank.
	38	func NewDefaultLanguage() *convert.LanguageDefault {
	39	return convert.NewLanguage()
	40	}
	41
	42	// NewDefaultAlgorithm function retrieves an Algorithm object. It defines how
	43	// should work the text ranking algorithm, the weighting. This is the general
	44	// text rank by weighting the connection between the words to find the strongest
	45	// phrases. Because Algorithm is an interface it's possible to modify the
	46	// ranking algorithm by inject different implementation. This is the 4th step to
	47	// use TextRank.
	48	func NewDefaultAlgorithm() *rank.AlgorithmDefault {
	49	return rank.NewAlgorithmDefault()
	50	}
	51
	52	// NewChainAlgorithm function retrieves an Algorithm object. It defines how
	53	// should work the text ranking algorithm, the weighting. This is an alternative
	54	// way to ranking words by weighting the number of the words. Because Algorithm
	55	// is an interface it's possible to modify the ranking algorithm by inject
	56	// different implementation. This is the 4th step to use TextRank.
	57	func NewChainAlgorithm() *rank.AlgorithmChain {
	58	return rank.NewAlgorithmChain()
	59	}
	60
	61	// Populate method adds a raw text to the text-ranking graph. It parses,
	62	// tokenize the raw text and prepares it to weighting and scoring. It's possible
	63	// to append a new raw text to an existing one even if the previously text is
	64	// already ranked. This is 5th step to use TextRank.
	65	//
	66	// text string must be a plain text from TXT or PDF or any document, it can
	67	// contain new lines, break lines or any unnecessary text parts, but it should
	68	// not contain HTML tags or codes.
	69	//
	70	// lang Language object can be loaded from NewDefaultLanguage function.
	71	//
	72	// rule Rule object can be loaded from NewDefaultRule function.
	73	func (textRank *TextRank) Populate(
	74	text string,
	75	lang convert.Language,
	76	rule parse.Rule,
	77	) {
	78	parsedText := parse.TokenizeText(text, rule)
	79
	80	for _, sentence := range parsedText.GetSentences() {
	81	convert.TextToRank(sentence, lang, textRank.rank)
	82	}
	83	}
	84
	85	// Ranking method counts the words and connections between the words, then it
	86	// weights the numbers then normalize them in type float32 between 0.00 and
	87	// 1.00. This is the 6th step to use TextRank.
	88	//
	89	// algorithm Algorithm is the object of the weighting and scoring methods.
	90	func (textRank *TextRank) Ranking(algorithm rank.Algorithm) {
	91	rank.Calculate(textRank.rank, algorithm)
	92	}
	93
	94	// GetRankData method retrieves the Rank data to that case if the developer want
	95	// access to the whole graph and sentences, words, weights and all of the data
	96	// to analyze it or just implement a new search logic or finder method.
	97	func (textRank TextRank) GetRankData() rank.Rank {
	98	return textRank.rank
	99	}
	100
	101	// FindPhrases function retrieves a slice of Phrase structures by TextRank
	102	// object. The return value contains the sorted phrases with IDs, words, weights
	103	// and quantities by weight from 1 to 0. Weight is calculated from quantities of
	104	// relation between two words. A single phrase is from two words - not less and
	105	// more. (But it's possible to find chain of phrases by
	106	// FindSentencesByPhraseChain function.)
	107	func FindPhrases(textRank *TextRank) []rank.Phrase {
	108	return rank.FindPhrases(textRank.rank)
	109	}
	110
	111	// FindSingleWords function retrieves a slice of SingleWord structures by
	112	// TextRank object. The return value contains the sorted words with IDs, words,
	113	// weights and quantities by weight from 1 to 0. Weight is calculated from
	114	// quantities of word.
	115	func FindSingleWords(textRank *TextRank) []rank.SingleWord {
	116	return rank.FindSingleWords(textRank.rank)
	117	}
	118
	119	// FindSentencesByRelationWeight function retrieves a slice of Sentence
	120	// structures by TextRank object. The return value contains the ID of the
	121	// sentence and the sentence text itself. The slice is sorted by weight of
	122	// phrases from 1 to 0.
	123	func FindSentencesByRelationWeight(
	124	textRank *TextRank,
	125	limit int,
	126	) []rank.Sentence {
	127
	128	return rank.FindSentences(textRank.rank, rank.ByRelation, limit)
	129	}
	130
	131	// FindSentencesByWordQtyWeight function retrieves a slice of Sentence
	132	// structures by TextRank object. The return value contains the ID of the
	133	// sentence and the sentence text itself. The slice is sorted by weight of word
	134	// quantities from 1 to 0.
	135	func FindSentencesByWordQtyWeight(
	136	textRank *TextRank,
	137	limit int,
	138	) []rank.Sentence {
	139
	140	return rank.FindSentences(textRank.rank, rank.ByQty, limit)
	141	}
	142
	143	// FindSentencesByPhraseChain function retrieves a slice of Sentence structures
	144	// by TextRank object and slice of phrases. The return value contains the ID of
	145	// the sentence and the sentence text itself. The slice is sorted by weight of
	146	// word quantities from 1 to 0.
	147	//
	148	// textRank TextRank is the object of the TextRank.
	149	//
	150	// phrases []string is a slice of phrases. A single phrase is from two words, so
	151	// when the slice contains 3 words the inner method will search for two phrases.
	152	// The search algorithm seeks for "len(phrases)!". In case of three item the
	153	// possible combination is 3 factorial (3!) = 3 * 2 * 1.
	154	//
	155	// rawText := "Long raw text, lorem ipsum..."
	156	// rule := NewDefaultRule()
	157	// language := NewDefaultLanguage()
	158	// algorithm := NewDefaultAlgorithm()
	159	//
	160	// Append(rawText, language, rule, 1)
	161	// Ranking(1, algorithm)
	162	//
	163	// FindSentencesByPhraseChain(1, []string{
	164	// "captain",
	165	// "james",
	166	// "kirk",
	167	// })
	168	//
	169	// The above code searches for captain james kirk, captain kirk james, james
	170	// kirk captain, james captain kirk, kirk james captain and james kirk captain
	171	// combinations in the graph. The 3 of words have to be related to each other
	172	// in the same sentence but the search algorithm ignores the stop words. So if
	173	// there is a sentence "James Kirk is the Captain of the Enterprise." the
	174	// sentence will be returned because the words "is" and "the" are stop words.
	175	func FindSentencesByPhraseChain(
	176	textRank *TextRank,
	177	phrases []string,
	178	) []rank.Sentence {
	179
	180	return rank.FindSentencesByPhrases(textRank.rank, phrases)
	181	}
	182
	183	// FindSentencesFrom function retrieves a slice of Sentence structures by
	184	// TextRank object and by ID of the sentence. The return value contains the
	185	// sentence text itself. The returned slice contains sentences sorted by their
	186	// IDs started from the given sentence ID in ascending sort.
	187	func FindSentencesFrom(
	188	textRank *TextRank,
	189	sentenceID int,
	190	limit int,
	191	) []rank.Sentence {
	192
	193	return rank.FindSentencesFrom(textRank.rank, sentenceID, limit)
	194	}