1package rank
  2
  3// Rank struct contains every original raw sentences, words, tokens, phrases,
  4// indexes, word hits, phrase hits and minimum-maximum values.
  5//
  6// Max is the occurrence of the most used word.
  7//
  8// Min is the occurrence of the less used word. It is always greater then 0.
  9//
 10// Relation is the Relation object, contains phrases.
 11//
 12// SentenceMap contains raw sentences. Index is the sentence ID, value is the
 13// sentence itself.
 14//
 15// Words contains Word objects. Index is the word ID, value is the word/token
 16// itself.
 17//
 18// WordValID contains words. Index is the word/token, value is the ID.
 19type Rank struct {
 20	Max         float32
 21	Min         float32
 22	Relation    Relation
 23	SentenceMap map[int]string
 24	Words       map[int]*Word
 25	WordValID   map[string]int
 26}
 27
 28// Word struct contains all data about the words.
 29//
 30// If a word is multiple times in the text then the multiple words point to the
 31// same ID. So Word is unique.
 32//
 33// SentenceIDs contains all IDs of sentences what contain the word.
 34//
 35// ConnectionLeft contains all words what are connected to this word on the left
 36// side. The map index is the ID of the related word and its value is the
 37// occurrence.
 38//
 39// ConnectionRight contains all words what are connected to this word on the
 40// right side. The map index is the ID of the related word and its value is the
 41// occurrence.
 42//
 43// Token is the word itself, but not the original, it is tokenized.
 44//
 45// Qty is the number of occurrence of the word.
 46//
 47// Weight is the weight of the word between 0.00 and 1.00.
 48type Word struct {
 49	ID              int
 50	SentenceIDs     []int
 51	ConnectionLeft  map[int]int
 52	ConnectionRight map[int]int
 53	Token           string
 54	Qty             int
 55	Weight          float32
 56}
 57
 58// NewRank constructor retrieves a Rank pointer.
 59func NewRank() *Rank {
 60	return &Rank{
 61		0,
 62		0,
 63		Relation{
 64			0,
 65			0,
 66			make(map[int]map[int]Score),
 67		},
 68		make(map[int]string),
 69		make(map[int]*Word),
 70		make(map[string]int),
 71	}
 72}
 73
 74// IsWordExist method retrieves true when the given word is already in the rank.
 75func (rank *Rank) IsWordExist(word string) bool {
 76	_, find := rank.WordValID[word]
 77
 78	return find
 79}
 80
 81// AddNewWord method adds a new word to the rank object and it defines its ID.
 82func (rank *Rank) AddNewWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
 83	wordID = len(rank.Words)
 84	connectionLeft := make(map[int]int)
 85
 86	if prevWordIdx >= 0 {
 87		connectionLeft[prevWordIdx] = 1
 88	}
 89
 90	newWord := &Word{
 91		ID:              wordID,
 92		SentenceIDs:     []int{sentenceID},
 93		ConnectionLeft:  connectionLeft,
 94		ConnectionRight: make(map[int]int),
 95		Token:           word,
 96		Qty:             1,
 97		Weight:          0,
 98	}
 99
100	rank.Words[wordID] = newWord
101	rank.WordValID[word] = wordID
102
103	return
104}
105
106// UpdateWord method update a word what already exists in the rank object. It
107// retrieves its ID.
108func (rank *Rank) UpdateWord(word string, prevWordIdx int, sentenceID int) (wordID int) {
109	wordID = rank.WordValID[word]
110
111	found := false
112
113	for _, oldSentenceID := range rank.Words[wordID].SentenceIDs {
114		if sentenceID == oldSentenceID {
115			found = true
116			break
117		}
118	}
119
120	if !found {
121		rank.Words[wordID].SentenceIDs = append(
122			rank.Words[wordID].SentenceIDs,
123			sentenceID,
124		)
125	}
126
127	rank.Words[wordID].Qty++
128
129	if prevWordIdx >= 0 {
130		rank.Words[wordID].ConnectionLeft[prevWordIdx]++
131	}
132
133	return
134}
135
136// UpdateRightConnection method adds the right connection to the word. It always
137// can be used after a word has added and the next word is known.
138func (rank *Rank) UpdateRightConnection(wordID int, rightWordID int) {
139	if wordID >= 0 {
140		rank.Words[wordID].ConnectionRight[rightWordID]++
141	}
142}
143
144// GetWordData method retrieves all words as a pointer.
145func (rank *Rank) GetWordData() map[int]*Word {
146	return rank.Words
147}