class Lingo::Attendee::MultiWorder
Public Instance Methods
control(cmd, *)
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 118 def control(cmd, *) if [:RECORD, :EOF].include?(cmd) @eof_handling = true while process_buffer?(2) process_buffer end forward_number_of_token @eof_handling = false end end
init()
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 83 def init # combine lexical variants? # # false = old behaviour # true = first match # 'all' = all matches @combine = get_key('combine', false) @all = @combine.is_a?(String) && @combine.downcase == 'all' lex_src, lex_mod, d = nil, nil, lingo.dictionary_config['databases'] (mul_src = get_ary('source')).each { |src| s, m = d[src].values_at('use-lex', 'lex-mode') if lex_src.nil? || lex_src == s lex_src, lex_mod = s, m else warn "#{self.class}: Dictionaries don't match: #{mul_src.join(',')}" end } lex_src = lex_src.split(SEP_RE) lex_mod = get_key('lex-mode', lex_mod || 'first') @mul_dic = dictionary(mul_src, get_key('mode', 'all')) @lex_dic = dictionary(lex_src, lex_mod) @lex_gra = grammar(lex_src, lex_mod) @syn_dic = if @combine && has_key?('use-syn') dictionary(get_ary('use-syn'), get_key('syn-mode', 'all')) end @expected_tokens_in_buffer, @eof_handling = 3, false end
Private Instance Methods
check_multiword(len, lst = nil)
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 201 def check_multiword(len, lst = nil) return unless process_buffer?(len) seq, mul, sep = [], [], ' ' @buffer.each { |obj| next seq << [obj] unless obj.is_a?(WordForm) next if (form = obj.form) == CHAR_PUNCT w = find_word(form, @lex_dic, @lex_gra) l = w.lexicals i = w.attr == WA_COMPOUND ? [l.first] : l.empty? ? [w] : l.dup @syn_dic.find_synonyms(w, i) if @syn_dic i.map! { |j| Unicode.downcase(j.form) }.uniq! seq << i break unless seq.length < len } if @combine combinations(*seq) { |key| @mul_dic.select(key.join(sep), mul) break unless @all || mul.empty? } && mul.uniq! else @mul_dic.select(seq.map! { |i,| i }.join(sep), mul) end lst.push(seq.size).concat(mul.map { |r| r.is_a?(Lexical) ? r.form.count(sep) + 1 : r }).sort!.reverse!.uniq! if lst mul unless mul.empty? end
create_and_forward_multiword(len, lex = check_multiword(len))
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 178 def create_and_forward_multiword(len, lex = check_multiword(len)) return unless lex pos, parts = 0, [] begin if form = form_at(pos) @buffer[pos].attr = WA_UNKMULPART if @buffer[pos].unknown? parts << form pos += 1 else @buffer.delete_at(pos) parts[-1] += CHAR_PUNCT end end while pos < len wrd = Word.new_lexicals(parts.join(' '), WA_MULTIWORD, lex.select { |l| l.is_a?(Lexical) }) wrd.token = @buffer[pos - 1].token forward(wrd) end
form_at(index)
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 134 def form_at(index) obj = @buffer[index] obj.form if obj.is_a?(WordForm) && obj.form != CHAR_PUNCT end
forward_number_of_token(len = default = @buffer.size, punct = !default)
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 139 def forward_number_of_token(len = default = @buffer.size, punct = !default) begin unless @buffer.empty? forward(item = @buffer.delete_at(0)) len -= 1 unless punct && item.form == CHAR_PUNCT end end while len > 0 end
process_buffer()
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 152 def process_buffer if form_at(0) if res = check_multiword(3, len = []) if (max = len.first) <= 3 create_and_forward_multiword(3, res) forward_number_of_token(3) elsif !@eof_handling && @buffer.size < max @expected_tokens_in_buffer = max else forward_number_of_token(len.find { |l| create_and_forward_multiword(l) } || 1) @expected_tokens_in_buffer = 3 process_buffer if process_buffer? end return end create_and_forward_multiword(2) && forward_number_of_token(1) end forward_number_of_token(1, false) @expected_tokens_in_buffer = 3 end
process_buffer?(num = @expected_tokens_in_buffer)
click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 148 def process_buffer?(num = @expected_tokens_in_buffer) @buffer.count { |item| item.form != CHAR_PUNCT } >= num end