class Lingo::Attendee::MultiWorder

Public Instance Methods

control(cmd, *) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 119
def control(cmd, *)
  if [:RECORD, :EOF].include?(cmd)
    @eof_handling = true

    while process_buffer?(2)
      process_buffer
    end

    forward_number_of_token

    @eof_handling = false
  end
end
init() click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 84
def init
  # combine lexical variants?
  #
  # false = old behaviour
  # true  = first match
  # 'all' = all matches
  @combine = get_key('combine', false)
  @all     = @combine.is_a?(String) && @combine.downcase == 'all'

  lex_src, lex_mod, d = nil, nil, lingo.dictionary_config['databases']

  (mul_src = get_ary('source')).each { |src|
    s, m = d[src].values_at('use-lex', 'lex-mode')

    if lex_src.nil? || lex_src == s
      lex_src, lex_mod = s, m
    else
      warn "#{self.class}: Dictionaries don't match: #{mul_src.join(',')}"
    end
  }

  lex_src = lex_src.split(SEP_RE)
  lex_mod = get_key('lex-mode', lex_mod || 'first')

  @mul_dic = dictionary(mul_src, get_key('mode', 'all'))
  @lex_dic = dictionary(lex_src, lex_mod)
  @lex_gra = grammar(lex_src, lex_mod)

  @syn_dic = if @combine && has_key?('use-syn')
    dictionary(get_ary('use-syn'), get_key('syn-mode', 'all'))
  end

  @expected_tokens_in_buffer, @eof_handling = 3, false
end

Private Instance Methods

check_multiword(len, lst = nil) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 202
def check_multiword(len, lst = nil)
  return unless process_buffer?(len)

  seq, mul, sep = [], [], ' '

  @buffer.each { |obj|
    next seq << [obj] unless obj.is_a?(WordForm)
    next if (form = obj.form) == CHAR_PUNCT

    w = find_word(form, @lex_dic, @lex_gra)
    l = w.lexicals

    i = w.attr == WA_COMPOUND ? [l.first] : l.empty? ? [w] : l.dup

    @syn_dic.find_synonyms(w, i) if @syn_dic
    i.map! { |j| Unicode.downcase(j.form) }.uniq!

    seq << i

    break unless seq.length < len
  }

  if @combine
    combinations(*seq) { |key|
      @mul_dic.select(key.join(sep), mul)
      break unless @all || mul.empty?
    } && mul.uniq!
  else
    @mul_dic.select(seq.map! { |i,| i }.join(sep), mul)
  end

  lst.push(seq.size).concat(mul.map { |r| r.is_a?(Lexical) ?
    r.form.count(sep) + 1 : r }).sort!.reverse!.uniq! if lst

  mul unless mul.empty?
end
create_and_forward_multiword(len, lex = check_multiword(len)) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 179
def create_and_forward_multiword(len, lex = check_multiword(len))
  return unless lex

  pos, parts = 0, []

  begin
    if form = form_at(pos)
      @buffer[pos].attr = WA_UNKMULPART if @buffer[pos].unknown?
      parts << form
      pos += 1
    else
      @buffer.delete_at(pos)
      parts[-1] += CHAR_PUNCT
    end
  end while pos < len

  wrd = Word.new_lexicals(parts.join(' '),
    WA_MULTIWORD, lex.select { |l| l.is_a?(Lexical) })
  wrd.token = @buffer[pos - 1].token

  forward(wrd)
end
form_at(index) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 135
def form_at(index)
  obj = @buffer[index]
  obj.form if obj.is_a?(WordForm) && obj.form != CHAR_PUNCT
end
forward_number_of_token(len = default = @buffer.size, punct = !default) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 140
def forward_number_of_token(len = default = @buffer.size, punct = !default)
  begin
    unless @buffer.empty?
      forward(item = @buffer.delete_at(0))
      len -= 1 unless punct && item.form == CHAR_PUNCT
    end
  end while len > 0
end
process_buffer() click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 153
def process_buffer
  if form_at(0)
    if res = check_multiword(3, len = [])
      if (max = len.first) <= 3
        create_and_forward_multiword(3, res)
        forward_number_of_token(3)
      elsif !@eof_handling && @buffer.size < max
        @expected_tokens_in_buffer = max
      else
        forward_number_of_token(len.find { |l|
          create_and_forward_multiword(l) } || 1)

        @expected_tokens_in_buffer = 3
        process_buffer if process_buffer?
      end

      return
    end

    create_and_forward_multiword(2) && forward_number_of_token(1)
  end

  forward_number_of_token(1, false)
  @expected_tokens_in_buffer = 3
end
process_buffer?(num = @expected_tokens_in_buffer) click to toggle source
# File lib/lingo/attendee/multi_worder.rb, line 149
def process_buffer?(num = @expected_tokens_in_buffer)
  @buffer.count { |item| item.form != CHAR_PUNCT } >= num
end