class Lingo::Attendee::Sequencer

Constants

CLASS_RE
FULL_CLASS_RE
FULL_REGEX_RE
NUM
REGEX_RE
UNK

Public Instance Methods

control(cmd, *) click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 153
def control(cmd, *)
  process_buffer if [:RECORD, :EOF].include?(cmd)
end
init() click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 122
def init
  @stopper = get_ary('stopper', DEFAULT_SKIP)
               .push(WA_UNKNOWN, WA_UNKMULPART)

  @mwc = get_key('multiword', LA_MULTIWORD)

  @cls, @seq = [], []

  get_key('sequences').each { |str, fmt|
    seq, fmt = lambda { |*a| @seq << (a << fmt) },
      fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil

    @cls.concat(cls = (str = str.downcase).scan(CLASS_RE))

    case str
      when FULL_CLASS_RE then seq[str, cls]
      when FULL_REGEX_RE then m = []
        str.scan(REGEX_RE) { |m1, m2, m3|
          m1 ? m1.each_char { |c| m << [c] } : m << (
          m2 ? m2.chars : m3.split('|').map(&:chars)) }

        combinations(*m) { |q| seq[q.join, q.flatten] }
      else seq[Regexp.new(str), nil]
    end
  }

  @cls.uniq!

  raise MissingConfigError.new(:sequences) if @seq.empty?
end
process_buffer() click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 161
def process_buffer
  process_seq if @buffer.size > 1
  flush(@buffer)
end
process_buffer?() click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 157
def process_buffer?
  (obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
end

Private Instance Methods

find_form(obj, wc, objs, args) click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 247
def find_form(obj, wc, objs, args)
  form = obj.is_a?(Word) ? obj.lexicals.find { |lex|
    break lex.form if lex.attr == wc } : obj.form or return

  objs << obj
  args << form
end
find_seq(buf, map) click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 207
def find_seq(buf, map)
  return if buf.empty?

  objs, args = [], []

  @seq.each { |str, cls, fmt|
    if cls
      len = cls.size

      buf.each_cons(len).zip(map.each_cons(len)) { |_buf, _map|
        obj = _buf.each; objs.clear; args.clear

        next if _map.zip(cls) { |_wc, wc|
          break true unless _wc.include?(wc) &&
            find_form(obj.next, wc, objs, args)
        }

        forward_seq(fmt, str, objs, args)
      }
    else
      combinations(*map) { |q|
        q, pos = q.join, -1

        while pos = q.index(str, pos += 1)
          objs.clear; args.clear

          next unless $&.each_char.with_index { |wc, i|
            find_form(buf[pos + i], wc, objs, args) or break
          }

          forward_seq(fmt, $&, objs, args)
        end
      }
    end
  }

  buf.clear
  map.clear
end
forward_seq(fmt, str, objs, args) click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 255
def forward_seq(fmt, str, objs, args)
  wrd_form, form = objs.map(&:form).join(' '),
    fmt =~ /\d/ ? fmt.gsub('%0$s', str) % args :
    fmt ? "#{str}:#{args.join(fmt)}" : args.join(' ')

  wrd = Word.new(wrd_form, WA_SEQUENCE)
  wrd << Lexical.new(form, LA_SEQUENCE)
  wrd.pattern, wrd.token = str, objs.first.token

  @buffer << wrd
end
process_seq() click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 168
def process_seq
  buf, map = [], []

  iter, skip, rewind = @buffer.each_with_index, 0, lambda {
    iter.rewind; skip.times { iter.next }; skip = 0
  }

  loop {
    obj, idx = begin
      iter.next
    rescue StopIteration
      raise unless skip > 0

      buf.slice!(0, skip)
      map.slice!(0, skip)

      rewind.call
    end

    att = (tok = obj.is_a?(Token)) ? obj.number? ? NUM : UNK :
      obj.is_a?(Word) && !obj.unknown? ? obj.compound_attrs : UNK

    if (att &= @cls).empty?
      find_seq(buf, map)
      rewind.call if skip > 0
    else
      obj.each_lex(@mwc) { |lex|
        lex.form.count(' ').succ.times { iter.next }
        break skip = idx + 1
      } unless tok

      buf << obj
      map << att
    end
  }

  find_seq(buf, map)
end