class Lingo::Attendee::Sequencer
Constants
- CLASS_RE
- FULL_CLASS_RE
- FULL_REGEX_RE
- NUM
- REGEX_RE
- UNK
Public Instance Methods
control(cmd, *)
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 153 def control(cmd, *) process_buffer if [:RECORD, :EOF].include?(cmd) end
init()
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 122 def init @stopper = get_ary('stopper', DEFAULT_SKIP) .push(WA_UNKNOWN, WA_UNKMULPART) @mwc = get_key('multiword', LA_MULTIWORD) @cls, @seq = [], [] get_key('sequences').each { |str, fmt| seq, fmt = lambda { |*a| @seq << (a << fmt) }, fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil @cls.concat(cls = (str = str.downcase).scan(CLASS_RE)) case str when FULL_CLASS_RE then seq[str, cls] when FULL_REGEX_RE then m = [] str.scan(REGEX_RE) { |m1, m2, m3| m1 ? m1.each_char { |c| m << [c] } : m << ( m2 ? m2.chars : m3.split('|').map(&:chars)) } combinations(*m) { |q| seq[q.join, q.flatten] } else seq[Regexp.new(str), nil] end } @cls.uniq! raise MissingConfigError.new(:sequences) if @seq.empty? end
process_buffer()
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 161 def process_buffer process_seq if @buffer.size > 1 flush(@buffer) end
process_buffer?()
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 157 def process_buffer? (obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr) end
Private Instance Methods
find_form(obj, wc, objs, args)
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 247 def find_form(obj, wc, objs, args) form = obj.is_a?(Word) ? obj.lexicals.find { |lex| break lex.form if lex.attr == wc } : obj.form or return objs << obj args << form end
find_seq(buf, map)
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 207 def find_seq(buf, map) return if buf.empty? objs, args = [], [] @seq.each { |str, cls, fmt| if cls len = cls.size buf.each_cons(len).zip(map.each_cons(len)) { |_buf, _map| obj = _buf.each; objs.clear; args.clear next if _map.zip(cls) { |_wc, wc| break true unless _wc.include?(wc) && find_form(obj.next, wc, objs, args) } forward_seq(fmt, str, objs, args) } else combinations(*map) { |q| q, pos = q.join, -1 while pos = q.index(str, pos += 1) objs.clear; args.clear next unless $&.each_char.with_index { |wc, i| find_form(buf[pos + i], wc, objs, args) or break } forward_seq(fmt, $&, objs, args) end } end } buf.clear map.clear end
forward_seq(fmt, str, objs, args)
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 255 def forward_seq(fmt, str, objs, args) wrd_form, form = objs.map(&:form).join(' '), fmt =~ /\d/ ? fmt.gsub('%0$s', str) % args : fmt ? "#{str}:#{args.join(fmt)}" : args.join(' ') wrd = Word.new(wrd_form, WA_SEQUENCE) wrd << Lexical.new(form, LA_SEQUENCE) wrd.pattern, wrd.token = str, objs.first.token @buffer << wrd end
process_seq()
click to toggle source
# File lib/lingo/attendee/sequencer.rb, line 168 def process_seq buf, map = [], [] iter, skip, rewind = @buffer.each_with_index, 0, lambda { iter.rewind; skip.times { iter.next }; skip = 0 } loop { obj, idx = begin iter.next rescue StopIteration raise unless skip > 0 buf.slice!(0, skip) map.slice!(0, skip) rewind.call end att = (tok = obj.is_a?(Token)) ? obj.number? ? NUM : UNK : obj.is_a?(Word) && !obj.unknown? ? obj.compound_attrs : UNK if (att &= @cls).empty? find_seq(buf, map) rewind.call if skip > 0 else obj.each_lex(@mwc) { |lex| lex.form.count(' ').succ.times { iter.next } break skip = idx + 1 } unless tok buf << obj map << att end } find_seq(buf, map) end