class Lingo::Language::Grammar
Constants
- DEFAULTS
- HYPHEN_RE
Public Class Methods
new(config, lingo)
click to toggle source
# File lib/lingo/language/grammar.rb, line 53 def initialize(config, lingo) @dic, @suggestions = Dictionary.new(config, lingo), [] cfg = lingo.dictionary_config['compound'] DEFAULTS.each { |k, v| instance_variable_set( "@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i) } #-- # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet # werden, um sie von Wortklassen normaler Wörter unterscheiden zu # können z.B. Hausmeister => ['haus/s', 'meister/s'] oder Hausmeister # => ['haus/s+', 'meister/s+'] mit append-wordclass = '+' #++ @append_wc = cfg.fetch('append-wordclass', '') #-- # Bestimmte Sequenzen können als ungültige Komposita erkannt werden, # z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also # skip-sequence = 'aa' #++ @sequences = cfg.fetch('skip-sequences', []).map! { |i| i.downcase } end
open(*args) { |grammar = new(*args)| ... }
click to toggle source
# File lib/lingo/language/grammar.rb, line 47 def self.open(*args) yield grammar = new(*args) ensure grammar.close if grammar end
Public Instance Methods
close()
click to toggle source
# File lib/lingo/language/grammar.rb, line 77 def close @dic.close end
find_compound(str, level = 1, tail = false)
click to toggle source
# File lib/lingo/language/grammar.rb, line 81 def find_compound(str, level = 1, tail = false) level == 1 ? (@_compound ||= {})[str] ||= permute_compound(Word.new(str, WA_UNKNOWN), str, level, tail) : permute_compound([[], [], ''], str, level, tail) end
find_compound_head(str)
click to toggle source
# File lib/lingo/language/grammar.rb, line 87 def find_compound_head(str) compound = find_compound(str) compound.head || compound if compound && !compound.unknown? end
Private Instance Methods
permute_compound(ret, str, level, tail)
click to toggle source
# File lib/lingo/language/grammar.rb, line 94 def permute_compound(ret, str, level, tail) if (len = str.length) > @min_word_size str = Unicode.downcase(str) lex, sta, seq = res = if str =~ HYPHEN_RE test_compound($1, '-', $2, level, tail) else sug = @suggestions[level] ||= [] catch(:res) { 1.upto(len - 1) { |i| tst = test_compound(str[0, i], '', str[i, len], level, tail) unless (lex = tst.first).empty? lex.last.attr == LA_TAKEITASIS ? sug << tst : throw(:res, tst) end } sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear } } end level > 1 ? ret = res : ret.identify(lex.each { |l| l.attr += @append_wc unless l.attr == LA_COMPOUND }, WA_COMPOUND, seq) if !lex.empty? && sta.size <= @max_parts && sta.min >= @min_part_size && str.length / sta.size >= @min_avg_part_size && (@sequences.empty? || !@sequences.include?(seq)) end ret end
test_compound(fstr, infix, bstr, level = 1, tail = false)
click to toggle source
# File lib/lingo/language/grammar.rb, line 128 def test_compound(fstr, infix, bstr, level = 1, tail = false) sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], ''] if !(blex = @dic.select_with_suffix(bstr)).empty? # 1. Word w/ suffix bform, seq[1] = tail ? bstr : blex.first.form, blex.first.attr elsif tail && !(blex = @dic.select_with_infix(bstr)).empty? # 2. Word w/ infix, unless tail part bform, seq[1] = bstr, blex.first.attr elsif infix == '-' blex, bsta, bseq = find_compound(bstr, level + 1, tail) if !blex.empty? # 3. Compound bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta else # 4. Take it as is blex = [Lexical.new(bform = bstr, seq[1] = LA_TAKEITASIS)] end else return empty end if !(flex = @dic.select_with_infix(fstr)).empty? # 1. Word w/ infix fform, seq[0] = fstr, flex.first.attr else flex, fsta, fseq = find_compound(fstr, level + 1, true) if !flex.empty? # 2. Compound fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta elsif infix == '-' # 3. Take it as is flex = [Lexical.new(fform = fstr, seq[0] = LA_TAKEITASIS)] else return empty end end forms = [[flex, fform], [blex, bform]].each { |ary| ary.shift.each { |lex| lex.src ||= ary.first } } flex.concat(blex).delete_if { |lex| lex.attr == LA_COMPOUND } [combinations(*forms).map { |front, back| Lexical.new(front + infix + back, LA_COMPOUND) }.concat(flex), sta, seq.join] end