class Lingo::Language::Grammar

Constants

DEFAULTS
HYPHEN_RE

Public Class Methods

new(config, lingo) click to toggle source
# File lib/lingo/language/grammar.rb, line 53
def initialize(config, lingo)
  @dic, @suggestions = Dictionary.new(config, lingo), []

  cfg = lingo.dictionary_config['compound']

  DEFAULTS.each { |k, v| instance_variable_set(
    "@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i) }

  #--
  # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
  # werden, um sie von Wortklassen normaler Wörter unterscheiden zu
  # können z.B. Hausmeister => ['haus/s', 'meister/s'] oder Hausmeister
  # => ['haus/s+', 'meister/s+'] mit append-wordclass = '+'
  #++
  @append_wc = cfg.fetch('append-wordclass', '')

  #--
  # Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
  # z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
  # skip-sequence = 'aa'
  #++
  @sequences = cfg.fetch('skip-sequences', []).map! { |i| i.downcase }
end
open(*args) { |grammar = new(*args)| ... } click to toggle source
# File lib/lingo/language/grammar.rb, line 47
def self.open(*args)
  yield grammar = new(*args)
ensure
  grammar.close if grammar
end

Public Instance Methods

close() click to toggle source
# File lib/lingo/language/grammar.rb, line 77
def close
  @dic.close
end
find_compound(str, level = 1, tail = false) click to toggle source
# File lib/lingo/language/grammar.rb, line 81
def find_compound(str, level = 1, tail = false)
  level == 1 ? (@_compound ||= {})[str] ||=
    permute_compound(Word.new(str, WA_UNKNOWN), str, level, tail) :
    permute_compound([[], [], ''],              str, level, tail)
end
find_compound_head(str) click to toggle source
# File lib/lingo/language/grammar.rb, line 87
def find_compound_head(str)
  compound = find_compound(str)
  compound.head || compound if compound && !compound.unknown?
end

Private Instance Methods

permute_compound(ret, str, level, tail) click to toggle source
# File lib/lingo/language/grammar.rb, line 94
def permute_compound(ret, str, level, tail)
  if (len = str.length) > @min_word_size
    str = Unicode.downcase(str)

    lex, sta, seq = res = if str =~ HYPHEN_RE
      test_compound($1, '-', $2, level, tail)
    else
      sug = @suggestions[level] ||= []

      catch(:res) {
        1.upto(len - 1) { |i|
          tst = test_compound(str[0, i], '', str[i, len], level, tail)

          unless (lex = tst.first).empty?
            lex.last.attr == LA_TAKEITASIS ? sug << tst : throw(:res, tst)
          end
        }

        sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
      }
    end

    level > 1 ? ret = res : ret.identify(lex.each { |l|
      l.attr += @append_wc unless l.attr == LA_COMPOUND
    }, WA_COMPOUND, seq) if !lex.empty? &&
      sta.size              <= @max_parts         &&
      sta.min               >= @min_part_size     &&
      str.length / sta.size >= @min_avg_part_size &&
      (@sequences.empty? || !@sequences.include?(seq))
  end

  ret
end
test_compound(fstr, infix, bstr, level = 1, tail = false) click to toggle source
# File lib/lingo/language/grammar.rb, line 128
def test_compound(fstr, infix, bstr, level = 1, tail = false)
  sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']

  if !(blex = @dic.select_with_suffix(bstr)).empty?
    # 1. Word w/ suffix
    bform, seq[1] = tail ? bstr : blex.first.form, blex.first.attr
  elsif tail && !(blex = @dic.select_with_infix(bstr)).empty?
    # 2. Word w/ infix, unless tail part
    bform, seq[1] = bstr, blex.first.attr
  elsif infix == '-'
    blex, bsta, bseq = find_compound(bstr, level + 1, tail)

    if !blex.empty?
      # 3. Compound
      bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
    else
      # 4. Take it as is
      blex = [Lexical.new(bform = bstr, seq[1] = LA_TAKEITASIS)]
    end
  else
    return empty
  end

  if !(flex = @dic.select_with_infix(fstr)).empty?
    # 1. Word w/ infix
    fform, seq[0] = fstr, flex.first.attr
  else
    flex, fsta, fseq = find_compound(fstr, level + 1, true)

    if !flex.empty?
      # 2. Compound
      fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
    elsif infix == '-'
      # 3. Take it as is
      flex = [Lexical.new(fform = fstr, seq[0] = LA_TAKEITASIS)]
    else
      return empty
    end
  end

  forms = [[flex, fform], [blex, bform]].each { |ary|
    ary.shift.each { |lex| lex.src ||= ary.first }
  }

  flex.concat(blex).delete_if { |lex| lex.attr == LA_COMPOUND }

  [combinations(*forms).map { |front, back|
    Lexical.new(front + infix + back, LA_COMPOUND)
  }.concat(flex), sta, seq.join]
end