module Lingo::Attendee::Stemmer::Porter

Constants

GOTO_RE
RULES
RULE_RE

Public Instance Methods

stem(word, found = false) click to toggle source
# File lib/lingo/attendee/stemmer/porter.rb, line 283
def stem(word, found = false)
  goto, conv = nil, lambda { |s, h| h.each { |q, r| s.gsub!(q, r.to_s) } }

  RULES.each { |key, rules|
    next if goto && goto != key.to_s

    rules.each { |rule|
      case rule
        when RULE_RE
          cond, repl, goto = $1, $3, $4
          stem = word[/(.+)#{Unicode.downcase($2)}$/, 1] or next
        when GOTO_RE
          goto = $1
          break
      end

      conv[shad = stem.dup,
        /[^aeiouy]/ => 'c',
        /[aeiou]/   => 'v',
        /cy/        => 'cv',
        /y/         => 'c'
      ]

      if cond
        conv[cond,
          'm'   => shad.scan(/vc/).size,
          '*v*' => shad.include?('v'),
          '*d'  => shad.end_with?('c') && (last = stem[-1]) == stem[-2],
          '*o'  => shad.end_with?('cvc') && !'wxy'.include?(last),
          'and' => '&&',
          'or'  => '||',
          'not' => '!',
          '='   => '=='
        ]

        last.upcase! if last
        cond.gsub!(/\*(\w)/) { last == $1 }

        next unless eval(cond)
      end

      found, word = true, begin
        stem[0...Integer(repl)]
      rescue ArgumentError
        stem << Unicode.downcase(repl)
      end

      break
    }
  }

  word if found
end