class Lingo::Attendee::TextReader

Public Instance Methods

control(cmd, *) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 123
def control(cmd, *)
  return unless cmd == :TALK

  command(:LIR) if @lir

  @files.each { |path|
    command(:FILE, path)

    io = stdin?(path) ? open_stdin : open_path(name = path)

    Progress.new(self, @progress && io.size, name) { |progress|
      pos = 0 unless pos?(io = filter(io, path, progress))

      io.each { |line|
        progress << offset = pos ? pos += line.bytesize : io.pos

        line =~ @skip ? nil : line =~ @lir ?
          command(:RECORD, $1 || $&) : begin
            line.sub!(@cut, '') if @cut
            forward(line, offset) unless line.empty?
          end
      }
    }

    io.close unless stdin?(path)

    command(:EOF, path)
  }

  command(:EOT)
  :skip_command
end
init() click to toggle source

TODO: FILE/LIR-FILE (?)

# File lib/lingo/attendee/text_reader.rb, line 110
def init
  get_files

  @encoding = get_enc

  @filter   = get_key('filter', false)
  @progress = get_key('progress', false)

  @lir  = get_re('records', nil, %r{^\[(\d+)\.\]})
  @cut  = get_re('fields', !!@lir, %r{^.+?:\s*})
  @skip = get_re('skip', nil)
end

Private Instance Methods

add_files(path, glob, recursive = false) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 244
def add_files(path, glob, recursive = false)
  raise FileNotFoundError.new(path) if (entries = Dir[path]).sort!.empty?

  entries.each { |entry|
    !File.directory?(entry) ? @files << entry : !recursive ?
      add_files(File.join(entry, glob), glob) : Find.find(entry) { |match|
        @files << match if File.file?(match) && File.fnmatch?(glob, match) } }
end
cancel(msg) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 211
def cancel(msg)
  throw(:cancel, msg)
end
cancel_filter(type, name, what = :gem) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 203
def cancel_filter(type, name, what = :gem)
  cancel("#{type} filter not available. #{please_install(what, name)}")
end
cancel_filters(msg) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 199
def cancel_filters(msg)
  cancel("Filters not available. #{msg}")
end
file_type(io, path) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 190
def file_type(io, path)
  Object.const_defined?(:FileMagic) && io.respond_to?(:pos=) ?
    FileMagic.fm(:mime, simplified: true).io(io, 256, true) :
  Object.const_defined?(:MIME) && MIME.const_defined?(:Types) ?
    (type = MIME::Types.of(path).first) ? type.content_type :
    cancel_filters('File type could not be determined.') :
    cancel_filters(please_install(:gem, 'ruby-filemagic', 'mime-types'))
end
filter(io, path, progress) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 158
def filter(io, path, progress)
  case @filter == true ? file_type(io, path) : @filter.to_s
    when 'pdftotext' then filter_pdftotext(io, path, progress)
    when /html/i     then filter_xml(io, :HTML)
    when /xml/i      then filter_xml(io)
    when /pdf/i      then filter_pdf(io)
    else io
  end
end
filter_pdf(io) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 180
def filter_pdf(io)
  cancel_filter(:PDF, 'pdf-reader') unless Object.const_defined?(:PDF)
  Filter::PDF.new(io, @encoding)
end
filter_pdftotext(io, path, progress, name = 'pdftotext') click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 168
def filter_pdftotext(io, path, progress, name = 'pdftotext')
  cancel_filter(:PDF, name, :command) unless cmd = File.which(name)

  with_tempfile(name) { |tempfile|
    pdf_path = stdin?(path) ? tempfile[:pdf, io] : path
    system(cmd, '-q', pdf_path, txt_path = tempfile[:txt])

    progress.init(File.size(txt_path)) if @progress
    open_path(txt_path)
  }
end
filter_xml(io, type = :XML) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 185
def filter_xml(io, type = :XML)
  cancel_filter(type, :nokogiri) unless Object.const_defined?(:Nokogiri)
  Filter.const_get(type).new(io, @encoding)
end
get_files() click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 235
def get_files
  args = [get_key('glob', '*.txt'), get_key('recursive', false)]

  @files = []

  Array(get_key('files', '-')).each { |path| stdin?(path) ?
    @files << path : add_files(File.expand_path(path), *args) }
end
please_install(what, *names) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 207
def please_install(what, *names)
  "Please install the `#{names.join("' or `")}' #{what}."
end
pos?(io) click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 215
def pos?(io)
  io.pos if io.respond_to?(:pos)
rescue Errno::ESPIPE
end
with_tempfile(name) { |lambda| ... } click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 220
def with_tempfile(name)
  require 'tempfile'

  tempfiles = []

  yield lambda { |ext, io = nil|
    tempfiles << temp = Tempfile.new([name, ".#{ext}"])
    temp.write(io.read) if io
    temp.close
    temp.path
  }
ensure
  tempfiles.each(&:unlink)
end