class Lingo::Attendee::TextReader
Public Instance Methods
control(cmd, *)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 122 def control(cmd, *) return unless cmd == :TALK command(:LIR) if @lir @files.each { |path| command(:FILE, path) io = stdin?(path) ? open_stdin : open_path(name = path) Progress.new(self, @progress && io.size, name) { |progress| pos = 0 unless pos?(io = filter(io, path, progress)) io.each { |line| progress << offset = pos ? pos += line.bytesize : io.pos line =~ @skip ? nil : line =~ @lir ? command(:RECORD, $1 || $&) : begin line.sub!(@cut, '') if @cut forward(line, offset) unless line.empty? end } } io.close unless stdin?(path) command(:EOF, path) } command(:EOT) :skip_command end
init()
click to toggle source
TODO: FILE/LIR-FILE (?)
# File lib/lingo/attendee/text_reader.rb, line 109 def init get_files @encoding = get_enc @filter = get_key('filter', false) @progress = get_key('progress', false) @lir = get_re('records', nil, %r{^\[(\d+)\.\]}) @cut = get_re('fields', !!@lir, %r{^.+?:\s*}) @skip = get_re('skip', nil) end
Private Instance Methods
add_files(path, glob, recursive = false)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 243 def add_files(path, glob, recursive = false) raise FileNotFoundError.new(path) if (entries = Dir[path]).sort!.empty? entries.each { |entry| !File.directory?(entry) ? @files << entry : !recursive ? add_files(File.join(entry, glob), glob) : Find.find(entry) { |match| @files << match if File.file?(match) && File.fnmatch?(glob, match) } } end
cancel(msg)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 210 def cancel(msg) throw(:cancel, msg) end
cancel_filter(type, name, what = :gem)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 202 def cancel_filter(type, name, what = :gem) cancel("#{type} filter not available. #{please_install(what, name)}") end
cancel_filters(msg)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 198 def cancel_filters(msg) cancel("Filters not available. #{msg}") end
file_type(io, path)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 189 def file_type(io, path) Object.const_defined?(:FileMagic) && io.respond_to?(:pos=) ? FileMagic.fm(:mime, simplified: true).io(io, 256, true) : Object.const_defined?(:MIME) && MIME.const_defined?(:Types) ? (type = MIME::Types.of(path).first) ? type.content_type : cancel_filters('File type could not be determined.') : cancel_filters(please_install(:gem, 'ruby-filemagic', 'mime-types')) end
filter(io, path, progress)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 157 def filter(io, path, progress) case @filter == true ? file_type(io, path) : @filter.to_s when 'pdftotext' then filter_pdftotext(io, path, progress) when /html/i then filter_xml(io, :HTML) when /xml/i then filter_xml(io) when /pdf/i then filter_pdf(io) else io end end
filter_pdf(io)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 179 def filter_pdf(io) cancel_filter(:PDF, 'pdf-reader') unless Object.const_defined?(:PDF) Filter::PDF.new(io, @encoding) end
filter_pdftotext(io, path, progress, name = 'pdftotext')
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 167 def filter_pdftotext(io, path, progress, name = 'pdftotext') cancel_filter(:PDF, name, :command) unless cmd = File.which(name) with_tempfile(name) { |tempfile| pdf_path = stdin?(path) ? tempfile[:pdf, io] : path system(cmd, '-q', pdf_path, txt_path = tempfile[:txt]) progress.init(File.size(txt_path)) if @progress open_path(txt_path) } end
filter_xml(io, type = :XML)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 184 def filter_xml(io, type = :XML) cancel_filter(type, :nokogiri) unless Object.const_defined?(:Nokogiri) Filter.const_get(type).new(io, @encoding) end
get_files()
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 234 def get_files args = [get_key('glob', '*.txt'), get_key('recursive', false)] @files = [] Array(get_key('files', '-')).each { |path| stdin?(path) ? @files << path : add_files(File.expand_path(path), *args) } end
please_install(what, *names)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 206 def please_install(what, *names) "Please install the `#{names.join("' or `")}' #{what}." end
pos?(io)
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 214 def pos?(io) io.pos if io.respond_to?(:pos) rescue Errno::ESPIPE end
with_tempfile(name) { |lambda| ... }
click to toggle source
# File lib/lingo/attendee/text_reader.rb, line 219 def with_tempfile(name) require 'tempfile' tempfiles = [] yield lambda { |ext, io = nil| tempfiles << temp = Tempfile.new([name, ".#{ext}"]) temp.write(io.read) if io temp.close temp.path } ensure tempfiles.each(&:unlink) end