class Fluent::Plugin::CSVParser

Public Instance Methods

configure(conf) click to toggle source
Calls superclass method Fluent::Plugin::Parser#configure
# File lib/fluent/plugin/parser_csv.rb, line 33
def configure(conf)
  super


  if @parser_engine == :fast
    @quote_char = '"'
    @escape_pattern = Regexp.compile(@quote_char * 2)

    m = method(:parse_fast)
    self.singleton_class.module_eval do
      define_method(:parse, m)
    end
  end
end
parse(text) { |time, record| ... } click to toggle source
# File lib/fluent/plugin/parser_csv.rb, line 48
def parse(text, &block)
  values = CSV.parse_line(text, col_sep: @delimiter)
  r = Hash[@keys.zip(values)]
  time, record = convert_values(parse_time(r), r)
  yield time, record
end
parse_fast(text) { |time, record| ... } click to toggle source
# File lib/fluent/plugin/parser_csv.rb, line 55
def parse_fast(text, &block)
  r = parse_fast_internal(text)
  time, record = convert_values(parse_time(r), r)
  yield time, record
end
parse_fast_internal(text) click to toggle source

CSV.parse_line is too slow due to initialize lots of object and CSV module doesn't provide the efficient method for parsing single line. This method avoids the overhead of CSV.parse_line for typical patterns

# File lib/fluent/plugin/parser_csv.rb, line 64
def parse_fast_internal(text)
  record = {}
  text.chomp!

  return record if text.empty?

  # use while because while is now faster than each_with_index
  columns = text.split(@delimiter, -1)
  num_columns = columns.size
  i = 0
  j = 0
  while j < num_columns
    column = columns[j]

    case column.count(@quote_char)
    when 0
      if column.empty?
        column = nil
      end
    when 1
      if column.start_with?(@quote_char)
        to_merge = [column]
        j += 1
        while j < num_columns
          merged_col = columns[j]
          to_merge << merged_col
          break if merged_col.end_with?(@quote_char)
          j += 1
        end
        column = to_merge.join(@delimiter)[1..-2]
      end
    when 2
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
    else
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
      column.gsub!(@escape_pattern, @quote_char)
    end

    record[@keys[i]] = column
    j += 1
    i += 1
  end
  record
end