class MaRuKu::In::Markdown::SpanLevelParser::HTMLHelper

This class helps me read and sanitize HTML blocks

Constants

CData
CDataEnd
CommentEnd
CommentStart
EverythingElse
PartialTag
TO_SANITIZE
Tag

Attributes

first_tag[R]
rest[R]
state[RW]

Public Class Methods

new() click to toggle source
# File lib/maruku/input/html_helper.rb, line 17
def initialize
  @rest = ""
  @tag_stack = []
  @m = nil
  @already = ""
  self.state = :inside_element
end

Public Instance Methods

eat_this(line) click to toggle source
# File lib/maruku/input/html_helper.rb, line 27
def eat_this(line)
  @rest = line + @rest
  things_read = 0
  until @rest.empty?
    case self.state
    when :inside_comment
      if @m = CommentEnd.match(@rest)
        debug_state 'Comment End'
        # Workaround for https://bugs.ruby-lang.org/issues/9277 and another bug in 1.9.2 where even a
        # single dash in a comment will cause REXML to error.
        @already << @m.pre_match.gsub(/-(?![^\-])/, '- ') << @m.to_s
        @rest = @m.post_match
        self.state = :inside_element
      else
        @already << @rest.gsub(/-(?![^\-])/, '- ') # Workaround for https://bugs.ruby-lang.org/issues/9277
        @rest = ""
        self.state = :inside_comment
      end
    when :inside_element
      if @m = CommentStart.match(@rest)
        debug_state 'Comment'
        things_read += 1
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        self.state = :inside_comment
      elsif @m = Tag.match(@rest)
        debug_state 'Tag'
        things_read += 1
        self.state = :inside_element
        handle_tag
      elsif @m = CData.match(@rest)
        debug_state 'CDATA'
        @already << @m.pre_match
        close_script_style if script_style?
        @already << @m.to_s
        @rest = @m.post_match
        self.state = :inside_cdata
      elsif @m = PartialTag.match(@rest)
        debug_state 'PartialTag'
        @already << @m.pre_match
        @rest = @m.post_match
        @partial_tag = @m.to_s
        self.state = :inside_tag
      elsif @m = EverythingElse.match(@rest)
        debug_state 'EverythingElse'
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        self.state = :inside_element
      else
        error "Malformed HTML: not complete: #{@rest.inspect}"
      end
    when :inside_tag
      if @m = /^[^>]*>/.match(@rest)
        @partial_tag << @m.to_s
        @rest = @partial_tag + @m.post_match
        @partial_tag = nil
        self.state = :inside_element
        if @m = Tag.match(@rest)
          things_read += 1
          handle_tag
        end
      else
        @partial_tag << @rest
        @rest = ""
        self.state = :inside_tag
      end
    when :inside_cdata
      if @m = CDataEnd.match(@rest)
        self.state = :inside_element
        @already << @m.pre_match << @m.to_s
        @rest = @m.post_match
        start_script_style if script_style?
      else
        @already << @rest
        @rest = ""
        self.state = :inside_cdata
      end
    else
      raise "Bug bug: state = #{self.state.inspect}"
    end

    break if is_finished? && things_read > 0
  end
end
handle_tag() click to toggle source
# File lib/maruku/input/html_helper.rb, line 112
def handle_tag
  @already << @m.pre_match
  @rest = @m.post_match

  is_closing = !!@m[1]
  tag = @m[2]
  @first_tag ||= tag
  attributes = @m[3].to_s

  is_single = false
  if attributes[-1, 1] == '/'
    attributes = attributes[0, attributes.size - 1]
    is_single = true
  end

  if TO_SANITIZE.include? tag
    attributes.strip!
    if attributes.size > 0
      @already << '<%s %s />' % [tag, attributes]
    else
      @already << '<%s />' % [tag]
    end
  elsif is_closing
    if @tag_stack.empty?
      error "Malformed: closing tag #{tag.inspect} in empty list"
    elsif @tag_stack.last != tag
      error "Malformed: tag <#{tag}> closes <#{@tag_stack.last}>"
    end

    close_script_style if script_style?

    @already << @m.to_s
    @tag_stack.pop
  else
    @already << @m.to_s
    @tag_stack.push(tag) unless is_single

    start_script_style if script_style?
  end
end
is_finished?() click to toggle source
# File lib/maruku/input/html_helper.rb, line 157
def is_finished?
  self.state == :inside_element && @tag_stack.empty?
end
stuff_you_read() click to toggle source
# File lib/maruku/input/html_helper.rb, line 153
def stuff_you_read
  @already
end

Private Instance Methods

close_script_style() click to toggle source

Finish script or style tag content, wrapping it in CDATA if necessary, and add it to our original @already buffer.

# File lib/maruku/input/html_helper.rb, line 209
def close_script_style
  tag = @tag_stack.last

  # See http://www.w3.org/TR/xhtml1/#C_4 for character sequences not allowed within an element body.
  if @already =~ /<|&|\]\]>|--/
    new_already = script_style_cdata_start(tag)
    new_already << "\n" unless @already.start_with?("\n")
    new_already << @already
    new_already << "\n" unless @already.end_with?("\n")
    new_already << script_style_cdata_end(tag)
    @already = new_already
  end
  @before_already << @already
  @already = @before_already
end
debug_state(note) click to toggle source
# File lib/maruku/input/html_helper.rb, line 163
def debug_state(note)
  my_debug "#{@state}: #{note}: #{@m.to_s.inspect}"
end
error(s) click to toggle source
# File lib/maruku/input/html_helper.rb, line 171
def error(s)
  raise "Error: #{s} \n" + inspect, caller
end
inspect() click to toggle source
# File lib/maruku/input/html_helper.rb, line 175
def inspect
  "HTML READER\n state=#{self.state} " +
    "match=#{@m.to_s.inspect}\n" +
    "Tag stack = #{@tag_stack.inspect} \n" +
    "Before:\n" +
    @already.gsub(/^/, '|') + "\n" +
    "After:\n" +
    @rest.gsub(/^/, '|') + "\n"
end
my_debug(s) click to toggle source
# File lib/maruku/input/html_helper.rb, line 167
def my_debug(s)
  #    puts "---" * 10 + "\n" + inspect + "\t>>>\t" + s
end
script_style?() click to toggle source

Are we within a script or style tag?

# File lib/maruku/input/html_helper.rb, line 197
def script_style?
  %w(script style).include?(@tag_stack.last)
end
script_style_cdata_end(tag) click to toggle source
# File lib/maruku/input/html_helper.rb, line 229
def script_style_cdata_end(tag)
  (tag == 'script') ? "//]]>" : "/*]]>*/"
end
script_style_cdata_start(tag) click to toggle source
# File lib/maruku/input/html_helper.rb, line 225
def script_style_cdata_start(tag)
  (tag == 'script') ? "//<![CDATA[" : "/*<![CDATA[*/"
end
start_script_style() click to toggle source

Save our @already buffer elsewhere, and switch to using @already for the contents of this script or style tag.

# File lib/maruku/input/html_helper.rb, line 203
def start_script_style
  @before_already, @already = @already, ""
end