struct Sanitize::Adapter::LibXML2
include Adapter
def self.process(policy : Policy, html : String, fragment : Bool = false)
return "" if html.empty?
node = parse(html, fragment)
process(policy, node, fragment)
end
def self.process(policy : Policy, node : XML::Node, fragment : Bool = false)
build(fragment) do |builder|
process(policy, node, builder, fragment)
end
end
def self.process(policy : Policy, node : XML::Node, builder : XML::Builder, fragment : Bool = false)
processor = Processor.new(policy, new(builder))
visit(processor, node, fragment)
builder.end_document
builder.flush
end
def self.parse(html : String, fragment : Bool)
if fragment
html = "
#{html}"
end
node = XML.parse_html(html, XML::HTMLParserOptions.default | XML::HTMLParserOptions::NOIMPLIED | XML::HTMLParserOptions::NODEFDTD)
end
def self.build(fragment : Bool)
result = String.build do |io|
builder = XML::Builder.new(io)
if fragment
builder.start_element("fragment")
end
yield(builder)
end
if fragment
result = "" if result == "\n"
result = result.lchop("").rchop("\n")
end
# strip trailing non-linebreak whitespace
if result.ends_with?("\n")
result
else
result.rstrip
end
end
def self.visit(processor : Processor, node : XML::Node, fragment : Bool)
visitor = Visitor.new(processor, fragment)
visitor.visit(node)
end
# :nodoc:
struct Visitor
@attributes = Hash(String, String).new
def initialize(@processor : Processor, @fragment : Bool)
end
# :nodoc:
def visit(node : XML::Node)
case node.type
when .html_document_node?
visit_children(node)
when .dtd_node?
# skip DTD
when .text_node?
visit_text(node)
when .element_node?
visit_element(node)
when .comment_node?
# skip comments
when .cdata_section_node?
# skip CDATA
else
raise "Not implemented for: #{node.type}:#{node.name}:#{node.content}"
end
end
def visit_children(node)
node.children.each do |child|
visit(child)
end
end
def visit_text(node)
@processor.process_text(node.content)
end
def visit_element(node)
if @fragment && node.name.in?({"html", "body"})
@attributes.clear
@processor.process_element(node.name, @attributes, Processor::CONTINUE) do
visit_children(node)
end
return
end
@attributes.clear
node.attributes.each do |attribute|
@attributes[attribute.name] = attribute.content
end
name = node.name
if namespace = node.namespace
name = "#{namespace}:#{name}"
end
@processor.process_element(name, @attributes) do
visit_children(node)
end
end
end
def initialize(@builder : XML::Builder)
end
def start_tag(name : String, attributes : Hash(String, String)) : Nil
@builder.start_element(name)
@builder.attributes(attributes)
end
def end_tag(name : String, attributes : Hash(String, String)) : Nil
@builder.end_element
end
def write_text(text : String) : Nil
@builder.text(text)
end
end