require "./xml/libxml2"
# The XML module allows parsing and generating [XML](https://www.w3.org/XML/) documents.
#
# NOTE: To use `XML`, you must explicitly import it with `require "xml"`
#
# ### Parsing
#
# `XML#parse` will parse xml from `String` or `IO` and return xml document as an `XML::Node` which represents all kinds of xml nodes.
#
# Example:
#
# ```
# require "xml"
#
# xml = <<-XML
#
# Jane
# Doe
#
# XML
#
# document = XML.parse(xml) # : XML::Node
# person = document.first_element_child # : XML::Node?
# if person
# puts person["id"] # "1" : String?
#
# puts typeof(person.children) # XML::NodeSet
# person.children.select(&.element?).each do |child| # Select only element children
# puts typeof(child) # XML::Node
# puts child.name # firstname : String
# puts child.content # Jane : String?
# end
# end
# ```
#
# ## Generating
#
# Use `XML.build`, which uses an `XML::Builder`:
#
# ```
# require "xml"
#
# string = XML.build(indent: " ") do |xml|
# xml.element("person", id: 1) do
# xml.element("firstname") { xml.text "Jane" }
# xml.element("lastname") { xml.text "Doe" }
# end
# end
#
# string # => "\n\n Jane\n Doe\n\n"
# ```
module XML
# Wraps a block that uses a XML parser context. Allocates the
# context and ensures the context is freed after use.
private def self.parse_xml(&)
ctxt = LibXML.xmlNewParserCtxt
begin
from_ptr(ctxt) { yield ctxt }
ensure
LibXML.xmlFreeParserCtxt(ctxt)
end
end
# Wraps a block that uses a HTML parser context. Allocates the
# context and ensures the context is freed after use.
private def self.parse_html(&)
ctxt = LibXML.htmlNewParserCtxt
begin
from_ptr(ctxt) { yield ctxt }
ensure
LibXML.htmlFreeParserCtxt(ctxt)
end
end
# Parses an XML document from *string* with *options* into an `XML::Node`.
#
# See `ParserOptions.default` for default options.
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Document
raise XML::Error.new("Document is empty", 0) if string.empty?
parse_xml do |ctxt|
LibXML.xmlCtxtReadMemory(ctxt, string, string.bytesize, nil, nil, options)
end
end
# Parses an XML document from *io* with *options* into an `XML::Node`.
#
# See `ParserOptions.default` for default options.
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Document
parse_xml do |ctxt|
LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options)
end
end
# Parses an HTML document from *string* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Document
raise XML::Error.new("Document is empty", 0) if string.empty?
parse_html do |ctxt|
LibXML.htmlCtxtReadMemory(ctxt, string, string.bytesize, nil, "utf-8", options)
end
end
# Parses an HTML document from *io* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Document
parse_html do |ctxt|
LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options)
end
end
protected def self.read_callback(data : Void*, buffer : UInt8*, len : LibC::Int) : LibC::Int
io = Box(IO).unbox(data)
buf = Slice.new(buffer, len)
ret = {% if LibXML.has_method?(:xmlCtxtSetErrorHandler) %}
io.read(buf)
{% else %}
XML::Error.default_handlers { io.read(buf) }
{% end %}
LibC::Int.new(ret)
end
protected def self.close_callback(data : Void*) : LibC::Int
LibC::Int.new(0)
end
protected def self.from_ptr(ctxt, & : -> LibXML::Doc*)
errors = [] of XML::Error
doc =
{% if LibXML.has_method?(:xmlCtxtSetErrorHandler) %}
LibXML.xmlCtxtSetErrorHandler(ctxt, ->Error.structured_callback, Box.box(errors))
yield
{% else %}
XML::Error.unsafe_collect(errors) { yield }
{% end %}
raise Error.new(LibXML.xmlGetLastError) unless doc
Document.new(doc, errors)
end
{% unless LibXML.has_method?(:xmlSaveSetIndentString) %}
# NOTE: These helpers are for internal compatibility with libxml < 2.14.
protected def self.with_indent_tree_output(indent : Bool, &)
save_indent_tree_output do
LibXML.__xmlIndentTreeOutput.value = indent ? 1 : 0
yield
end
end
protected def self.save_indent_tree_output(&)
value = LibXML.__xmlIndentTreeOutput.value
begin
yield
ensure
LibXML.__xmlIndentTreeOutput.value = value
end
end
protected def self.with_tree_indent_string(string : String, &)
value = LibXML.__xmlTreeIndentString.value
LibXML.__xmlTreeIndentString.value = string.to_unsafe
begin
yield
ensure
LibXML.__xmlTreeIndentString.value = value
end
end
{% end %}
class_getter libxml2_version : String do
version_string = String.new(LibXML.xmlParserVersion)
# The version string can contain extra information after the version number,
# so we ignore any trailing non-numbers with `strict: false`
number = version_string.to_i(strict: false)
# Construct a formatted version string
"#{number // 10_000}.#{number % 10_000 // 100}.#{number % 100}"
end
# :nodoc:
protected def self.node_content_to_string(node : LibXML::Node*) : String
if ptr = LibXML.xmlNodeGetContent(node)
begin
String.new(ptr)
ensure
xmlFree = LibXML.xmlFree
{% if flag?(:interpreted) %}
# FIXME: calling xmlFree directly crashes the interpreter (https://github.com/crystal-lang/crystal/issues/12495)
xmlFree = LibXML::FreeFunc.new(xmlFree.pointer, Pointer(Void).null)
{% end %}
xmlFree.call(ptr.as(Void*))
end
else
""
end
end
end
require "./xml/*"