require "./spec_helper" require "html" describe "HTML" do describe ".escape" do it "does not change a safe string" do HTML.escape("safe_string").should eq("safe_string") end it "escapes dangerous characters from a string" do HTML.escape("< & > ' \"").should eq("< & > ' "") end end pending_wasm32 describe: ".unescape" do it "identity" do HTML.unescape("safe_string").should be("safe_string") end it "empty entity" do HTML.unescape("foo&;bar").should eq "foo&;bar" end context "numeric entities" do it "decimal" do HTML.unescape("3 + 2 = 5 ").should eq("3 + 2 = 5 \t") end it "hex" do HTML.unescape("3 + 2 = 5 ").should eq("3 + 2 = 5 \t") end it "early termination" do HTML.unescape("&# &#x €43 ©f ©").should eq "&# &#x €43 ©f ©" end it "ISO-8859-1 replacement" do HTML.unescape("‡").should eq "‡" end it "does not unescape Char::MAX_CODEPOINT" do # U+10FFFF and U+10FFFE are noncharacter and are not replaced HTML.unescape("limit 􏿿").should eq("limit 􏿿") HTML.unescape("limit 􏿾").should eq("limit 􏿾") HTML.unescape("limit 􏿽").should eq("limit \u{10FFFD}") end it "does not unescape characters above Char::MAX_CODEPOINT" do HTML.unescape("limit �").should eq("limit \uFFFD") HTML.unescape("limit �").should eq("limit \uFFFD") HTML.unescape("limit �").should eq("limit \uFFFD") end it "ignores leading zeros" do HTML.unescape("A").should eq("A") HTML.unescape("e").should eq("e") end it "space characters" do HTML.unescape(" €Ÿ").should eq(" \t\n\f\u20AC\u0178") end it "does not escape non-space unicode control characters" do HTML.unescape("- �").should eq("- \uFFFD") end it "does not escape noncharacter codepoints" do # noncharacters http://www.unicode.org/faq/private_use.html string = "﷐-﷯ ￾ &#FFFF; 🿾 🿿 𯿾 􏿿" HTML.unescape(string).should eq(string) end it "does not escape unicode surrogate characters" do HTML.unescape("�-�").should eq("\uFFFD-\uFFFD") end end context "named entities" do it "simple named entities" do HTML.unescape("< & >").should eq("< & >") HTML.unescape("nbsp space ").should eq("nbsp\u{0000A0}space ") end it "without trailing semicolon" do HTML.unescape("&hello").should eq("&hello") end it "end of string" do HTML.unescape("& &").should eq("& &") end it "multi codepoint" do HTML.unescape(" ⊐̸ ").should eq(" ⊐̸ ") end it "invalid entities" do HTML.unescape("&<&>"&abcdefghijklmn &ThisIsNotAnEntity;").should eq("&<&>\"&abcdefghijklmn &ThisIsNotAnEntity;") end it "entity with numerical characters" do HTML.unescape("¾").should eq("\u00BE") end end it "unescapes javascript example from a string" do HTML.unescape("<script>alert('You are being hacked')</script>").should eq("") end it "invalid utf-8" do HTML.unescape("test \xff\xfe").should eq "test \xff\xfe" end end end