aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2019-09-11 21:35:03 -0700
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2019-09-11 21:35:03 -0700
commitc6607740a0ebc9abfc74169d54236e5a3c5b84f3 (patch)
tree36c4e6af0fd5772a61fd77292fb58eea479050db
parent946de603b13514a28fdb4f3beaf80ebd0d27b5f3 (diff)
downloadpugixml-c6607740a0ebc9abfc74169d54236e5a3c5b84f3.zip
pugixml-c6607740a0ebc9abfc74169d54236e5a3c5b84f3.tar.gz
pugixml-c6607740a0ebc9abfc74169d54236e5a3c5b84f3.tar.bz2
Never escape > in attribute values
According to XML spec, > sometimes needs to be escaped in PCDATA (when it occurs as a ]]> pattern), but it doesn't need to be escaped in attribute values. Contributes to #272.
-rw-r--r--src/pugixml.cpp4
-rw-r--r--tests/test_write.cpp8
2 files changed, 6 insertions, 6 deletions
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index afe2321..90c48b2 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -1861,7 +1861,7 @@ PUGI__NS_BEGIN
enum chartypex_t
{
ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
- ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, >, ", '
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
ctx_digit = 8, // 0-9
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
@@ -1872,7 +1872,7 @@ PUGI__NS_BEGIN
3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
diff --git a/tests/test_write.cpp b/tests/test_write.cpp
index 797ddd0..0410e82 100644
--- a/tests/test_write.cpp
+++ b/tests/test_write.cpp
@@ -193,8 +193,8 @@ TEST_XML(write_escape, "<node attr=''>text</node>")
doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t");
doc.child(STR("node")).first_child().set_value(STR("<>'\"&\x04\r\n\t"));
- CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\r\n\t</node>"));
- CHECK_NODE_EX(doc, STR("<node attr='&lt;&gt;&apos;\"&amp;&#04;&#13;&#10;&#09;'>&lt;&gt;'\"&amp;&#04;\r\n\t</node>"), STR(""), format_raw | format_attribute_single_quote);
+ CHECK_NODE(doc, STR("<node attr=\"&lt;>'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\r\n\t</node>"));
+ CHECK_NODE_EX(doc, STR("<node attr='&lt;>&apos;\"&amp;&#04;&#13;&#10;&#09;'>&lt;&gt;'\"&amp;&#04;\r\n\t</node>"), STR(""), format_raw | format_attribute_single_quote);
}
TEST_XML(write_escape_roundtrip, "<node attr=''>text</node>")
@@ -208,8 +208,8 @@ TEST_XML(write_escape_roundtrip, "<node attr=''>text</node>")
// Note: this string is almost identical to the string from write_escape with the exception of \r
// \r in PCDATA doesn't roundtrip because it has to go through newline conversion (which could be disabled, but is active by default)
- CHECK_NODE(doc, STR("<node attr=\"&lt;&gt;'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\n\t</node>"));
- CHECK_NODE_EX(doc, STR("<node attr='&lt;&gt;&apos;\"&amp;&#04;&#13;&#10;&#09;'>&lt;&gt;'\"&amp;&#04;\n\t</node>"), STR(""), format_raw | format_attribute_single_quote);
+ CHECK_NODE(doc, STR("<node attr=\"&lt;>'&quot;&amp;&#04;&#13;&#10;&#09;\">&lt;&gt;'\"&amp;&#04;\n\t</node>"));
+ CHECK_NODE_EX(doc, STR("<node attr='&lt;>&apos;\"&amp;&#04;&#13;&#10;&#09;'>&lt;&gt;'\"&amp;&#04;\n\t</node>"), STR(""), format_raw | format_attribute_single_quote);
}
TEST_XML(write_escape_unicode, "<node attr='&#x3c00;'/>")