aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2020-09-10 09:53:25 -0700
committerGitHub <noreply@github.com>2020-09-10 09:53:25 -0700
commita3fad540a70bbba9349e8361aff8bff4dfdf7e67 (patch)
tree5725aa4668fd7dd23a7eca691453c7af285e70fb
parent22401bafaff996baecfb694ddc754855e184d377 (diff)
parent20aef1cd4b436a8859a54b41cc992db517c61d2d (diff)
downloadpugixml-a3fad540a70bbba9349e8361aff8bff4dfdf7e67.zip
pugixml-a3fad540a70bbba9349e8361aff8bff4dfdf7e67.tar.gz
pugixml-a3fad540a70bbba9349e8361aff8bff4dfdf7e67.tar.bz2
Merge pull request #369 from zeux/xpath-rec
XPath: Restrict AST depth to prevent stack overflow
-rw-r--r--src/pugiconfig.hpp3
-rw-r--r--src/pugixml.cpp54
-rw-r--r--tests/test_xpath_parse.cpp22
3 files changed, 77 insertions, 2 deletions
diff --git a/src/pugiconfig.hpp b/src/pugiconfig.hpp
index 2fb6918..c9c40e2 100644
--- a/src/pugiconfig.hpp
+++ b/src/pugiconfig.hpp
@@ -40,6 +40,9 @@
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
+// Tune this constant to adjust max nesting for XPath queries
+// #define PUGIXML_XPATH_DEPTH_LIMIT 1024
+
// Uncomment this to switch to header-only version
// #define PUGIXML_HEADER_ONLY
diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 4d0c2c9..e166458 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -11143,6 +11143,14 @@ PUGI__NS_BEGIN
}
};
+ static const size_t xpath_ast_depth_limit =
+ #ifdef PUGIXML_XPATH_DEPTH_LIMIT
+ PUGIXML_XPATH_DEPTH_LIMIT
+ #else
+ 1024
+ #endif
+ ;
+
struct xpath_parser
{
xpath_allocator* _alloc;
@@ -11155,6 +11163,8 @@ PUGI__NS_BEGIN
char_t _scratch[32];
+ size_t _depth;
+
xpath_ast_node* error(const char* message)
{
_result->error = message;
@@ -11171,6 +11181,11 @@ PUGI__NS_BEGIN
return 0;
}
+ xpath_ast_node* error_rec()
+ {
+ return error("Exceeded maximum allowed query depth");
+ }
+
void* alloc_node()
{
return _alloc->allocate(sizeof(xpath_ast_node));
@@ -11563,10 +11578,15 @@ PUGI__NS_BEGIN
xpath_ast_node* n = parse_primary_expression();
if (!n) return 0;
+ size_t old_depth = _depth;
+
while (_lexer.current() == lex_open_square_brace)
{
_lexer.next();
+ if (++_depth > xpath_ast_depth_limit)
+ return error_rec();
+
if (n->rettype() != xpath_type_node_set)
return error("Predicate has to be applied to node set");
@@ -11582,6 +11602,8 @@ PUGI__NS_BEGIN
_lexer.next();
}
+ _depth = old_depth;
+
return n;
}
@@ -11733,12 +11755,17 @@ PUGI__NS_BEGIN
xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
if (!n) return 0;
+ size_t old_depth = _depth;
+
xpath_ast_node* last = 0;
while (_lexer.current() == lex_open_square_brace)
{
_lexer.next();
+ if (++_depth > xpath_ast_depth_limit)
+ return error_rec();
+
xpath_ast_node* expr = parse_expression();
if (!expr) return 0;
@@ -11755,6 +11782,8 @@ PUGI__NS_BEGIN
last = pred;
}
+ _depth = old_depth;
+
return n;
}
@@ -11764,11 +11793,16 @@ PUGI__NS_BEGIN
xpath_ast_node* n = parse_step(set);
if (!n) return 0;
+ size_t old_depth = _depth;
+
while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
{
lexeme_t l = _lexer.current();
_lexer.next();
+ if (++_depth > xpath_ast_depth_limit)
+ return error_rec();
+
if (l == lex_double_slash)
{
n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
@@ -11779,6 +11813,8 @@ PUGI__NS_BEGIN
if (!n) return 0;
}
+ _depth = old_depth;
+
return n;
}
@@ -11964,6 +12000,9 @@ PUGI__NS_BEGIN
{
_lexer.next();
+ if (++_depth > xpath_ast_depth_limit)
+ return error_rec();
+
xpath_ast_node* rhs = parse_path_or_unary_expression();
if (!rhs) return 0;
@@ -12009,13 +12048,22 @@ PUGI__NS_BEGIN
// | MultiplicativeExpr 'mod' UnaryExpr
xpath_ast_node* parse_expression(int limit = 0)
{
+ size_t old_depth = _depth;
+
+ if (++_depth > xpath_ast_depth_limit)
+ return error_rec();
+
xpath_ast_node* n = parse_path_or_unary_expression();
if (!n) return 0;
- return parse_expression_rec(n, limit);
+ n = parse_expression_rec(n, limit);
+
+ _depth = old_depth;
+
+ return n;
}
- xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
+ xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
{
}
@@ -12024,6 +12072,8 @@ PUGI__NS_BEGIN
xpath_ast_node* n = parse_expression();
if (!n) return 0;
+ assert(_depth == 0);
+
// check if there are unparsed tokens left
if (_lexer.current() != lex_eof)
return error("Incorrect query");
diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp
index daa12bf..d0b0bac 100644
--- a/tests/test_xpath_parse.cpp
+++ b/tests/test_xpath_parse.cpp
@@ -381,6 +381,28 @@ TEST(xpath_parse_oom_propagation)
}
}
+static std::basic_string<char_t> rep(const std::basic_string<char_t>& base, size_t count)
+{
+ std::basic_string<char_t> result;
+ result.reserve(base.size() * count);
+
+ for (size_t i = 0; i < count; ++i)
+ result += base;
+
+ return result;
+}
+
+TEST(xpath_parse_depth_limit)
+{
+ const size_t limit = 1500;
+
+ CHECK_XPATH_FAIL((rep(STR("("), limit) + STR("1") + rep(STR(")"), limit)).c_str());
+ CHECK_XPATH_FAIL((STR("(id('a'))") + rep(STR("[1]"), limit)).c_str());
+ CHECK_XPATH_FAIL((STR("/foo") + rep(STR("[1]"), limit)).c_str());
+ CHECK_XPATH_FAIL((STR("/foo") + rep(STR("/x"), limit)).c_str());
+ CHECK_XPATH_FAIL((STR("1") + rep(STR("+1"), limit)).c_str());
+}
+
TEST_XML(xpath_parse_location_path, "<node><child/></node>")
{
CHECK_XPATH_NODESET(doc, STR("/node")) % 2;