From c27eba57c03cbaa6efa74b69f6b4053f7d2e28c3 Mon Sep 17 00:00:00 2001 From: "K.Kosako" Date: Sun, 19 Jan 2020 00:00:31 +0900 Subject: optimize look-behind by reducing upper range of quantifier for simple repetition --- src/regcomp.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/regcomp.c b/src/regcomp.c index b291edf..1290d44 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -4287,6 +4287,49 @@ divide_look_behind_alternatives(Node* node) return 0; } +static int +quant_reduce_in_look_behind(Node* node) +{ + NodeType type; + Node* body; + + if (NODE_TYPE(node) != NODE_QUANT) return 0; + + body = NODE_BODY(node); + type = NODE_TYPE(body); + if (type == NODE_STRING || type == NODE_CTYPE || + type == NODE_CCLASS || type == NODE_BACKREF) { + QuantNode* qn = QUANT_(node); + qn->upper = qn->lower; + } + + return 0; +} + +static int +tree_reduce_in_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_QUANT: + r = quant_reduce_in_look_behind(node); + break; + + case NODE_ALT: + do { + r = quant_reduce_in_look_behind(NODE_CAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + default: + r = 0; + break; + } + + return r; +} + static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env); static int @@ -4308,6 +4351,9 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) r = tune_tree(NODE_ANCHOR_BODY(an), reg, state1, env); if (r != 0) return r; + r = tree_reduce_in_look_behind(NODE_ANCHOR_BODY(an), reg, env); + if (r != 0) return r; + r = node_char_len(NODE_ANCHOR_BODY(an), reg, &ci, env); if (r >= 0) { if (r == CHAR_LEN_TOP_ALT_FIXED) { -- cgit v1.1