aboutsummaryrefslogtreecommitdiff
path: root/libphobos/src/std/regex/internal
diff options
context:
space:
mode:
Diffstat (limited to 'libphobos/src/std/regex/internal')
-rw-r--r--libphobos/src/std/regex/internal/backtracking.d1388
-rw-r--r--libphobos/src/std/regex/internal/generator.d2
-rw-r--r--libphobos/src/std/regex/internal/ir.d565
-rw-r--r--libphobos/src/std/regex/internal/kickstart.d14
-rw-r--r--libphobos/src/std/regex/internal/parser.d792
-rw-r--r--libphobos/src/std/regex/internal/tests.d36
-rw-r--r--libphobos/src/std/regex/internal/tests2.d159
-rw-r--r--libphobos/src/std/regex/internal/thompson.d158
8 files changed, 1542 insertions, 1572 deletions
diff --git a/libphobos/src/std/regex/internal/backtracking.d b/libphobos/src/std/regex/internal/backtracking.d
index ffc9779..21766fc 100644
--- a/libphobos/src/std/regex/internal/backtracking.d
+++ b/libphobos/src/std/regex/internal/backtracking.d
@@ -9,760 +9,783 @@ package(std.regex):
import core.stdc.stdlib, std.range.primitives, std.traits, std.typecons;
import std.regex.internal.ir;
+import core.memory : pureMalloc, pureFree;
+
/+
BacktrackingMatcher implements backtracking scheme of matching
regular expressions.
+/
-template BacktrackingMatcher(bool CTregex)
+@trusted class BacktrackingMatcher(Char, Stream = Input!Char) : Matcher!Char
+if (is(Char : dchar))
{
- @trusted struct BacktrackingMatcher(Char, Stream = Input!Char)
- if (is(Char : dchar))
- {
- alias DataIndex = Stream.DataIndex;
- struct State
- {//top bit in pc is set if saved along with matches
- DataIndex index;
- uint pc, counter, infiniteNesting;
- }
- static assert(State.sizeof % size_t.sizeof == 0);
- enum stateSize = State.sizeof / size_t.sizeof;
- enum initialStack = 1 << 11; // items in a block of segmented stack
- alias String = const(Char)[];
- alias RegEx = Regex!Char;
- alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream));
- RegEx re; //regex program
- static if (CTregex)
- MatchFn nativeFn; //native code for that program
- //Stream state
- Stream s;
+ alias DataIndex = Stream.DataIndex;
+ struct State
+ {//top bit in pc is set if saved along with matches
DataIndex index;
- dchar front;
- bool exhausted;
- //backtracking machine state
- uint pc, counter;
- DataIndex lastState = 0; //top of state stack
- static if (!CTregex)
- uint infiniteNesting;
- size_t[] memory;
- Trace[] merge;
- static struct Trace
- {
- ulong mask;
- size_t offset;
+ uint pc, counter, infiniteNesting;
+ }
+ static assert(State.sizeof % size_t.sizeof == 0);
+ enum stateSize = State.sizeof / size_t.sizeof;
+ enum initialStack = 1 << 11; // items in a block of segmented stack
+ alias String = const(Char)[];
+ alias RegEx = Regex!Char;
+ alias MatchFn = bool function(BacktrackingMatcher) pure;
+ const RegEx re; // regex program
+ MatchFn nativeFn; // native code for that program
+ // Stream state
+ Stream s;
+ DataIndex index;
+ dchar front;
+ bool exhausted;
+ // Backtracking machine state
+ uint pc, counter;
+ DataIndex lastState = 0; // Top of state stack
+ uint infiniteNesting;
+ size_t[] memory;
+ Trace[] merge;
+ static struct Trace
+ {
+ ulong mask;
+ size_t offset;
- bool mark(size_t idx)
+ bool mark(size_t idx)
+ {
+ immutable d = idx - offset;
+ if (d < 64) // including overflow
{
- immutable d = idx - offset;
- if (d < 64) // including overflow
- {
- immutable p = mask & (1UL << d);
- mask |= 1UL << d;
- return p != 0;
- }
- else
- {
- offset = idx;
- mask = 1;
- return false;
- }
+ immutable p = mask & (1UL << d);
+ mask |= 1UL << d;
+ return p != 0;
+ }
+ else
+ {
+ offset = idx;
+ mask = 1;
+ return false;
}
}
- //local slice of matches, global for backref
- Group!DataIndex[] matches, backrefed;
+ }
+ //local slice of matches, global for backref
+ Group!DataIndex[] matches, backrefed;
+ size_t _refCount;
+final:
- static if (__traits(hasMember,Stream, "search"))
- {
- enum kicked = true;
- }
- else
- enum kicked = false;
+ override @property ref size_t refCount() { return _refCount; }
+ override @property ref const(RegEx) pattern(){ return re; }
- static size_t initialMemory(const ref RegEx re)
- {
- return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
- }
+ static if (__traits(hasMember,Stream, "search"))
+ {
+ enum kicked = true;
+ }
+ else
+ enum kicked = false;
- static size_t stackSize(const ref RegEx re)
- {
- size_t itemSize = stateSize
- + re.ngroup * (Group!DataIndex).sizeof / size_t.sizeof;
- return initialStack * itemSize + 2;
- }
+ static size_t initialMemory(const ref RegEx re)
+ {
+ return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
+ }
+
+ static size_t stackSize(const ref RegEx re)
+ {
+ size_t itemSize = stateSize
+ + re.ngroup * (Group!DataIndex).sizeof / size_t.sizeof;
+ return initialStack * itemSize + 2;
+ }
- @property bool atStart(){ return index == 0; }
+ @property bool atStart(){ return index == 0; }
- @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
+ @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
- void next()
- {
- if (!s.nextChar(front, index))
- index = s.lastIndex;
- }
+ void next()
+ {
+ if (!s.nextChar(front, index))
+ index = s.lastIndex;
+ }
- void search()
+ void search()
+ {
+ static if (kicked)
{
- static if (kicked)
+ if (!s.search(re.kickstart, front, index))
{
- if (!s.search(re.kickstart, front, index))
- {
- index = s.lastIndex;
- }
+ index = s.lastIndex;
}
- else
- next();
}
+ else
+ next();
+ }
- //
- void newStack()
- {
- auto chunk = mallocArray!(size_t)(stackSize(re));
- chunk[0] = cast(size_t)(memory.ptr);
- chunk[1] = lastState;
- memory = chunk[2..$];
- lastState = 0;
- }
+ //
+ void newStack()
+ {
+ auto chunk = mallocArray!(size_t)(stackSize(re));
+ chunk[0] = cast(size_t)(memory.ptr);
+ chunk[1] = lastState;
+ memory = chunk[2..$];
+ lastState = 0;
+ }
- bool prevStack()
+ bool prevStack()
+ {
+ // pointer to previous block
+ size_t* prev = cast(size_t*) memory.ptr[-2];
+ if (!prev)
{
- // pointer to previous block
- size_t* prev = cast(size_t*) memory.ptr[-2];
- if (!prev)
- {
- // The last segment is freed in RegexMatch
- return false;
- }
- else
- {
- import core.stdc.stdlib : free;
- // memory used in previous block
- size_t size = memory.ptr[-1];
- free(memory.ptr-2);
- memory = prev[0 .. size];
- lastState = size;
- return true;
- }
+ // The last segment is freed in RegexMatch
+ return false;
}
-
- void initExternalMemory(void[] memBlock)
+ else
{
- merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock);
- merge[] = Trace.init;
- memory = cast(size_t[]) memBlock;
- memory[0] = 0; // hidden pointer
- memory[1] = 0; // used size
- memory = memory[2..$];
+ import core.memory : pureFree;
+ // memory used in previous block
+ size_t size = memory.ptr[-1];
+ pureFree(memory.ptr-2);
+ memory = prev[0 .. size];
+ lastState = size;
+ return true;
}
+ }
- void initialize(ref RegEx program, Stream stream, void[] memBlock)
- {
- re = program;
- s = stream;
- exhausted = false;
- initExternalMemory(memBlock);
- backrefed = null;
- }
+ void initExternalMemory(void[] memBlock)
+ {
+ merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock);
+ merge[] = Trace.init;
+ memory = cast(size_t[]) memBlock;
+ memory[0] = 0; // hidden pointer
+ memory[1] = 0; // used size
+ memory = memory[2..$];
+ }
- auto dupTo(void[] memory)
- {
- typeof(this) tmp = this;
- tmp.initExternalMemory(memory);
- return tmp;
- }
+ void initialize(ref const RegEx program, Stream stream, void[] memBlock)
+ {
+ s = stream;
+ exhausted = false;
+ initExternalMemory(memBlock);
+ backrefed = null;
+ }
- this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
- {
- initialize(program, stream, memBlock);
- front = ch;
- index = idx;
- }
+ override void dupTo(Matcher!Char m, void[] memBlock)
+ {
+ auto backtracking = cast(BacktrackingMatcher) m;
+ backtracking.s = s;
+ backtracking.front = front;
+ backtracking.index = index;
+ backtracking.exhausted = exhausted;
+ backtracking.initExternalMemory(memBlock);
+ }
- this(ref RegEx program, Stream stream, void[] memBlock)
- {
- initialize(program, stream, memBlock);
- next();
- }
+ override Matcher!Char rearm(in Char[] data)
+ {
+ merge[] = Trace.init;
+ exhausted = false;
+ s = Stream(data);
+ next();
+ return this;
+ }
- auto fwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
- {
- alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
- alias BackMatcher = BackMatcherTempl!(Char, Stream);
- auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index);
- return fwdMatcher;
+ this(ref const RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
+ {
+ _refCount = 1;
+ re = program;
+ nativeFn = null;
+ initialize(program, stream, memBlock);
+ front = ch;
+ index = idx;
+ }
+
+ this(ref const RegEx program, MatchFn func, Stream stream, void[] memBlock)
+ {
+ _refCount = 1;
+ re = program;
+ initialize(program, stream, memBlock);
+ nativeFn = func;
+ next();
+ }
+
+ this(ref const RegEx program, Stream stream, void[] memBlock)
+ {
+ _refCount = 1;
+ re = program;
+ nativeFn = null;
+ initialize(program, stream, memBlock);
+ next();
+ }
+
+ auto fwdMatcher(ref const RegEx re, void[] memBlock)
+ {
+ alias BackMatcher = BacktrackingMatcher!(Char, Stream);
+ auto fwdMatcher = new BackMatcher(re, s, memBlock, front, index);
+ return fwdMatcher;
+ }
+
+ auto bwdMatcher(ref const RegEx re, void[] memBlock)
+ {
+ alias BackMatcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
+ auto fwdMatcher =
+ new BackMatcher(re, s.loopBack(index), memBlock);
+ return fwdMatcher;
+ }
+
+ //
+ int matchFinalize()
+ {
+ immutable start = index;
+ immutable val = matchImpl();
+ if (val)
+ {//stream is updated here
+ matches[0].begin = start;
+ matches[0].end = index;
+ if (!(re.flags & RegexOption.global) || atEnd)
+ exhausted = true;
+ if (start == index)//empty match advances input
+ next();
+ return val;
}
+ else
+ return 0;
+ }
- auto bwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
+ //lookup next match, fill matches with indices into input
+ override int match(Group!DataIndex[] matches)
+ {
+ debug(std_regex_matcher)
{
- alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
- alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index)));
- auto fwdMatcher =
- BackMatcher(matcher.re, s.loopBack(index), memBlock);
- return fwdMatcher;
+ writeln("------------------------------------------");
}
-
- //
- int matchFinalize()
+ if (exhausted) //all matches collected
+ return false;
+ this.matches = matches;
+ if (re.flags & RegexInfo.oneShot)
{
- immutable start = index;
- immutable val = matchImpl();
- if (val)
- {//stream is updated here
+ exhausted = true;
+ const DataIndex start = index;
+ immutable m = matchImpl();
+ if (m)
+ {
matches[0].begin = start;
matches[0].end = index;
- if (!(re.flags & RegexOption.global) || atEnd)
- exhausted = true;
- if (start == index)//empty match advances input
- next();
- return val;
}
- else
- return 0;
+ return m;
}
-
- //lookup next match, fill matches with indices into input
- int match(Group!DataIndex[] matches)
+ static if (kicked)
{
- debug(std_regex_matcher)
- {
- writeln("------------------------------------------");
- }
- if (exhausted) //all matches collected
- return false;
- this.matches = matches;
- if (re.flags & RegexInfo.oneShot)
- {
- exhausted = true;
- const DataIndex start = index;
- immutable m = matchImpl();
- if (m)
- {
- matches[0].begin = start;
- matches[0].end = index;
- }
- return m;
- }
- static if (kicked)
+ if (!re.kickstart.empty)
{
- if (!re.kickstart.empty)
+ for (;;)
{
- for (;;)
+ immutable val = matchFinalize();
+ if (val)
+ return val;
+ else
{
- immutable val = matchFinalize();
- if (val)
- return val;
- else
+ if (atEnd)
+ break;
+ search();
+ if (atEnd)
{
- if (atEnd)
- break;
- search();
- if (atEnd)
- {
- exhausted = true;
- return matchFinalize();
- }
+ exhausted = true;
+ return matchFinalize();
}
}
- exhausted = true;
- return 0; //early return
}
+ exhausted = true;
+ return 0; //early return
}
- //no search available - skip a char at a time
- for (;;)
+ }
+ //no search available - skip a char at a time
+ for (;;)
+ {
+ immutable val = matchFinalize();
+ if (val)
+ return val;
+ else
{
- immutable val = matchFinalize();
- if (val)
- return val;
- else
+ if (atEnd)
+ break;
+ next();
+ if (atEnd)
{
- if (atEnd)
- break;
- next();
- if (atEnd)
- {
- exhausted = true;
- return matchFinalize();
- }
+ exhausted = true;
+ return matchFinalize();
}
}
- exhausted = true;
- return 0;
}
+ exhausted = true;
+ return 0;
+ }
- /+
- match subexpression against input,
- results are stored in matches
- +/
- int matchImpl()
+ /+
+ match subexpression against input,
+ results are stored in matches
+ +/
+ int matchImpl() pure
+ {
+ if (nativeFn)
{
- static if (CTregex && is(typeof(nativeFn(this))))
- {
- debug(std_regex_ctr) writeln("using C-T matcher");
- return nativeFn(this);
- }
- else
+ debug(std_regex_ctr) writeln("using C-T matcher");
+ return nativeFn(this);
+ }
+ else
+ {
+ pc = 0;
+ counter = 0;
+ lastState = 0;
+ infiniteNesting = 0;
+ matches[] = Group!DataIndex.init;
+ auto start = s._index;
+ debug(std_regex_matcher)
+ writeln("Try match starting at ", s[index .. s.lastIndex]);
+ for (;;)
{
- pc = 0;
- counter = 0;
- lastState = 0;
- matches[] = Group!DataIndex.init;
- auto start = s._index;
debug(std_regex_matcher)
- writeln("Try match starting at ", s[index .. s.lastIndex]);
- for (;;)
+ writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s",
+ pc, counter, disassemble(re.ir, pc, re.dict),
+ front, s._index);
+ switch (re.ir[pc].code)
{
- debug(std_regex_matcher)
- writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s",
- pc, counter, disassemble(re.ir, pc, re.dict),
- front, s._index);
- switch (re.ir[pc].code)
+ case IR.OrChar://assumes IRL!(OrChar) == 1
+ if (atEnd)
+ goto L_backtrack;
+ uint len = re.ir[pc].sequence;
+ uint end = pc + len;
+ if (re.ir[pc].data != front && re.ir[pc+1].data != front)
{
- case IR.OrChar://assumes IRL!(OrChar) == 1
- if (atEnd)
- goto L_backtrack;
- uint len = re.ir[pc].sequence;
- uint end = pc + len;
- if (re.ir[pc].data != front && re.ir[pc+1].data != front)
- {
- for (pc = pc+2; pc < end; pc++)
- if (re.ir[pc].data == front)
- break;
- if (pc == end)
- goto L_backtrack;
- }
- pc = end;
- next();
- break;
- case IR.Char:
- if (atEnd || front != re.ir[pc].data)
+ for (pc = pc+2; pc < end; pc++)
+ if (re.ir[pc].data == front)
+ break;
+ if (pc == end)
goto L_backtrack;
- pc += IRL!(IR.Char);
- next();
+ }
+ pc = end;
+ next();
break;
- case IR.Any:
- if (atEnd)
- goto L_backtrack;
- pc += IRL!(IR.Any);
- next();
- break;
- case IR.CodepointSet:
- if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front))
- goto L_backtrack;
- next();
- pc += IRL!(IR.CodepointSet);
+ case IR.Char:
+ if (atEnd || front != re.ir[pc].data)
+ goto L_backtrack;
+ pc += IRL!(IR.Char);
+ next();
+ break;
+ case IR.Any:
+ if (atEnd)
+ goto L_backtrack;
+ pc += IRL!(IR.Any);
+ next();
+ break;
+ case IR.CodepointSet:
+ if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front))
+ goto L_backtrack;
+ next();
+ pc += IRL!(IR.CodepointSet);
+ break;
+ case IR.Trie:
+ if (atEnd || !re.matchers[re.ir[pc].data][front])
+ goto L_backtrack;
+ next();
+ pc += IRL!(IR.Trie);
+ break;
+ case IR.Wordboundary:
+ dchar back;
+ DataIndex bi;
+ //at start & end of input
+ if (atStart && wordMatcher[front])
+ {
+ pc += IRL!(IR.Wordboundary);
break;
- case IR.Trie:
- if (atEnd || !re.matchers[re.ir[pc].data][front])
- goto L_backtrack;
- next();
- pc += IRL!(IR.Trie);
+ }
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ {
+ pc += IRL!(IR.Wordboundary);
break;
- case IR.Wordboundary:
- dchar back;
- DataIndex bi;
- //at start & end of input
- if (atStart && wordMatcher[front])
- {
- pc += IRL!(IR.Wordboundary);
- break;
- }
- else if (atEnd && s.loopBack(index).nextChar(back, bi)
- && wordMatcher[back])
+ }
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ immutable af = wordMatcher[front];
+ immutable ab = wordMatcher[back];
+ if (af ^ ab)
{
pc += IRL!(IR.Wordboundary);
break;
}
- else if (s.loopBack(index).nextChar(back, bi))
- {
- immutable af = wordMatcher[front];
- immutable ab = wordMatcher[back];
- if (af ^ ab)
- {
- pc += IRL!(IR.Wordboundary);
- break;
- }
- }
+ }
+ goto L_backtrack;
+ case IR.Notwordboundary:
+ dchar back;
+ DataIndex bi;
+ //at start & end of input
+ if (atStart && wordMatcher[front])
goto L_backtrack;
- case IR.Notwordboundary:
- dchar back;
- DataIndex bi;
- //at start & end of input
- if (atStart && wordMatcher[front])
- goto L_backtrack;
- else if (atEnd && s.loopBack(index).nextChar(back, bi)
- && wordMatcher[back])
- goto L_backtrack;
- else if (s.loopBack(index).nextChar(back, bi))
- {
- immutable af = wordMatcher[front];
- immutable ab = wordMatcher[back];
- if (af ^ ab)
- goto L_backtrack;
- }
- pc += IRL!(IR.Wordboundary);
- break;
- case IR.Bof:
- if (atStart)
- pc += IRL!(IR.Bol);
- else
- goto L_backtrack;
- break;
- case IR.Bol:
- dchar back;
- DataIndex bi;
- if (atStart)
- pc += IRL!(IR.Bol);
- else if (s.loopBack(index).nextChar(back,bi)
- && endOfLine(back, front == '\n'))
- {
- pc += IRL!(IR.Bol);
- }
- else
- goto L_backtrack;
- break;
- case IR.Eof:
- if (atEnd)
- pc += IRL!(IR.Eol);
- else
- goto L_backtrack;
- break;
- case IR.Eol:
- dchar back;
- DataIndex bi;
- debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
- //no matching inside \r\n
- if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
- && back == '\r')))
- {
- pc += IRL!(IR.Eol);
- }
- else
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ goto L_backtrack;
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ immutable af = wordMatcher[front];
+ immutable ab = wordMatcher[back];
+ if (af ^ ab)
goto L_backtrack;
- break;
- case IR.InfiniteStart, IR.InfiniteQStart:
- pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
- //now pc is at end IR.Infinite(Q)End
- uint len = re.ir[pc].data;
- if (re.ir[pc].code == IR.InfiniteEnd)
- {
- pushState(pc+IRL!(IR.InfiniteEnd), counter);
- pc -= len;
- }
- else
- {
- pushState(pc - len, counter);
- pc += IRL!(IR.InfiniteEnd);
- }
- break;
- case IR.InfiniteBloomStart:
- pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart);
- //now pc is at end IR.InfiniteBloomEnd
- immutable len = re.ir[pc].data;
- immutable filterIdx = re.ir[pc+2].raw;
- if (re.filters[filterIdx][front])
- pushState(pc+IRL!(IR.InfiniteBloomEnd), counter);
+ }
+ pc += IRL!(IR.Wordboundary);
+ break;
+ case IR.Bof:
+ if (atStart)
+ pc += IRL!(IR.Bol);
+ else
+ goto L_backtrack;
+ break;
+ case IR.Bol:
+ dchar back;
+ DataIndex bi;
+ if (atStart)
+ pc += IRL!(IR.Bol);
+ else if (s.loopBack(index).nextChar(back,bi)
+ && endOfLine(back, front == '\n'))
+ {
+ pc += IRL!(IR.Bol);
+ }
+ else
+ goto L_backtrack;
+ break;
+ case IR.Eof:
+ if (atEnd)
+ pc += IRL!(IR.Eol);
+ else
+ goto L_backtrack;
+ break;
+ case IR.Eol:
+ dchar back;
+ DataIndex bi;
+ debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
+ //no matching inside \r\n
+ if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
+ && back == '\r')))
+ {
+ pc += IRL!(IR.Eol);
+ }
+ else
+ goto L_backtrack;
+ break;
+ case IR.InfiniteStart, IR.InfiniteQStart:
+ pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
+ //now pc is at end IR.Infinite(Q)End
+ uint len = re.ir[pc].data;
+ if (re.ir[pc].code == IR.InfiniteEnd)
+ {
+ pushState(pc+IRL!(IR.InfiniteEnd), counter);
pc -= len;
- break;
- case IR.RepeatStart, IR.RepeatQStart:
- pc += re.ir[pc].data + IRL!(IR.RepeatStart);
- break;
- case IR.RepeatEnd:
- case IR.RepeatQEnd:
- if (merge[re.ir[pc + 1].raw+counter].mark(index))
- {
- // merged!
- goto L_backtrack;
- }
- //len, step, min, max
- immutable len = re.ir[pc].data;
- immutable step = re.ir[pc+2].raw;
- immutable min = re.ir[pc+3].raw;
- immutable max = re.ir[pc+4].raw;
- if (counter < min)
+ }
+ else
+ {
+ pushState(pc - len, counter);
+ pc += IRL!(IR.InfiniteEnd);
+ }
+ break;
+ case IR.InfiniteBloomStart:
+ pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart);
+ //now pc is at end IR.InfiniteBloomEnd
+ immutable len = re.ir[pc].data;
+ immutable filterIdx = re.ir[pc+2].raw;
+ if (re.filters[filterIdx][front])
+ pushState(pc+IRL!(IR.InfiniteBloomEnd), counter);
+ pc -= len;
+ break;
+ case IR.RepeatStart, IR.RepeatQStart:
+ pc += re.ir[pc].data + IRL!(IR.RepeatStart);
+ break;
+ case IR.RepeatEnd:
+ case IR.RepeatQEnd:
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ //len, step, min, max
+ immutable len = re.ir[pc].data;
+ immutable step = re.ir[pc+2].raw;
+ immutable min = re.ir[pc+3].raw;
+ immutable max = re.ir[pc+4].raw;
+ if (counter < min)
+ {
+ counter += step;
+ pc -= len;
+ }
+ else if (counter < max)
+ {
+ if (re.ir[pc].code == IR.RepeatEnd)
{
+ pushState(pc + IRL!(IR.RepeatEnd), counter%step);
counter += step;
pc -= len;
}
- else if (counter < max)
- {
- if (re.ir[pc].code == IR.RepeatEnd)
- {
- pushState(pc + IRL!(IR.RepeatEnd), counter%step);
- counter += step;
- pc -= len;
- }
- else
- {
- pushState(pc - len, counter + step);
- counter = counter%step;
- pc += IRL!(IR.RepeatEnd);
- }
- }
else
{
+ pushState(pc - len, counter + step);
counter = counter%step;
pc += IRL!(IR.RepeatEnd);
}
- break;
- case IR.InfiniteEnd:
- case IR.InfiniteQEnd:
- debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
- if (merge[re.ir[pc + 1].raw+counter].mark(index))
- {
- // merged!
- goto L_backtrack;
- }
- immutable len = re.ir[pc].data;
- if (re.ir[pc].code == IR.InfiniteEnd)
- {
- pushState(pc + IRL!(IR.InfiniteEnd), counter);
- pc -= len;
- }
- else
- {
- pushState(pc-len, counter);
- pc += IRL!(IR.InfiniteEnd);
- }
- break;
- case IR.InfiniteBloomEnd:
- debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
- if (merge[re.ir[pc + 1].raw+counter].mark(index))
- {
- // merged!
- goto L_backtrack;
- }
- immutable len = re.ir[pc].data;
- immutable filterIdx = re.ir[pc+2].raw;
- if (re.filters[filterIdx][front])
- {
- infiniteNesting--;
- pushState(pc + IRL!(IR.InfiniteBloomEnd), counter);
- infiniteNesting++;
- }
+ }
+ else
+ {
+ counter = counter%step;
+ pc += IRL!(IR.RepeatEnd);
+ }
+ break;
+ case IR.InfiniteEnd:
+ case IR.InfiniteQEnd:
+ debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ immutable len = re.ir[pc].data;
+ if (re.ir[pc].code == IR.InfiniteEnd)
+ {
+ pushState(pc + IRL!(IR.InfiniteEnd), counter);
pc -= len;
- break;
- case IR.OrEnd:
- if (merge[re.ir[pc + 1].raw+counter].mark(index))
- {
- // merged!
- goto L_backtrack;
- }
- pc += IRL!(IR.OrEnd);
- break;
- case IR.OrStart:
- pc += IRL!(IR.OrStart);
- goto case;
- case IR.Option:
- immutable len = re.ir[pc].data;
- if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one
- {
- pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch
- }
- pc += IRL!(IR.Option);
- break;
- case IR.GotoEndOr:
- pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr);
- break;
- case IR.GroupStart:
- immutable n = re.ir[pc].data;
- matches[n].begin = index;
- debug(std_regex_matcher) writefln("IR group #%u starts at %u", n, index);
- pc += IRL!(IR.GroupStart);
- break;
- case IR.GroupEnd:
- immutable n = re.ir[pc].data;
- matches[n].end = index;
- debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index);
- pc += IRL!(IR.GroupEnd);
- break;
- case IR.LookaheadStart:
- case IR.NeglookaheadStart:
- immutable len = re.ir[pc].data;
- auto save = index;
- immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
- auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
- scope(exit) free(mem.ptr);
- static if (Stream.isLoopback)
- {
- auto matcher = bwdMatcher(this, mem);
- }
- else
- {
- auto matcher = fwdMatcher(this, mem);
- }
- matcher.matches = matches[ms .. me];
- matcher.backrefed = backrefed.empty ? matches : backrefed;
- matcher.re.ir = re.ir[
- pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
- ];
- immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
- s.reset(save);
+ }
+ else
+ {
+ pushState(pc-len, counter);
+ pc += IRL!(IR.InfiniteEnd);
+ }
+ break;
+ case IR.InfiniteBloomEnd:
+ debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ immutable len = re.ir[pc].data;
+ immutable filterIdx = re.ir[pc+2].raw;
+ if (re.filters[filterIdx][front])
+ {
+ infiniteNesting--;
+ pushState(pc + IRL!(IR.InfiniteBloomEnd), counter);
+ infiniteNesting++;
+ }
+ pc -= len;
+ break;
+ case IR.OrEnd:
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ pc += IRL!(IR.OrEnd);
+ break;
+ case IR.OrStart:
+ pc += IRL!(IR.OrStart);
+ goto case;
+ case IR.Option:
+ immutable len = re.ir[pc].data;
+ if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one
+ {
+ pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch
+ }
+ pc += IRL!(IR.Option);
+ break;
+ case IR.GotoEndOr:
+ pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr);
+ break;
+ case IR.GroupStart:
+ immutable n = re.ir[pc].data;
+ matches[n].begin = index;
+ debug(std_regex_matcher) writefln("IR group #%u starts at %u", n, index);
+ pc += IRL!(IR.GroupStart);
+ break;
+ case IR.GroupEnd:
+ immutable n = re.ir[pc].data;
+ matches[n].end = index;
+ debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index);
+ pc += IRL!(IR.GroupEnd);
+ break;
+ case IR.LookaheadStart:
+ case IR.NeglookaheadStart:
+ immutable len = re.ir[pc].data;
+ auto save = index;
+ immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
+ auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) pureFree(mem.ptr);
+ auto slicedRe = re.withCode(re.ir[
+ pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
+ ]);
+ static if (Stream.isLoopback)
+ {
+ auto matcher = bwdMatcher(slicedRe, mem);
+ }
+ else
+ {
+ auto matcher = fwdMatcher(slicedRe, mem);
+ }
+ matcher.matches = matches[ms .. me];
+ matcher.backrefed = backrefed.empty ? matches : backrefed;
+ immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
+ s.reset(save);
+ next();
+ if (!match)
+ goto L_backtrack;
+ else
+ {
+ pc += IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd);
+ }
+ break;
+ case IR.LookbehindStart:
+ case IR.NeglookbehindStart:
+ immutable len = re.ir[pc].data;
+ immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
+ auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) pureFree(mem.ptr);
+ auto slicedRe = re.withCode(re.ir[
+ pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
+ ]);
+ static if (Stream.isLoopback)
+ {
+ alias Matcher = BacktrackingMatcher!(Char, Stream);
+ auto matcher = new Matcher(slicedRe, s, mem, front, index);
+ }
+ else
+ {
+ alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
+ auto matcher = new Matcher(slicedRe, s.loopBack(index), mem);
+ }
+ matcher.matches = matches[ms .. me];
+ matcher.backrefed = backrefed.empty ? matches : backrefed;
+ immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
+ if (!match)
+ goto L_backtrack;
+ else
+ {
+ pc += IRL!(IR.LookbehindStart)+len+IRL!(IR.LookbehindEnd);
+ }
+ break;
+ case IR.Backref:
+ immutable n = re.ir[pc].data;
+ auto referenced = re.ir[pc].localRef
+ ? s[matches[n].begin .. matches[n].end]
+ : s[backrefed[n].begin .. backrefed[n].end];
+ while (!atEnd && !referenced.empty && front == referenced.front)
+ {
next();
- if (!match)
- goto L_backtrack;
- else
- {
- pc += IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd);
- }
- break;
- case IR.LookbehindStart:
- case IR.NeglookbehindStart:
- immutable len = re.ir[pc].data;
- immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
- auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
- scope(exit) free(mem.ptr);
- static if (Stream.isLoopback)
- {
- alias Matcher = BacktrackingMatcher!(Char, Stream);
- auto matcher = Matcher(re, s, mem, front, index);
- }
- else
- {
- alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
- auto matcher = Matcher(re, s.loopBack(index), mem);
- }
- matcher.matches = matches[ms .. me];
- matcher.re.ir = re.ir[
- pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
- ];
- matcher.backrefed = backrefed.empty ? matches : backrefed;
- immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
- if (!match)
- goto L_backtrack;
- else
- {
- pc += IRL!(IR.LookbehindStart)+len+IRL!(IR.LookbehindEnd);
- }
- break;
- case IR.Backref:
- immutable n = re.ir[pc].data;
- auto referenced = re.ir[pc].localRef
- ? s[matches[n].begin .. matches[n].end]
- : s[backrefed[n].begin .. backrefed[n].end];
- while (!atEnd && !referenced.empty && front == referenced.front)
- {
- next();
- referenced.popFront();
- }
- if (referenced.empty)
- pc++;
- else
- goto L_backtrack;
- break;
- case IR.Nop:
- pc += IRL!(IR.Nop);
- break;
- case IR.LookaheadEnd:
- case IR.NeglookaheadEnd:
- case IR.LookbehindEnd:
- case IR.NeglookbehindEnd:
- case IR.End:
- // cleanup stale stack blocks if any
- while (prevStack()) {}
- return re.ir[pc].data;
- default:
- debug printBytecode(re.ir[0..$]);
- assert(0);
- L_backtrack:
- if (!popState())
- {
- s.reset(start);
- return 0;
- }
+ referenced.popFront();
+ }
+ if (referenced.empty)
+ pc++;
+ else
+ goto L_backtrack;
+ break;
+ case IR.Nop:
+ pc += IRL!(IR.Nop);
+ break;
+ case IR.LookaheadEnd:
+ case IR.NeglookaheadEnd:
+ case IR.LookbehindEnd:
+ case IR.NeglookbehindEnd:
+ case IR.End:
+ // cleanup stale stack blocks if any
+ while (prevStack()) {}
+ return re.ir[pc].data;
+ default:
+ debug printBytecode(re.ir[0..$]);
+ assert(0);
+ L_backtrack:
+ if (!popState())
+ {
+ s.reset(start);
+ return 0;
}
}
}
- assert(0);
}
+ assert(0);
+ }
- @property size_t stackAvail()
- {
- return memory.length - lastState;
- }
+ @property size_t stackAvail()
+ {
+ return memory.length - lastState;
+ }
- void stackPush(T)(T val)
- if (!isDynamicArray!T)
- {
- *cast(T*)&memory[lastState] = val;
- enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
- lastState += delta;
- debug(std_regex_matcher) writeln("push element SP= ", lastState);
- }
+ void stackPush(T)(T val)
+ if (!isDynamicArray!T)
+ {
+ *cast(T*)&memory[lastState] = val;
+ enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
+ lastState += delta;
+ debug(std_regex_matcher) writeln("push element SP= ", lastState);
+ }
- void stackPush(T)(T[] val)
- {
- static assert(T.sizeof % size_t.sizeof == 0);
- (cast(T*)&memory[lastState])[0 .. val.length]
- = val[0..$];
- lastState += val.length*(T.sizeof/size_t.sizeof);
- debug(std_regex_matcher) writeln("push array SP= ", lastState);
- }
+ void stackPush(T)(T[] val)
+ {
+ static assert(T.sizeof % size_t.sizeof == 0);
+ (cast(T*)&memory[lastState])[0 .. val.length]
+ = val[0..$];
+ lastState += val.length*(T.sizeof/size_t.sizeof);
+ debug(std_regex_matcher) writeln("push array SP= ", lastState);
+ }
- void stackPop(T)(ref T val)
- if (!isDynamicArray!T)
- {
- enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
- lastState -= delta;
- val = *cast(T*)&memory[lastState];
- debug(std_regex_matcher) writeln("pop element SP= ", lastState);
- }
+ void stackPop(T)(ref T val)
+ if (!isDynamicArray!T)
+ {
+ enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
+ lastState -= delta;
+ val = *cast(T*)&memory[lastState];
+ debug(std_regex_matcher) writeln("pop element SP= ", lastState);
+ }
- void stackPop(T)(T[] val)
- {
- stackPop(val); // call ref version
- }
- void stackPop(T)(ref T[] val)
+ void stackPop(T)(T[] val)
+ {
+ stackPop(val); // call ref version
+ }
+ void stackPop(T)(ref T[] val)
+ {
+ lastState -= val.length*(T.sizeof/size_t.sizeof);
+ val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
+ debug(std_regex_matcher) writeln("pop array SP= ", lastState);
+ }
+ //helper function, saves engine state
+ void pushState(uint pc, uint counter)
+ {
+ if (stateSize + 2 * matches.length > stackAvail)
{
- lastState -= val.length*(T.sizeof/size_t.sizeof);
- val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
- debug(std_regex_matcher) writeln("pop array SP= ", lastState);
+ newStack();
}
+ *cast(State*)&memory[lastState] =
+ State(index, pc, counter, infiniteNesting);
+ lastState += stateSize;
+ memory[lastState .. lastState + 2 * matches.length] = (cast(size_t[]) matches)[];
+ lastState += 2*matches.length;
+ debug(std_regex_matcher)
+ writefln("Saved(pc=%s) front: %s src: %s",
+ pc, front, s[index .. s.lastIndex]);
+ }
- static if (!CTregex)
+ //helper function, restores engine state
+ bool popState()
+ {
+ if (!lastState && !prevStack())
+ return false;
+ lastState -= 2*matches.length;
+ auto pm = cast(size_t[]) matches;
+ pm[] = memory[lastState .. lastState + 2 * matches.length];
+ lastState -= stateSize;
+ State* state = cast(State*)&memory[lastState];
+ index = state.index;
+ pc = state.pc;
+ counter = state.counter;
+ infiniteNesting = state.infiniteNesting;
+ debug(std_regex_matcher)
{
- //helper function, saves engine state
- void pushState(uint pc, uint counter)
- {
- if (stateSize + 2 * matches.length > stackAvail)
- {
- newStack();
- }
- *cast(State*)&memory[lastState] =
- State(index, pc, counter, infiniteNesting);
- lastState += stateSize;
- memory[lastState .. lastState + 2 * matches.length] = (cast(size_t[]) matches)[];
- lastState += 2*matches.length;
- debug(std_regex_matcher)
- writefln("Saved(pc=%s) front: %s src: %s",
- pc, front, s[index .. s.lastIndex]);
- }
-
- //helper function, restores engine state
- bool popState()
- {
- if (!lastState && !prevStack())
- return false;
- lastState -= 2*matches.length;
- auto pm = cast(size_t[]) matches;
- pm[] = memory[lastState .. lastState + 2 * matches.length];
- lastState -= stateSize;
- State* state = cast(State*)&memory[lastState];
- index = state.index;
- pc = state.pc;
- counter = state.counter;
- infiniteNesting = state.infiniteNesting;
- debug(std_regex_matcher)
- {
- writefln("Restored matches", front, s[index .. s.lastIndex]);
- foreach (i, m; matches)
- writefln("Sub(%d) : %s..%s", i, m.begin, m.end);
- }
- s.reset(index);
- next();
- debug(std_regex_matcher)
- writefln("Backtracked (pc=%s) front: %s src: %s",
- pc, front, s[index .. s.lastIndex]);
- return true;
- }
+ writefln("Restored matches", front, s[index .. s.lastIndex]);
+ foreach (i, m; matches)
+ writefln("Sub(%d) : %s..%s", i, m.begin, m.end);
}
+ s.reset(index);
+ next();
+ debug(std_regex_matcher)
+ writefln("Backtracked (pc=%s) front: %s src: %s",
+ pc, front, s[index .. s.lastIndex]);
+ return true;
}
}
@@ -795,8 +818,6 @@ template BacktrackingMatcher(bool CTregex)
return format;
}
-alias Sequence(int B, int E) = staticIota!(B, E);
-
struct CtContext
{
import std.conv : to, text;
@@ -805,7 +826,7 @@ struct CtContext
//to mark the portion of matches to save
int match, total_matches;
int reserved;
- CodepointSet[] charsets;
+ const(CodepointInterval)[][] charsets;
//state of codegenerator
@@ -815,12 +836,15 @@ struct CtContext
int addr;
}
- this(Char)(Regex!Char re)
+ this(Char)(ref const Regex!Char re)
{
match = 1;
reserved = 1; //first match is skipped
total_matches = re.ngroup;
- charsets = re.charsets;
+ foreach (ref set; re.charsets)
+ {
+ charsets ~= set.intervals;
+ }
}
CtContext lookaround(uint s, uint e)
@@ -876,7 +900,7 @@ struct CtContext
}
//
- CtState ctGenBlock(Bytecode[] ir, int addr)
+ CtState ctGenBlock(const(Bytecode)[] ir, int addr)
{
CtState result;
result.addr = addr;
@@ -890,7 +914,7 @@ struct CtContext
}
//
- CtState ctGenGroup(ref Bytecode[] ir, int addr)
+ CtState ctGenGroup(ref const(Bytecode)[] ir, int addr)
{
import std.algorithm.comparison : max;
auto bailOut = "goto L_backtrack;";
@@ -932,10 +956,10 @@ struct CtContext
immutable len = ir[0].data;
immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
- string fwdType = "typeof(fwdMatcher(matcher, []))";
- string bwdType = "typeof(bwdMatcher(matcher, []))";
- string fwdCreate = "fwdMatcher(matcher, mem)";
- string bwdCreate = "bwdMatcher(matcher, mem)";
+ string fwdType = "typeof(fwdMatcher(re, []))";
+ string bwdType = "typeof(bwdMatcher(re, []))";
+ string fwdCreate = "fwdMatcher(re, mem)";
+ string bwdCreate = "bwdMatcher(re, mem)";
immutable start = IRL!(IR.LookbehindStart);
immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
@@ -946,15 +970,15 @@ struct CtContext
alias Lookaround = $$;
else
alias Lookaround = $$;
- static bool matcher_$$(ref Lookaround matcher) @trusted
+ static bool matcher_$$(Lookaround matcher) @trusted
{
//(neg)lookaround piece start
$$
//(neg)lookaround piece ends
}
auto save = index;
- auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
- scope(exit) free(mem.ptr);
+ auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) pureFree(mem.ptr);
static if (typeof(matcher.s).isLoopback)
auto lookaround = $$;
else
@@ -992,7 +1016,7 @@ struct CtContext
}
//generate source for bytecode contained in OrStart ... OrEnd
- CtState ctGenAlternation(Bytecode[] ir, int addr)
+ CtState ctGenAlternation(const(Bytecode)[] ir, int addr)
{
CtState[] pieces;
CtState r;
@@ -1032,11 +1056,11 @@ struct CtContext
// generate fixup code for instruction in ir,
// fixup means it has an alternative way for control flow
- string ctGenFixupCode(Bytecode[] ir, int addr, int fixup)
+ string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup)
{
return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
}
- string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup)
+ string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup)
{
string r;
string testCode;
@@ -1190,7 +1214,7 @@ struct CtContext
}
- string ctQuickTest(Bytecode[] ir, int id)
+ string ctQuickTest(const(Bytecode)[] ir, int id)
{
uint pc = 0;
while (pc < ir.length && ir[pc].isAtom)
@@ -1217,7 +1241,7 @@ struct CtContext
}
//process & generate source for simple bytecodes at front of ir using address addr
- CtState ctGenAtom(ref Bytecode[] ir, int addr)
+ CtState ctGenAtom(ref const(Bytecode)[] ir, int addr)
{
CtState result;
result.code = ctAtomCode(ir, addr);
@@ -1227,7 +1251,7 @@ struct CtContext
}
//D code for atom at ir using address addr, addr < 0 means quickTest
- string ctAtomCode(Bytecode[] ir, int addr)
+ string ctAtomCode(const(Bytecode)[] ir, int addr)
{
string code;
string bailOut, nextInstr;
@@ -1282,7 +1306,7 @@ struct CtContext
if (charsets.length)
{
string name = `func_`~to!string(addr+1);
- string funcCode = charsets[ir[0].data].toSourceCode(name);
+ string funcCode = CodepointSet.toSourceCode(charsets[ir[0].data], name);
code ~= ctSub( `
static $$
if (atEnd || !$$(front))
@@ -1298,7 +1322,7 @@ struct CtContext
$$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
break;
case IR.Trie:
- if (charsets.length && charsets[ir[0].data].byInterval.length <= 8)
+ if (charsets.length && charsets[ir[0].data].length <= 8)
goto case IR.CodepointSet;
code ~= ctSub( `
if (atEnd || !re.matchers[$$][front])
@@ -1439,11 +1463,11 @@ struct CtContext
}
//generate D code for the whole regex
- public string ctGenRegEx(Bytecode[] ir)
+ public string ctGenRegEx(const(Bytecode)[] ir)
{
auto bdy = ctGenBlock(ir, 0);
auto r = `
- import core.stdc.stdlib;
+ import core.memory : pureMalloc, pureFree;
with(matcher)
{
pc = 0;
@@ -1488,7 +1512,7 @@ struct CtContext
}
-string ctGenRegExCode(Char)(Regex!Char re)
+string ctGenRegExCode(Char)(const Regex!Char re)
{
auto context = CtContext(re);
return context.ctGenRegEx(re.ir);
diff --git a/libphobos/src/std/regex/internal/generator.d b/libphobos/src/std/regex/internal/generator.d
index 6831e59..c1fe4cd 100644
--- a/libphobos/src/std/regex/internal/generator.d
+++ b/libphobos/src/std/regex/internal/generator.d
@@ -13,7 +13,7 @@ module std.regex.internal.generator;
@trusted private struct SampleGenerator(Char)
{
import std.array : appender, Appender;
- import std.format : formattedWrite;
+ import std.format.write : formattedWrite;
import std.random : Xorshift;
import std.regex.internal.ir : Regex, IR, IRL;
import std.utf : isValidDchar, byChar;
diff --git a/libphobos/src/std/regex/internal/ir.d b/libphobos/src/std/regex/internal/ir.d
index 28b1998..ec0cb66 100644
--- a/libphobos/src/std/regex/internal/ir.d
+++ b/libphobos/src/std/regex/internal/ir.d
@@ -30,7 +30,9 @@ CharMatcher[CodepointSet] matcherCache;
//accessor with caching
@trusted CharMatcher getMatcher(CodepointSet set)
-{// @@@BUG@@@ 6357 almost all properties of AA are not @safe
+{
+ // almost all properties of AA are not @safe
+ // https://issues.dlang.org/show_bug.cgi?id=6357
if (__ctfe || maxCachedMatchers == 0)
return CharMatcher(set);
else
@@ -47,40 +49,15 @@ CharMatcher[CodepointSet] matcherCache;
}
}
-@trusted auto memoizeExpr(string expr)()
-{
- if (__ctfe)
- return mixin(expr);
- alias T = typeof(mixin(expr));
- static T slot;
- static bool initialized;
- if (!initialized)
- {
- slot = mixin(expr);
- initialized = true;
- }
- return slot;
-}
-
-//property for \w character class
-@property CodepointSet wordCharacter()
+@property ref wordMatcher()()
{
- return memoizeExpr!("unicode.Alphabetic | unicode.Mn | unicode.Mc
- | unicode.Me | unicode.Nd | unicode.Pc")();
-}
-
-@property CharMatcher wordMatcher()
-{
- return memoizeExpr!("CharMatcher(wordCharacter)")();
+ static immutable CharMatcher matcher = CharMatcher(wordCharacter);
+ return matcher;
}
// some special Unicode white space characters
private enum NEL = '\u0085', LS = '\u2028', PS = '\u2029';
-// Characters that need escaping in string posed as regular expressions
-alias Escapables = AliasSeq!('[', ']', '\\', '^', '$', '.', '|', '?', ',', '-',
- ';', ':', '#', '&', '%', '/', '<', '>', '`', '*', '+', '(', ')', '{', '}', '~');
-
//Regular expression engine/parser options:
// global - search all nonoverlapping matches in input
// casefold - case insensitive matching, do casefolding on match in unicode mode
@@ -97,6 +74,20 @@ enum RegexOption: uint {
//do not reorder this list
alias RegexOptionNames = AliasSeq!('g', 'i', 'x', 'U', 'm', 's');
static assert( RegexOption.max < 0x80);
+
+package(std) string regexOptionsToString()(uint flags) nothrow pure @safe
+{
+ flags &= (RegexOption.max << 1) - 1;
+ if (!flags)
+ return "";
+ char[RegexOptionNames.length] buffer = void;
+ size_t pos = 0;
+ foreach (i, flag; __traits(allMembers, RegexOption))
+ if (flags & __traits(getMember, RegexOption, flag))
+ buffer[pos++] = RegexOptionNames[i];
+ return buffer[0 .. pos].idup;
+}
+
// flags that allow guide execution of engine
enum RegexInfo : uint { oneShot = 0x80 }
@@ -173,7 +164,8 @@ template IRL(IR code)
static assert(IRL!(IR.LookaheadStart) == 3);
//how many parameters follow the IR, should be optimized fixing some IR bits
-int immediateParamsIR(IR i){
+int immediateParamsIR(IR i) @safe pure nothrow @nogc
+{
switch (i)
{
case IR.OrEnd,IR.InfiniteEnd,IR.InfiniteQEnd:
@@ -190,49 +182,50 @@ int immediateParamsIR(IR i){
}
//full length of IR instruction inlcuding all parameters that might follow it
-int lengthOfIR(IR i)
+int lengthOfIR(IR i) @safe pure nothrow @nogc
{
return 1 + immediateParamsIR(i);
}
//full length of the paired IR instruction inlcuding all parameters that might follow it
-int lengthOfPairedIR(IR i)
+int lengthOfPairedIR(IR i) @safe pure nothrow @nogc
{
return 1 + immediateParamsIR(pairedIR(i));
}
//if the operation has a merge point (this relies on the order of the ops)
-bool hasMerge(IR i)
+bool hasMerge(IR i) @safe pure nothrow @nogc
{
return (i&0b11)==0b10 && i <= IR.RepeatQEnd;
}
//is an IR that opens a "group"
-bool isStartIR(IR i)
+bool isStartIR(IR i) @safe pure nothrow @nogc
{
return (i&0b11)==0b01;
}
//is an IR that ends a "group"
-bool isEndIR(IR i)
+bool isEndIR(IR i) @safe pure nothrow @nogc
{
return (i&0b11)==0b10;
}
//is a standalone IR
-bool isAtomIR(IR i)
+bool isAtomIR(IR i) @safe pure nothrow @nogc
{
return (i&0b11)==0b00;
}
//makes respective pair out of IR i, swapping start/end bits of instruction
-IR pairedIR(IR i)
+IR pairedIR(IR i) @safe pure nothrow @nogc
{
assert(isStartIR(i) || isEndIR(i));
- return cast(IR)(i ^ 0b11);
+ return cast(IR) (i ^ 0b11);
}
//encoded IR instruction
+@safe pure
struct Bytecode
{
uint raw;
@@ -241,6 +234,7 @@ struct Bytecode
enum maxData = 1 << 22;
enum maxRaw = 1 << 31;
+@safe pure:
this(IR code, uint data)
{
assert(data < (1 << 22) && code < 256);
@@ -262,8 +256,8 @@ struct Bytecode
return t;
}
- //bit twiddling helpers
- //0-arg template due to @@@BUG@@@ 10985
+ // bit twiddling helpers
+ // 0-arg template due to https://issues.dlang.org/show_bug.cgi?id=10985
@property uint data()() const { return raw & 0x003f_ffff; }
@property void data()(uint val)
@@ -271,12 +265,12 @@ struct Bytecode
raw = (raw & ~0x003f_ffff) | (val & 0x003f_ffff);
}
- //ditto
- //0-arg template due to @@@BUG@@@ 10985
+ // ditto
+ // 0-arg template due to https://issues.dlang.org/show_bug.cgi?id=10985
@property uint sequence()() const { return 2 + (raw >> 22 & 0x3); }
- //ditto
- //0-arg template due to @@@BUG@@@ 10985
+ // ditto
+ // 0-arg template due to https://issues.dlang.org/show_bug.cgi?id=10985
@property IR code()() const { return cast(IR)(raw >> 24); }
//ditto
@@ -369,11 +363,20 @@ struct NamedGroup
//holds pair of start-end markers for a submatch
struct Group(DataIndex)
{
- DataIndex begin, end;
+ DataIndex begin = DataIndex.max;
+ DataIndex end = DataIndex.min;
+
+ bool opCast(T : bool)() const
+ {
+ return begin <= end;
+ }
+
@trusted string toString()() const
{
+ if (begin < end)
+ return "(unmatched)";
import std.array : appender;
- import std.format : formattedWrite;
+ import std.format.write : formattedWrite;
auto a = appender!string();
formattedWrite(a, "%s..%s", begin, end);
return a.data;
@@ -384,7 +387,7 @@ struct Group(DataIndex)
@trusted string disassemble(in Bytecode[] irb, uint pc, in NamedGroup[] dict=[])
{
import std.array : appender;
- import std.format : formattedWrite;
+ import std.format.write : formattedWrite;
auto output = appender!string();
formattedWrite(output,"%s", irb[pc].mnemonic);
switch (irb[pc].code)
@@ -452,9 +455,169 @@ struct Group(DataIndex)
writeln("\t", disassemble(slice, pc, dict));
}
+// Encapsulates memory management, explicit ref counting
+// and the exact type of engine created
+// there is a single instance per engine combination type x Char
+// In future may also maintain a (TLS?) cache of memory
+interface MatcherFactory(Char)
+{
+@safe:
+ Matcher!Char create(const ref Regex!Char, in Char[] input) const;
+ Matcher!Char dup(Matcher!Char m, in Char[] input) const;
+ size_t incRef(Matcher!Char m) const;
+ size_t decRef(Matcher!Char m) const;
+}
+
+// Only memory management, no compile-time vs run-time specialities
+abstract class GenericFactory(alias EngineType, Char) : MatcherFactory!Char
+{
+ import core.memory : pureFree;
+ import std.internal.memory : enforceMalloc;
+ import core.memory : GC;
+ // round up to next multiple of size_t for alignment purposes
+ enum classSize = (__traits(classInstanceSize, EngineType!Char) + size_t.sizeof - 1) & ~(size_t.sizeof - 1);
+
+ EngineType!Char construct(const ref Regex!Char re, in Char[] input, void[] memory) const;
+
+ override EngineType!Char create(const ref Regex!Char re, in Char[] input) const @trusted
+ {
+ immutable size = EngineType!Char.initialMemory(re) + classSize;
+ auto memory = enforceMalloc(size)[0 .. size];
+ scope(failure) pureFree(memory.ptr);
+ GC.addRange(memory.ptr, classSize);
+ auto engine = construct(re, input, memory);
+ assert(engine.refCount == 1);
+ assert(cast(void*) engine == memory.ptr);
+ return engine;
+ }
+
+ override EngineType!Char dup(Matcher!Char engine, in Char[] input) const @trusted
+ {
+ immutable size = EngineType!Char.initialMemory(engine.pattern) + classSize;
+ auto memory = enforceMalloc(size)[0 .. size];
+ scope(failure) pureFree(memory.ptr);
+ auto copy = construct(engine.pattern, input, memory);
+ GC.addRange(memory.ptr, classSize);
+ engine.dupTo(copy, memory[classSize .. size]);
+ assert(copy.refCount == 1);
+ return copy;
+ }
+
+ override size_t incRef(Matcher!Char m) const
+ {
+ return ++m.refCount;
+ }
+
+ override size_t decRef(Matcher!Char m) const @trusted
+ {
+ assert(m.refCount != 0);
+ auto cnt = --m.refCount;
+ if (cnt == 0)
+ {
+ void* ptr = cast(void*) m;
+ GC.removeRange(ptr);
+ pureFree(ptr);
+ }
+ return cnt;
+ }
+}
+
+// A factory for run-time engines
+class RuntimeFactory(alias EngineType, Char) : GenericFactory!(EngineType, Char)
+{
+ override EngineType!Char construct(const ref Regex!Char re, in Char[] input, void[] memory) const
+ {
+ import core.lifetime : emplace;
+ return emplace!(EngineType!Char)(memory[0 .. classSize],
+ re, Input!Char(input), memory[classSize .. $]);
+ }
+}
+
+// A factory for compile-time engine
+class CtfeFactory(alias EngineType, Char, alias func) : GenericFactory!(EngineType, Char)
+{
+ override EngineType!Char construct(const ref Regex!Char re, in Char[] input, void[] memory) const
+ {
+ import core.lifetime : emplace;
+ return emplace!(EngineType!Char)(memory[0 .. classSize],
+ re, &func, Input!Char(input), memory[classSize .. $]);
+ }
+}
+
+private auto defaultFactoryImpl(Char)(const ref Regex!Char re)
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ import std.regex.internal.thompson : ThompsonMatcher;
+ import std.algorithm.searching : canFind;
+ static MatcherFactory!Char backtrackingFactory;
+ static MatcherFactory!Char thompsonFactory;
+ if (re.backrefed.canFind!"a != 0")
+ {
+ if (backtrackingFactory is null)
+ backtrackingFactory = new RuntimeFactory!(BacktrackingMatcher, Char);
+ return backtrackingFactory;
+ }
+ else
+ {
+ if (thompsonFactory is null)
+ thompsonFactory = new RuntimeFactory!(ThompsonMatcher, Char);
+ return thompsonFactory;
+ }
+}
+
+// Used to generate a pure wrapper for defaultFactoryImpl. Based on the example in
+// the std.traits.SetFunctionAttributes documentation.
+auto assumePureFunction(T)(T t)
+if (isFunctionPointer!T)
+{
+ enum attrs = functionAttributes!T | FunctionAttribute.pure_;
+ return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
+}
+
+// A workaround for R-T enum re = regex(...)
+template defaultFactory(Char)
+{
+ // defaultFactory is constructed as a safe, pure wrapper over defaultFactoryImpl.
+ // It can be faked as pure because the static mutable variables are used to cache
+ // the key and character matcher. The technique used avoids delegates and GC.
+ @property MatcherFactory!Char defaultFactory(const ref Regex!Char re) @safe pure
+ {
+ static auto impl(const ref Regex!Char re)
+ {
+ return defaultFactoryImpl(re);
+ }
+
+ static auto pureImpl(const ref Regex!Char re) @trusted
+ {
+ auto p = assumePureFunction(&impl);
+ return p(re);
+ }
+
+ return pureImpl(re);
+ }
+}
+
+// Defining it as an interface has the undesired side-effect:
+// casting any class to an interface silently adjusts pointer to point to a nested vtbl
+abstract class Matcher(Char)
+{
+abstract:
+ // Get a (next) match
+ int match(Group!size_t[] matches) pure;
+ // This only maintains internal ref-count,
+ // deallocation happens inside MatcherFactory
+ @property ref size_t refCount() @safe;
+ // Copy internal state to another engine, using memory arena 'memory'
+ void dupTo(Matcher!Char m, void[] memory);
+ // The pattern loaded
+ @property ref const(Regex!Char) pattern() @safe;
+ // Re-arm the engine with new Input
+ Matcher rearm(in Char[] stream);
+}
+
/++
- $(D Regex) object holds regular expression pattern in compiled form.
- Instances of this object are constructed via calls to $(D regex).
+ `Regex` object holds regular expression pattern in compiled form.
+ Instances of this object are constructed via calls to `regex`.
This is an intended form for caching and storage of frequently
used regular expressions.
+/
@@ -466,17 +629,19 @@ struct Regex(Char)
@safe @property bool empty() const nothrow { return ir is null; }
-
+ /++
+ `namedCaptures` returns a range of all named captures in a given regular expression.
+ +/
@safe @property auto namedCaptures()
{
static struct NamedGroupRange
{
private:
- NamedGroup[] groups;
+ const(NamedGroup)[] groups;
size_t start;
size_t end;
public:
- this(NamedGroup[] g, size_t s, size_t e)
+ this(const(NamedGroup)[] g, size_t s, size_t e)
{
assert(s <= e);
assert(e <= g.length);
@@ -514,7 +679,7 @@ struct Regex(Char)
package(std.regex):
import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
- NamedGroup[] dict; // maps name -> user group number
+ const(NamedGroup)[] dict; // maps name -> user group number
uint ngroup; // number of internal groups
uint maxCounterDepth; // max depth of nested {n,m} repetitions
uint hotspotTableSize; // number of entries in merge table
@@ -524,6 +689,36 @@ package(std.regex):
public const(BitTable)[] filters; // bloom filters for conditional loops
uint[] backrefed; // bit array of backreferenced submatches
Kickstart!Char kickstart;
+ MatcherFactory!Char factory; // produces optimal matcher for this pattern
+ immutable(Char)[] pattern; // copy of pattern to serve as cache key
+
+ const(Regex) withFactory(MatcherFactory!Char factory) pure const @trusted
+ {
+ auto r = cast() this;
+ r.factory = factory;
+ return r;
+ }
+
+ const(Regex) withFlags(uint newFlags) pure const @trusted
+ {
+ auto r = cast() this;
+ r.flags = newFlags;
+ return r;
+ }
+
+ const(Regex) withCode(const(Bytecode)[] code) pure const @trusted
+ {
+ auto r = cast() this;
+ r.ir = code.dup; // TODO: sidestep const instead?
+ return r;
+ }
+
+ const(Regex) withNGroup(uint nGroup) pure const @trusted
+ {
+ auto r = cast() this;
+ r.ngroup = nGroup;
+ return r;
+ }
//bit access helper
uint isBackref(uint n)
@@ -564,26 +759,20 @@ package(std.regex):
writeln("Max counter nesting depth: ", maxCounterDepth);
}
-}
-
-//@@@BUG@@@ (unreduced) - public makes it inaccessible in std.regex.package (!)
-/*public*/ struct StaticRegex(Char)
-{
-package(std.regex):
- import std.regex.internal.backtracking : BacktrackingMatcher;
- alias Matcher = BacktrackingMatcher!(true);
- alias MatchFn = bool function(ref Matcher!Char) @trusted;
- MatchFn nativeFn;
-public:
- Regex!Char _regex;
- alias _regex this;
- this(Regex!Char re, MatchFn fn)
+ public string toString()() const
{
- _regex = re;
- nativeFn = fn;
-
+ import std.format : format;
+ static if (is(typeof(pattern) : string))
+ alias patternString = pattern;
+ else
+ {
+ import std.conv : to;
+ auto patternString = conv.to!string(pattern);
+ }
+ auto quotedEscapedPattern = format("%(%s %)", [patternString]);
+ auto flagString = regexOptionsToString(flags);
+ return "Regex!" ~ Char.stringof ~ "(" ~ quotedEscapedPattern ~ ", \"" ~ flagString ~ "\")";
}
-
}
// The stuff below this point is temporarrily part of IR module
@@ -622,7 +811,7 @@ if (is(Char :dchar))
@property bool atEnd(){
return _index == _origin.length;
}
- bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos)
+ bool search(Kickstart)(ref const Kickstart kick, ref dchar res, ref size_t pos)
{
size_t idx = kick.search(_origin, _index);
_index = idx;
@@ -653,6 +842,11 @@ struct BackLooperImpl(Input)
_origin = input._origin;
_index = index;
}
+ this(String input)
+ {
+ _origin = input;
+ _index = input.length;
+ }
@trusted bool nextChar(ref dchar res,ref size_t pos)
{
pos = _index;
@@ -690,10 +884,10 @@ template BackLooper(E)
//both helpers below are internal, on its own are quite "explosive"
//unsafe, no initialization of elements
-@system T[] mallocArray(T)(size_t len)
+@system pure T[] mallocArray(T)(size_t len)
{
- import core.stdc.stdlib : malloc;
- return (cast(T*) malloc(len * T.sizeof))[0 .. len];
+ import core.memory : pureMalloc;
+ return (cast(T*) pureMalloc(len * T.sizeof))[0 .. len];
}
//very unsafe, no initialization
@@ -705,7 +899,7 @@ template BackLooper(E)
}
//
-@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
+@trusted uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name)
{//equal is @system?
import std.algorithm.comparison : equal;
import std.algorithm.iteration : map;
@@ -718,16 +912,15 @@ template BackLooper(E)
return dict[fnd].group;
}
-//whether ch is one of unicode newline sequences
-//0-arg template due to @@@BUG@@@ 10985
+// whether ch is one of unicode newline sequences
+// 0-arg template due to https://issues.dlang.org/show_bug.cgi?id=10985
bool endOfLine()(dchar front, bool seenCr)
{
return ((front == '\n') ^ seenCr) || front == '\r'
|| front == NEL || front == LS || front == PS;
}
-//
-//0-arg template due to @@@BUG@@@ 10985
+// 0-arg template due to https://issues.dlang.org/show_bug.cgi?id=10985
bool startOfLine()(dchar back, bool seenNl)
{
return ((back == '\r') ^ seenNl) || back == '\n'
@@ -786,3 +979,215 @@ struct CharMatcher {
return trie[ch];
}
}
+
+// Internal non-resizeble array, switches between inline storage and CoW
+// POD-only
+struct SmallFixedArray(T, uint SMALL=3)
+if (!hasElaborateDestructor!T)
+{
+ import std.internal.memory : enforceMalloc;
+ import core.memory : pureFree;
+ static struct Payload
+ {
+ size_t refcount;
+ T[0] placeholder;
+ inout(T)* ptr() inout { return placeholder.ptr; }
+ }
+ static assert(Payload.sizeof == size_t.sizeof);
+ union
+ {
+ Payload* big;
+ T[SMALL] small;
+ }
+ size_t _sizeMask;
+ enum BIG_MASK = size_t(1)<<(8*size_t.sizeof-1);
+ enum SIZE_MASK = ~BIG_MASK;
+
+ @property bool isBig() const { return (_sizeMask & BIG_MASK) != 0; }
+ @property size_t length() const { return _sizeMask & SIZE_MASK; }
+
+ this(size_t size)
+ {
+ if (size <= SMALL)
+ {
+ small[] = T.init;
+ _sizeMask = size;
+ }
+ else
+ {
+ big = cast(Payload*) enforceMalloc(Payload.sizeof + T.sizeof*size);
+ big.refcount = 1;
+ _sizeMask = size | BIG_MASK;
+ }
+ }
+
+ private @trusted @property inout(T)[] internalSlice() inout
+ {
+ return isBig ? big.ptr[0 .. length] : small[0 .. length];
+ }
+
+ this(this)
+ {
+ if (isBig)
+ {
+ big.refcount++;
+ }
+ }
+
+ bool opEquals(SmallFixedArray a)
+ {
+ return internalSlice[] == a.internalSlice[];
+ }
+
+ size_t toHash() const
+ {
+ return hashOf(internalSlice[]);
+ }
+
+ ref inout(T) opIndex(size_t idx) inout
+ {
+ return internalSlice[idx];
+ }
+
+ // accesses big to test self-referencing so not @safe
+ @trusted ref opAssign(SmallFixedArray arr)
+ {
+ if (isBig)
+ {
+ if (arr.isBig)
+ {
+ if (big is arr.big) return this; // self-assign
+ else
+ {
+ abandonRef();
+ _sizeMask = arr._sizeMask;
+ big = arr.big;
+ big.refcount++;
+ }
+ }
+ else
+ {
+ abandonRef();
+ _sizeMask = arr._sizeMask;
+ small = arr.small;
+ }
+ }
+ else
+ {
+ if (arr.isBig)
+ {
+ _sizeMask = arr._sizeMask;
+ big = arr.big;
+ big.refcount++;
+ }
+ else
+ {
+ _sizeMask = arr._sizeMask;
+ small = arr.small;
+ }
+ }
+ return this;
+ }
+
+ void mutate(scope void delegate(T[]) pure filler)
+ {
+ if (isBig && big.refcount != 1) // copy on write
+ {
+ auto oldSizeMask = _sizeMask;
+ auto newbig = cast(Payload*) enforceMalloc(Payload.sizeof + T.sizeof*length);
+ newbig.refcount = 1;
+ abandonRef();
+ big = newbig;
+ _sizeMask = oldSizeMask;
+ }
+ filler(internalSlice);
+ }
+
+ ~this()
+ {
+ if (isBig)
+ {
+ abandonRef();
+ }
+ }
+
+ @trusted private void abandonRef()
+ {
+ assert(isBig);
+ if (--big.refcount == 0)
+ {
+ pureFree(big);
+ _sizeMask = 0;
+ assert(!isBig);
+ }
+ }
+}
+
+@system unittest
+{
+ alias SA = SmallFixedArray!(int, 2);
+ SA create(int[] data)
+ {
+ SA a = SA(data.length);
+ a.mutate((slice) { slice[] = data[]; });
+ assert(a.internalSlice == data);
+ return a;
+ }
+
+ {
+ SA a;
+ a = SA(1);
+ assert(a.length == 1);
+ a = SA.init;
+ assert(a.length == 0);
+ }
+
+ {
+ SA a, b, c, d;
+ assert(a.length == 0);
+ assert(a.internalSlice == b.internalSlice);
+ a = create([1]);
+ assert(a.internalSlice == [1]);
+ b = create([2, 3]);
+ assert(b.internalSlice == [2, 3]);
+ c = create([3, 4, 5]);
+ d = create([5, 6, 7, 8]);
+ assert(c.isBig);
+ a = c;
+ assert(a.isBig);
+ assert(a.big is c.big);
+ assert(a.big.refcount == 2);
+ assert(a.internalSlice == [3, 4, 5]);
+ assert(c.internalSlice == [3, 4, 5]);
+ a = b;
+ assert(!a.isBig);
+ assert(a.internalSlice == [2, 3]);
+ assert(c.big.refcount == 1);
+ a = c;
+ assert(c.big.refcount == 2);
+
+ // mutate copies on write if ref-count is not 1
+ a.mutate((slice){ slice[] = 1; });
+ assert(a.internalSlice == [1, 1, 1]);
+ assert(c.internalSlice == [3, 4, 5]);
+ assert(a.isBig && c.isBig);
+ assert(a.big.refcount == 1);
+ assert(c.big.refcount == 1);
+
+ auto e = d;
+ assert(e.big.refcount == 2);
+ auto f = d;
+ f = a;
+ assert(f.isBig);
+ assert(f.internalSlice == [1, 1, 1]);
+ assert(f.big.refcount == 2); // a & f
+ assert(e.big.refcount == 2); // d & e
+ a = c;
+ assert(f.big.refcount == 1); // f
+ assert(e.big.refcount == 2); // d & e
+ a = a;
+ a = a;
+ a = a;
+ assert(a.big.refcount == 2); // a & c
+ }
+}
diff --git a/libphobos/src/std/regex/internal/kickstart.d b/libphobos/src/std/regex/internal/kickstart.d
index f303b43..4529205 100644
--- a/libphobos/src/std/regex/internal/kickstart.d
+++ b/libphobos/src/std/regex/internal/kickstart.d
@@ -393,7 +393,7 @@ public:
// has a useful trait: if supplied with valid UTF indexes,
// returns only valid UTF indexes
// (that given the haystack in question is valid UTF string)
- @trusted size_t search(const(Char)[] haystack, size_t idx)
+ @trusted size_t search(const(Char)[] haystack, size_t idx) const
{//@BUG: apparently assumes little endian machines
import core.stdc.string : memchr;
import std.conv : text;
@@ -517,8 +517,8 @@ public:
import std.conv, std.regex;
@trusted void test_fixed(alias Kick)()
{
- foreach (i, v; AliasSeq!(char, wchar, dchar))
- {
+ static foreach (i, v; AliasSeq!(char, wchar, dchar))
+ {{
alias Char = v;
alias String = immutable(v)[];
auto r = regex(to!String(`abc$`));
@@ -539,12 +539,12 @@ public:
assert(x == 3, text(Kick.stringof,v.stringof," == ", kick.length));
x = kick.search("aabaacaa", x+1);
assert(x == 8, text(Kick.stringof,v.stringof," == ", kick.length));
- }
+ }}
}
@trusted void test_flex(alias Kick)()
{
- foreach (i, v; AliasSeq!(char, wchar, dchar))
- {
+ static foreach (i, v; AliasSeq!(char, wchar, dchar))
+ {{
alias Char = v;
alias String = immutable(v)[];
auto r = regex(to!String(`abc[a-z]`));
@@ -570,7 +570,7 @@ public:
assert(kick.search("ababx",0) == 2);
assert(kick.search("abaacba",0) == 3);//expected inexact
- }
+ }}
}
test_fixed!(ShiftOr)();
test_flex!(ShiftOr)();
diff --git a/libphobos/src/std/regex/internal/parser.d b/libphobos/src/std/regex/internal/parser.d
index 8cabae5..41ca687 100644
--- a/libphobos/src/std/regex/internal/parser.d
+++ b/libphobos/src/std/regex/internal/parser.d
@@ -4,15 +4,19 @@
*/
module std.regex.internal.parser;
-static import std.ascii;
+import std.regex.internal.ir;
import std.range.primitives, std.uni, std.meta,
std.traits, std.typecons, std.exception;
-import std.regex.internal.ir;
+static import std.ascii;
// package relevant info from parser into a regex object
auto makeRegex(S, CG)(Parser!(S, CG) p)
{
- Regex!(BasicElementOf!S) re;
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ import std.regex.internal.thompson : ThompsonMatcher;
+ import std.algorithm.searching : canFind;
+ alias Char = BasicElementOf!S;
+ Regex!Char re;
auto g = p.g;
with(re)
{
@@ -24,7 +28,14 @@ auto makeRegex(S, CG)(Parser!(S, CG) p)
charsets = g.charsets;
matchers = g.matchers;
backrefed = g.backrefed;
+ re.pattern = p.origin.idup;
re.postprocess();
+ // check if we have backreferences, if so - use backtracking
+ if (__ctfe) factory = null; // allows us to use the awful enum re = regex(...);
+ else if (re.backrefed.canFind!"a != 0")
+ factory = new RuntimeFactory!(BacktrackingMatcher, Char);
+ else
+ factory = new RuntimeFactory!(ThompsonMatcher, Char);
debug(std_regex_parser)
{
__ctfe || print();
@@ -157,98 +168,6 @@ if (isSomeString!S)
code[] = rev[];
}
-//test if a given string starts with hex number of maxDigit that's a valid codepoint
-//returns it's value and skips these maxDigit chars on success, throws on failure
-dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit)
-{
- //std.conv.parse is both @system and bogus
- enforce(str.length >= maxDigit,"incomplete escape sequence");
- uint val;
- for (int k = 0; k < maxDigit; k++)
- {
- immutable current = str[k];//accepts ascii only, so it's OK to index directly
- if ('0' <= current && current <= '9')
- val = val * 16 + current - '0';
- else if ('a' <= current && current <= 'f')
- val = val * 16 + current -'a' + 10;
- else if ('A' <= current && current <= 'F')
- val = val * 16 + current - 'A' + 10;
- else
- throw new Exception("invalid escape sequence");
- }
- enforce(val <= 0x10FFFF, "invalid codepoint");
- str = str[maxDigit..$];
- return val;
-}
-
-@system unittest //BUG canFind is system
-{
- import std.algorithm.searching : canFind;
- string[] non_hex = [ "000j", "000z", "FffG", "0Z"];
- string[] hex = [ "01", "ff", "00af", "10FFFF" ];
- int[] value = [ 1, 0xFF, 0xAF, 0x10FFFF ];
- foreach (v; non_hex)
- assert(collectException(parseUniHex(v, v.length)).msg
- .canFind("invalid escape sequence"));
- foreach (i, v; hex)
- assert(parseUniHex(v, v.length) == value[i]);
- string over = "0011FFFF";
- assert(collectException(parseUniHex(over, over.length)).msg
- .canFind("invalid codepoint"));
-}
-
-auto caseEnclose(CodepointSet set)
-{
- auto cased = set & unicode.LC;
- foreach (dchar ch; cased.byCodepoint)
- {
- foreach (c; simpleCaseFoldings(ch))
- set |= c;
- }
- return set;
-}
-
-/+
- fetch codepoint set corresponding to a name (InBlock or binary property)
-+/
-@trusted CodepointSet getUnicodeSet(in char[] name, bool negated, bool casefold)
-{
- CodepointSet s = unicode(name);
- //FIXME: caseEnclose for new uni as Set | CaseEnclose(SET && LC)
- if (casefold)
- s = caseEnclose(s);
- if (negated)
- s = s.inverted;
- return s;
-}
-
-//basic stack, just in case it gets used anywhere else then Parser
-@trusted struct Stack(T)
-{
- T[] data;
- @property bool empty(){ return data.empty; }
-
- @property size_t length(){ return data.length; }
-
- void push(T val){ data ~= val; }
-
- T pop()
- {
- assert(!empty);
- auto val = data[$ - 1];
- data = data[0 .. $ - 1];
- if (!__ctfe)
- cast(void) data.assumeSafeAppend();
- return val;
- }
-
- @property ref T top()
- {
- assert(!empty);
- return data[$ - 1];
- }
-}
-
struct CodeGen
{
Bytecode[] ir; // resulting bytecode
@@ -616,7 +535,7 @@ enum infinite = ~0u;
struct Parser(R, Generator)
if (isForwardRange!R && is(ElementType!R : dchar))
{
- dchar _current;
+ dchar front;
bool empty;
R pat, origin; //keep full pattern for pretty printing error messages
uint re_flags = 0; //global flags e.g. multiline + internal ones
@@ -628,8 +547,8 @@ if (isForwardRange!R && is(ElementType!R : dchar))
pat = origin = pattern;
//reserve slightly more then avg as sampled from unittests
parseFlags(flags);
- _current = ' ';//a safe default for freeform parsing
- next();
+ front = ' ';//a safe default for freeform parsing
+ popFront();
g.start(cast(uint) pat.length);
try
{
@@ -642,61 +561,47 @@ if (isForwardRange!R && is(ElementType!R : dchar))
g.endPattern(1);
}
- @property dchar current(){ return _current; }
-
- bool _next()
+ void _popFront()
{
if (pat.empty)
{
empty = true;
- return false;
}
- _current = pat.front;
- pat.popFront();
- return true;
+ else
+ {
+ front = pat.front;
+ pat.popFront();
+ }
}
void skipSpace()
{
- while (isWhite(current) && _next()){ }
+ while (!empty && isWhite(front)) _popFront();
}
- bool next()
+ void popFront()
{
- if (re_flags & RegexOption.freeform)
- {
- immutable r = _next();
- skipSpace();
- return r;
- }
- else
- return _next();
+ _popFront();
+ if (re_flags & RegexOption.freeform) skipSpace();
}
+ auto save(){ return this; }
+
//parsing number with basic overflow check
uint parseDecimal()
{
uint r = 0;
- while (std.ascii.isDigit(current))
+ while (std.ascii.isDigit(front))
{
if (r >= (uint.max/10))
error("Overflow in decimal number");
- r = 10*r + cast(uint)(current-'0');
- if (!next())
- break;
+ r = 10*r + cast(uint)(front-'0');
+ popFront();
+ if (empty) break;
}
return r;
}
- //parse control code of form \cXXX, c assumed to be the current symbol
- dchar parseControlCode()
- {
- enforce(next(), "Unfinished escape sequence");
- enforce(('a' <= current && current <= 'z') || ('A' <= current && current <= 'Z'),
- "Only letters are allowed after \\c");
- return current & 0x1f;
- }
-
//
@trusted void parseFlags(S)(S flags)
{//@@@BUG@@@ text is @system
@@ -730,73 +635,74 @@ if (isForwardRange!R && is(ElementType!R : dchar))
{
debug(std_regex_parser)
__ctfe || writeln("*LR*\nSource: ", pat, "\nStack: ",fixupStack.data);
- switch (current)
+ switch (front)
{
case '(':
- next();
- if (current == '?')
+ popFront();
+ if (front == '?')
{
- next();
- switch (current)
+ popFront();
+ switch (front)
{
case '#':
for (;;)
{
- if (!next())
- error("Unexpected end of pattern");
- if (current == ')')
+ popFront();
+ enforce(!empty, "Unexpected end of pattern");
+ if (front == ')')
{
- next();
+ popFront();
break;
}
}
break;
case ':':
g.genLogicGroup();
- next();
+ popFront();
break;
case '=':
g.genLookaround(IR.LookaheadStart);
- next();
+ popFront();
break;
case '!':
g.genLookaround(IR.NeglookaheadStart);
- next();
+ popFront();
break;
case 'P':
- next();
- if (current != '<')
- error("Expected '<' in named group");
+ popFront();
+ enforce(front == '<', "Expected '<' in named group");
string name;
- if (!next() || !(isAlpha(current) || current == '_'))
+ popFront();
+ if (empty || !(isAlpha(front) || front == '_'))
error("Expected alpha starting a named group");
- name ~= current;
- while (next() && (isAlpha(current) ||
- current == '_' || std.ascii.isDigit(current)))
+ name ~= front;
+ popFront();
+ while (!empty && (isAlpha(front) ||
+ front == '_' || std.ascii.isDigit(front)))
{
- name ~= current;
+ name ~= front;
+ popFront();
}
- if (current != '>')
- error("Expected '>' closing named group");
- next();
+ enforce(front == '>', "Expected '>' closing named group");
+ popFront();
g.genNamedGroup(name);
break;
case '<':
- next();
- if (current == '=')
+ popFront();
+ if (front == '=')
g.genLookaround(IR.LookbehindStart);
- else if (current == '!')
+ else if (front == '!')
g.genLookaround(IR.NeglookbehindStart);
else
error("'!' or '=' expected after '<'");
- next();
+ popFront();
break;
default:
uint enableFlags, disableFlags;
bool enable = true;
do
{
- switch (current)
+ switch (front)
{
case 's':
if (enable)
@@ -830,9 +736,9 @@ if (isForwardRange!R && is(ElementType!R : dchar))
default:
error(" 's', 'x', 'i', 'm' or '-' expected after '(?' ");
}
- next();
- }while (current != ')');
- next();
+ popFront();
+ }while (front != ')');
+ popFront();
re_flags |= enableFlags;
re_flags &= ~disableFlags;
}
@@ -844,13 +750,13 @@ if (isForwardRange!R && is(ElementType!R : dchar))
break;
case ')':
enforce(g.nesting, "Unmatched ')'");
- next();
+ popFront();
auto pair = g.onClose();
if (pair[0])
parseQuantifier(pair[1]);
break;
case '|':
- next();
+ popFront();
g.fixAlternation();
break;
default://no groups or whatever
@@ -875,7 +781,7 @@ if (isForwardRange!R && is(ElementType!R : dchar))
if (empty)
return g.fixRepetition(offset);
uint min, max;
- switch (current)
+ switch (front)
{
case '*':
min = 0;
@@ -890,28 +796,27 @@ if (isForwardRange!R && is(ElementType!R : dchar))
max = infinite;
break;
case '{':
- enforce(next(), "Unexpected end of regex pattern");
- enforce(std.ascii.isDigit(current), "First number required in repetition");
+ popFront();
+ enforce(!empty, "Unexpected end of regex pattern");
+ enforce(std.ascii.isDigit(front), "First number required in repetition");
min = parseDecimal();
- if (current == '}')
+ if (front == '}')
max = min;
- else if (current == ',')
+ else if (front == ',')
{
- next();
- if (std.ascii.isDigit(current))
+ popFront();
+ if (std.ascii.isDigit(front))
max = parseDecimal();
- else if (current == '}')
+ else if (front == '}')
max = infinite;
else
error("Unexpected symbol in regex pattern");
skipSpace();
- if (current != '}')
- error("Unmatched '{' in regex pattern");
+ enforce(front == '}', "Unmatched '{' in regex pattern");
}
else
error("Unexpected symbol in regex pattern");
- if (min > max)
- error("Illegal {n,m} quantifier");
+ enforce(min <= max, "Illegal {n,m} quantifier");
break;
default:
g.fixRepetition(offset);
@@ -919,10 +824,11 @@ if (isForwardRange!R && is(ElementType!R : dchar))
}
bool greedy = true;
//check only if we managed to get new symbol
- if (next() && current == '?')
+ popFront();
+ if (!empty && front == '?')
{
greedy = false;
- next();
+ popFront();
}
g.fixRepetition(offset, min, max, greedy);
}
@@ -932,11 +838,10 @@ if (isForwardRange!R && is(ElementType!R : dchar))
{
if (empty)
return;
- switch (current)
+ switch (front)
{
case '*', '?', '+', '|', '{', '}':
error("'*', '+', '?', '{', '}' not allowed in atom");
- break;
case '.':
if (re_flags & RegexOption.singleline)
g.put(Bytecode(IR.Any, 0));
@@ -945,13 +850,14 @@ if (isForwardRange!R && is(ElementType!R : dchar))
CodepointSet set;
g.charsetToIr(set.add('\n','\n'+1).add('\r', '\r'+1).inverted);
}
- next();
+ popFront();
break;
case '[':
parseCharset();
break;
case '\\':
- enforce(_next(), "Unfinished escape sequence");
+ _popFront();
+ enforce(!empty, "Unfinished escape sequence");
parseEscape();
break;
case '^':
@@ -959,20 +865,19 @@ if (isForwardRange!R && is(ElementType!R : dchar))
g.put(Bytecode(IR.Bol, 0));
else
g.put(Bytecode(IR.Bof, 0));
- next();
+ popFront();
break;
case '$':
if (re_flags & RegexOption.multiline)
g.put(Bytecode(IR.Eol, 0));
else
g.put(Bytecode(IR.Eof, 0));
- next();
+ popFront();
break;
default:
- //FIXME: getCommonCasing in new std uni
if (re_flags & RegexOption.casefold)
{
- auto range = simpleCaseFoldings(current);
+ auto range = simpleCaseFoldings(front);
assert(range.length <= 5);
if (range.length == 1)
g.put(Bytecode(IR.Char, range.front));
@@ -981,507 +886,96 @@ if (isForwardRange!R && is(ElementType!R : dchar))
g.put(Bytecode(IR.OrChar, v, cast(uint) range.length));
}
else
- g.put(Bytecode(IR.Char, current));
- next();
+ g.put(Bytecode(IR.Char, front));
+ popFront();
}
}
-
-
- //CodepointSet operations relatively in order of priority
- enum Operator:uint {
- Open = 0, Negate, Difference, SymDifference, Intersection, Union, None
- }
-
- //parse unit of CodepointSet spec, most notably escape sequences and char ranges
- //also fetches next set operation
- Tuple!(CodepointSet,Operator) parseCharTerm()
- {
- enum State{ Start, Char, Escape, CharDash, CharDashEscape,
- PotentialTwinSymbolOperator }
- Operator op = Operator.None;
- dchar last;
- CodepointSet set;
- State state = State.Start;
-
- static void addWithFlags(ref CodepointSet set, uint ch, uint re_flags)
- {
- if (re_flags & RegexOption.casefold)
- {
- auto range = simpleCaseFoldings(ch);
- foreach (v; range)
- set |= v;
- }
- else
- set |= ch;
- }
-
- static Operator twinSymbolOperator(dchar symbol)
- {
- switch (symbol)
- {
- case '|':
- return Operator.Union;
- case '-':
- return Operator.Difference;
- case '~':
- return Operator.SymDifference;
- case '&':
- return Operator.Intersection;
- default:
- assert(false);
- }
- }
-
- L_CharTermLoop:
- for (;;)
- {
- final switch (state)
- {
- case State.Start:
- switch (current)
- {
- case '|':
- case '-':
- case '~':
- case '&':
- state = State.PotentialTwinSymbolOperator;
- last = current;
- break;
- case '[':
- op = Operator.Union;
- goto case;
- case ']':
- break L_CharTermLoop;
- case '\\':
- state = State.Escape;
- break;
- default:
- state = State.Char;
- last = current;
- }
- break;
- case State.Char:
- // xxx last current xxx
- switch (current)
- {
- case '|':
- case '~':
- case '&':
- // then last is treated as normal char and added as implicit union
- state = State.PotentialTwinSymbolOperator;
- addWithFlags(set, last, re_flags);
- last = current;
- break;
- case '-': // still need more info
- state = State.CharDash;
- break;
- case '\\':
- set |= last;
- state = State.Escape;
- break;
- case '[':
- op = Operator.Union;
- goto case;
- case ']':
- addWithFlags(set, last, re_flags);
- break L_CharTermLoop;
- default:
- state = State.Char;
- addWithFlags(set, last, re_flags);
- last = current;
- }
- break;
- case State.PotentialTwinSymbolOperator:
- // xxx last current xxxx
- // where last = [|-&~]
- if (current == last)
- {
- op = twinSymbolOperator(last);
- next();//skip second twin char
- break L_CharTermLoop;
- }
- goto case State.Char;
- case State.Escape:
- // xxx \ current xxx
- switch (current)
- {
- case 'f':
- last = '\f';
- state = State.Char;
- break;
- case 'n':
- last = '\n';
- state = State.Char;
- break;
- case 'r':
- last = '\r';
- state = State.Char;
- break;
- case 't':
- last = '\t';
- state = State.Char;
- break;
- case 'v':
- last = '\v';
- state = State.Char;
- break;
- case 'c':
- last = parseControlCode();
- state = State.Char;
- break;
- foreach (val; Escapables)
- {
- case val:
- }
- last = current;
- state = State.Char;
- break;
- case 'p':
- set.add(parseUnicodePropertySpec(false));
- state = State.Start;
- continue L_CharTermLoop; //next char already fetched
- case 'P':
- set.add(parseUnicodePropertySpec(true));
- state = State.Start;
- continue L_CharTermLoop; //next char already fetched
- case 'x':
- last = parseUniHex(pat, 2);
- state = State.Char;
- break;
- case 'u':
- last = parseUniHex(pat, 4);
- state = State.Char;
- break;
- case 'U':
- last = parseUniHex(pat, 8);
- state = State.Char;
- break;
- case 'd':
- set.add(unicode.Nd);
- state = State.Start;
- break;
- case 'D':
- set.add(unicode.Nd.inverted);
- state = State.Start;
- break;
- case 's':
- set.add(unicode.White_Space);
- state = State.Start;
- break;
- case 'S':
- set.add(unicode.White_Space.inverted);
- state = State.Start;
- break;
- case 'w':
- set.add(wordCharacter);
- state = State.Start;
- break;
- case 'W':
- set.add(wordCharacter.inverted);
- state = State.Start;
- break;
- default:
- if (current >= privateUseStart && current <= privateUseEnd)
- enforce(false, "no matching ']' found while parsing character class");
- enforce(false, "invalid escape sequence");
- }
- break;
- case State.CharDash:
- // xxx last - current xxx
- switch (current)
- {
- case '[':
- op = Operator.Union;
- goto case;
- case ']':
- //means dash is a single char not an interval specifier
- addWithFlags(set, last, re_flags);
- addWithFlags(set, '-', re_flags);
- break L_CharTermLoop;
- case '-'://set Difference again
- addWithFlags(set, last, re_flags);
- op = Operator.Difference;
- next();//skip '-'
- break L_CharTermLoop;
- case '\\':
- state = State.CharDashEscape;
- break;
- default:
- enforce(last <= current, "inverted range");
- if (re_flags & RegexOption.casefold)
- {
- for (uint ch = last; ch <= current; ch++)
- addWithFlags(set, ch, re_flags);
- }
- else
- set.add(last, current + 1);
- state = State.Start;
- }
- break;
- case State.CharDashEscape:
- //xxx last - \ current xxx
- uint end;
- switch (current)
- {
- case 'f':
- end = '\f';
- break;
- case 'n':
- end = '\n';
- break;
- case 'r':
- end = '\r';
- break;
- case 't':
- end = '\t';
- break;
- case 'v':
- end = '\v';
- break;
- foreach (val; Escapables)
- {
- case val:
- }
- end = current;
- break;
- case 'c':
- end = parseControlCode();
- break;
- case 'x':
- end = parseUniHex(pat, 2);
- break;
- case 'u':
- end = parseUniHex(pat, 4);
- break;
- case 'U':
- end = parseUniHex(pat, 8);
- break;
- default:
- if (current >= privateUseStart && current <= privateUseEnd)
- enforce(false, "no matching ']' found while parsing character class");
- error("invalid escape sequence");
- }
- // Lookahead to check if it's a \T
- // where T is sub-pattern terminator in multi-pattern scheme
- if (end == '\\' && !pat.empty)
- {
- if (pat.front >= privateUseStart && pat.front <= privateUseEnd)
- enforce(false, "invalid escape sequence");
- }
- enforce(last <= end,"inverted range");
- set.add(last, end + 1);
- state = State.Start;
- break;
- }
- enforce(next(), "unexpected end of CodepointSet");
- }
- return tuple(set, op);
- }
-
- alias ValStack = Stack!(CodepointSet);
- alias OpStack = Stack!(Operator);
-
//parse and store IR for CodepointSet
void parseCharset()
{
const save = re_flags;
re_flags &= ~RegexOption.freeform; // stop ignoring whitespace if we did
- parseCharsetImpl();
+ bool casefold = cast(bool)(re_flags & RegexOption.casefold);
+ g.charsetToIr(unicode.parseSet(this, casefold));
re_flags = save;
- // Last next() in parseCharsetImp is executed w/o freeform flag
+ // Last next() in parseCharset is executed w/o freeform flag
if (re_flags & RegexOption.freeform) skipSpace();
}
- void parseCharsetImpl()
- {
- ValStack vstack;
- OpStack opstack;
- import std.functional : unaryFun;
- //
- static bool apply(Operator op, ref ValStack stack)
- {
- switch (op)
- {
- case Operator.Negate:
- enforce(!stack.empty, "no operand for '^'");
- stack.top = stack.top.inverted;
- break;
- case Operator.Union:
- auto s = stack.pop();//2nd operand
- enforce(!stack.empty, "no operand for '||'");
- stack.top.add(s);
- break;
- case Operator.Difference:
- auto s = stack.pop();//2nd operand
- enforce(!stack.empty, "no operand for '--'");
- stack.top.sub(s);
- break;
- case Operator.SymDifference:
- auto s = stack.pop();//2nd operand
- enforce(!stack.empty, "no operand for '~~'");
- stack.top ~= s;
- break;
- case Operator.Intersection:
- auto s = stack.pop();//2nd operand
- enforce(!stack.empty, "no operand for '&&'");
- stack.top.intersect(s);
- break;
- default:
- return false;
- }
- return true;
- }
- static bool unrollWhile(alias cond)(ref ValStack vstack, ref OpStack opstack)
- {
- while (cond(opstack.top))
- {
- if (!apply(opstack.pop(),vstack))
- return false;//syntax error
- if (opstack.empty)
- return false;
- }
- return true;
- }
-
- L_CharsetLoop:
- do
- {
- switch (current)
- {
- case '[':
- opstack.push(Operator.Open);
- enforce(next(), "unexpected end of character class");
- if (current == '^')
- {
- opstack.push(Operator.Negate);
- enforce(next(), "unexpected end of character class");
- }
- else if (current == ']') // []...] is special cased
- {
- enforce(next(), "wrong character set");
- auto pair = parseCharTerm();
- pair[0].add(']', ']'+1);
- if (pair[1] != Operator.None)
- {
- if (opstack.top == Operator.Union)
- unrollWhile!(unaryFun!"a == a.Union")(vstack, opstack);
- opstack.push(pair[1]);
- }
- vstack.push(pair[0]);
- }
- break;
- case ']':
- enforce(unrollWhile!(unaryFun!"a != a.Open")(vstack, opstack),
- "character class syntax error");
- enforce(!opstack.empty, "unmatched ']'");
- opstack.pop();
- if (!next() || opstack.empty)
- break L_CharsetLoop;
- auto pair = parseCharTerm();
- if (!pair[0].empty)//not only operator e.g. -- or ~~
- {
- vstack.top.add(pair[0]);//apply union
- }
- if (pair[1] != Operator.None)
- {
- if (opstack.top == Operator.Union)
- unrollWhile!(unaryFun!"a == a.Union")(vstack, opstack);
- opstack.push(pair[1]);
- }
- break;
- //
- default://yet another pair of term(op)?
- auto pair = parseCharTerm();
- if (pair[1] != Operator.None)
- {
- if (opstack.top == Operator.Union)
- unrollWhile!(unaryFun!"a == a.Union")(vstack, opstack);
- opstack.push(pair[1]);
- }
- vstack.push(pair[0]);
- }
- }while (!empty || !opstack.empty);
- while (!opstack.empty)
- {
- enforce(opstack.top != Operator.Open,
- "no matching ']' found while parsing character class");
- apply(opstack.pop(), vstack);
- }
- assert(vstack.length == 1);
- g.charsetToIr(vstack.top);
- }
-
//parse and generate IR for escape stand alone escape sequence
@trusted void parseEscape()
{//accesses array of appender
import std.algorithm.iteration : sum;
- switch (current)
+ switch (front)
{
- case 'f': next(); g.put(Bytecode(IR.Char, '\f')); break;
- case 'n': next(); g.put(Bytecode(IR.Char, '\n')); break;
- case 'r': next(); g.put(Bytecode(IR.Char, '\r')); break;
- case 't': next(); g.put(Bytecode(IR.Char, '\t')); break;
- case 'v': next(); g.put(Bytecode(IR.Char, '\v')); break;
+ case 'f': popFront(); g.put(Bytecode(IR.Char, '\f')); break;
+ case 'n': popFront(); g.put(Bytecode(IR.Char, '\n')); break;
+ case 'r': popFront(); g.put(Bytecode(IR.Char, '\r')); break;
+ case 't': popFront(); g.put(Bytecode(IR.Char, '\t')); break;
+ case 'v': popFront(); g.put(Bytecode(IR.Char, '\v')); break;
case 'd':
- next();
+ popFront();
g.charsetToIr(unicode.Nd);
break;
case 'D':
- next();
+ popFront();
g.charsetToIr(unicode.Nd.inverted);
break;
- case 'b': next(); g.put(Bytecode(IR.Wordboundary, 0)); break;
- case 'B': next(); g.put(Bytecode(IR.Notwordboundary, 0)); break;
+ case 'b': popFront(); g.put(Bytecode(IR.Wordboundary, 0)); break;
+ case 'B': popFront(); g.put(Bytecode(IR.Notwordboundary, 0)); break;
case 's':
- next();
+ popFront();
g.charsetToIr(unicode.White_Space);
break;
case 'S':
- next();
+ popFront();
g.charsetToIr(unicode.White_Space.inverted);
break;
case 'w':
- next();
+ popFront();
g.charsetToIr(wordCharacter);
break;
case 'W':
- next();
+ popFront();
g.charsetToIr(wordCharacter.inverted);
break;
case 'p': case 'P':
- auto CodepointSet = parseUnicodePropertySpec(current == 'P');
- g.charsetToIr(CodepointSet);
+ bool casefold = cast(bool)(re_flags & RegexOption.casefold);
+ auto set = unicode.parsePropertySpec(this, front == 'P', casefold);
+ g.charsetToIr(set);
break;
case 'x':
immutable code = parseUniHex(pat, 2);
- next();
+ popFront();
g.put(Bytecode(IR.Char,code));
break;
case 'u': case 'U':
- immutable code = parseUniHex(pat, current == 'u' ? 4 : 8);
- next();
+ immutable code = parseUniHex(pat, front == 'u' ? 4 : 8);
+ popFront();
g.put(Bytecode(IR.Char, code));
break;
case 'c': //control codes
- Bytecode code = Bytecode(IR.Char, parseControlCode());
- next();
+ Bytecode code = Bytecode(IR.Char, unicode.parseControlCode(this));
+ popFront();
g.put(code);
break;
case '0':
- next();
+ popFront();
g.put(Bytecode(IR.Char, 0));//NUL character
break;
case '1': .. case '9':
- uint nref = cast(uint) current - '0';
+ uint nref = cast(uint) front - '0';
immutable maxBackref = sum(g.groupStack.data);
enforce(nref < maxBackref, "Backref to unseen group");
//perl's disambiguation rule i.e.
//get next digit only if there is such group number
- while (nref < maxBackref && next() && std.ascii.isDigit(current))
+ popFront();
+ while (nref < maxBackref && !empty && std.ascii.isDigit(front))
{
- nref = nref * 10 + current - '0';
+ nref = nref * 10 + front - '0';
+ popFront();
}
if (nref >= maxBackref)
nref /= 10;
@@ -1497,57 +991,27 @@ if (isForwardRange!R && is(ElementType!R : dchar))
g.markBackref(nref);
break;
default:
- // Lookahead to check if it's a \T
- // where T is sub-pattern terminator in multi-pattern scheme
- if (current == '\\' && !pat.empty)
+ if (front == '\\' && !pat.empty)
{
- if (pat.front >= privateUseStart && current <= privateUseEnd)
+ if (pat.front >= privateUseStart && pat.front <= privateUseEnd)
enforce(false, "invalid escape sequence");
}
- if (current >= privateUseStart && current <= privateUseEnd)
+ if (front >= privateUseStart && front <= privateUseEnd)
{
- g.endPattern(current - privateUseStart + 1);
+ g.endPattern(front - privateUseStart + 1);
break;
}
- auto op = Bytecode(IR.Char, current);
- next();
+ auto op = Bytecode(IR.Char, front);
+ popFront();
g.put(op);
}
}
- //parse and return a CodepointSet for \p{...Property...} and \P{...Property..},
- //\ - assumed to be processed, p - is current
- CodepointSet parseUnicodePropertySpec(bool negated)
- {
- enum MAX_PROPERTY = 128;
- char[MAX_PROPERTY] result;
- uint k = 0;
- enforce(next(), "eof parsing unicode property spec");
- if (current == '{')
- {
- while (k < MAX_PROPERTY && next() && current !='}' && current !=':')
- if (current != '-' && current != ' ' && current != '_')
- result[k++] = cast(char) std.ascii.toLower(current);
- enforce(k != MAX_PROPERTY, "invalid property name");
- enforce(current == '}', "} expected ");
- }
- else
- {//single char properties e.g.: \pL, \pN ...
- enforce(current < 0x80, "invalid property name");
- result[k++] = cast(char) current;
- }
- auto s = getUnicodeSet(result[0 .. k], negated,
- cast(bool)(re_flags & RegexOption.casefold));
- enforce(!s.empty, "unrecognized unicode property spec");
- next();
- return s;
- }
-
//
@trusted void error(string msg)
{
import std.array : appender;
- import std.format : formattedWrite;
+ import std.format.write : formattedWrite;
auto app = appender!string();
formattedWrite(app, "%s\nPattern with error: `%s` <--HERE-- `%s`",
msg, origin[0..$-pat.length], pat);
diff --git a/libphobos/src/std/regex/internal/tests.d b/libphobos/src/std/regex/internal/tests.d
index fe75ce0..8a0fec1 100644
--- a/libphobos/src/std/regex/internal/tests.d
+++ b/libphobos/src/std/regex/internal/tests.d
@@ -8,9 +8,9 @@ package(std.regex):
import std.conv, std.exception, std.meta, std.range,
std.typecons, std.regex;
-import std.regex.internal.ir : Escapables; // characters that need escaping
+import std.uni : Escapables; // characters that need escaping
-alias Sequence(int B, int E) = staticIota!(B, E);
+debug(std_regex_test) import std.stdio;
@safe unittest
{//sanity checks
@@ -353,8 +353,8 @@ alias Sequence(int B, int E) = staticIota!(B, E);
void run_tests(alias matchFn)()
{
int i;
- foreach (Char; AliasSeq!( char, wchar, dchar))
- (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
+ static foreach (Char; AliasSeq!( char, wchar, dchar))
+ {{
alias String = immutable(Char)[];
String produceExpected(M,Range)(auto ref M m, Range fmt)
{
@@ -397,7 +397,7 @@ alias Sequence(int B, int E) = staticIota!(B, E);
}
}
}
- }();
+ }}
debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
}
@@ -408,28 +408,28 @@ alias Sequence(int B, int E) = staticIota!(B, E);
version (std_regex_ct1)
{
pragma(msg, "Testing 1st part of ctRegex");
- alias Tests = Sequence!(0, 155);
+ enum Tests = iota(0, 155);
}
else version (std_regex_ct2)
{
pragma(msg, "Testing 2nd part of ctRegex");
- alias Tests = Sequence!(155, 174);
+ enum Tests = iota(155, 174);
}
//FIXME: #174-178 contains CTFE parser bug
else version (std_regex_ct3)
{
pragma(msg, "Testing 3rd part of ctRegex");
- alias Tests = Sequence!(178, 220);
+ enum Tests = iota(178, 220);
}
else version (std_regex_ct4)
{
pragma(msg, "Testing 4th part of ctRegex");
- alias Tests = Sequence!(220, tv.length);
+ enum Tests = iota(220, tv.length);
}
else
- alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
- foreach (a, v; Tests)
- (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
+ enum Tests = chain(iota(0, 30), iota(235, tv.length-5));
+ static foreach (v; Tests)
+ {{
enum tvd = tv[v];
static if (tvd.result == "c")
{
@@ -443,22 +443,20 @@ alias Sequence(int B, int E) = staticIota!(B, E);
auto r = ctRegex!(tv[v].pattern, tv[v].flags);
auto nr = regex(tvd.pattern, tvd.flags);
assert(equal(r.ir, nr.ir),
- text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
+ text("!C-T regex! failed to compile pattern #", v ,": ", tvd.pattern));
auto m = match(tvd.input, r);
auto c = tvd.result[0];
bool ok = (c == 'y') ^ m.empty;
assert(ok, text("ctRegex: failed to match pattern #",
- a ,": ", tvd.pattern));
+ v ,": ", tvd.pattern));
if (c == 'y')
{
- import std.stdio;
auto result = produceExpected(m, tvd.format);
- if (result != tvd.replace)
- writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
- tvd.replace, " vs ", result);
+ assert(result == tvd.replace, text("ctRegex mismatch pattern #", v,
+ ": ", tvd.pattern," expected: ", tvd.replace, " vs ", result));
}
}
- }();
+ }}
debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
}
diff --git a/libphobos/src/std/regex/internal/tests2.d b/libphobos/src/std/regex/internal/tests2.d
index 420f8d3..0c9f23d 100644
--- a/libphobos/src/std/regex/internal/tests2.d
+++ b/libphobos/src/std/regex/internal/tests2.d
@@ -7,7 +7,7 @@ package(std.regex):
import std.conv, std.exception, std.meta, std.range,
std.typecons, std.regex;
-import std.regex.internal.ir : Escapables; // characters that need escaping
+import std.uni : Escapables; // characters that need escaping
@safe unittest
{
@@ -60,11 +60,11 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
{
import std.algorithm.comparison : equal;
auto rtr = regex("a|b|c");
- enum ctr = regex("a|b|c");
+ static ctr = regex("a|b|c");
assert(equal(rtr.ir,ctr.ir));
//CTFE parser BUG is triggered by group
//in the middle of alternation (at least not first and not last)
- enum testCT = regex(`abc|(edf)|xyz`);
+ static testCT = regex(`abc|(edf)|xyz`);
auto testRT = regex(`abc|(edf)|xyz`);
assert(equal(testCT.ir,testRT.ir));
}
@@ -149,7 +149,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
import std.algorithm.iteration : map;
void test_body(alias matchFn)()
{
- //issue 5857
+ // https://issues.dlang.org/show_bug.cgi?id=5857
//matching goes out of control if ... in (...){x} has .*/.+
auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
assert(c[0] == "axxxzayyyyyzd");
@@ -157,7 +157,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
assert(c2[0] == "axxxayyyyyd");
assert(c2[1] == "ayyyyy");
- //issue 2108
+ // https://issues.dlang.org/show_bug.cgi?id=2108
//greedy vs non-greedy
auto nogreed = regex("<packet.*?/packet>");
assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
@@ -165,7 +165,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
auto greed = regex("<packet.*/packet>");
assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
== "<packet>text</packet><packet>text</packet>");
- //issue 4574
+ // https://issues.dlang.org/show_bug.cgi?id=4574
//empty successful match still advances the input
string[] pres, posts, hits;
foreach (m; matchFn("abcabc", regex("","g")))
@@ -195,7 +195,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
];
assert(pres == array(retro(heads)));
assert(posts == tails);
- //issue 6076
+ // https://issues.dlang.org/show_bug.cgi?id=6076
//regression on .*
auto re = regex("c.*|d");
auto m = matchFn("mm", re);
@@ -214,7 +214,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
}
- //bugzilla 7718
+ // https://issues.dlang.org/show_bug.cgi?id=7718
string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
auto reStrCmd = regex (`(".*")|('.*')`, "g");
assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
@@ -231,8 +231,8 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
{
import std.uni : toUpper;
- foreach (i, v; AliasSeq!(string, wstring, dstring))
- {
+ static foreach (i, v; AliasSeq!(string, wstring, dstring))
+ {{
auto baz(Cap)(Cap m)
if (is(Cap == Captures!(Cap.String)))
{
@@ -251,7 +251,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
regex(to!String("[ar]"), "g"));
assert(s == "StRAp A Rocket engine on A chicken.");
- }
+ }}
debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!");
}
test!(bmatch)();
@@ -293,24 +293,29 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(equal(split(s1, regex(", *")), w1[]));
}
+// https://issues.dlang.org/show_bug.cgi?id=7141
@safe unittest
-{ // bugzilla 7141
+{
string pattern = `[a\--b]`;
assert(match("-", pattern));
assert(match("b", pattern));
string pattern2 = `[&-z]`;
assert(match("b", pattern2));
}
+
+// https://issues.dlang.org/show_bug.cgi?id=7111
@safe unittest
-{//bugzilla 7111
+{
assert(match("", regex("^")));
}
+
+// https://issues.dlang.org/show_bug.cgi?id=7300
@safe unittest
-{//bugzilla 7300
+{
assert(!match("a"d, "aa"d));
}
-// bugzilla 7551
+// https://issues.dlang.org/show_bug.cgi?id=7551
@safe unittest
{
auto r = regex("[]abc]*");
@@ -320,25 +325,30 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert("]ac".matchFirst(r2).hit == "]");
}
+// https://issues.dlang.org/show_bug.cgi?id=7674
@safe unittest
-{//bugzilla 7674
+{
assert("1234".replace(regex("^"), "$$") == "$1234");
assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
}
+
+// https://issues.dlang.org/show_bug.cgi?id=7679
@safe unittest
-{// bugzilla 7679
+{
import std.algorithm.comparison : equal;
- foreach (S; AliasSeq!(string, wstring, dstring))
- (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
+ static foreach (S; AliasSeq!(string, wstring, dstring))
+ {{
enum re = ctRegex!(to!S(r"\."));
auto str = to!S("a.b");
assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
assert(split(str, re) == [to!S("a"), to!S("b")]);
- }();
+ }}
}
+
+// https://issues.dlang.org/show_bug.cgi?id=8203
@safe unittest
-{//bugzilla 8203
+{
string data = "
NAME = XPAW01_STA:STATION
NAME = XPAW01_STA
@@ -353,13 +363,15 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
match("аллея Театральная", r2);
}
+
+// https://issues.dlang.org/show_bug.cgi?id=8637 purity of enforce
@safe unittest
-{// bugzilla 8637 purity of enforce
+{
auto m = match("hello world", regex("world"));
enforce(m);
}
-// bugzilla 8725
+// https://issues.dlang.org/show_bug.cgi?id=8725
@safe unittest
{
static italic = regex( r"\*
@@ -372,7 +384,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
"this * is* interesting, <i>very</i> interesting");
}
-// bugzilla 8349
+// https://issues.dlang.org/show_bug.cgi?id=8349
@safe unittest
{
enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
@@ -381,7 +393,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
}
-// bugzilla 9211
+// https://issues.dlang.org/show_bug.cgi?id=9211
@safe unittest
{
import std.algorithm.comparison : equal;
@@ -393,7 +405,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(equal(m2.front, ["1234", "3", "4"]));
}
-// bugzilla 9280
+// https://issues.dlang.org/show_bug.cgi?id=9280
@safe unittest
{
string tomatch = "a!b@c";
@@ -406,7 +418,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
}
-// bugzilla 9579
+// https://issues.dlang.org/show_bug.cgi?id=9579
@safe unittest
{
char[] input = ['a', 'b', 'c'];
@@ -417,14 +429,14 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(r == "(a)bc");
}
-// bugzilla 9634
+// https://issues.dlang.org/show_bug.cgi?id=9634
@safe unittest
{
auto re = ctRegex!"(?:a+)";
assert(match("aaaa", re).hit == "aaaa");
}
-//bugzilla 10798
+// https://issues.dlang.org/show_bug.cgi?id=10798
@safe unittest
{
auto cr = ctRegex!("[abcd--c]*");
@@ -433,7 +445,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(m.hit == "ab");
}
-// bugzilla 10913
+// https://issues.dlang.org/show_bug.cgi?id=10913
@system unittest
{
@system static string foo(const(char)[] s)
@@ -452,7 +464,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
}();
}
-// bugzilla 11262
+// https://issues.dlang.org/show_bug.cgi?id=11262
@safe unittest
{
enum reg = ctRegex!(r",", "g");
@@ -461,13 +473,13 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(str == "This-List");
}
-// bugzilla 11775
+// https://issues.dlang.org/show_bug.cgi?id=11775
@safe unittest
{
assert(collectException(regex("a{1,0}")));
}
-// bugzilla 11839
+// https://issues.dlang.org/show_bug.cgi?id=11839
@safe unittest
{
import std.algorithm.comparison : equal;
@@ -478,7 +490,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
}
-// bugzilla 12076
+// https://issues.dlang.org/show_bug.cgi?id=12076
@safe unittest
{
auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
@@ -486,7 +498,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
auto m = match(s, RE);
}
-// bugzilla 12105
+// https://issues.dlang.org/show_bug.cgi?id=12105
@safe unittest
{
auto r = ctRegex!`.*?(?!a)`;
@@ -495,14 +507,14 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert("aaab".matchFirst(r2).hit == "aaab");
}
-//bugzilla 11784
+// https://issues.dlang.org/show_bug.cgi?id=11784
@safe unittest
{
assert("abcdefghijklmnopqrstuvwxyz"
.matchFirst("[a-z&&[^aeiuo]]").hit == "b");
}
-//bugzilla 12366
+// https://issues.dlang.org/show_bug.cgi?id=12366
@safe unittest
{
auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
@@ -510,27 +522,27 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(!"xxxx".match(re).empty);
}
-// bugzilla 12582
+// https://issues.dlang.org/show_bug.cgi?id=12582
@safe unittest
{
auto r = regex(`(?P<a>abc)`);
assert(collectException("abc".matchFirst(r)["b"]));
}
-// bugzilla 12691
+// https://issues.dlang.org/show_bug.cgi?id=12691
@safe unittest
{
assert(bmatch("e@", "^([a-z]|)*$").empty);
assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
}
-//bugzilla 12713
+// https://issues.dlang.org/show_bug.cgi?id=12713
@safe unittest
{
assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
}
-//bugzilla 12747
+// https://issues.dlang.org/show_bug.cgi?id=12747
@safe unittest
{
assertThrown(regex(`^x(\1)`));
@@ -538,14 +550,44 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assertThrown(regex(`^((x)(?=\1))`));
}
-// bugzilla 14504
+// https://issues.dlang.org/show_bug.cgi?id=13532
+version (none) // TODO: revist once we have proper benchmark framework
+@safe unittest
+{
+ import std.datetime.stopwatch : StopWatch, AutoStart;
+ import std.math.algebraic : abs;
+ import std.conv : to;
+ enum re1 = ctRegex!`[0-9][0-9]`;
+ immutable static re2 = ctRegex!`[0-9][0-9]`;
+ immutable iterations = 1_000_000;
+ size_t result1 = 0, result2 = 0;
+ auto sw = StopWatch(AutoStart.yes);
+ foreach (_; 0 .. iterations)
+ {
+ result1 += matchFirst("12345678", re1).length;
+ }
+ const staticTime = sw.peek();
+ sw.reset();
+ foreach (_; 0 .. iterations)
+ {
+ result2 += matchFirst("12345678", re2).length;
+ }
+ const enumTime = sw.peek();
+ assert(result1 == result2);
+ auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs";
+ // enum is faster or the diff is less < 30%
+ assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75,
+ "enum regex to static regex ratio "~to!string(ratio));
+}
+
+// https://issues.dlang.org/show_bug.cgi?id=14504
@safe unittest
{
auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
}
-// bugzilla 14529
+// https://issues.dlang.org/show_bug.cgi?id=14529
@safe unittest
{
auto ctPat2 = regex(r"^[CDF]$", "i");
@@ -553,7 +595,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(matchAll(v, ctPat2).front.hit == v);
}
-// bugzilla 14615
+// https://issues.dlang.org/show_bug.cgi?id=14615
@safe unittest
{
import std.array : appender;
@@ -572,14 +614,14 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(sink.data == "Hello, world!Hello, world!");
}
-// bugzilla 15573
+// https://issues.dlang.org/show_bug.cgi?id=15573
@safe unittest
{
auto rx = regex("[c d]", "x");
assert("a b".matchFirst(rx));
}
-// bugzilla 15864
+// https://issues.dlang.org/show_bug.cgi?id=15864
@safe unittest
{
regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
@@ -592,7 +634,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assertThrown(regex("(?#..."));
}
-// bugzilla 17075
+// https://issues.dlang.org/show_bug.cgi?id=17075
@safe unittest
{
enum titlePattern = `<title>(.+)</title>`;
@@ -601,14 +643,14 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(input.matchFirst(titleRegex).empty);
}
-// bugzilla 17212
+// https://issues.dlang.org/show_bug.cgi?id=17212
@safe unittest
{
auto r = regex(" [a] ", "x");
assert("a".matchFirst(r));
}
-// bugzilla 17157
+// https://issues.dlang.org/show_bug.cgi?id=17157
@safe unittest
{
import std.algorithm.comparison : equal;
@@ -625,7 +667,7 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(equal!equal(s.bmatch(r), outcomes));
}
-// bugzilla 17667
+// https://issues.dlang.org/show_bug.cgi?id=17667
@safe unittest
{
import std.algorithm.searching : canFind;
@@ -637,19 +679,19 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
- willThrow([r"[a-\", r"123"], "invalid escape sequence");
+ willThrow([r"[a-\", r"123"], "no matching ']' found while parsing character class");
willThrow([r"\", r"123"], "invalid escape sequence");
}
-// bugzilla 17668
+// https://issues.dlang.org/show_bug.cgi?id=17668
@safe unittest
{
import std.algorithm.searching;
auto e = collectException!RegexException(regex(q"<[^]>"));
- assert(e.msg.canFind("no operand for '^'"));
+ assert(e.msg.canFind("no operand for '^'"), e.msg);
}
-// bugzilla 17673
+// https://issues.dlang.org/show_bug.cgi?id=17673
@safe unittest
{
string str = `<">`;
@@ -660,3 +702,12 @@ import std.regex.internal.ir : Escapables; // characters that need escaping
assert(c.whichPattern == 2);
}
+// https://issues.dlang.org/show_bug.cgi?id=18692
+@safe unittest
+{
+ auto rx = regex("()()()");
+ auto ma = "".matchFirst(rx);
+ auto ma2 = ma;
+ ma = ma2;
+ assert(ma[1] == "");
+}
diff --git a/libphobos/src/std/regex/internal/thompson.d b/libphobos/src/std/regex/internal/thompson.d
index 4d7deaa..ab28d37 100644
--- a/libphobos/src/std/regex/internal/thompson.d
+++ b/libphobos/src/std/regex/internal/thompson.d
@@ -70,7 +70,7 @@ struct ThreadList(DataIndex)
this(ThreadList tlist){ ct = tlist.tip; }
@property bool empty(){ return ct is null; }
@property const(Thread!DataIndex)* front(){ return ct; }
- @property popFront()
+ void popFront()
{
assert(ct);
ct = ct.next;
@@ -89,7 +89,7 @@ struct ThreadList(DataIndex)
template ThompsonOps(E, S, bool withInput:true)
{
@trusted:
- static bool op(IR code:IR.End)(E* e, S* state)
+ static bool op(IR code:IR.End)(E e, S* state)
{
with(e) with(state)
{
@@ -105,7 +105,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Wordboundary)(E* e, S* state)
+ static bool op(IR code:IR.Wordboundary)(E e, S* state)
{
with(e) with(state)
{
@@ -137,7 +137,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Notwordboundary)(E* e, S* state)
+ static bool op(IR code:IR.Notwordboundary)(E e, S* state)
{
with(e) with(state)
{
@@ -167,7 +167,7 @@ template ThompsonOps(E, S, bool withInput:true)
return true;
}
- static bool op(IR code:IR.Bof)(E* e, S* state)
+ static bool op(IR code:IR.Bof)(E e, S* state)
{
with(e) with(state)
{
@@ -183,7 +183,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Bol)(E* e, S* state)
+ static bool op(IR code:IR.Bol)(E e, S* state)
{
with(e) with(state)
{
@@ -203,7 +203,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Eof)(E* e, S* state)
+ static bool op(IR code:IR.Eof)(E e, S* state)
{
with(e) with(state)
{
@@ -219,7 +219,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Eol)(E* e, S* state)
+ static bool op(IR code:IR.Eol)(E e, S* state)
{
with(e) with(state)
{
@@ -240,42 +240,42 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.InfiniteStart)(E* e, S* state)
+ static bool op(IR code:IR.InfiniteStart)(E e, S* state)
{
with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
return op!(IR.InfiniteEnd)(e,state);
}
- static bool op(IR code:IR.InfiniteBloomStart)(E* e, S* state)
+ static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
{
with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
return op!(IR.InfiniteBloomEnd)(e,state);
}
- static bool op(IR code:IR.InfiniteQStart)(E* e, S* state)
+ static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
{
with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
return op!(IR.InfiniteQEnd)(e,state);
}
- static bool op(IR code:IR.RepeatStart)(E* e, S* state)
+ static bool op(IR code:IR.RepeatStart)(E e, S* state)
{
with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
return op!(IR.RepeatEnd)(e,state);
}
- static bool op(IR code:IR.RepeatQStart)(E* e, S* state)
+ static bool op(IR code:IR.RepeatQStart)(E e, S* state)
{
with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
return op!(IR.RepeatQEnd)(e,state);
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
{
with(e) with(state)
@@ -330,7 +330,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
{
with(e) with(state)
@@ -365,7 +365,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.InfiniteBloomEnd)
{
with(e) with(state)
@@ -394,7 +394,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.OrEnd)(E* e, S* state)
+ static bool op(IR code:IR.OrEnd)(E e, S* state)
{
with(e) with(state)
{
@@ -415,7 +415,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.OrStart)(E* e, S* state)
+ static bool op(IR code:IR.OrStart)(E e, S* state)
{
with(e) with(state)
{
@@ -424,7 +424,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Option)(E* e, S* state)
+ static bool op(IR code:IR.Option)(E e, S* state)
{
with(e) with(state)
{
@@ -439,7 +439,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.GotoEndOr)(E* e, S* state)
+ static bool op(IR code:IR.GotoEndOr)(E e, S* state)
{
with(e) with(state)
{
@@ -448,7 +448,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.GroupStart)(E* e, S* state)
+ static bool op(IR code:IR.GroupStart)(E e, S* state)
{
with(e) with(state)
{
@@ -458,7 +458,7 @@ template ThompsonOps(E, S, bool withInput:true)
return true;
}
}
- static bool op(IR code:IR.GroupEnd)(E* e, S* state)
+ static bool op(IR code:IR.GroupEnd)(E e, S* state)
{
with(e) with(state)
{
@@ -469,7 +469,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Backref)(E* e, S* state)
+ static bool op(IR code:IR.Backref)(E e, S* state)
{
with(e) with(state)
{
@@ -506,7 +506,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
{
with(e) with(state)
@@ -516,10 +516,9 @@ template ThompsonOps(E, S, bool withInput:true)
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
bool positive = re.ir[t.pc].code == IR.LookbehindStart;
static if (Stream.isLoopback)
- auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
+ auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
else
- auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
- matcher.re.ngroup = me - ms;
+ auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
//backMatch
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
@@ -534,7 +533,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
{
with(e) with(state)
@@ -545,10 +544,9 @@ template ThompsonOps(E, S, bool withInput:true)
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
bool positive = re.ir[t.pc].code == IR.LookaheadStart;
static if (Stream.isLoopback)
- auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
+ auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
else
- auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
- matcher.re.ngroup = me - ms;
+ auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
freelist = matcher.freelist;
@@ -564,7 +562,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
{
@@ -579,13 +577,13 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Nop)(E* e, S* state)
+ static bool op(IR code:IR.Nop)(E e, S* state)
{
with(state) t.pc += IRL!(IR.Nop);
return true;
}
- static bool op(IR code:IR.OrChar)(E* e, S* state)
+ static bool op(IR code:IR.OrChar)(E e, S* state)
{
with(e) with(state)
{
@@ -607,7 +605,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Char)(E* e, S* state)
+ static bool op(IR code:IR.Char)(E e, S* state)
{
with(e) with(state)
{
@@ -623,7 +621,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Any)(E* e, S* state)
+ static bool op(IR code:IR.Any)(E e, S* state)
{
with(e) with(state)
{
@@ -634,7 +632,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.CodepointSet)(E* e, S* state)
+ static bool op(IR code:IR.CodepointSet)(E e, S* state)
{
with(e) with(state)
{
@@ -652,7 +650,7 @@ template ThompsonOps(E, S, bool withInput:true)
}
}
- static bool op(IR code:IR.Trie)(E* e, S* state)
+ static bool op(IR code:IR.Trie)(E e, S* state)
{
with(e) with(state)
{
@@ -676,7 +674,7 @@ template ThompsonOps(E,S, bool withInput:false)
{
@trusted:
// can't match these without input
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
|| code == IR.Trie || code == IR.Char || code == IR.Any)
{
@@ -684,7 +682,7 @@ template ThompsonOps(E,S, bool withInput:false)
}
// special case of zero-width backref
- static bool op(IR code:IR.Backref)(E* e, S* state)
+ static bool op(IR code:IR.Backref)(E e, S* state)
{
with(e) with(state)
{
@@ -702,7 +700,7 @@ template ThompsonOps(E,S, bool withInput:false)
}
// forward all control flow to normal versions
- static bool op(IR code)(E* e, S* state)
+ static bool op(IR code)(E e, S* state)
if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
&& code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
{
@@ -714,19 +712,19 @@ template ThompsonOps(E,S, bool withInput:false)
Thomspon matcher does all matching in lockstep,
never looking at the same char twice
+/
-@trusted struct ThompsonMatcher(Char, StreamType = Input!Char)
+@trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
if (is(Char : dchar))
{
alias DataIndex = Stream.DataIndex;
alias Stream = StreamType;
- alias OpFunc = bool function(ThompsonMatcher*, State*);
+ alias OpFunc = bool function(ThompsonMatcher, State*) pure;
alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
- alias OpBackFunc = bool function(BackMatcher*, BackMatcher.State*);
+ alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*) pure;
Thread!DataIndex* freelist;
ThreadList!DataIndex clist, nlist;
DataIndex[] merge;
Group!DataIndex[] backrefed;
- Regex!Char re; //regex program
+ const Regex!Char re; //regex program
Stream s;
dchar front;
DataIndex index;
@@ -737,16 +735,19 @@ if (is(Char : dchar))
OpBackFunc[] opCacheBackTrue; // ditto
OpBackFunc[] opCacheBackFalse; // ditto
size_t threadSize;
+ size_t _refCount;
int matched;
bool exhausted;
+final:
+ pure
static struct State
{
Thread!DataIndex* t;
ThreadList!DataIndex worklist;
Group!DataIndex[] matches;
- bool popState(E)(E* e)
+ bool popState(E)(E e)
{
with(e)
{
@@ -784,6 +785,10 @@ if (is(Char : dchar))
//true if it's end of input
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
+ override @property ref size_t refCount() @safe { return _refCount; }
+
+ override @property ref const(Regex!Char) pattern() @safe { return re; }
+
bool next()
{
if (!s.nextChar(front, index))
@@ -843,19 +848,36 @@ if (is(Char : dchar))
}
}
- this()(Regex!Char program, Stream stream, void[] memory)
+ override Matcher!Char rearm(in Char[] data)
+ {
+ exhausted = false;
+ matched = 0;
+ s = Stream(data);
+ return this;
+ }
+
+ this()(ref const Regex!Char program, Stream stream, void[] memory)
{
+ // We are emplace'd to malloced memory w/o blitting T.init over it\
+ // make sure we initialize all fields explicitly
+ _refCount = 1;
+ subCounters = null;
+ backrefed = null;
+ exhausted = false;
+ matched = 0;
re = program;
s = stream;
initExternalMemory(memory);
genCounter = 0;
}
- this(ref ThompsonMatcher matcher, size_t lo, size_t hi, Stream stream)
+ this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
{
+ _refCount = 1;
+ subCounters = matcher.subCounters;
s = stream;
- re = matcher.re;
- re.ir = re.ir[lo .. hi];
+ auto code = matcher.re.ir[lo .. hi];
+ re = matcher.re.withCode(code).withNGroup(nGroup);
threadSize = matcher.threadSize;
merge = matcher.merge;
freelist = matcher.freelist;
@@ -867,11 +889,13 @@ if (is(Char : dchar))
index = matcher.index;
}
- this(ref BackMatcher matcher, size_t lo, size_t hi, Stream stream)
+ this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
{
+ _refCount = 1;
+ subCounters = matcher.subCounters;
s = stream;
- re = matcher.re;
- re.ir = re.ir[lo .. hi];
+ auto code = matcher.re.ir[lo .. hi];
+ re = matcher.re.withCode(code).withNGroup(nGroup);
threadSize = matcher.threadSize;
merge = matcher.merge;
freelist = matcher.freelist;
@@ -883,31 +907,35 @@ if (is(Char : dchar))
index = matcher.index;
}
- auto fwdMatcher()(size_t lo, size_t hi, size_t counter)
+ auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
{
- auto m = ThompsonMatcher!(Char, Stream)(this, lo, hi, s);
+ auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
m.genCounter = counter;
return m;
}
- auto bwdMatcher()(size_t lo, size_t hi, size_t counter)
+ auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
{
alias BackLooper = typeof(s.loopBack(index));
- auto m = ThompsonMatcher!(Char, BackLooper)(this, lo, hi, s.loopBack(index));
+ auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
m.genCounter = counter;
m.next();
return m;
}
- auto dupTo(void[] memory)
+ override void dupTo(Matcher!Char engine, void[] memory)
{
- typeof(this) tmp = this;//bitblit
- tmp.initExternalMemory(memory);
- tmp.genCounter = 0;
- return tmp;
+ auto thompson = cast(ThompsonMatcher) engine;
+ thompson.s = s;
+ thompson.subCounters = null;
+ thompson.front = front;
+ thompson.index = index;
+ thompson.matched = matched;
+ thompson.exhausted = exhausted;
+ thompson.initExternalMemory(memory);
}
- int match(Group!DataIndex[] matches)
+ override int match(Group!DataIndex[] matches)
{
debug(std_regex_matcher)
writeln("------------------------------------------");
@@ -1052,9 +1080,9 @@ if (is(Char : dchar))
{
debug(std_regex_matcher) writeln("---- Evaluating thread");
static if (withInput)
- while (opCacheTrue.ptr[state.t.pc](&this, state)){}
+ while (opCacheTrue.ptr[state.t.pc](this, state)){}
else
- while (opCacheFalse.ptr[state.t.pc](&this, state)){}
+ while (opCacheFalse.ptr[state.t.pc](this, state)){}
}
enum uint RestartPc = uint.max;
//match the input, evaluating IR without searching