aboutsummaryrefslogtreecommitdiff
path: root/libphobos/src/std/regex/internal/backtracking.d
diff options
context:
space:
mode:
authorIain Buclaw <ibuclaw@gcc.gnu.org>2018-10-28 19:51:47 +0000
committerIain Buclaw <ibuclaw@gcc.gnu.org>2018-10-28 19:51:47 +0000
commitb4c522fabd0df7be08882d2207df8b2765026110 (patch)
treeb5ffc312b0a441c1ba24323152aec463fdbe5e9f /libphobos/src/std/regex/internal/backtracking.d
parent01ce9e31a02c8039d88e90f983735104417bf034 (diff)
downloadgcc-b4c522fabd0df7be08882d2207df8b2765026110.zip
gcc-b4c522fabd0df7be08882d2207df8b2765026110.tar.gz
gcc-b4c522fabd0df7be08882d2207df8b2765026110.tar.bz2
Add D front-end, libphobos library, and D2 testsuite.
ChangeLog: * Makefile.def (target_modules): Add libphobos. (flags_to_pass): Add GDC, GDCFLAGS, GDC_FOR_TARGET and GDCFLAGS_FOR_TARGET. (dependencies): Make libphobos depend on libatomic, libbacktrace configure, and zlib configure. (language): Add language d. * Makefile.in: Rebuild. * Makefile.tpl (BUILD_EXPORTS): Add GDC and GDCFLAGS. (HOST_EXPORTS): Add GDC. (POSTSTAGE1_HOST_EXPORTS): Add GDC and GDC_FOR_BUILD. (BASE_TARGET_EXPORTS): Add GDC. (GDC_FOR_BUILD, GDC, GDCFLAGS): New variables. (GDC_FOR_TARGET, GDC_FLAGS_FOR_TARGET): New variables. (EXTRA_HOST_FLAGS): Add GDC. (STAGE1_FLAGS_TO_PASS): Add GDC. (EXTRA_TARGET_FLAGS): Add GDC and GDCFLAGS. * config-ml.in: Treat GDC and GDCFLAGS like other compiler/flag environment variables. * configure: Rebuild. * configure.ac: Add target-libphobos to target_libraries. Set and substitute GDC_FOR_BUILD and GDC_FOR_TARGET. config/ChangeLog: * multi.m4: Set GDC. gcc/ChangeLog: * Makefile.in (tm_d_file_list, tm_d_include_list): New variables. (TM_D_H, D_TARGET_DEF, D_TARGET_H, D_TARGET_OBJS): New variables. (tm_d.h, cs-tm_d.h, default-d.o): New rules. (d/d-target-hooks-def.h, s-d-target-hooks-def-h): New rules. (s-tm-texi): Also check timestamp on d-target.def. (generated_files): Add TM_D_H and d-target-hooks-def.h. (build/genhooks.o): Also depend on D_TARGET_DEF. * config.gcc (tm_d_file, d_target_objs, target_has_targetdm): New variables. * config/aarch64/aarch64-d.c: New file. * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_D_CRITSEC_SIZE): Define. * config/aarch64/aarch64-protos.h (aarch64_d_target_versions): New prototype. * config/aarch64/aarch64.h (TARGET_D_CPU_VERSIONS): Define. * config/aarch64/t-aarch64 (aarch64-d.o): New rule. * config/arm/arm-d.c: New file. * config/arm/arm-protos.h (arm_d_target_versions): New prototype. * config/arm/arm.h (TARGET_D_CPU_VERSIONS): Define. * config/arm/linux-eabi.h (EXTRA_TARGET_D_OS_VERSIONS): Define. * config/arm/t-arm (arm-d.o): New rule. * config/default-d.c: New file. * config/glibc-d.c: New file. * config/gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/i386/i386-d.c: New file. * config/i386/i386-protos.h (ix86_d_target_versions): New prototype. * config/i386/i386.h (TARGET_D_CPU_VERSIONS): Define. * config/i386/linux-common.h (EXTRA_TARGET_D_OS_VERSIONS): Define. (GNU_USER_TARGET_D_CRITSEC_SIZE): Define. * config/i386/t-i386 (i386-d.o): New rule. * config/kfreebsd-gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/kopensolaris-gnu.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/linux-android.h (ANDROID_TARGET_D_OS_VERSIONS): Define. * config/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/mips/linux-common.h (EXTRA_TARGET_D_OS_VERSIONS): Define. * config/mips/mips-d.c: New file. * config/mips/mips-protos.h (mips_d_target_versions): New prototype. * config/mips/mips.h (TARGET_D_CPU_VERSIONS): Define. * config/mips/t-mips (mips-d.o): New rule. * config/powerpcspe/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/powerpcspe/linux64.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/powerpcspe/powerpcspe-d.c: New file. * config/powerpcspe/powerpcspe-protos.h (rs6000_d_target_versions): New prototype. * config/powerpcspe/powerpcspe.c (rs6000_output_function_epilogue): Support GNU D by using 0 as the language type. * config/powerpcspe/powerpcspe.h (TARGET_D_CPU_VERSIONS): Define. * config/powerpcspe/t-powerpcspe (powerpcspe-d.o): New rule. * config/riscv/riscv-d.c: New file. * config/riscv/riscv-protos.h (riscv_d_target_versions): New prototype. * config/riscv/riscv.h (TARGET_D_CPU_VERSIONS): Define. * config/riscv/t-riscv (riscv-d.o): New rule. * config/rs6000/linux.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/rs6000/linux64.h (GNU_USER_TARGET_D_OS_VERSIONS): Define. * config/rs6000/rs6000-d.c: New file. * config/rs6000/rs6000-protos.h (rs6000_d_target_versions): New prototype. * config/rs6000/rs6000.c (rs6000_output_function_epilogue): Support GNU D by using 0 as the language type. * config/rs6000/rs6000.h (TARGET_D_CPU_VERSIONS): Define. * config/rs6000/t-rs6000 (rs6000-d.o): New rule. * config/s390/s390-d.c: New file. * config/s390/s390-protos.h (s390_d_target_versions): New prototype. * config/s390/s390.h (TARGET_D_CPU_VERSIONS): Define. * config/s390/t-s390 (s390-d.o): New rule. * config/sparc/sparc-d.c: New file. * config/sparc/sparc-protos.h (sparc_d_target_versions): New prototype. * config/sparc/sparc.h (TARGET_D_CPU_VERSIONS): Define. * config/sparc/t-sparc (sparc-d.o): New rule. * config/t-glibc (glibc-d.o): New rule. * configure: Regenerated. * configure.ac (tm_d_file): New variable. (tm_d_file_list, tm_d_include_list, d_target_objs): Add substitutes. * doc/contrib.texi (Contributors): Add self for the D frontend. * doc/frontends.texi (G++ and GCC): Mention D as a supported language. * doc/install.texi (Configuration): Mention libphobos as an option for --enable-shared. Mention d as an option for --enable-languages. (Testing): Mention check-d as a target. * doc/invoke.texi (Overall Options): Mention .d, .dd, and .di as file name suffixes. Mention d as a -x option. * doc/sourcebuild.texi (Top Level): Mention libphobos. * doc/standards.texi (Standards): Add section on D language. * doc/tm.texi: Regenerated. * doc/tm.texi.in: Add @node for D language and ABI, and @hook for TARGET_CPU_VERSIONS, TARGET_D_OS_VERSIONS, and TARGET_D_CRITSEC_SIZE. * dwarf2out.c (is_dlang): New function. (gen_compile_unit_die): Use DW_LANG_D for D. (declare_in_namespace): Return module die for D, instead of adding extra declarations into the namespace. (gen_namespace_die): Generate DW_TAG_module for D. (gen_decl_die): Handle CONST_DECLSs for D. (dwarf2out_decl): Likewise. (prune_unused_types_walk_local_classes): Handle DW_tag_interface_type. (prune_unused_types_walk): Handle DW_tag_interface_type same as other kinds of aggregates. * gcc.c (default_compilers): Add entries for .d, .dd and .di. * genhooks.c: Include d/d-target.def. gcc/po/ChangeLog: * EXCLUDES: Add sources from d/dmd. gcc/testsuite/ChangeLog: * gcc.misc-tests/help.exp: Add D to option descriptions check. * gdc.dg/asan/asan.exp: New file. * gdc.dg/asan/gdc272.d: New test. * gdc.dg/compilable.d: New test. * gdc.dg/dg.exp: New file. * gdc.dg/gdc254.d: New test. * gdc.dg/gdc260.d: New test. * gdc.dg/gdc270a.d: New test. * gdc.dg/gdc270b.d: New test. * gdc.dg/gdc282.d: New test. * gdc.dg/gdc283.d: New test. * gdc.dg/imports/gdc170.d: New test. * gdc.dg/imports/gdc231.d: New test. * gdc.dg/imports/gdc239.d: New test. * gdc.dg/imports/gdc241a.d: New test. * gdc.dg/imports/gdc241b.d: New test. * gdc.dg/imports/gdc251a.d: New test. * gdc.dg/imports/gdc251b.d: New test. * gdc.dg/imports/gdc253.d: New test. * gdc.dg/imports/gdc254a.d: New test. * gdc.dg/imports/gdc256.d: New test. * gdc.dg/imports/gdc27.d: New test. * gdc.dg/imports/gdcpkg256/package.d: New test. * gdc.dg/imports/runnable.d: New test. * gdc.dg/link.d: New test. * gdc.dg/lto/lto.exp: New file. * gdc.dg/lto/ltotests_0.d: New test. * gdc.dg/lto/ltotests_1.d: New test. * gdc.dg/runnable.d: New test. * gdc.dg/simd.d: New test. * gdc.test/gdc-test.exp: New file. * lib/gdc-dg.exp: New file. * lib/gdc.exp: New file. libphobos/ChangeLog: * Makefile.am: New file. * Makefile.in: New file. * acinclude.m4: New file. * aclocal.m4: New file. * config.h.in: New file. * configure: New file. * configure.ac: New file. * d_rules.am: New file. * libdruntime/Makefile.am: New file. * libdruntime/Makefile.in: New file. * libdruntime/__entrypoint.di: New file. * libdruntime/__main.di: New file. * libdruntime/gcc/attribute.d: New file. * libdruntime/gcc/backtrace.d: New file. * libdruntime/gcc/builtins.d: New file. * libdruntime/gcc/config.d.in: New file. * libdruntime/gcc/deh.d: New file. * libdruntime/gcc/libbacktrace.d.in: New file. * libdruntime/gcc/unwind/arm.d: New file. * libdruntime/gcc/unwind/arm_common.d: New file. * libdruntime/gcc/unwind/c6x.d: New file. * libdruntime/gcc/unwind/generic.d: New file. * libdruntime/gcc/unwind/package.d: New file. * libdruntime/gcc/unwind/pe.d: New file. * m4/autoconf.m4: New file. * m4/druntime.m4: New file. * m4/druntime/cpu.m4: New file. * m4/druntime/libraries.m4: New file. * m4/druntime/os.m4: New file. * m4/gcc_support.m4: New file. * m4/gdc.m4: New file. * m4/libtool.m4: New file. * src/Makefile.am: New file. * src/Makefile.in: New file. * src/libgphobos.spec.in: New file. * testsuite/Makefile.am: New file. * testsuite/Makefile.in: New file. * testsuite/config/default.exp: New file. * testsuite/lib/libphobos-dg.exp: New file. * testsuite/lib/libphobos.exp: New file. * testsuite/testsuite_flags.in: New file. From-SVN: r265573
Diffstat (limited to 'libphobos/src/std/regex/internal/backtracking.d')
-rw-r--r--libphobos/src/std/regex/internal/backtracking.d1495
1 files changed, 1495 insertions, 0 deletions
diff --git a/libphobos/src/std/regex/internal/backtracking.d b/libphobos/src/std/regex/internal/backtracking.d
new file mode 100644
index 0000000..ffc9779
--- /dev/null
+++ b/libphobos/src/std/regex/internal/backtracking.d
@@ -0,0 +1,1495 @@
+/*
+ Implementation of backtracking std.regex engine.
+ Contains both compile-time and run-time versions.
+*/
+module std.regex.internal.backtracking;
+
+package(std.regex):
+
+import core.stdc.stdlib, std.range.primitives, std.traits, std.typecons;
+import std.regex.internal.ir;
+
+/+
+ BacktrackingMatcher implements backtracking scheme of matching
+ regular expressions.
++/
+template BacktrackingMatcher(bool CTregex)
+{
+ @trusted struct BacktrackingMatcher(Char, Stream = Input!Char)
+ if (is(Char : dchar))
+ {
+ alias DataIndex = Stream.DataIndex;
+ struct State
+ {//top bit in pc is set if saved along with matches
+ DataIndex index;
+ uint pc, counter, infiniteNesting;
+ }
+ static assert(State.sizeof % size_t.sizeof == 0);
+ enum stateSize = State.sizeof / size_t.sizeof;
+ enum initialStack = 1 << 11; // items in a block of segmented stack
+ alias String = const(Char)[];
+ alias RegEx = Regex!Char;
+ alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream));
+ RegEx re; //regex program
+ static if (CTregex)
+ MatchFn nativeFn; //native code for that program
+ //Stream state
+ Stream s;
+ DataIndex index;
+ dchar front;
+ bool exhausted;
+ //backtracking machine state
+ uint pc, counter;
+ DataIndex lastState = 0; //top of state stack
+ static if (!CTregex)
+ uint infiniteNesting;
+ size_t[] memory;
+ Trace[] merge;
+ static struct Trace
+ {
+ ulong mask;
+ size_t offset;
+
+ bool mark(size_t idx)
+ {
+ immutable d = idx - offset;
+ if (d < 64) // including overflow
+ {
+ immutable p = mask & (1UL << d);
+ mask |= 1UL << d;
+ return p != 0;
+ }
+ else
+ {
+ offset = idx;
+ mask = 1;
+ return false;
+ }
+ }
+ }
+ //local slice of matches, global for backref
+ Group!DataIndex[] matches, backrefed;
+
+ static if (__traits(hasMember,Stream, "search"))
+ {
+ enum kicked = true;
+ }
+ else
+ enum kicked = false;
+
+ static size_t initialMemory(const ref RegEx re)
+ {
+ return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
+ }
+
+ static size_t stackSize(const ref RegEx re)
+ {
+ size_t itemSize = stateSize
+ + re.ngroup * (Group!DataIndex).sizeof / size_t.sizeof;
+ return initialStack * itemSize + 2;
+ }
+
+ @property bool atStart(){ return index == 0; }
+
+ @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
+
+ void next()
+ {
+ if (!s.nextChar(front, index))
+ index = s.lastIndex;
+ }
+
+ void search()
+ {
+ static if (kicked)
+ {
+ if (!s.search(re.kickstart, front, index))
+ {
+ index = s.lastIndex;
+ }
+ }
+ else
+ next();
+ }
+
+ //
+ void newStack()
+ {
+ auto chunk = mallocArray!(size_t)(stackSize(re));
+ chunk[0] = cast(size_t)(memory.ptr);
+ chunk[1] = lastState;
+ memory = chunk[2..$];
+ lastState = 0;
+ }
+
+ bool prevStack()
+ {
+ // pointer to previous block
+ size_t* prev = cast(size_t*) memory.ptr[-2];
+ if (!prev)
+ {
+ // The last segment is freed in RegexMatch
+ return false;
+ }
+ else
+ {
+ import core.stdc.stdlib : free;
+ // memory used in previous block
+ size_t size = memory.ptr[-1];
+ free(memory.ptr-2);
+ memory = prev[0 .. size];
+ lastState = size;
+ return true;
+ }
+ }
+
+ void initExternalMemory(void[] memBlock)
+ {
+ merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock);
+ merge[] = Trace.init;
+ memory = cast(size_t[]) memBlock;
+ memory[0] = 0; // hidden pointer
+ memory[1] = 0; // used size
+ memory = memory[2..$];
+ }
+
+ void initialize(ref RegEx program, Stream stream, void[] memBlock)
+ {
+ re = program;
+ s = stream;
+ exhausted = false;
+ initExternalMemory(memBlock);
+ backrefed = null;
+ }
+
+ auto dupTo(void[] memory)
+ {
+ typeof(this) tmp = this;
+ tmp.initExternalMemory(memory);
+ return tmp;
+ }
+
+ this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
+ {
+ initialize(program, stream, memBlock);
+ front = ch;
+ index = idx;
+ }
+
+ this(ref RegEx program, Stream stream, void[] memBlock)
+ {
+ initialize(program, stream, memBlock);
+ next();
+ }
+
+ auto fwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
+ {
+ alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
+ alias BackMatcher = BackMatcherTempl!(Char, Stream);
+ auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index);
+ return fwdMatcher;
+ }
+
+ auto bwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
+ {
+ alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
+ alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index)));
+ auto fwdMatcher =
+ BackMatcher(matcher.re, s.loopBack(index), memBlock);
+ return fwdMatcher;
+ }
+
+ //
+ int matchFinalize()
+ {
+ immutable start = index;
+ immutable val = matchImpl();
+ if (val)
+ {//stream is updated here
+ matches[0].begin = start;
+ matches[0].end = index;
+ if (!(re.flags & RegexOption.global) || atEnd)
+ exhausted = true;
+ if (start == index)//empty match advances input
+ next();
+ return val;
+ }
+ else
+ return 0;
+ }
+
+ //lookup next match, fill matches with indices into input
+ int match(Group!DataIndex[] matches)
+ {
+ debug(std_regex_matcher)
+ {
+ writeln("------------------------------------------");
+ }
+ if (exhausted) //all matches collected
+ return false;
+ this.matches = matches;
+ if (re.flags & RegexInfo.oneShot)
+ {
+ exhausted = true;
+ const DataIndex start = index;
+ immutable m = matchImpl();
+ if (m)
+ {
+ matches[0].begin = start;
+ matches[0].end = index;
+ }
+ return m;
+ }
+ static if (kicked)
+ {
+ if (!re.kickstart.empty)
+ {
+ for (;;)
+ {
+ immutable val = matchFinalize();
+ if (val)
+ return val;
+ else
+ {
+ if (atEnd)
+ break;
+ search();
+ if (atEnd)
+ {
+ exhausted = true;
+ return matchFinalize();
+ }
+ }
+ }
+ exhausted = true;
+ return 0; //early return
+ }
+ }
+ //no search available - skip a char at a time
+ for (;;)
+ {
+ immutable val = matchFinalize();
+ if (val)
+ return val;
+ else
+ {
+ if (atEnd)
+ break;
+ next();
+ if (atEnd)
+ {
+ exhausted = true;
+ return matchFinalize();
+ }
+ }
+ }
+ exhausted = true;
+ return 0;
+ }
+
+ /+
+ match subexpression against input,
+ results are stored in matches
+ +/
+ int matchImpl()
+ {
+ static if (CTregex && is(typeof(nativeFn(this))))
+ {
+ debug(std_regex_ctr) writeln("using C-T matcher");
+ return nativeFn(this);
+ }
+ else
+ {
+ pc = 0;
+ counter = 0;
+ lastState = 0;
+ matches[] = Group!DataIndex.init;
+ auto start = s._index;
+ debug(std_regex_matcher)
+ writeln("Try match starting at ", s[index .. s.lastIndex]);
+ for (;;)
+ {
+ debug(std_regex_matcher)
+ writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s",
+ pc, counter, disassemble(re.ir, pc, re.dict),
+ front, s._index);
+ switch (re.ir[pc].code)
+ {
+ case IR.OrChar://assumes IRL!(OrChar) == 1
+ if (atEnd)
+ goto L_backtrack;
+ uint len = re.ir[pc].sequence;
+ uint end = pc + len;
+ if (re.ir[pc].data != front && re.ir[pc+1].data != front)
+ {
+ for (pc = pc+2; pc < end; pc++)
+ if (re.ir[pc].data == front)
+ break;
+ if (pc == end)
+ goto L_backtrack;
+ }
+ pc = end;
+ next();
+ break;
+ case IR.Char:
+ if (atEnd || front != re.ir[pc].data)
+ goto L_backtrack;
+ pc += IRL!(IR.Char);
+ next();
+ break;
+ case IR.Any:
+ if (atEnd)
+ goto L_backtrack;
+ pc += IRL!(IR.Any);
+ next();
+ break;
+ case IR.CodepointSet:
+ if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front))
+ goto L_backtrack;
+ next();
+ pc += IRL!(IR.CodepointSet);
+ break;
+ case IR.Trie:
+ if (atEnd || !re.matchers[re.ir[pc].data][front])
+ goto L_backtrack;
+ next();
+ pc += IRL!(IR.Trie);
+ break;
+ case IR.Wordboundary:
+ dchar back;
+ DataIndex bi;
+ //at start & end of input
+ if (atStart && wordMatcher[front])
+ {
+ pc += IRL!(IR.Wordboundary);
+ break;
+ }
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ {
+ pc += IRL!(IR.Wordboundary);
+ break;
+ }
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ immutable af = wordMatcher[front];
+ immutable ab = wordMatcher[back];
+ if (af ^ ab)
+ {
+ pc += IRL!(IR.Wordboundary);
+ break;
+ }
+ }
+ goto L_backtrack;
+ case IR.Notwordboundary:
+ dchar back;
+ DataIndex bi;
+ //at start & end of input
+ if (atStart && wordMatcher[front])
+ goto L_backtrack;
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ goto L_backtrack;
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ immutable af = wordMatcher[front];
+ immutable ab = wordMatcher[back];
+ if (af ^ ab)
+ goto L_backtrack;
+ }
+ pc += IRL!(IR.Wordboundary);
+ break;
+ case IR.Bof:
+ if (atStart)
+ pc += IRL!(IR.Bol);
+ else
+ goto L_backtrack;
+ break;
+ case IR.Bol:
+ dchar back;
+ DataIndex bi;
+ if (atStart)
+ pc += IRL!(IR.Bol);
+ else if (s.loopBack(index).nextChar(back,bi)
+ && endOfLine(back, front == '\n'))
+ {
+ pc += IRL!(IR.Bol);
+ }
+ else
+ goto L_backtrack;
+ break;
+ case IR.Eof:
+ if (atEnd)
+ pc += IRL!(IR.Eol);
+ else
+ goto L_backtrack;
+ break;
+ case IR.Eol:
+ dchar back;
+ DataIndex bi;
+ debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
+ //no matching inside \r\n
+ if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
+ && back == '\r')))
+ {
+ pc += IRL!(IR.Eol);
+ }
+ else
+ goto L_backtrack;
+ break;
+ case IR.InfiniteStart, IR.InfiniteQStart:
+ pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
+ //now pc is at end IR.Infinite(Q)End
+ uint len = re.ir[pc].data;
+ if (re.ir[pc].code == IR.InfiniteEnd)
+ {
+ pushState(pc+IRL!(IR.InfiniteEnd), counter);
+ pc -= len;
+ }
+ else
+ {
+ pushState(pc - len, counter);
+ pc += IRL!(IR.InfiniteEnd);
+ }
+ break;
+ case IR.InfiniteBloomStart:
+ pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart);
+ //now pc is at end IR.InfiniteBloomEnd
+ immutable len = re.ir[pc].data;
+ immutable filterIdx = re.ir[pc+2].raw;
+ if (re.filters[filterIdx][front])
+ pushState(pc+IRL!(IR.InfiniteBloomEnd), counter);
+ pc -= len;
+ break;
+ case IR.RepeatStart, IR.RepeatQStart:
+ pc += re.ir[pc].data + IRL!(IR.RepeatStart);
+ break;
+ case IR.RepeatEnd:
+ case IR.RepeatQEnd:
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ //len, step, min, max
+ immutable len = re.ir[pc].data;
+ immutable step = re.ir[pc+2].raw;
+ immutable min = re.ir[pc+3].raw;
+ immutable max = re.ir[pc+4].raw;
+ if (counter < min)
+ {
+ counter += step;
+ pc -= len;
+ }
+ else if (counter < max)
+ {
+ if (re.ir[pc].code == IR.RepeatEnd)
+ {
+ pushState(pc + IRL!(IR.RepeatEnd), counter%step);
+ counter += step;
+ pc -= len;
+ }
+ else
+ {
+ pushState(pc - len, counter + step);
+ counter = counter%step;
+ pc += IRL!(IR.RepeatEnd);
+ }
+ }
+ else
+ {
+ counter = counter%step;
+ pc += IRL!(IR.RepeatEnd);
+ }
+ break;
+ case IR.InfiniteEnd:
+ case IR.InfiniteQEnd:
+ debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ immutable len = re.ir[pc].data;
+ if (re.ir[pc].code == IR.InfiniteEnd)
+ {
+ pushState(pc + IRL!(IR.InfiniteEnd), counter);
+ pc -= len;
+ }
+ else
+ {
+ pushState(pc-len, counter);
+ pc += IRL!(IR.InfiniteEnd);
+ }
+ break;
+ case IR.InfiniteBloomEnd:
+ debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ immutable len = re.ir[pc].data;
+ immutable filterIdx = re.ir[pc+2].raw;
+ if (re.filters[filterIdx][front])
+ {
+ infiniteNesting--;
+ pushState(pc + IRL!(IR.InfiniteBloomEnd), counter);
+ infiniteNesting++;
+ }
+ pc -= len;
+ break;
+ case IR.OrEnd:
+ if (merge[re.ir[pc + 1].raw+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ pc += IRL!(IR.OrEnd);
+ break;
+ case IR.OrStart:
+ pc += IRL!(IR.OrStart);
+ goto case;
+ case IR.Option:
+ immutable len = re.ir[pc].data;
+ if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one
+ {
+ pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch
+ }
+ pc += IRL!(IR.Option);
+ break;
+ case IR.GotoEndOr:
+ pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr);
+ break;
+ case IR.GroupStart:
+ immutable n = re.ir[pc].data;
+ matches[n].begin = index;
+ debug(std_regex_matcher) writefln("IR group #%u starts at %u", n, index);
+ pc += IRL!(IR.GroupStart);
+ break;
+ case IR.GroupEnd:
+ immutable n = re.ir[pc].data;
+ matches[n].end = index;
+ debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index);
+ pc += IRL!(IR.GroupEnd);
+ break;
+ case IR.LookaheadStart:
+ case IR.NeglookaheadStart:
+ immutable len = re.ir[pc].data;
+ auto save = index;
+ immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
+ auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) free(mem.ptr);
+ static if (Stream.isLoopback)
+ {
+ auto matcher = bwdMatcher(this, mem);
+ }
+ else
+ {
+ auto matcher = fwdMatcher(this, mem);
+ }
+ matcher.matches = matches[ms .. me];
+ matcher.backrefed = backrefed.empty ? matches : backrefed;
+ matcher.re.ir = re.ir[
+ pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
+ ];
+ immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
+ s.reset(save);
+ next();
+ if (!match)
+ goto L_backtrack;
+ else
+ {
+ pc += IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd);
+ }
+ break;
+ case IR.LookbehindStart:
+ case IR.NeglookbehindStart:
+ immutable len = re.ir[pc].data;
+ immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
+ auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) free(mem.ptr);
+ static if (Stream.isLoopback)
+ {
+ alias Matcher = BacktrackingMatcher!(Char, Stream);
+ auto matcher = Matcher(re, s, mem, front, index);
+ }
+ else
+ {
+ alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
+ auto matcher = Matcher(re, s.loopBack(index), mem);
+ }
+ matcher.matches = matches[ms .. me];
+ matcher.re.ir = re.ir[
+ pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
+ ];
+ matcher.backrefed = backrefed.empty ? matches : backrefed;
+ immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
+ if (!match)
+ goto L_backtrack;
+ else
+ {
+ pc += IRL!(IR.LookbehindStart)+len+IRL!(IR.LookbehindEnd);
+ }
+ break;
+ case IR.Backref:
+ immutable n = re.ir[pc].data;
+ auto referenced = re.ir[pc].localRef
+ ? s[matches[n].begin .. matches[n].end]
+ : s[backrefed[n].begin .. backrefed[n].end];
+ while (!atEnd && !referenced.empty && front == referenced.front)
+ {
+ next();
+ referenced.popFront();
+ }
+ if (referenced.empty)
+ pc++;
+ else
+ goto L_backtrack;
+ break;
+ case IR.Nop:
+ pc += IRL!(IR.Nop);
+ break;
+ case IR.LookaheadEnd:
+ case IR.NeglookaheadEnd:
+ case IR.LookbehindEnd:
+ case IR.NeglookbehindEnd:
+ case IR.End:
+ // cleanup stale stack blocks if any
+ while (prevStack()) {}
+ return re.ir[pc].data;
+ default:
+ debug printBytecode(re.ir[0..$]);
+ assert(0);
+ L_backtrack:
+ if (!popState())
+ {
+ s.reset(start);
+ return 0;
+ }
+ }
+ }
+ }
+ assert(0);
+ }
+
+ @property size_t stackAvail()
+ {
+ return memory.length - lastState;
+ }
+
+ void stackPush(T)(T val)
+ if (!isDynamicArray!T)
+ {
+ *cast(T*)&memory[lastState] = val;
+ enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
+ lastState += delta;
+ debug(std_regex_matcher) writeln("push element SP= ", lastState);
+ }
+
+ void stackPush(T)(T[] val)
+ {
+ static assert(T.sizeof % size_t.sizeof == 0);
+ (cast(T*)&memory[lastState])[0 .. val.length]
+ = val[0..$];
+ lastState += val.length*(T.sizeof/size_t.sizeof);
+ debug(std_regex_matcher) writeln("push array SP= ", lastState);
+ }
+
+ void stackPop(T)(ref T val)
+ if (!isDynamicArray!T)
+ {
+ enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
+ lastState -= delta;
+ val = *cast(T*)&memory[lastState];
+ debug(std_regex_matcher) writeln("pop element SP= ", lastState);
+ }
+
+ void stackPop(T)(T[] val)
+ {
+ stackPop(val); // call ref version
+ }
+ void stackPop(T)(ref T[] val)
+ {
+ lastState -= val.length*(T.sizeof/size_t.sizeof);
+ val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
+ debug(std_regex_matcher) writeln("pop array SP= ", lastState);
+ }
+
+ static if (!CTregex)
+ {
+ //helper function, saves engine state
+ void pushState(uint pc, uint counter)
+ {
+ if (stateSize + 2 * matches.length > stackAvail)
+ {
+ newStack();
+ }
+ *cast(State*)&memory[lastState] =
+ State(index, pc, counter, infiniteNesting);
+ lastState += stateSize;
+ memory[lastState .. lastState + 2 * matches.length] = (cast(size_t[]) matches)[];
+ lastState += 2*matches.length;
+ debug(std_regex_matcher)
+ writefln("Saved(pc=%s) front: %s src: %s",
+ pc, front, s[index .. s.lastIndex]);
+ }
+
+ //helper function, restores engine state
+ bool popState()
+ {
+ if (!lastState && !prevStack())
+ return false;
+ lastState -= 2*matches.length;
+ auto pm = cast(size_t[]) matches;
+ pm[] = memory[lastState .. lastState + 2 * matches.length];
+ lastState -= stateSize;
+ State* state = cast(State*)&memory[lastState];
+ index = state.index;
+ pc = state.pc;
+ counter = state.counter;
+ infiniteNesting = state.infiniteNesting;
+ debug(std_regex_matcher)
+ {
+ writefln("Restored matches", front, s[index .. s.lastIndex]);
+ foreach (i, m; matches)
+ writefln("Sub(%d) : %s..%s", i, m.begin, m.end);
+ }
+ s.reset(index);
+ next();
+ debug(std_regex_matcher)
+ writefln("Backtracked (pc=%s) front: %s src: %s",
+ pc, front, s[index .. s.lastIndex]);
+ return true;
+ }
+ }
+ }
+}
+
+//very shitty string formatter, $$ replaced with next argument converted to string
+@trusted string ctSub( U...)(string format, U args)
+{
+ import std.conv : to;
+ bool seenDollar;
+ foreach (i, ch; format)
+ {
+ if (ch == '$')
+ {
+ if (seenDollar)
+ {
+ static if (args.length > 0)
+ {
+ return format[0 .. i - 1] ~ to!string(args[0])
+ ~ ctSub(format[i + 1 .. $], args[1 .. $]);
+ }
+ else
+ assert(0);
+ }
+ else
+ seenDollar = true;
+ }
+ else
+ seenDollar = false;
+
+ }
+ return format;
+}
+
+alias Sequence(int B, int E) = staticIota!(B, E);
+
+struct CtContext
+{
+ import std.conv : to, text;
+ //dirty flags
+ bool counter;
+ //to mark the portion of matches to save
+ int match, total_matches;
+ int reserved;
+ CodepointSet[] charsets;
+
+
+ //state of codegenerator
+ static struct CtState
+ {
+ string code;
+ int addr;
+ }
+
+ this(Char)(Regex!Char re)
+ {
+ match = 1;
+ reserved = 1; //first match is skipped
+ total_matches = re.ngroup;
+ charsets = re.charsets;
+ }
+
+ CtContext lookaround(uint s, uint e)
+ {
+ CtContext ct;
+ ct.total_matches = e - s;
+ ct.match = 1;
+ return ct;
+ }
+
+ //restore state having current context
+ string restoreCode()
+ {
+ string text;
+ //stack is checked in L_backtrack
+ text ~= counter
+ ? "
+ stackPop(counter);"
+ : "
+ counter = 0;";
+ if (match < total_matches)
+ {
+ text ~= ctSub("
+ stackPop(matches[$$..$$]);", reserved, match);
+ text ~= ctSub("
+ matches[$$..$] = typeof(matches[0]).init;", match);
+ }
+ else
+ text ~= ctSub("
+ stackPop(matches[$$..$]);", reserved);
+ return text;
+ }
+
+ //save state having current context
+ string saveCode(uint pc, string count_expr="counter")
+ {
+ string text = ctSub("
+ if (stackAvail < $$*(Group!(DataIndex)).sizeof/size_t.sizeof + $$)
+ {
+ newStack();
+ }", match - reserved, cast(int) counter + 2);
+ if (match < total_matches)
+ text ~= ctSub("
+ stackPush(matches[$$..$$]);", reserved, match);
+ else
+ text ~= ctSub("
+ stackPush(matches[$$..$]);", reserved);
+ text ~= counter ? ctSub("
+ stackPush($$);", count_expr) : "";
+ text ~= ctSub("
+ stackPush(index); stackPush($$); \n", pc);
+ return text;
+ }
+
+ //
+ CtState ctGenBlock(Bytecode[] ir, int addr)
+ {
+ CtState result;
+ result.addr = addr;
+ while (!ir.empty)
+ {
+ auto n = ctGenGroup(ir, result.addr);
+ result.code ~= n.code;
+ result.addr = n.addr;
+ }
+ return result;
+ }
+
+ //
+ CtState ctGenGroup(ref Bytecode[] ir, int addr)
+ {
+ import std.algorithm.comparison : max;
+ auto bailOut = "goto L_backtrack;";
+ auto nextInstr = ctSub("goto case $$;", addr+1);
+ CtState r;
+ assert(!ir.empty);
+ switch (ir[0].code)
+ {
+ case IR.InfiniteStart, IR.InfiniteBloomStart,IR.InfiniteQStart, IR.RepeatStart, IR.RepeatQStart:
+ immutable infLoop =
+ ir[0].code == IR.InfiniteStart || ir[0].code == IR.InfiniteQStart ||
+ ir[0].code == IR.InfiniteBloomStart;
+
+ counter = counter ||
+ ir[0].code == IR.RepeatStart || ir[0].code == IR.RepeatQStart;
+ immutable len = ir[0].data;
+ auto nir = ir[ir[0].length .. ir[0].length+len];
+ r = ctGenBlock(nir, addr+1);
+ //start/end codegen
+ //r.addr is at last test+ jump of loop, addr+1 is body of loop
+ nir = ir[ir[0].length + len .. $];
+ r.code = ctGenFixupCode(ir[0 .. ir[0].length], addr, r.addr) ~ r.code;
+ r.code ~= ctGenFixupCode(nir, r.addr, addr+1);
+ r.addr += 2; //account end instruction + restore state
+ ir = nir;
+ break;
+ case IR.OrStart:
+ immutable len = ir[0].data;
+ auto nir = ir[ir[0].length .. ir[0].length+len];
+ r = ctGenAlternation(nir, addr);
+ ir = ir[ir[0].length + len .. $];
+ assert(ir[0].code == IR.OrEnd);
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.LookaheadStart:
+ case IR.NeglookaheadStart:
+ case IR.LookbehindStart:
+ case IR.NeglookbehindStart:
+ immutable len = ir[0].data;
+ immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
+ immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
+ string fwdType = "typeof(fwdMatcher(matcher, []))";
+ string bwdType = "typeof(bwdMatcher(matcher, []))";
+ string fwdCreate = "fwdMatcher(matcher, mem)";
+ string bwdCreate = "bwdMatcher(matcher, mem)";
+ immutable start = IRL!(IR.LookbehindStart);
+ immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
+ CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
+ auto slice = ir[start .. end];
+ r.code ~= ctSub(`
+ case $$: //fake lookaround "atom"
+ static if (typeof(matcher.s).isLoopback)
+ alias Lookaround = $$;
+ else
+ alias Lookaround = $$;
+ static bool matcher_$$(ref Lookaround matcher) @trusted
+ {
+ //(neg)lookaround piece start
+ $$
+ //(neg)lookaround piece ends
+ }
+ auto save = index;
+ auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
+ scope(exit) free(mem.ptr);
+ static if (typeof(matcher.s).isLoopback)
+ auto lookaround = $$;
+ else
+ auto lookaround = $$;
+ lookaround.matches = matches[$$..$$];
+ lookaround.backrefed = backrefed.empty ? matches : backrefed;
+ lookaround.nativeFn = &matcher_$$; //hookup closure's binary code
+ int match = $$;
+ s.reset(save);
+ next();
+ if (match)
+ $$
+ else
+ $$`, addr,
+ behind ? fwdType : bwdType, behind ? bwdType : fwdType,
+ addr, context.ctGenRegEx(slice),
+ behind ? fwdCreate : bwdCreate, behind ? bwdCreate : fwdCreate,
+ ir[1].raw, ir[2].raw, //start - end of matches slice
+ addr,
+ negative ? "!lookaround.matchImpl()" : "lookaround.matchImpl()",
+ nextInstr, bailOut);
+ ir = ir[end .. $];
+ r.addr = addr + 1;
+ break;
+ case IR.LookaheadEnd: case IR.NeglookaheadEnd:
+ case IR.LookbehindEnd: case IR.NeglookbehindEnd:
+ ir = ir[IRL!(IR.LookaheadEnd) .. $];
+ r.addr = addr;
+ break;
+ default:
+ assert(ir[0].isAtom, text(ir[0].mnemonic));
+ r = ctGenAtom(ir, addr);
+ }
+ return r;
+ }
+
+ //generate source for bytecode contained in OrStart ... OrEnd
+ CtState ctGenAlternation(Bytecode[] ir, int addr)
+ {
+ CtState[] pieces;
+ CtState r;
+ enum optL = IRL!(IR.Option);
+ for (;;)
+ {
+ assert(ir[0].code == IR.Option);
+ auto len = ir[0].data;
+ if (optL+len < ir.length && ir[optL+len].code == IR.Option)//not a last option
+ {
+ auto nir = ir[optL .. optL+len-IRL!(IR.GotoEndOr)];
+ r = ctGenBlock(nir, addr+2);//space for Option + restore state
+ //r.addr+1 to account GotoEndOr at end of branch
+ r.code = ctGenFixupCode(ir[0 .. ir[0].length], addr, r.addr+1) ~ r.code;
+ addr = r.addr+1;//leave space for GotoEndOr
+ pieces ~= r;
+ ir = ir[optL + len .. $];
+ }
+ else
+ {
+ pieces ~= ctGenBlock(ir[optL..$], addr);
+ addr = pieces[$-1].addr;
+ break;
+ }
+ }
+ r = pieces[0];
+ for (uint i = 1; i < pieces.length; i++)
+ {
+ r.code ~= ctSub(`
+ case $$:
+ goto case $$; `, pieces[i-1].addr, addr);
+ r.code ~= pieces[i].code;
+ }
+ r.addr = addr;
+ return r;
+ }
+
+ // generate fixup code for instruction in ir,
+ // fixup means it has an alternative way for control flow
+ string ctGenFixupCode(Bytecode[] ir, int addr, int fixup)
+ {
+ return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
+ }
+ string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup)
+ {
+ string r;
+ string testCode;
+ r = ctSub(`
+ case $$: debug(std_regex_matcher) writeln("#$$");`,
+ addr, addr);
+ switch (ir[0].code)
+ {
+ case IR.InfiniteStart, IR.InfiniteQStart, IR.InfiniteBloomStart:
+ r ~= ctSub( `
+ goto case $$;`, fixup);
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.InfiniteEnd:
+ testCode = ctQuickTest(ir[IRL!(IR.InfiniteEnd) .. $],addr + 1);
+ r ~= ctSub( `
+ if (merge[$$+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+
+ $$
+ {
+ $$
+ }
+ goto case $$;
+ case $$: //restore state and go out of loop
+ $$
+ goto case;`, ir[1].raw, testCode, saveCode(addr+1), fixup,
+ addr+1, restoreCode());
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.InfiniteBloomEnd:
+ //TODO: check bloom filter and skip on failure
+ testCode = ctQuickTest(ir[IRL!(IR.InfiniteBloomEnd) .. $],addr + 1);
+ r ~= ctSub( `
+ if (merge[$$+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+
+ $$
+ {
+ $$
+ }
+ goto case $$;
+ case $$: //restore state and go out of loop
+ $$
+ goto case;`, ir[1].raw, testCode, saveCode(addr+1), fixup,
+ addr+1, restoreCode());
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.InfiniteQEnd:
+ testCode = ctQuickTest(ir[IRL!(IR.InfiniteEnd) .. $],addr + 1);
+ auto altCode = testCode.length ? ctSub("else goto case $$;", fixup) : "";
+ r ~= ctSub( `
+ if (merge[$$+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+
+ $$
+ {
+ $$
+ goto case $$;
+ }
+ $$
+ case $$://restore state and go inside loop
+ $$
+ goto case $$;`, ir[1].raw,
+ testCode, saveCode(addr+1), addr+2, altCode,
+ addr+1, restoreCode(), fixup);
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.RepeatStart, IR.RepeatQStart:
+ r ~= ctSub( `
+ goto case $$;`, fixup);
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.RepeatEnd, IR.RepeatQEnd:
+ //len, step, min, max
+ immutable len = ir[0].data;
+ immutable step = ir[2].raw;
+ immutable min = ir[3].raw;
+ immutable max = ir[4].raw;
+ r ~= ctSub(`
+ if (merge[$$+counter].mark(index))
+ {
+ // merged!
+ goto L_backtrack;
+ }
+ if (counter < $$)
+ {
+ debug(std_regex_matcher) writeln("RepeatEnd min case pc=", $$);
+ counter += $$;
+ goto case $$;
+ }`, ir[1].raw, min, addr, step, fixup);
+ if (ir[0].code == IR.RepeatEnd)
+ {
+ string counter_expr = ctSub("counter % $$", step);
+ r ~= ctSub(`
+ else if (counter < $$)
+ {
+ $$
+ counter += $$;
+ goto case $$;
+ }`, max, saveCode(addr+1, counter_expr), step, fixup);
+ }
+ else
+ {
+ string counter_expr = ctSub("counter % $$", step);
+ r ~= ctSub(`
+ else if (counter < $$)
+ {
+ $$
+ counter = counter % $$;
+ goto case $$;
+ }`, max, saveCode(addr+1,counter_expr), step, addr+2);
+ }
+ r ~= ctSub(`
+ else
+ {
+ counter = counter % $$;
+ goto case $$;
+ }
+ case $$: //restore state
+ $$
+ goto case $$;`, step, addr+2, addr+1, restoreCode(),
+ ir[0].code == IR.RepeatEnd ? addr+2 : fixup );
+ ir = ir[ir[0].length..$];
+ break;
+ case IR.Option:
+ r ~= ctSub( `
+ {
+ $$
+ }
+ goto case $$;
+ case $$://restore thunk to go to the next group
+ $$
+ goto case $$;`, saveCode(addr+1), addr+2,
+ addr+1, restoreCode(), fixup);
+ ir = ir[ir[0].length..$];
+ break;
+ default:
+ assert(0, text(ir[0].mnemonic));
+ }
+ return r;
+ }
+
+
+ string ctQuickTest(Bytecode[] ir, int id)
+ {
+ uint pc = 0;
+ while (pc < ir.length && ir[pc].isAtom)
+ {
+ if (ir[pc].code == IR.GroupStart || ir[pc].code == IR.GroupEnd)
+ {
+ pc++;
+ }
+ else if (ir[pc].code == IR.Backref)
+ break;
+ else
+ {
+ auto code = ctAtomCode(ir[pc..$], -1);
+ return ctSub(`
+ int test_$$()
+ {
+ $$ //$$
+ }
+ if (test_$$() >= 0)`, id, code.ptr ? code : "return 0;",
+ ir[pc].mnemonic, id);
+ }
+ }
+ return "";
+ }
+
+ //process & generate source for simple bytecodes at front of ir using address addr
+ CtState ctGenAtom(ref Bytecode[] ir, int addr)
+ {
+ CtState result;
+ result.code = ctAtomCode(ir, addr);
+ ir.popFrontN(ir[0].code == IR.OrChar ? ir[0].sequence : ir[0].length);
+ result.addr = addr + 1;
+ return result;
+ }
+
+ //D code for atom at ir using address addr, addr < 0 means quickTest
+ string ctAtomCode(Bytecode[] ir, int addr)
+ {
+ string code;
+ string bailOut, nextInstr;
+ if (addr < 0)
+ {
+ bailOut = "return -1;";
+ nextInstr = "return 0;";
+ }
+ else
+ {
+ bailOut = "goto L_backtrack;";
+ nextInstr = ctSub("goto case $$;", addr+1);
+ code ~= ctSub( `
+ case $$: debug(std_regex_matcher) writeln("#$$");
+ `, addr, addr);
+ }
+ switch (ir[0].code)
+ {
+ case IR.OrChar://assumes IRL!(OrChar) == 1
+ code ~= ctSub(`
+ if (atEnd)
+ $$`, bailOut);
+ immutable len = ir[0].sequence;
+ for (uint i = 0; i < len; i++)
+ {
+ code ~= ctSub( `
+ if (front == $$)
+ {
+ $$
+ $$
+ }`, ir[i].data, addr >= 0 ? "next();" :"", nextInstr);
+ }
+ code ~= ctSub( `
+ $$`, bailOut);
+ break;
+ case IR.Char:
+ code ~= ctSub( `
+ if (atEnd || front != $$)
+ $$
+ $$
+ $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
+ break;
+ case IR.Any:
+ code ~= ctSub( `
+ if (atEnd || (!(re.flags & RegexOption.singleline)
+ && (front == '\r' || front == '\n')))
+ $$
+ $$
+ $$`, bailOut, addr >= 0 ? "next();" :"",nextInstr);
+ break;
+ case IR.CodepointSet:
+ if (charsets.length)
+ {
+ string name = `func_`~to!string(addr+1);
+ string funcCode = charsets[ir[0].data].toSourceCode(name);
+ code ~= ctSub( `
+ static $$
+ if (atEnd || !$$(front))
+ $$
+ $$
+ $$`, funcCode, name, bailOut, addr >= 0 ? "next();" :"", nextInstr);
+ }
+ else
+ code ~= ctSub( `
+ if (atEnd || !re.charsets[$$].scanFor(front))
+ $$
+ $$
+ $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
+ break;
+ case IR.Trie:
+ if (charsets.length && charsets[ir[0].data].byInterval.length <= 8)
+ goto case IR.CodepointSet;
+ code ~= ctSub( `
+ if (atEnd || !re.matchers[$$][front])
+ $$
+ $$
+ $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
+ break;
+ case IR.Wordboundary:
+ code ~= ctSub( `
+ dchar back;
+ DataIndex bi;
+ if (atStart && wordMatcher[front])
+ {
+ $$
+ }
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ {
+ $$
+ }
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ bool af = wordMatcher[front];
+ bool ab = wordMatcher[back];
+ if (af ^ ab)
+ {
+ $$
+ }
+ }
+ $$`, nextInstr, nextInstr, nextInstr, bailOut);
+ break;
+ case IR.Notwordboundary:
+ code ~= ctSub( `
+ dchar back;
+ DataIndex bi;
+ //at start & end of input
+ if (atStart && wordMatcher[front])
+ $$
+ else if (atEnd && s.loopBack(index).nextChar(back, bi)
+ && wordMatcher[back])
+ $$
+ else if (s.loopBack(index).nextChar(back, bi))
+ {
+ bool af = wordMatcher[front];
+ bool ab = wordMatcher[back];
+ if (af ^ ab)
+ $$
+ }
+ $$`, bailOut, bailOut, bailOut, nextInstr);
+
+ break;
+ case IR.Bol:
+ code ~= ctSub(`
+ dchar back;
+ DataIndex bi;
+ if (atStart || (s.loopBack(index).nextChar(back,bi)
+ && endOfLine(back, front == '\n')))
+ {
+ debug(std_regex_matcher) writeln("BOL matched");
+ $$
+ }
+ else
+ $$`, nextInstr, bailOut);
+
+ break;
+ case IR.Bof:
+ code ~= ctSub(`
+ if (atStart)
+ {
+ debug(std_regex_matcher) writeln("BOF matched");
+ $$
+ }
+ else
+ $$`, nextInstr, bailOut);
+ break;
+ case IR.Eol:
+ code ~= ctSub(`
+ dchar back;
+ DataIndex bi;
+ debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
+ //no matching inside \r\n
+ if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
+ && back == '\r')))
+ {
+ debug(std_regex_matcher) writeln("EOL matched");
+ $$
+ }
+ else
+ $$`, nextInstr, bailOut);
+ break;
+ case IR.Eof:
+ code ~= ctSub(`
+ if (atEnd)
+ {
+ debug(std_regex_matcher) writeln("BOF matched");
+ $$
+ }
+ else
+ $$`, nextInstr, bailOut);
+ break;
+ case IR.GroupStart:
+ code ~= ctSub(`
+ matches[$$].begin = index;
+ $$`, ir[0].data, nextInstr);
+ match = ir[0].data+1;
+ break;
+ case IR.GroupEnd:
+ code ~= ctSub(`
+ matches[$$].end = index;
+ $$`, ir[0].data, nextInstr);
+ break;
+ case IR.Backref:
+ string mStr = "auto referenced = ";
+ mStr ~= ir[0].localRef
+ ? ctSub("s[matches[$$].begin .. matches[$$].end];",
+ ir[0].data, ir[0].data)
+ : ctSub("s[backrefed[$$].begin .. backrefed[$$].end];",
+ ir[0].data, ir[0].data);
+ code ~= ctSub( `
+ $$
+ while (!atEnd && !referenced.empty && front == referenced.front)
+ {
+ next();
+ referenced.popFront();
+ }
+ if (referenced.empty)
+ $$
+ else
+ $$`, mStr, nextInstr, bailOut);
+ break;
+ case IR.Nop:
+ case IR.End:
+ break;
+ default:
+ assert(0, text(ir[0].mnemonic, " is not supported yet"));
+ }
+ return code;
+ }
+
+ //generate D code for the whole regex
+ public string ctGenRegEx(Bytecode[] ir)
+ {
+ auto bdy = ctGenBlock(ir, 0);
+ auto r = `
+ import core.stdc.stdlib;
+ with(matcher)
+ {
+ pc = 0;
+ counter = 0;
+ lastState = 0;
+ matches[] = Group!DataIndex.init;
+ auto start = s._index;`;
+ r ~= `
+ goto StartLoop;
+ debug(std_regex_matcher) writeln("Try CT matching starting at ",s[index .. s.lastIndex]);
+ L_backtrack:
+ if (lastState || prevStack())
+ {
+ stackPop(pc);
+ stackPop(index);
+ s.reset(index);
+ next();
+ }
+ else
+ {
+ s.reset(start);
+ return false;
+ }
+ StartLoop:
+ switch (pc)
+ {
+ `;
+ r ~= bdy.code;
+ r ~= ctSub(`
+ case $$: break;`,bdy.addr);
+ r ~= `
+ default:
+ assert(0);
+ }
+ // cleanup stale stack blocks
+ while (prevStack()) {}
+ return true;
+ }
+ `;
+ return r;
+ }
+
+}
+
+string ctGenRegExCode(Char)(Regex!Char re)
+{
+ auto context = CtContext(re);
+ return context.ctGenRegEx(re.ir);
+}