diff options
author | Iain Buclaw <ibuclaw@gdcproject.org> | 2021-04-02 13:29:22 +0200 |
---|---|---|
committer | Iain Buclaw <ibuclaw@gdcproject.org> | 2021-04-04 01:26:20 +0200 |
commit | 5a0aa603b2452dca48ad86d97d4b918187d259fc (patch) | |
tree | 67439099b8129248a45826b5791912b20ff3c3fb /libphobos/src/std/regex | |
parent | ba0f6902666430e5d065a92b3d5292cba91813d3 (diff) | |
download | gcc-5a0aa603b2452dca48ad86d97d4b918187d259fc.zip gcc-5a0aa603b2452dca48ad86d97d4b918187d259fc.tar.gz gcc-5a0aa603b2452dca48ad86d97d4b918187d259fc.tar.bz2 |
d: Merge upstream dmd 3b808e838, druntime 483bc129, phobos f89dc217a
D front-end changes:
- Explicit package visibility attribute is now always applied to
introducing scopes.
- Added `__traits(totype, string)' to convert mangled type string to an
existing type.
- Printf-like and scanf-like functions are now detected by prefixing
them with `pragma(printf)' for printf-like functions or
`pragma(scanf)' for scanf-like functions.
- Added `__c_wchar_t', `__c_complex_float', `__c_complex_double', and
`__c_complex_real' types for interfacing with C and C++.
- Template alias parameters can now be instantiated with basic types,
such as `int` or `void function()`.
- Mixins can now be used as types in the form `mixin(string) var'.
- Mixin expressions can take an argument list, same as `pragma(msg)'.
- Implement DIP1034, add `typeof(*null)' types to represent `noreturn'.
- `pragma(msg)' can print expressions of type `void'.
- It is now an error to use private variables selectively imported from
other modules. Due to a bug, some imported private members were
visible from other modules, violating the specification.
- Added new syntax to declare an alias to a function type using the
`alias' syntax based on the assignment operator.
- Function literals can now return a value by reference.
Phobos changes:
- Synchronize C bindings with the latest port fixes in upstream druntime.
- Added alias for a `noreturn' type in object.d
- Make use of the new `pragma(printf)' and `pragma(scanf)' pragmas, fix
all code that got flagged as being incorrect.
- Fixed code that relied on bugs in the D import package system.
Reviewed-on: https://github.com/dlang/dmd/pull/12339
https://github.com/dlang/druntime/pull/3422
https://github.com/dlang/phobos/pull/7932
gcc/d/ChangeLog:
* dmd/MERGE: Merge upstream dmd 3b808e838.
* Make-lang.in (D_FRONTEND_OBJS): Add d/chkformat.o.
* d-codegen.cc (build_struct_literal): Handle special enums.
* d-convert.cc (convert_expr): Handle noreturn type.
(convert_for_condition): Likewise.
* d-target.cc (Target::_init): Set type for wchar_t.
(TargetCPP::derivedClassOffset): New method.
(Target::libraryObjectMonitors): New method.
* decl.cc (get_symbol_decl): Set TREE_THIS_VOLATILE for functions of
type noreturn.
* toir.cc (IRVisitor::visit (ReturnStatement *)): Handle returning
noreturn types.
* types.cc (TypeVisitor::visit (TypeNoreturn *)): New method.
(TypeVisitor::visit (TypeEnum *)): Handle special enums.
libphobos/ChangeLog:
* libdruntime/MERGE: Merge upstream druntime 483bc129.
* libdruntime/Makefile.am (DRUNTIME_DSOURCES_DARWIN): Add
core/sys/darwin/fcntl.d.
(DRUNTIME_DSOURCES_OPENBSD): Add core/sys/openbsd/unistd.d.
(DRUNTIME_DSOURCES_WINDOWS): Add core/sys/windows/stdc/malloc.d.
* libdruntime/Makefile.in: Regenerate.
* src/MERGE: Merge upstream phobos f89dc217a.
* src/Makefile.am (PHOBOS_DSOURCES): Add std/regex/internal/tests2.d.
* src/Makefile.in: Regenerate.
* testsuite/libphobos.exceptions/chain.d: Fix format arguments.
* testsuite/libphobos.exceptions/line_trace.d: Likewise.
Diffstat (limited to 'libphobos/src/std/regex')
-rw-r--r-- | libphobos/src/std/regex/internal/tests.d | 653 | ||||
-rw-r--r-- | libphobos/src/std/regex/internal/tests2.d | 662 |
2 files changed, 663 insertions, 652 deletions
diff --git a/libphobos/src/std/regex/internal/tests.d b/libphobos/src/std/regex/internal/tests.d index 1c4f295..fe75ce0 100644 --- a/libphobos/src/std/regex/internal/tests.d +++ b/libphobos/src/std/regex/internal/tests.d @@ -8,7 +8,7 @@ package(std.regex): import std.conv, std.exception, std.meta, std.range, std.typecons, std.regex; -import std.regex.internal.parser : Escapables; // characters that need escaping +import std.regex.internal.ir : Escapables; // characters that need escaping alias Sequence(int B, int E) = staticIota!(B, E); @@ -467,654 +467,3 @@ alias Sequence(int B, int E) = staticIota!(B, E); run_tests!match(); //thompson VM } -@safe unittest -{ - auto cr = ctRegex!("abc"); - assert(bmatch("abc",cr).hit == "abc"); - auto cr2 = ctRegex!("ab*c"); - assert(bmatch("abbbbc",cr2).hit == "abbbbc"); -} -@safe unittest -{ - auto cr3 = ctRegex!("^abc$"); - assert(bmatch("abc",cr3).hit == "abc"); - auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); - assert(array(match("azb",cr4).captures) == ["azb", "azb"]); -} - -@safe unittest -{ - auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); - assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); - auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); - assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); -} - -@safe unittest -{ - auto cr7 = ctRegex!(`\r.*?$`,"sm"); - assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); - auto greed = ctRegex!("<packet.*?/packet>"); - assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit - == "<packet>text</packet>"); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto cr8 = ctRegex!("^(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!("q(a|b)*q"); - auto m9 = match("xxqababqyy",cr9); - assert(m9); - assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto rtr = regex("a|b|c"); - enum ctr = regex("a|b|c"); - assert(equal(rtr.ir,ctr.ir)); - //CTFE parser BUG is triggered by group - //in the middle of alternation (at least not first and not last) - enum testCT = regex(`abc|(edf)|xyz`); - auto testRT = regex(`abc|(edf)|xyz`); - assert(equal(testCT.ir,testRT.ir)); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; - enum cx = ctRegex!"(A|B|C)"; - auto mx = match("B",cx); - assert(mx); - assert(equal(mx.captures, [ "B", "B"])); - enum cx2 = ctRegex!"(A|B)*"; - assert(match("BAAA",cx2)); - - enum cx3 = ctRegex!("a{3,4}","i"); - auto mx3 = match("AaA",cx3); - assert(mx3); - assert(mx3.captures[0] == "AaA"); - enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); - auto mx4 = match("aaaabc", cx4); - assert(mx4); - assert(mx4.captures[0] == "aaaab"); - auto cr8 = ctRegex!("(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!(".*$", "gm"); - auto m9 = match("First\rSecond", cr9); - assert(m9); - assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; -//global matching - void test_body(alias matchFn)() - { - string s = "a quick brown fox jumps over a lazy dog"; - auto r1 = regex("\\b[a-z]+\\b","g"); - string[] test; - foreach (m; matchFn(s, r1)) - test ~= m.hit; - assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); - auto free_reg = regex(` - - abc - \s+ - " - ( - [^"]+ - | \\ " - )+ - " - z - `, "x"); - auto m = match(`abc "quoted string with \" inside"z`,free_reg); - assert(m); - string mails = " hey@you.com no@spam.net "; - auto rm = regex(`@(?<=\S+@)\S+`,"g"); - assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); - auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); - assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); - auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); - assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); - auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); - assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); - debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); - } - test_body!bmatch(); - test_body!match(); -} - -//tests for accumulated std.regex issues and other regressions -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; - void test_body(alias matchFn)() - { - //issue 5857 - //matching goes out of control if ... in (...){x} has .*/.+ - auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; - assert(c[0] == "axxxzayyyyyzd"); - assert(c[1] == "ayyyyyz"); - auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; - assert(c2[0] == "axxxayyyyyd"); - assert(c2[1] == "ayyyyy"); - //issue 2108 - //greedy vs non-greedy - auto nogreed = regex("<packet.*?/packet>"); - assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit - == "<packet>text</packet>"); - auto greed = regex("<packet.*/packet>"); - assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit - == "<packet>text</packet><packet>text</packet>"); - //issue 4574 - //empty successful match still advances the input - string[] pres, posts, hits; - foreach (m; matchFn("abcabc", regex("","g"))) - { - pres ~= m.pre; - posts ~= m.post; - assert(m.hit.empty); - - } - auto heads = [ - "abcabc", - "abcab", - "abca", - "abc", - "ab", - "a", - "" - ]; - auto tails = [ - "abcabc", - "bcabc", - "cabc", - "abc", - "bc", - "c", - "" - ]; - assert(pres == array(retro(heads))); - assert(posts == tails); - //issue 6076 - //regression on .* - auto re = regex("c.*|d"); - auto m = matchFn("mm", re); - assert(!m); - debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); - auto rprealloc = regex(`((.){5}.{1,10}){5}`); - auto arr = array(repeat('0',100)); - auto m2 = matchFn(arr, rprealloc); - assert(m2); - assert(collectException( - regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") - ) is null); - foreach (ch; [Escapables]) - { - assert(match(to!string(ch),regex(`[\`~ch~`]`))); - assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); - assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); - } - //bugzilla 7718 - string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; - auto reStrCmd = regex (`(".*")|('.*')`, "g"); - assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), - [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); - } - test_body!bmatch(); - test_body!match(); -} - -// tests for replace -@safe unittest -{ - void test(alias matchFn)() - { - import std.uni : toUpper; - - foreach (i, v; AliasSeq!(string, wstring, dstring)) - { - auto baz(Cap)(Cap m) - if (is(Cap == Captures!(Cap.String))) - { - return toUpper(m.hit); - } - alias String = v; - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) - == to!String("ack rapacity")); - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) - == to!String("ack capacity")); - assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) - == to!String("[n]oon")); - assert(std.regex.replace!(matchFn)( - to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") - ) == to!String(": test2 test1 :")); - auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), - regex(to!String("[ar]"), "g")); - assert(s == "StRAp A Rocket engine on A chicken."); - } - debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); - } - test!(bmatch)(); - test!(match)(); -} - -// tests for splitter -@safe unittest -{ - import std.algorithm.comparison : equal; - auto s1 = ", abc, de, fg, hi, "; - auto sp1 = splitter(s1, regex(", *")); - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(sp1, w1)); - - auto s2 = ", abc, de, fg, hi"; - auto sp2 = splitter(s2, regex(", *")); - auto w2 = ["", "abc", "de", "fg", "hi"]; - - uint cnt; - foreach (e; sp2) - { - assert(w2[cnt++] == e); - } - assert(equal(sp2, w2)); -} - -@safe unittest -{ - char[] s1 = ", abc, de, fg, hi, ".dup; - auto sp2 = splitter(s1, regex(", *")); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto s1 = ", abc, de, fg, hi, "; - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(split(s1, regex(", *")), w1[])); -} - -@safe unittest -{ // bugzilla 7141 - string pattern = `[a\--b]`; - assert(match("-", pattern)); - assert(match("b", pattern)); - string pattern2 = `[&-z]`; - assert(match("b", pattern2)); -} -@safe unittest -{//bugzilla 7111 - assert(match("", regex("^"))); -} -@safe unittest -{//bugzilla 7300 - assert(!match("a"d, "aa"d)); -} - -// bugzilla 7551 -@safe unittest -{ - auto r = regex("[]abc]*"); - assert("]ab".matchFirst(r).hit == "]ab"); - assertThrown(regex("[]")); - auto r2 = regex("[]abc--ab]*"); - assert("]ac".matchFirst(r2).hit == "]"); -} - -@safe unittest -{//bugzilla 7674 - assert("1234".replace(regex("^"), "$$") == "$1234"); - assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); - assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); -} -@safe unittest -{// bugzilla 7679 - import std.algorithm.comparison : equal; - foreach (S; AliasSeq!(string, wstring, dstring)) - (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 - enum re = ctRegex!(to!S(r"\.")); - auto str = to!S("a.b"); - assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); - assert(split(str, re) == [to!S("a"), to!S("b")]); - }(); -} -@safe unittest -{//bugzilla 8203 - string data = " - NAME = XPAW01_STA:STATION - NAME = XPAW01_STA - "; - auto uniFileOld = data; - auto r = regex( - r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); - auto uniCapturesNew = match(uniFileOld, r); - for (int i = 0; i < 20; i++) - foreach (matchNew; uniCapturesNew) {} - //a second issue with same symptoms - auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); - match("аллея Театральная", r2); -} -@safe unittest -{// bugzilla 8637 purity of enforce - auto m = match("hello world", regex("world")); - enforce(m); -} - -// bugzilla 8725 -@safe unittest -{ - static italic = regex( r"\* - (?!\s+) - (.*?) - (?!\s+) - \*", "gx" ); - string input = "this * is* interesting, *very* interesting"; - assert(replace(input, italic, "<i>$1</i>") == - "this * is* interesting, <i>very</i> interesting"); -} - -// bugzilla 8349 -@safe unittest -{ - enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; - enum peakRegex = ctRegex!(peakRegexStr); - //note that the regex pattern itself is probably bogus - assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); -} - -// bugzilla 9211 -@safe unittest -{ - import std.algorithm.comparison : equal; - auto rx_1 = regex(r"^(\w)*(\d)"); - auto m = match("1234", rx_1); - assert(equal(m.front, ["1234", "3", "4"])); - auto rx_2 = regex(r"^([0-9])*(\d)"); - auto m2 = match("1234", rx_2); - assert(equal(m2.front, ["1234", "3", "4"])); -} - -// bugzilla 9280 -@safe unittest -{ - string tomatch = "a!b@c"; - static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); - auto nm = match(tomatch, r); - assert(nm); - auto c = nm.captures; - assert(c[1] == "a"); - assert(c["nick"] == "a"); -} - - -// bugzilla 9579 -@safe unittest -{ - char[] input = ['a', 'b', 'c']; - string format = "($1)"; - // used to give a compile error: - auto re = regex(`(a)`, "g"); - auto r = replace(input, re, format); - assert(r == "(a)bc"); -} - -// bugzilla 9634 -@safe unittest -{ - auto re = ctRegex!"(?:a+)"; - assert(match("aaaa", re).hit == "aaaa"); -} - -//bugzilla 10798 -@safe unittest -{ - auto cr = ctRegex!("[abcd--c]*"); - auto m = "abc".match(cr); - assert(m); - assert(m.hit == "ab"); -} - -// bugzilla 10913 -@system unittest -{ - @system static string foo(const(char)[] s) - { - return s.dup; - } - @safe static string bar(const(char)[] s) - { - return s.dup; - } - () @system { - replace!((a) => foo(a.hit))("blah", regex(`a`)); - }(); - () @safe { - replace!((a) => bar(a.hit))("blah", regex(`a`)); - }(); -} - -// bugzilla 11262 -@safe unittest -{ - enum reg = ctRegex!(r",", "g"); - auto str = "This,List"; - str = str.replace(reg, "-"); - assert(str == "This-List"); -} - -// bugzilla 11775 -@safe unittest -{ - assert(collectException(regex("a{1,0}"))); -} - -// bugzilla 11839 -@safe unittest -{ - import std.algorithm.comparison : equal; - assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); - assert(collectException(regex(`(?P<1>\w+)`))); - assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); - assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); - assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); -} - -// bugzilla 12076 -@safe unittest -{ - auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); - string s = "one two"; - auto m = match(s, RE); -} - -// bugzilla 12105 -@safe unittest -{ - auto r = ctRegex!`.*?(?!a)`; - assert("aaab".matchFirst(r).hit == "aaa"); - auto r2 = ctRegex!`.*(?!a)`; - assert("aaab".matchFirst(r2).hit == "aaab"); -} - -//bugzilla 11784 -@safe unittest -{ - assert("abcdefghijklmnopqrstuvwxyz" - .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); -} - -//bugzilla 12366 -@safe unittest -{ - auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); - assert("xxxxxxxx".match(re).empty); - assert(!"xxxx".match(re).empty); -} - -// bugzilla 12582 -@safe unittest -{ - auto r = regex(`(?P<a>abc)`); - assert(collectException("abc".matchFirst(r)["b"])); -} - -// bugzilla 12691 -@safe unittest -{ - assert(bmatch("e@", "^([a-z]|)*$").empty); - assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); -} - -//bugzilla 12713 -@safe unittest -{ - assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); -} - -//bugzilla 12747 -@safe unittest -{ - assertThrown(regex(`^x(\1)`)); - assertThrown(regex(`^(x(\1))`)); - assertThrown(regex(`^((x)(?=\1))`)); -} - -// bugzilla 14504 -@safe unittest -{ - auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); -} - -// bugzilla 14529 -@safe unittest -{ - auto ctPat2 = regex(r"^[CDF]$", "i"); - foreach (v; ["C", "c", "D", "d", "F", "f"]) - assert(matchAll(v, ctPat2).front.hit == v); -} - -// bugzilla 14615 -@safe unittest -{ - import std.array : appender; - import std.regex : replaceFirst, replaceFirstInto, regex; - import std.stdio : writeln; - - auto example = "Hello, world!"; - auto pattern = regex("^Hello, (bug)"); // won't find this one - auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); - assert(result == "Hello, world!"); // Ok. - - auto sink = appender!string; - replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!"); - replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!Hello, world!"); -} - -// bugzilla 15573 -@safe unittest -{ - auto rx = regex("[c d]", "x"); - assert("a b".matchFirst(rx)); -} - -// bugzilla 15864 -@safe unittest -{ - regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); -} - -@safe unittest -{ - auto r = regex("(?# comment)abc(?# comment2)"); - assert("abc".matchFirst(r)); - assertThrown(regex("(?#...")); -} - -// bugzilla 17075 -@safe unittest -{ - enum titlePattern = `<title>(.+)</title>`; - static titleRegex = ctRegex!titlePattern; - string input = "<title>" ~ "<".repeat(100_000).join; - assert(input.matchFirst(titleRegex).empty); -} - -// bugzilla 17212 -@safe unittest -{ - auto r = regex(" [a] ", "x"); - assert("a".matchFirst(r)); -} - -// bugzilla 17157 -@safe unittest -{ - import std.algorithm.comparison : equal; - auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; - auto r = regex("(a)|(b)|(c)|(d)", "g"); - auto s = "--a--b--c--d--"; - auto outcomes = [ - ["a", "a", "", "", ""], - ["b", "", "b", "", ""], - ["c", "", "", "c", ""], - ["d", "", "", "", "d"] - ]; - assert(equal!equal(s.matchAll(ctr), outcomes)); - assert(equal!equal(s.bmatch(r), outcomes)); -} - -// bugzilla 17667 -@safe unittest -{ - import std.algorithm.searching : canFind; - void willThrow(T, size_t line = __LINE__)(T arg, string msg) - { - auto e = collectException(regex(arg)); - assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); - } - willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); - willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); - willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); - willThrow([r"[a-\", r"123"], "invalid escape sequence"); - willThrow([r"\", r"123"], "invalid escape sequence"); -} - -// bugzilla 17668 -@safe unittest -{ - import std.algorithm.searching; - auto e = collectException!RegexException(regex(q"<[^]>")); - assert(e.msg.canFind("no operand for '^'")); -} - -// bugzilla 17673 -@safe unittest -{ - string str = `<">`; - string[] regexps = ["abc", "\"|x"]; - auto regexp = regex(regexps); - auto c = matchFirst(str, regexp); - assert(c); - assert(c.whichPattern == 2); -} - diff --git a/libphobos/src/std/regex/internal/tests2.d b/libphobos/src/std/regex/internal/tests2.d new file mode 100644 index 0000000..420f8d3 --- /dev/null +++ b/libphobos/src/std/regex/internal/tests2.d @@ -0,0 +1,662 @@ +// Split-up due to DMD's enormous memory consumption + +module std.regex.internal.tests2; + +package(std.regex): + +import std.conv, std.exception, std.meta, std.range, + std.typecons, std.regex; + +import std.regex.internal.ir : Escapables; // characters that need escaping + +@safe unittest +{ + auto cr = ctRegex!("abc"); + assert(bmatch("abc",cr).hit == "abc"); + auto cr2 = ctRegex!("ab*c"); + assert(bmatch("abbbbc",cr2).hit == "abbbbc"); +} +@safe unittest +{ + auto cr3 = ctRegex!("^abc$"); + assert(bmatch("abc",cr3).hit == "abc"); + auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); + assert(array(match("azb",cr4).captures) == ["azb", "azb"]); +} + +@safe unittest +{ + auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); + assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); + auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); + assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); +} + +@safe unittest +{ + auto cr7 = ctRegex!(`\r.*?$`,"sm"); + assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); + auto greed = ctRegex!("<packet.*?/packet>"); + assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit + == "<packet>text</packet>"); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto cr8 = ctRegex!("^(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!("q(a|b)*q"); + auto m9 = match("xxqababqyy",cr9); + assert(m9); + assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto rtr = regex("a|b|c"); + enum ctr = regex("a|b|c"); + assert(equal(rtr.ir,ctr.ir)); + //CTFE parser BUG is triggered by group + //in the middle of alternation (at least not first and not last) + enum testCT = regex(`abc|(edf)|xyz`); + auto testRT = regex(`abc|(edf)|xyz`); + assert(equal(testCT.ir,testRT.ir)); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; + enum cx = ctRegex!"(A|B|C)"; + auto mx = match("B",cx); + assert(mx); + assert(equal(mx.captures, [ "B", "B"])); + enum cx2 = ctRegex!"(A|B)*"; + assert(match("BAAA",cx2)); + + enum cx3 = ctRegex!("a{3,4}","i"); + auto mx3 = match("AaA",cx3); + assert(mx3); + assert(mx3.captures[0] == "AaA"); + enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); + auto mx4 = match("aaaabc", cx4); + assert(mx4); + assert(mx4.captures[0] == "aaaab"); + auto cr8 = ctRegex!("(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!(".*$", "gm"); + auto m9 = match("First\rSecond", cr9); + assert(m9); + assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; +//global matching + void test_body(alias matchFn)() + { + string s = "a quick brown fox jumps over a lazy dog"; + auto r1 = regex("\\b[a-z]+\\b","g"); + string[] test; + foreach (m; matchFn(s, r1)) + test ~= m.hit; + assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); + auto free_reg = regex(` + + abc + \s+ + " + ( + [^"]+ + | \\ " + )+ + " + z + `, "x"); + auto m = match(`abc "quoted string with \" inside"z`,free_reg); + assert(m); + string mails = " hey@you.com no@spam.net "; + auto rm = regex(`@(?<=\S+@)\S+`,"g"); + assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); + auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); + assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); + auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); + assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); + auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); + assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); + debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); + } + test_body!bmatch(); + test_body!match(); +} + +//tests for accumulated std.regex issues and other regressions +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; + void test_body(alias matchFn)() + { + //issue 5857 + //matching goes out of control if ... in (...){x} has .*/.+ + auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; + assert(c[0] == "axxxzayyyyyzd"); + assert(c[1] == "ayyyyyz"); + auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; + assert(c2[0] == "axxxayyyyyd"); + assert(c2[1] == "ayyyyy"); + //issue 2108 + //greedy vs non-greedy + auto nogreed = regex("<packet.*?/packet>"); + assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit + == "<packet>text</packet>"); + auto greed = regex("<packet.*/packet>"); + assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit + == "<packet>text</packet><packet>text</packet>"); + //issue 4574 + //empty successful match still advances the input + string[] pres, posts, hits; + foreach (m; matchFn("abcabc", regex("","g"))) + { + pres ~= m.pre; + posts ~= m.post; + assert(m.hit.empty); + + } + auto heads = [ + "abcabc", + "abcab", + "abca", + "abc", + "ab", + "a", + "" + ]; + auto tails = [ + "abcabc", + "bcabc", + "cabc", + "abc", + "bc", + "c", + "" + ]; + assert(pres == array(retro(heads))); + assert(posts == tails); + //issue 6076 + //regression on .* + auto re = regex("c.*|d"); + auto m = matchFn("mm", re); + assert(!m); + debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); + auto rprealloc = regex(`((.){5}.{1,10}){5}`); + auto arr = array(repeat('0',100)); + auto m2 = matchFn(arr, rprealloc); + assert(m2); + assert(collectException( + regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") + ) is null); + foreach (ch; [Escapables]) + { + assert(match(to!string(ch),regex(`[\`~ch~`]`))); + assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); + assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); + } + //bugzilla 7718 + string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; + auto reStrCmd = regex (`(".*")|('.*')`, "g"); + assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), + [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); + } + test_body!bmatch(); + test_body!match(); +} + +// tests for replace +@safe unittest +{ + void test(alias matchFn)() + { + import std.uni : toUpper; + + foreach (i, v; AliasSeq!(string, wstring, dstring)) + { + auto baz(Cap)(Cap m) + if (is(Cap == Captures!(Cap.String))) + { + return toUpper(m.hit); + } + alias String = v; + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) + == to!String("ack rapacity")); + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) + == to!String("ack capacity")); + assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) + == to!String("[n]oon")); + assert(std.regex.replace!(matchFn)( + to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") + ) == to!String(": test2 test1 :")); + auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), + regex(to!String("[ar]"), "g")); + assert(s == "StRAp A Rocket engine on A chicken."); + } + debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); + } + test!(bmatch)(); + test!(match)(); +} + +// tests for splitter +@safe unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto sp1 = splitter(s1, regex(", *")); + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(sp1, w1)); + + auto s2 = ", abc, de, fg, hi"; + auto sp2 = splitter(s2, regex(", *")); + auto w2 = ["", "abc", "de", "fg", "hi"]; + + uint cnt; + foreach (e; sp2) + { + assert(w2[cnt++] == e); + } + assert(equal(sp2, w2)); +} + +@safe unittest +{ + char[] s1 = ", abc, de, fg, hi, ".dup; + auto sp2 = splitter(s1, regex(", *")); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(split(s1, regex(", *")), w1[])); +} + +@safe unittest +{ // bugzilla 7141 + string pattern = `[a\--b]`; + assert(match("-", pattern)); + assert(match("b", pattern)); + string pattern2 = `[&-z]`; + assert(match("b", pattern2)); +} +@safe unittest +{//bugzilla 7111 + assert(match("", regex("^"))); +} +@safe unittest +{//bugzilla 7300 + assert(!match("a"d, "aa"d)); +} + +// bugzilla 7551 +@safe unittest +{ + auto r = regex("[]abc]*"); + assert("]ab".matchFirst(r).hit == "]ab"); + assertThrown(regex("[]")); + auto r2 = regex("[]abc--ab]*"); + assert("]ac".matchFirst(r2).hit == "]"); +} + +@safe unittest +{//bugzilla 7674 + assert("1234".replace(regex("^"), "$$") == "$1234"); + assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); + assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); +} +@safe unittest +{// bugzilla 7679 + import std.algorithm.comparison : equal; + foreach (S; AliasSeq!(string, wstring, dstring)) + (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 + enum re = ctRegex!(to!S(r"\.")); + auto str = to!S("a.b"); + assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); + assert(split(str, re) == [to!S("a"), to!S("b")]); + }(); +} +@safe unittest +{//bugzilla 8203 + string data = " + NAME = XPAW01_STA:STATION + NAME = XPAW01_STA + "; + auto uniFileOld = data; + auto r = regex( + r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); + auto uniCapturesNew = match(uniFileOld, r); + for (int i = 0; i < 20; i++) + foreach (matchNew; uniCapturesNew) {} + //a second issue with same symptoms + auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); + match("аллея Театральная", r2); +} +@safe unittest +{// bugzilla 8637 purity of enforce + auto m = match("hello world", regex("world")); + enforce(m); +} + +// bugzilla 8725 +@safe unittest +{ + static italic = regex( r"\* + (?!\s+) + (.*?) + (?!\s+) + \*", "gx" ); + string input = "this * is* interesting, *very* interesting"; + assert(replace(input, italic, "<i>$1</i>") == + "this * is* interesting, <i>very</i> interesting"); +} + +// bugzilla 8349 +@safe unittest +{ + enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; + enum peakRegex = ctRegex!(peakRegexStr); + //note that the regex pattern itself is probably bogus + assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); +} + +// bugzilla 9211 +@safe unittest +{ + import std.algorithm.comparison : equal; + auto rx_1 = regex(r"^(\w)*(\d)"); + auto m = match("1234", rx_1); + assert(equal(m.front, ["1234", "3", "4"])); + auto rx_2 = regex(r"^([0-9])*(\d)"); + auto m2 = match("1234", rx_2); + assert(equal(m2.front, ["1234", "3", "4"])); +} + +// bugzilla 9280 +@safe unittest +{ + string tomatch = "a!b@c"; + static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); + auto nm = match(tomatch, r); + assert(nm); + auto c = nm.captures; + assert(c[1] == "a"); + assert(c["nick"] == "a"); +} + + +// bugzilla 9579 +@safe unittest +{ + char[] input = ['a', 'b', 'c']; + string format = "($1)"; + // used to give a compile error: + auto re = regex(`(a)`, "g"); + auto r = replace(input, re, format); + assert(r == "(a)bc"); +} + +// bugzilla 9634 +@safe unittest +{ + auto re = ctRegex!"(?:a+)"; + assert(match("aaaa", re).hit == "aaaa"); +} + +//bugzilla 10798 +@safe unittest +{ + auto cr = ctRegex!("[abcd--c]*"); + auto m = "abc".match(cr); + assert(m); + assert(m.hit == "ab"); +} + +// bugzilla 10913 +@system unittest +{ + @system static string foo(const(char)[] s) + { + return s.dup; + } + @safe static string bar(const(char)[] s) + { + return s.dup; + } + () @system { + replace!((a) => foo(a.hit))("blah", regex(`a`)); + }(); + () @safe { + replace!((a) => bar(a.hit))("blah", regex(`a`)); + }(); +} + +// bugzilla 11262 +@safe unittest +{ + enum reg = ctRegex!(r",", "g"); + auto str = "This,List"; + str = str.replace(reg, "-"); + assert(str == "This-List"); +} + +// bugzilla 11775 +@safe unittest +{ + assert(collectException(regex("a{1,0}"))); +} + +// bugzilla 11839 +@safe unittest +{ + import std.algorithm.comparison : equal; + assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); + assert(collectException(regex(`(?P<1>\w+)`))); + assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); + assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); + assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); +} + +// bugzilla 12076 +@safe unittest +{ + auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); + string s = "one two"; + auto m = match(s, RE); +} + +// bugzilla 12105 +@safe unittest +{ + auto r = ctRegex!`.*?(?!a)`; + assert("aaab".matchFirst(r).hit == "aaa"); + auto r2 = ctRegex!`.*(?!a)`; + assert("aaab".matchFirst(r2).hit == "aaab"); +} + +//bugzilla 11784 +@safe unittest +{ + assert("abcdefghijklmnopqrstuvwxyz" + .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); +} + +//bugzilla 12366 +@safe unittest +{ + auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); + assert("xxxxxxxx".match(re).empty); + assert(!"xxxx".match(re).empty); +} + +// bugzilla 12582 +@safe unittest +{ + auto r = regex(`(?P<a>abc)`); + assert(collectException("abc".matchFirst(r)["b"])); +} + +// bugzilla 12691 +@safe unittest +{ + assert(bmatch("e@", "^([a-z]|)*$").empty); + assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); +} + +//bugzilla 12713 +@safe unittest +{ + assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); +} + +//bugzilla 12747 +@safe unittest +{ + assertThrown(regex(`^x(\1)`)); + assertThrown(regex(`^(x(\1))`)); + assertThrown(regex(`^((x)(?=\1))`)); +} + +// bugzilla 14504 +@safe unittest +{ + auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +} + +// bugzilla 14529 +@safe unittest +{ + auto ctPat2 = regex(r"^[CDF]$", "i"); + foreach (v; ["C", "c", "D", "d", "F", "f"]) + assert(matchAll(v, ctPat2).front.hit == v); +} + +// bugzilla 14615 +@safe unittest +{ + import std.array : appender; + import std.regex : replaceFirst, replaceFirstInto, regex; + import std.stdio : writeln; + + auto example = "Hello, world!"; + auto pattern = regex("^Hello, (bug)"); // won't find this one + auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); + assert(result == "Hello, world!"); // Ok. + + auto sink = appender!string; + replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!"); + replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!Hello, world!"); +} + +// bugzilla 15573 +@safe unittest +{ + auto rx = regex("[c d]", "x"); + assert("a b".matchFirst(rx)); +} + +// bugzilla 15864 +@safe unittest +{ + regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); +} + +@safe unittest +{ + auto r = regex("(?# comment)abc(?# comment2)"); + assert("abc".matchFirst(r)); + assertThrown(regex("(?#...")); +} + +// bugzilla 17075 +@safe unittest +{ + enum titlePattern = `<title>(.+)</title>`; + static titleRegex = ctRegex!titlePattern; + string input = "<title>" ~ "<".repeat(100_000).join; + assert(input.matchFirst(titleRegex).empty); +} + +// bugzilla 17212 +@safe unittest +{ + auto r = regex(" [a] ", "x"); + assert("a".matchFirst(r)); +} + +// bugzilla 17157 +@safe unittest +{ + import std.algorithm.comparison : equal; + auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; + auto r = regex("(a)|(b)|(c)|(d)", "g"); + auto s = "--a--b--c--d--"; + auto outcomes = [ + ["a", "a", "", "", ""], + ["b", "", "b", "", ""], + ["c", "", "", "c", ""], + ["d", "", "", "", "d"] + ]; + assert(equal!equal(s.matchAll(ctr), outcomes)); + assert(equal!equal(s.bmatch(r), outcomes)); +} + +// bugzilla 17667 +@safe unittest +{ + import std.algorithm.searching : canFind; + void willThrow(T, size_t line = __LINE__)(T arg, string msg) + { + auto e = collectException(regex(arg)); + assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); + } + willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); + willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); + willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); + willThrow([r"[a-\", r"123"], "invalid escape sequence"); + willThrow([r"\", r"123"], "invalid escape sequence"); +} + +// bugzilla 17668 +@safe unittest +{ + import std.algorithm.searching; + auto e = collectException!RegexException(regex(q"<[^]>")); + assert(e.msg.canFind("no operand for '^'")); +} + +// bugzilla 17673 +@safe unittest +{ + string str = `<">`; + string[] regexps = ["abc", "\"|x"]; + auto regexp = regex(regexps); + auto c = matchFirst(str, regexp); + assert(c); + assert(c.whichPattern == 2); +} + |