aboutsummaryrefslogtreecommitdiff
path: root/libphobos/src/std/regex/package.d
diff options
context:
space:
mode:
Diffstat (limited to 'libphobos/src/std/regex/package.d')
-rw-r--r--libphobos/src/std/regex/package.d1735
1 files changed, 1735 insertions, 0 deletions
diff --git a/libphobos/src/std/regex/package.d b/libphobos/src/std/regex/package.d
new file mode 100644
index 0000000..bfc7d7f
--- /dev/null
+++ b/libphobos/src/std/regex/package.d
@@ -0,0 +1,1735 @@
+/++
+ $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
+ are a commonly used method of pattern matching
+ on strings, with $(I regex) being a catchy word for a pattern in this domain
+ specific language. Typical problems usually solved by regular expressions
+ include validation of user input and the ubiquitous find $(AMP) replace
+ in text processing utilities.
+
+$(SCRIPT inhibitQuickIndex = 1;)
+$(BOOKTABLE,
+$(TR $(TH Category) $(TH Functions))
+$(TR $(TD Matching) $(TD
+ $(LREF bmatch)
+ $(LREF match)
+ $(LREF matchAll)
+ $(LREF matchFirst)
+))
+$(TR $(TD Building) $(TD
+ $(LREF ctRegex)
+ $(LREF escaper)
+ $(LREF _regex)
+))
+$(TR $(TD Replace) $(TD
+ $(LREF replace)
+ $(LREF replaceAll)
+ $(LREF replaceAllInto)
+ $(LREF replaceFirst)
+ $(LREF replaceFirstInto)
+))
+$(TR $(TD Split) $(TD
+ $(LREF split)
+ $(LREF splitter)
+))
+$(TR $(TD Objects) $(TD
+ $(LREF Captures)
+ $(LREF Regex)
+ $(LREF RegexException)
+ $(LREF RegexMatch)
+ $(LREF Splitter)
+ $(LREF StaticRegex)
+))
+)
+
+ $(SECTION Synopsis)
+ ---
+ import std.regex;
+ import std.stdio;
+ void main()
+ {
+ // Print out all possible dd/mm/yy(yy) dates found in user input.
+ auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
+ foreach (line; stdin.byLine)
+ {
+ // matchAll() returns a range that can be iterated
+ // to get all subsequent matches.
+ foreach (c; matchAll(line, r))
+ writeln(c.hit);
+ }
+ }
+ ...
+
+ // Create a static regex at compile-time, which contains fast native code.
+ auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
+
+ // It works just like a normal regex:
+ auto c2 = matchFirst("foo/bar", ctr); // First match found here, if any
+ assert(!c2.empty); // Be sure to check if there is a match before examining contents!
+ assert(c2[1] == "bar"); // Captures is a range of submatches: 0 = full match.
+
+ ...
+ // multi-pattern regex
+ auto multi = regex([`\d+,\d+`,`(a-z]+):(\d+)`]);
+ auto m = "abc:43 12,34".matchAll(multi);
+ assert(m.front.whichPattern == 2);
+ assert(m.front[1] == "abc");
+ assert(m.front[2] == "43");
+ m.popFront();
+ assert(m.front.whichPattern == 1);
+ assert(m.front[1] == "12");
+ ...
+
+ // The result of the `matchAll/matchFirst` is directly testable with if/assert/while.
+ // e.g. test if a string consists of letters:
+ assert(matchFirst("Letter", `^\p{L}+$`));
+ ---
+
+ $(SECTION Syntax and general information)
+ The general usage guideline is to keep regex complexity on the side of simplicity,
+ as its capabilities reside in purely character-level manipulation.
+ As such it's ill-suited for tasks involving higher level invariants
+ like matching an integer number $(U bounded) in an [a,b] interval.
+ Checks of this sort of are better addressed by additional post-processing.
+
+ The basic syntax shouldn't surprise experienced users of regular expressions.
+ For an introduction to $(D std.regex) see a
+ $(HTTP dlang.org/regular-expression.html, short tour) of the module API
+ and its abilities.
+
+ There are other web resources on regular expressions to help newcomers,
+ and a good $(HTTP www.regular-expressions.info, reference with tutorial)
+ can easily be found.
+
+ This library uses a remarkably common ECMAScript syntax flavor
+ with the following extensions:
+ $(UL
+ $(LI Named subexpressions, with Python syntax. )
+ $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
+ $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
+ )
+
+ $(REG_START Pattern syntax )
+ $(I std.regex operates on codepoint level,
+ 'character' in this table denotes a single Unicode codepoint.)
+ $(REG_TABLE
+ $(REG_TITLE Pattern element, Semantics )
+ $(REG_TITLE Atoms, Match single characters )
+ $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
+ $(REG_ROW ., In single line mode matches any character.
+ Otherwise it matches any character except '\n' and '\r'. )
+ $(REG_ROW [class], Matches a single character
+ that belongs to this character class. )
+ $(REG_ROW [^class], Matches a single character that
+ does $(U not) belong to this character class.)
+ $(REG_ROW \cC, Matches the control character corresponding to letter C)
+ $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
+ $(REG_ROW \uXXXX, Matches a character with hexadecimal value of XXXX. )
+ $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
+ $(REG_ROW \f, Matches a formfeed character. )
+ $(REG_ROW \n, Matches a linefeed character. )
+ $(REG_ROW \r, Matches a carriage return character. )
+ $(REG_ROW \t, Matches a tab character. )
+ $(REG_ROW \v, Matches a vertical tab character. )
+ $(REG_ROW \d, Matches any Unicode digit. )
+ $(REG_ROW \D, Matches any character except Unicode digits. )
+ $(REG_ROW \w, Matches any word character (note: this includes numbers).)
+ $(REG_ROW \W, Matches any non-word character.)
+ $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
+ $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
+ $(REG_ROW \\, Matches \ character. )
+ $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
+ $(REG_ROW \p{PropertyName}, Matches a character that belongs
+ to the Unicode PropertyName set.
+ Single letter abbreviations can be used without surrounding {,}. )
+ $(REG_ROW \P{PropertyName}, Matches a character that does not belong
+ to the Unicode PropertyName set.
+ Single letter abbreviations can be used without surrounding {,}. )
+ $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
+ the BasicLatin Unicode $(U block).)
+ $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
+ the BasicLatin Unicode $(U block).)
+ $(REG_ROW \p{Cyrillic}, Matches any character that is part of
+ Cyrillic $(U script).)
+ $(REG_ROW \P{Cyrillic}, Matches any character except ones in
+ Cyrillic $(U script).)
+ $(REG_TITLE Quantifiers, Specify repetition of other elements)
+ $(REG_ROW *, Matches previous character/subexpression 0 or more times.
+ Greedy version - tries as many times as possible.)
+ $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
+ Lazy version - stops as early as possible.)
+ $(REG_ROW +, Matches previous character/subexpression 1 or more times.
+ Greedy version - tries as many times as possible.)
+ $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
+ Lazy version - stops as early as possible.)
+ $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
+ $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
+ Greedy version - tries as many times as possible. )
+ $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
+ Lazy version - stops as early as possible.)
+ $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
+ Greedy version - tries as many times as possible, but no more than m times. )
+ $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
+ Lazy version - stops as early as possible, but no less then n times.)
+ $(REG_TITLE Other, Subexpressions $(AMP) alternations )
+ $(REG_ROW (regex), Matches subexpression regex,
+ saving matched portion of text for later retrieval. )
+ $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
+ $(REG_ROW (?:regex), Matches subexpression regex,
+ $(U not) saving matched portion of text. Useful to speed up matching. )
+ $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
+ $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
+ regex labeling it with name 'name'.
+ When referring to a matched portion of text,
+ names work like aliases in addition to direct numbers.
+ )
+ $(REG_TITLE Assertions, Match position rather than character )
+ $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).)
+ $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
+ $(REG_ROW \b, Matches at word boundary. )
+ $(REG_ROW \B, Matches when $(U not) at word boundary. )
+ $(REG_ROW (?=regex), Zero-width lookahead assertion.
+ Matches at a point where the subexpression
+ regex could be matched starting from the current position.
+ )
+ $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
+ Matches at a point where the subexpression
+ regex could $(U not) be matched starting from the current position.
+ )
+ $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
+ where the subexpression regex could be matched ending
+ at the current position (matching goes backwards).
+ )
+ $(REG_ROW (?<!regex), Zero-width negative lookbehind assertion.
+ Matches at a point where the subexpression regex could $(U not)
+ be matched ending at the current position (matching goes backwards).
+ )
+ )
+
+ $(REG_START Character classes )
+ $(REG_TABLE
+ $(REG_TITLE Pattern element, Semantics )
+ $(REG_ROW Any atom, Has the same meaning as outside of a character class.)
+ $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
+ $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
+ Where a, b are arbitrary classes, means union, set difference,
+ symmetric set difference, and intersection respectively.
+ $(I Any sequence of character class elements implicitly forms a union.) )
+ )
+
+ $(REG_START Regex flags )
+ $(REG_TABLE
+ $(REG_TITLE Flag, Semantics )
+ $(REG_ROW g, Global regex, repeat over the whole input. )
+ $(REG_ROW i, Case insensitive matching. )
+ $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
+ as well as start and end of input.)
+ $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
+ $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
+ useful for formatting complex regular expressions. )
+ )
+
+ $(SECTION Unicode support)
+
+ This library provides full Level 1 support* according to
+ $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
+ $(UL
+ $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
+ $(LI 1.2 Unicode properties.)
+ $(LI 1.3 Character classes with set operations.)
+ $(LI 1.4 Word boundaries use the full set of "word" characters.)
+ $(LI 1.5 Using simple casefolding to match case
+ insensitively across the full range of codepoints.)
+ $(LI 1.6 Respecting line breaks as any of
+ \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
+ $(LI 1.7 Operating on codepoint level.)
+ )
+ *With exception of point 1.1.1, as of yet, normalization of input
+ is expected to be enforced by user.
+
+ $(SECTION Replace format string)
+
+ A set of functions in this module that do the substitution rely
+ on a simple format to guide the process. In particular the table below
+ applies to the $(D format) argument of
+ $(LREF replaceFirst) and $(LREF replaceAll).
+
+ The format string can reference parts of match using the following notation.
+ $(REG_TABLE
+ $(REG_TITLE Format specifier, Replaced by )
+ $(REG_ROW $$(AMP), the whole match. )
+ $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
+ $(REG_ROW $', part of input $(I following) the match. )
+ $(REG_ROW $$, '$' character. )
+ $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
+ $(REG_ROW \\, '\' character. )
+ $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
+ )
+
+ $(SECTION Slicing and zero memory allocations orientation)
+
+ All matches returned by pattern matching functionality in this library
+ are slices of the original input. The notable exception is the $(D replace)
+ family of functions that generate a new string from the input.
+
+ In cases where producing the replacement is the ultimate goal
+ $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
+ as functions that avoid allocations even for replacement.
+
+ Copyright: Copyright Dmitry Olshansky, 2011-
+
+ License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
+
+ Authors: Dmitry Olshansky,
+
+ API and utility constructs are modeled after the original $(D std.regex)
+ by Walter Bright and Andrei Alexandrescu.
+
+ Source: $(PHOBOSSRC std/_regex/_package.d)
+
+Macros:
+ REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
+ REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
+ REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
+ REG_START = <h3><div align="center"> $0 </div></h3>
+ SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
+ S_LINK = <a href="#$1">$+</a>
+ +/
+module std.regex;
+
+import std.range.primitives, std.traits;
+import std.regex.internal.ir;
+import std.regex.internal.thompson; //TODO: get rid of this dependency
+import std.typecons; // : Flag, Yes, No;
+
+/++
+ $(D Regex) object holds regular expression pattern in compiled form.
+
+ Instances of this object are constructed via calls to $(D regex).
+ This is an intended form for caching and storage of frequently
+ used regular expressions.
+
+ Example:
+
+ Test if this object doesn't contain any compiled pattern.
+ ---
+ Regex!char r;
+ assert(r.empty);
+ r = regex(""); // Note: "" is a valid regex pattern.
+ assert(!r.empty);
+ ---
+
+ Getting a range of all the named captures in the regex.
+ ----
+ import std.range;
+ import std.algorithm;
+
+ auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
+ auto nc = re.namedCaptures;
+ static assert(isRandomAccessRange!(typeof(nc)));
+ assert(!nc.empty);
+ assert(nc.length == 2);
+ assert(nc.equal(["name", "var"]));
+ assert(nc[0] == "name");
+ assert(nc[1..$].equal(["var"]));
+ ----
++/
+public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
+
+/++
+ A $(D StaticRegex) is $(D Regex) object that contains D code specially
+ generated at compile-time to speed up matching.
+
+ Implicitly convertible to normal $(D Regex),
+ however doing so will result in losing this additional capability.
++/
+public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
+
+/++
+ Compile regular expression pattern for the later execution.
+ Returns: $(D Regex) object that works on inputs having
+ the same character width as $(D pattern).
+
+ Params:
+ pattern = A single regular expression to match.
+ patterns = An array of regular expression strings.
+ The resulting `Regex` object will match any expression;
+ use $(LREF whichPattern) to know which.
+ flags = The _attributes (g, i, m and x accepted)
+
+ Throws: $(D RegexException) if there were any errors during compilation.
++/
+@trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
+if (isSomeString!(S))
+{
+ import std.array : appender;
+ import std.functional : memoize;
+ enum cacheSize = 8; //TODO: invent nice interface to control regex caching
+ S pat;
+ if (patterns.length > 1)
+ {
+ auto app = appender!S();
+ foreach (i, p; patterns)
+ {
+ if (i != 0)
+ app.put("|");
+ app.put("(?:");
+ app.put(patterns[i]);
+ // terminator for the pattern
+ // to detect if the pattern unexpectedly ends
+ app.put("\\");
+ app.put(cast(dchar)(privateUseStart+i));
+ app.put(")");
+ // another one to return correct whichPattern
+ // for all of potential alternatives in the patterns[i]
+ app.put("\\");
+ app.put(cast(dchar)(privateUseStart+i));
+ }
+ pat = app.data;
+ }
+ else
+ pat = patterns[0];
+
+ if (__ctfe)
+ return regexImpl(pat, flags);
+ return memoize!(regexImpl!S, cacheSize)(pat, flags);
+}
+
+///ditto
+@trusted public auto regex(S)(S pattern, const(char)[] flags="")
+if (isSomeString!(S))
+{
+ return regex([pattern], flags);
+}
+
+///
+@system unittest
+{
+ // multi-pattern regex example
+ auto multi = regex([`([a-z]+):(\d+)`, `(\d+),\d+`]); // multi regex
+ auto m = "abc:43 12,34".matchAll(multi);
+ assert(m.front.whichPattern == 1);
+ assert(m.front[1] == "abc");
+ assert(m.front[2] == "43");
+ m.popFront();
+ assert(m.front.whichPattern == 2);
+ assert(m.front[1] == "12");
+}
+
+public auto regexImpl(S)(S pattern, const(char)[] flags="")
+if (isSomeString!(S))
+{
+ import std.regex.internal.parser : Parser, CodeGen;
+ auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
+ auto r = parser.program;
+ return r;
+}
+
+
+template ctRegexImpl(alias pattern, string flags=[])
+{
+ import std.regex.internal.backtracking, std.regex.internal.parser;
+ enum r = regex(pattern, flags);
+ alias Char = BasicElementOf!(typeof(pattern));
+ enum source = ctGenRegExCode(r);
+ alias Matcher = BacktrackingMatcher!(true);
+ @trusted bool func(ref Matcher!Char matcher)
+ {
+ debug(std_regex_ctr) pragma(msg, source);
+ mixin(source);
+ }
+ enum nr = StaticRegex!Char(r, &func);
+}
+
+/++
+ Compile regular expression using CTFE
+ and generate optimized native machine code for matching it.
+
+ Returns: StaticRegex object for faster matching.
+
+ Params:
+ pattern = Regular expression
+ flags = The _attributes (g, i, m and x accepted)
++/
+public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
+
+enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
+ || is(RegEx == StaticRegex!(BasicElementOf!R));
+
+
+/++
+ $(D Captures) object contains submatches captured during a call
+ to $(D match) or iteration over $(D RegexMatch) range.
+
+ First element of range is the whole match.
++/
+@trusted public struct Captures(R, DIndex = size_t)
+if (isSomeString!R)
+{//@trusted because of union inside
+ alias DataIndex = DIndex;
+ alias String = R;
+private:
+ import std.conv : text;
+ R _input;
+ int _nMatch;
+ enum smallString = 3;
+ enum SMALL_MASK = 0x8000_0000, REF_MASK= 0x1FFF_FFFF;
+ union
+ {
+ Group!DataIndex[] big_matches;
+ Group!DataIndex[smallString] small_matches;
+ }
+ uint _f, _b;
+ uint _refcount; // ref count or SMALL MASK + num groups
+ NamedGroup[] _names;
+
+ this()(R input, uint n, NamedGroup[] named)
+ {
+ _input = input;
+ _names = named;
+ newMatches(n);
+ _b = n;
+ _f = 0;
+ }
+
+ this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
+ {
+ _input = rmatch._input;
+ _names = rmatch._engine.re.dict;
+ immutable n = rmatch._engine.re.ngroup;
+ newMatches(n);
+ _b = n;
+ _f = 0;
+ }
+
+ @property inout(Group!DataIndex[]) matches() inout
+ {
+ return (_refcount & SMALL_MASK) ? small_matches[0 .. _refcount & 0xFF] : big_matches;
+ }
+
+ void newMatches(uint n)
+ {
+ import core.stdc.stdlib : calloc;
+ import std.exception : enforce;
+ if (n > smallString)
+ {
+ auto p = cast(Group!DataIndex*) enforce(
+ calloc(Group!DataIndex.sizeof,n),
+ "Failed to allocate Captures struct"
+ );
+ big_matches = p[0 .. n];
+ _refcount = 1;
+ }
+ else
+ {
+ _refcount = SMALL_MASK | n;
+ }
+ }
+
+ bool unique()
+ {
+ return (_refcount & SMALL_MASK) || _refcount == 1;
+ }
+
+public:
+ this(this)
+ {
+ if (!(_refcount & SMALL_MASK))
+ {
+ _refcount++;
+ }
+ }
+ ~this()
+ {
+ import core.stdc.stdlib : free;
+ if (!(_refcount & SMALL_MASK))
+ {
+ if (--_refcount == 0)
+ {
+ free(big_matches.ptr);
+ big_matches = null;
+ }
+ }
+ }
+ ///Slice of input prior to the match.
+ @property R pre()
+ {
+ return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
+ }
+
+ ///Slice of input immediately after the match.
+ @property R post()
+ {
+ return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
+ }
+
+ ///Slice of matched portion of input.
+ @property R hit()
+ {
+ assert(_nMatch, "attempted to get hit of an empty match");
+ return _input[matches[0].begin .. matches[0].end];
+ }
+
+ ///Range interface.
+ @property R front()
+ {
+ assert(_nMatch, "attempted to get front of an empty match");
+ return _input[matches[_f].begin .. matches[_f].end];
+ }
+
+ ///ditto
+ @property R back()
+ {
+ assert(_nMatch, "attempted to get back of an empty match");
+ return _input[matches[_b - 1].begin .. matches[_b - 1].end];
+ }
+
+ ///ditto
+ void popFront()
+ {
+ assert(!empty);
+ ++_f;
+ }
+
+ ///ditto
+ void popBack()
+ {
+ assert(!empty);
+ --_b;
+ }
+
+ ///ditto
+ @property bool empty() const { return _nMatch == 0 || _f >= _b; }
+
+ ///ditto
+ inout(R) opIndex()(size_t i) inout
+ {
+ assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
+ assert(matches[_f + i].begin <= matches[_f + i].end,
+ text("wrong match: ", matches[_f + i].begin, "..", matches[_f + i].end));
+ return _input[matches[_f + i].begin .. matches[_f + i].end];
+ }
+
+ /++
+ Explicit cast to bool.
+ Useful as a shorthand for !(x.empty) in if and assert statements.
+
+ ---
+ import std.regex;
+
+ assert(!matchFirst("nothing", "something"));
+ ---
+ +/
+
+ @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
+
+ /++
+ Number of pattern matched counting, where 1 - the first pattern.
+ Returns 0 on no match.
+ +/
+
+ @safe @property int whichPattern() const nothrow { return _nMatch; }
+
+ ///
+ @system unittest
+ {
+ import std.regex;
+ assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
+ }
+
+ /++
+ Lookup named submatch.
+
+ ---
+ import std.regex;
+ import std.range;
+
+ auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
+ assert(c["var"] == "a");
+ assert(c["value"] == "42");
+ popFrontN(c, 2);
+ //named groups are unaffected by range primitives
+ assert(c["var"] =="a");
+ assert(c.front == "42");
+ ----
+ +/
+ R opIndex(String)(String i) /*const*/ //@@@BUG@@@
+ if (isSomeString!String)
+ {
+ size_t index = lookupNamedGroup(_names, i);
+ return _input[matches[index].begin .. matches[index].end];
+ }
+
+ ///Number of matches in this object.
+ @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f; }
+
+ ///A hook for compatibility with original std.regex.
+ @property ref captures(){ return this; }
+}
+
+///
+@system unittest
+{
+ import std.range.primitives : popFrontN;
+
+ auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
+ assert(c.pre == "@"); // Part of input preceding match
+ assert(c.post == "#"); // Immediately after match
+ assert(c.hit == c[0] && c.hit == "abc"); // The whole match
+ assert(c[2] == "b");
+ assert(c.front == "abc");
+ c.popFront();
+ assert(c.front == "a");
+ assert(c.back == "c");
+ c.popBack();
+ assert(c.back == "b");
+ popFrontN(c, 2);
+ assert(c.empty);
+
+ assert(!matchFirst("nothing", "something"));
+}
+
+/++
+ A regex engine state, as returned by $(D match) family of functions.
+
+ Effectively it's a forward range of Captures!R, produced
+ by lazily searching for matches in a given input.
+
+ $(D alias Engine) specifies an engine type to use during matching,
+ and is automatically deduced in a call to $(D match)/$(D bmatch).
++/
+@trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
+if (isSomeString!R)
+{
+private:
+ import core.stdc.stdlib : malloc, free;
+ alias Char = BasicElementOf!R;
+ alias EngineType = Engine!Char;
+ EngineType _engine;
+ R _input;
+ Captures!(R,EngineType.DataIndex) _captures;
+ void[] _memory;//is ref-counted
+
+ this(RegEx)(R input, RegEx prog)
+ {
+ import std.exception : enforce;
+ _input = input;
+ immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
+ _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
+ scope(failure) free(_memory.ptr);
+ *cast(size_t*)_memory.ptr = 1;
+ _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
+ static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
+ _engine.nativeFn = prog.nativeFn;
+ _captures = Captures!(R,EngineType.DataIndex)(this);
+ _captures._nMatch = _engine.match(_captures.matches);
+ debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
+ }
+
+ @property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
+public:
+ this(this)
+ {
+ if (_memory.ptr)
+ {
+ ++counter;
+ debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
+ _memory.ptr, *cast(size_t*)_memory.ptr);
+ }
+ }
+
+ ~this()
+ {
+ if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
+ {
+ debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
+ _memory.ptr, *cast(size_t*)_memory.ptr);
+ free(cast(void*)_memory.ptr);
+ }
+ }
+
+ ///Shorthands for front.pre, front.post, front.hit.
+ @property R pre()
+ {
+ return _captures.pre;
+ }
+
+ ///ditto
+ @property R post()
+ {
+ return _captures.post;
+ }
+
+ ///ditto
+ @property R hit()
+ {
+ return _captures.hit;
+ }
+
+ /++
+ Functionality for processing subsequent matches of global regexes via range interface:
+ ---
+ import std.regex;
+ auto m = matchAll("Hello, world!", regex(`\w+`));
+ assert(m.front.hit == "Hello");
+ m.popFront();
+ assert(m.front.hit == "world");
+ m.popFront();
+ assert(m.empty);
+ ---
+ +/
+ @property auto front()
+ {
+ return _captures;
+ }
+
+ ///ditto
+ void popFront()
+ {
+ import std.exception : enforce;
+ if (counter != 1)
+ {//do cow magic first
+ counter--;//we abandon this reference
+ immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
+ _memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
+ _engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
+ counter = 1;//points to new chunk
+ }
+
+ if (!_captures.unique)
+ {
+ // has external references - allocate new space
+ _captures.newMatches(_engine.re.ngroup);
+ }
+ _captures._nMatch = _engine.match(_captures.matches);
+ }
+
+ ///ditto
+ auto save(){ return this; }
+
+ ///Test if this match object is empty.
+ @property bool empty() const { return _captures._nMatch == 0; }
+
+ ///Same as !(x.empty), provided for its convenience in conditional statements.
+ T opCast(T:bool)(){ return !empty; }
+
+ /// Same as .front, provided for compatibility with original std.regex.
+ @property auto captures() inout { return _captures; }
+
+}
+
+private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
+{
+ import core.stdc.stdlib : malloc, free;
+ import std.exception : enforce;
+ alias Char = BasicElementOf!R;
+ alias EngineType = Engine!Char;
+
+ size_t size = EngineType.initialMemory(re);
+ void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
+ scope(exit) free(memory.ptr);
+ auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
+ auto engine = EngineType(re, Input!Char(input), memory);
+ static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
+ engine.nativeFn = re.nativeFn;
+ captures._nMatch = engine.match(captures.matches);
+ return captures;
+}
+
+private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
+{
+ re.flags |= RegexOption.global;
+ return RegexMatch!(R, Engine)(input, re);
+}
+
+@system unittest
+{
+ //sanity checks for new API
+ auto re = regex("abc");
+ assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
+ assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
+}
+
+
+private enum isReplaceFunctor(alias fun, R) =
+ __traits(compiles, (Captures!R c) { fun(c); });
+
+// the lowest level - just stuff replacements into the sink
+private @trusted void replaceCapturesInto(alias output, Sink, R, T)
+ (ref Sink sink, R input, T captures)
+if (isOutputRange!(Sink, dchar) && isSomeString!R)
+{
+ if (captures.empty)
+ {
+ sink.put(input);
+ return;
+ }
+ sink.put(captures.pre);
+ // a hack to get around bogus errors, should be simply output(captures, sink)
+ // "is a nested function and cannot be accessed from"
+ static if (isReplaceFunctor!(output, R))
+ sink.put(output(captures)); //"mutator" type of function
+ else
+ output(captures, sink); //"output" type of function
+ sink.put(captures.post);
+}
+
+// ditto for a range of captures
+private void replaceMatchesInto(alias output, Sink, R, T)
+ (ref Sink sink, R input, T matches)
+if (isOutputRange!(Sink, dchar) && isSomeString!R)
+{
+ size_t offset = 0;
+ foreach (cap; matches)
+ {
+ sink.put(cap.pre[offset .. $]);
+ // same hack, see replaceCapturesInto
+ static if (isReplaceFunctor!(output, R))
+ sink.put(output(cap)); //"mutator" type of function
+ else
+ output(cap, sink); //"output" type of function
+ offset = cap.pre.length + cap.hit.length;
+ }
+ sink.put(input[offset .. $]);
+}
+
+// a general skeleton of replaceFirst
+private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ import std.array : appender;
+ auto data = matchFirst(input, re);
+ if (data.empty)
+ return input;
+ auto app = appender!(R)();
+ replaceCapturesInto!output(app, input, data);
+ return app.data;
+}
+
+// ditto for replaceAll
+// the method parameter allows old API to ride on the back of the new one
+private R replaceAllWith(alias output,
+ alias method=matchAll, R, RegEx)(R input, RegEx re)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ import std.array : appender;
+ auto matches = method(input, re); //inout(C)[] fails
+ if (matches.empty)
+ return input;
+ auto app = appender!(R)();
+ replaceMatchesInto!output(app, input, matches);
+ return app.data;
+}
+
+
+/++
+ Start matching $(D input) to regex pattern $(D re),
+ using Thompson NFA matching scheme.
+
+ The use of this function is $(RED discouraged) - use either of
+ $(LREF matchAll) or $(LREF matchFirst).
+
+ Delegating the kind of operation
+ to "g" flag is soon to be phased out along with the
+ ability to choose the exact matching scheme. The choice of
+ matching scheme to use depends highly on the pattern kind and
+ can done automatically on case by case basis.
+
+ Returns: a $(D RegexMatch) object holding engine state after first match.
++/
+
+public auto match(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
+}
+
+///ditto
+public auto match(R, String)(R input, String re)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
+}
+
+public auto match(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+}
+
+/++
+ Find the first (leftmost) slice of the $(D input) that
+ matches the pattern $(D re). This function picks the most suitable
+ regular expression engine depending on the pattern properties.
+
+ $(D re) parameter can be one of three types:
+ $(UL
+ $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
+ $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
+ compiled bytecode. )
+ $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
+ compiled native machine code. )
+ )
+
+ Returns:
+ $(LREF Captures) containing the extent of a match together with all submatches
+ if there was a match, otherwise an empty $(LREF Captures) object.
++/
+public auto matchFirst(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchOnce!ThompsonMatcher(input, re);
+}
+
+///ditto
+public auto matchFirst(R, String)(R input, String re)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchOnce!ThompsonMatcher(input, regex(re));
+}
+
+///ditto
+public auto matchFirst(R, String)(R input, String[] re...)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchOnce!ThompsonMatcher(input, regex(re));
+}
+
+public auto matchFirst(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return matchOnce!(BacktrackingMatcher!true)(input, re);
+}
+
+/++
+ Initiate a search for all non-overlapping matches to the pattern $(D re)
+ in the given $(D input). The result is a lazy range of matches generated
+ as they are encountered in the input going left to right.
+
+ This function picks the most suitable regular expression engine
+ depending on the pattern properties.
+
+ $(D re) parameter can be one of three types:
+ $(UL
+ $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
+ $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
+ compiled bytecode. )
+ $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
+ compiled native machine code. )
+ )
+
+ Returns:
+ $(LREF RegexMatch) object that represents matcher state
+ after the first match was found or an empty one if not present.
++/
+public auto matchAll(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchMany!ThompsonMatcher(input, re);
+}
+
+///ditto
+public auto matchAll(R, String)(R input, String re)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchMany!ThompsonMatcher(input, regex(re));
+}
+
+///ditto
+public auto matchAll(R, String)(R input, String[] re...)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.thompson : ThompsonMatcher;
+ return matchMany!ThompsonMatcher(input, regex(re));
+}
+
+public auto matchAll(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return matchMany!(BacktrackingMatcher!true)(input, re);
+}
+
+// another set of tests just to cover the new API
+@system unittest
+{
+ import std.algorithm.comparison : equal;
+ import std.algorithm.iteration : map;
+ import std.conv : to;
+
+ foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
+ {
+ auto str1 = "blah-bleh".to!String();
+ auto pat1 = "bl[ae]h".to!String();
+ auto mf = matchFirst(str1, pat1);
+ assert(mf.equal(["blah".to!String()]));
+ auto mAll = matchAll(str1, pat1);
+ assert(mAll.equal!((a,b) => a.equal(b))
+ ([["blah".to!String()], ["bleh".to!String()]]));
+
+ auto str2 = "1/03/12 - 3/03/12".to!String();
+ auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
+ auto mf2 = matchFirst(str2, pat2);
+ assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
+ auto mAll2 = matchAll(str2, pat2);
+ assert(mAll2.front.equal(mf2));
+ mAll2.popFront();
+ assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
+ mf2.popFrontN(3);
+ assert(mf2.equal(["12".to!String()]));
+
+ auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
+ auto str = "2 + 34/56 - 6/1".to!String();
+ auto cmf = matchFirst(str, ctPat);
+ assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
+ assert(cmf["Quot"] == "34".to!String());
+ assert(cmf["Denom"] == "56".to!String());
+
+ auto cmAll = matchAll(str, ctPat);
+ assert(cmAll.front.equal(cmf));
+ cmAll.popFront();
+ assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
+ }
+}
+
+/++
+ Start matching of $(D input) to regex pattern $(D re),
+ using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
+ backtracking) matching scheme.
+
+ The use of this function is $(RED discouraged) - use either of
+ $(LREF matchAll) or $(LREF matchFirst).
+
+ Delegating the kind of operation
+ to "g" flag is soon to be phased out along with the
+ ability to choose the exact matching scheme. The choice of
+ matching scheme to use depends highly on the pattern kind and
+ can done automatically on case by case basis.
+
+ Returns: a $(D RegexMatch) object holding engine
+ state after first match.
+
++/
+public auto bmatch(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
+}
+
+///ditto
+public auto bmatch(R, String)(R input, String re)
+if (isSomeString!R && isSomeString!String)
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
+}
+
+public auto bmatch(R, RegEx)(R input, RegEx re)
+if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
+{
+ import std.regex.internal.backtracking : BacktrackingMatcher;
+ return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+}
+
+// produces replacement string from format using captures for substitution
+package void replaceFmt(R, Capt, OutR)
+ (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
+if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
+ isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
+{
+ import std.algorithm.searching : find;
+ import std.ascii : isDigit, isAlpha;
+ import std.conv : text, parse;
+ import std.exception : enforce;
+ enum State { Normal, Dollar }
+ auto state = State.Normal;
+ size_t offset;
+L_Replace_Loop:
+ while (!format.empty)
+ final switch (state)
+ {
+ case State.Normal:
+ for (offset = 0; offset < format.length; offset++)//no decoding
+ {
+ if (format[offset] == '$')
+ {
+ state = State.Dollar;
+ sink.put(format[0 .. offset]);
+ format = format[offset+1 .. $];//ditto
+ continue L_Replace_Loop;
+ }
+ }
+ sink.put(format[0 .. offset]);
+ format = format[offset .. $];
+ break;
+ case State.Dollar:
+ if (isDigit(format[0]))
+ {
+ uint digit = parse!uint(format);
+ enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
+ if (digit < captures.length)
+ sink.put(captures[digit]);
+ }
+ else if (format[0] == '{')
+ {
+ auto x = find!(a => !isAlpha(a))(format[1..$]);
+ enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
+ auto name = format[1 .. $ - x.length];
+ format = x[1..$];
+ enforce(!name.empty, "invalid name in ${...} replacement format");
+ sink.put(captures[name]);
+ }
+ else if (format[0] == '&')
+ {
+ sink.put(captures[0]);
+ format = format[1 .. $];
+ }
+ else if (format[0] == '`')
+ {
+ sink.put(captures.pre);
+ format = format[1 .. $];
+ }
+ else if (format[0] == '\'')
+ {
+ sink.put(captures.post);
+ format = format[1 .. $];
+ }
+ else if (format[0] == '$')
+ {
+ sink.put(format[0 .. 1]);
+ format = format[1 .. $];
+ }
+ state = State.Normal;
+ break;
+ }
+ enforce(state == State.Normal, "invalid format string in regex replace");
+}
+
+/++
+ Construct a new string from $(D input) by replacing the first match with
+ a string generated from it according to the $(D format) specifier.
+
+ To replace all matches use $(LREF replaceAll).
+
+ Params:
+ input = string to search
+ re = compiled regular expression to use
+ format = _format string to generate replacements from,
+ see $(S_LINK Replace _format string, the _format string).
+
+ Returns:
+ A string of the same type with the first match (if any) replaced.
+ If no match is found returns the input string itself.
++/
+public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
+if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
+{
+ return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
+}
+
+///
+@system unittest
+{
+ assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
+}
+
+/++
+ This is a general replacement tool that construct a new string by replacing
+ matches of pattern $(D re) in the $(D input). Unlike the other overload
+ there is no format string instead captures are passed to
+ to a user-defined functor $(D fun) that returns a new string
+ to use as replacement.
+
+ This version replaces the first match in $(D input),
+ see $(LREF replaceAll) to replace the all of the matches.
+
+ Returns:
+ A new string of the same type as $(D input) with all matches
+ replaced by return values of $(D fun). If no matches found
+ returns the $(D input) itself.
++/
+public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
+}
+
+///
+@system unittest
+{
+ import std.conv : to;
+ string list = "#21 out of 46";
+ string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
+ (list, regex(`[0-9]+`));
+ assert(newList == "#22 out of 46");
+}
+
+/++
+ A variation on $(LREF replaceFirst) that instead of allocating a new string
+ on each call outputs the result piece-wise to the $(D sink). In particular
+ this enables efficient construction of a final output incrementally.
+
+ Like in $(LREF replaceFirst) family of functions there is an overload
+ for the substitution guided by the $(D format) string
+ and the one with the user defined callback.
++/
+public @trusted void replaceFirstInto(Sink, R, C, RegEx)
+ (ref Sink sink, R input, RegEx re, const(C)[] format)
+if (isOutputRange!(Sink, dchar) && isSomeString!R
+ && is(C : dchar) && isRegexFor!(RegEx, R))
+ {
+ replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
+ (sink, input, matchFirst(input, re));
+ }
+
+///ditto
+public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
+ (Sink sink, R input, RegEx re)
+if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
+{
+ replaceCapturesInto!fun(sink, input, matchFirst(input, re));
+}
+
+///
+@system unittest
+{
+ import std.array;
+ string m1 = "first message\n";
+ string m2 = "second message\n";
+ auto result = appender!string();
+ replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
+ //equivalent of the above with user-defined callback
+ replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
+ assert(result.data == "first\nsecond\n");
+}
+
+//examples for replaceFirst
+@system unittest
+{
+ import std.conv;
+ string list = "#21 out of 46";
+ string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
+ (list, regex(`[0-9]+`));
+ assert(newList == "#22 out of 46");
+ import std.array;
+ string m1 = "first message\n";
+ string m2 = "second message\n";
+ auto result = appender!string();
+ replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
+ //equivalent of the above with user-defined callback
+ replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
+ assert(result.data == "first\nsecond\n");
+}
+
+/++
+ Construct a new string from $(D input) by replacing all of the
+ fragments that match a pattern $(D re) with a string generated
+ from the match according to the $(D format) specifier.
+
+ To replace only the first match use $(LREF replaceFirst).
+
+ Params:
+ input = string to search
+ re = compiled regular expression to use
+ format = _format string to generate replacements from,
+ see $(S_LINK Replace _format string, the _format string).
+
+ Returns:
+ A string of the same type as $(D input) with the all
+ of the matches (if any) replaced.
+ If no match is found returns the input string itself.
++/
+public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
+if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
+{
+ return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
+}
+
+///
+@system unittest
+{
+ // insert comma as thousands delimiter
+ auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
+ assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
+}
+
+/++
+ This is a general replacement tool that construct a new string by replacing
+ matches of pattern $(D re) in the $(D input). Unlike the other overload
+ there is no format string instead captures are passed to
+ to a user-defined functor $(D fun) that returns a new string
+ to use as replacement.
+
+ This version replaces all of the matches found in $(D input),
+ see $(LREF replaceFirst) to replace the first match only.
+
+ Returns:
+ A new string of the same type as $(D input) with all matches
+ replaced by return values of $(D fun). If no matches found
+ returns the $(D input) itself.
+
+ Params:
+ input = string to search
+ re = compiled regular expression
+ fun = delegate to use
++/
+public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
+}
+
+///
+@system unittest
+{
+ string baz(Captures!(string) m)
+ {
+ import std.string : toUpper;
+ return toUpper(m.hit);
+ }
+ // Capitalize the letters 'a' and 'r':
+ auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
+ regex("[ar]"));
+ assert(s == "StRAp A Rocket engine on A chicken.");
+}
+
+/++
+ A variation on $(LREF replaceAll) that instead of allocating a new string
+ on each call outputs the result piece-wise to the $(D sink). In particular
+ this enables efficient construction of a final output incrementally.
+
+ As with $(LREF replaceAll) there are 2 overloads - one with a format string,
+ the other one with a user defined functor.
++/
+public @trusted void replaceAllInto(Sink, R, C, RegEx)
+ (Sink sink, R input, RegEx re, const(C)[] format)
+if (isOutputRange!(Sink, dchar) && isSomeString!R
+ && is(C : dchar) && isRegexFor!(RegEx, R))
+ {
+ replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
+ (sink, input, matchAll(input, re));
+ }
+
+///ditto
+public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
+ (Sink sink, R input, RegEx re)
+if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
+{
+ replaceMatchesInto!fun(sink, input, matchAll(input, re));
+}
+
+///
+@system unittest
+{
+ // insert comma as thousands delimiter in fifty randomly produced big numbers
+ import std.array, std.conv, std.random, std.range;
+ static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
+ auto sink = appender!(char [])();
+ enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
+ foreach (i; 0 .. 50)
+ {
+ sink.clear();
+ replaceAllInto(sink, text(uniform(min, max)), re, ",");
+ foreach (pos; iota(sink.data.length - 4, 0, -4))
+ assert(sink.data[pos] == ',');
+ }
+}
+
+// exercise all of the replace APIs
+@system unittest
+{
+ import std.array : appender;
+ import std.conv;
+ // try and check first/all simple substitution
+ foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
+ {
+ S s1 = "curt trial".to!S();
+ S s2 = "round dome".to!S();
+ S t1F = "court trial".to!S();
+ S t2F = "hound dome".to!S();
+ S t1A = "court trial".to!S();
+ S t2A = "hound home".to!S();
+ auto re1 = regex("curt".to!S());
+ auto re2 = regex("[dr]o".to!S());
+
+ assert(replaceFirst(s1, re1, "court") == t1F);
+ assert(replaceFirst(s2, re2, "ho") == t2F);
+ assert(replaceAll(s1, re1, "court") == t1A);
+ assert(replaceAll(s2, re2, "ho") == t2A);
+
+ auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
+ assert(rep1 == t1F);
+ assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
+ auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
+ assert(rep1A == t1A);
+ assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
+
+ auto sink = appender!S();
+ replaceFirstInto(sink, s1, re1, "court");
+ assert(sink.data == t1F);
+ replaceFirstInto(sink, s2, re2, "ho");
+ assert(sink.data == t1F~t2F);
+ replaceAllInto(sink, s1, re1, "court");
+ assert(sink.data == t1F~t2F~t1A);
+ replaceAllInto(sink, s2, re2, "ho");
+ assert(sink.data == t1F~t2F~t1A~t2A);
+ }
+}
+
+/++
+ Old API for replacement, operation depends on flags of pattern $(D re).
+ With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
+ works the same as $(LREF replaceFirst).
+
+ The use of this function is $(RED discouraged), please use $(LREF replaceAll)
+ or $(LREF replaceFirst) explicitly.
++/
+public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
+}
+
+///ditto
+public R replace(alias fun, R, RegEx)(R input, RegEx re)
+if (isSomeString!R && isRegexFor!(RegEx, R))
+{
+ return replaceAllWith!(fun, match)(input, re);
+}
+
+/**
+Splits a string `r` using a regular expression `pat` as a separator.
+
+Params:
+ keepSeparators = flag to specify if the matches should be in the resulting range
+ r = the string to split
+ pat = the pattern to split on
+Returns:
+ A lazy range of strings
+*/
+public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
+if (isSomeString!Range && isRegexFor!(RegEx, Range))
+{
+private:
+ Range _input;
+ size_t _offset;
+ alias Rx = typeof(match(Range.init,RegEx.init));
+ Rx _match;
+
+ static if (keepSeparators) bool onMatch = false;
+
+ @trusted this(Range input, RegEx separator)
+ {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
+ _input = input;
+ separator.flags |= RegexOption.global;
+ if (_input.empty)
+ {
+ //there is nothing to match at all, make _offset > 0
+ _offset = 1;
+ }
+ else
+ {
+ _match = Rx(_input, separator);
+
+ static if (keepSeparators)
+ if (_match.pre.empty)
+ popFront();
+ }
+ }
+
+public:
+ auto ref opSlice()
+ {
+ return this.save;
+ }
+
+ ///Forward range primitives.
+ @property Range front()
+ {
+ import std.algorithm.comparison : min;
+
+ assert(!empty && _offset <= _match.pre.length
+ && _match.pre.length <= _input.length);
+
+ static if (keepSeparators)
+ {
+ if (!onMatch)
+ return _input[_offset .. min($, _match.pre.length)];
+ else
+ return _match.hit();
+ }
+ else
+ {
+ return _input[_offset .. min($, _match.pre.length)];
+ }
+ }
+
+ ///ditto
+ @property bool empty()
+ {
+ static if (keepSeparators)
+ return _offset >= _input.length;
+ else
+ return _offset > _input.length;
+ }
+
+ ///ditto
+ void popFront()
+ {
+ assert(!empty);
+ if (_match.empty)
+ {
+ //No more separators, work is done here
+ _offset = _input.length + 1;
+ }
+ else
+ {
+ static if (keepSeparators)
+ {
+ if (!onMatch)
+ {
+ //skip past the separator
+ _offset = _match.pre.length;
+ }
+ else
+ {
+ _offset += _match.hit.length;
+ _match.popFront();
+ }
+
+ onMatch = !onMatch;
+ }
+ else
+ {
+ //skip past the separator
+ _offset = _match.pre.length + _match.hit.length;
+ _match.popFront();
+ }
+ }
+ }
+
+ ///ditto
+ @property auto save()
+ {
+ return this;
+ }
+}
+
+/// ditto
+public Splitter!(keepSeparators, Range, RegEx) splitter(
+ Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
+if (
+ is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
+{
+ return Splitter!(keepSeparators, Range, RegEx)(r, pat);
+}
+
+///
+@system unittest
+{
+ import std.algorithm.comparison : equal;
+ auto s1 = ", abc, de, fg, hi, ";
+ assert(equal(splitter(s1, regex(", *")),
+ ["", "abc", "de", "fg", "hi", ""]));
+}
+
+/// Split on a pattern, but keep the matches in the resulting range
+@system unittest
+{
+ import std.algorithm.comparison : equal;
+ import std.typecons : Yes;
+
+ auto pattern = regex(`([\.,])`);
+
+ assert("2003.04.05"
+ .splitter!(Yes.keepSeparators)(pattern)
+ .equal(["2003", ".", "04", ".", "05"]));
+
+ assert(",1,2,3"
+ .splitter!(Yes.keepSeparators)(pattern)
+ .equal([",", "1", ",", "2", ",", "3"]));
+}
+
+///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
+public @trusted String[] split(String, RegEx)(String input, RegEx rx)
+if (isSomeString!String && isRegexFor!(RegEx, String))
+{
+ import std.array : appender;
+ auto a = appender!(String[])();
+ foreach (e; splitter(input, rx))
+ a.put(e);
+ return a.data;
+}
+
+///Exception object thrown in case of errors during regex compilation.
+public alias RegexException = std.regex.internal.ir.RegexException;
+
+/++
+ A range that lazily produces a string output escaped
+ to be used inside of a regular expression.
++/
+auto escaper(Range)(Range r)
+{
+ import std.algorithm.searching : find;
+ static immutable escapables = [Escapables];
+ static struct Escaper // template to deduce attributes
+ {
+ Range r;
+ bool escaped;
+
+ @property ElementType!Range front(){
+ if (escaped)
+ return '\\';
+ else
+ return r.front;
+ }
+
+ @property bool empty(){ return r.empty; }
+
+ void popFront(){
+ if (escaped) escaped = false;
+ else
+ {
+ r.popFront();
+ if (!r.empty && !escapables.find(r.front).empty)
+ escaped = true;
+ }
+ }
+
+ @property auto save(){ return Escaper(r.save, escaped); }
+ }
+
+ bool escaped = !r.empty && !escapables.find(r.front).empty;
+ return Escaper(r, escaped);
+}
+
+///
+@system unittest
+{
+ import std.algorithm.comparison;
+ import std.regex;
+ string s = `This is {unfriendly} to *regex*`;
+ assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
+}
+
+@system unittest
+{
+ import std.algorithm.comparison;
+ import std.conv;
+ foreach (S; AliasSeq!(string, wstring, dstring))
+ {
+ auto s = "^".to!S;
+ assert(s.escaper.equal(`\^`));
+ auto s2 = "";
+ assert(s2.escaper.equal(""));
+ }
+}