pure @safe unittest
{
    import std.uni;

        import std.algorithm.comparison : equal;

        auto set = CodepointSet('a', 'z'+1, 'а', 'я'+1);
        foreach (v; 'a'..'z'+1)
            assert(set[v]);
        // Cyrillic lowercase interval
        foreach (v; 'а'..'я'+1)
            assert(set[v]);
        //specific order is not required, intervals may interesect
        auto set2 = CodepointSet('а', 'я'+1, 'a', 'd', 'b', 'z'+1);
        //the same end result
        assert(set2.byInterval.equal(set.byInterval));
        // test constructor this(Range)(Range intervals)
        auto chessPiecesWhite = CodepointInterval(9812, 9818);
        auto chessPiecesBlack = CodepointInterval(9818, 9824);
        auto set3 = CodepointSet([chessPiecesWhite, chessPiecesBlack]);
        foreach (v; '♔'..'♟'+1)
            assert(set3[v]);
    
}

pure @safe unittest
{
    import std.uni;

        auto gothic = unicode.Gothic;
        // Gothic letter ahsa
        assert(gothic['\U00010330']);
        // no ascii in Gothic obviously
        assert(!gothic['$']);
    
}

pure @safe unittest
{
    import std.uni;

        import std.algorithm.comparison : equal;
        import std.range : iota;

        auto lower = unicode.LowerCase;
        auto upper = unicode.UpperCase;
        auto ascii = unicode.ASCII;

        assert((lower & upper).empty); // no intersection
        auto lowerASCII = lower & ascii;
        assert(lowerASCII.byCodepoint.equal(iota('a', 'z'+1)));
        // throw away all of the lowercase ASCII
        assert((ascii - lower).length == 128 - 26);

        auto onlyOneOf = lower ~ ascii;
        assert(!onlyOneOf['Δ']); // not ASCII and not lowercase
        assert(onlyOneOf['$']); // ASCII and not lowercase
        assert(!onlyOneOf['a']); // ASCII and lowercase
        assert(onlyOneOf['я']); // not ASCII but lowercase

        // throw away all cased letters from ASCII
        auto noLetters = ascii - (lower | upper);
        assert(noLetters.length == 128 - 26*2);
    
}

pure @safe unittest
{
    import std.uni;

        assert('я' in unicode.Cyrillic);
        assert(!('z' in unicode.Cyrillic));
    
}

pure @safe unittest
{
    import std.uni;

        import std.algorithm.comparison : equal;
        import std.range : iota;

        auto set = unicode.ASCII;
        set.byCodepoint.equal(iota(0, 0x80));
    
}

pure @safe unittest
{
    import std.uni;

        import std.conv : to;
        import std.format : format;
        import std.uni : unicode;

        // This was originally using Cyrillic script.
        // Unfortunately this is a pretty active range for changes,
        // and hence broke in an update.
        // Therefore the range Basic latin was used instead as it
        // unlikely to ever change.

        assert(unicode.InBasic_latin.to!string == "[0..128)");

        // The specs '%s' and '%d' are equivalent to the to!string call above.
        assert(format("%d", unicode.InBasic_latin) == unicode.InBasic_latin.to!string);

        assert(format("%#x", unicode.InBasic_latin) == "[0..0x80)");
        assert(format("%#X", unicode.InBasic_latin) == "[0..0X80)");
    
}

pure @safe unittest
{
    import std.uni;

        CodepointSet someSet;
        someSet.add('0', '5').add('A','Z'+1);
        someSet.add('5', '9'+1);
        assert(someSet['0']);
        assert(someSet['5']);
        assert(someSet['9']);
        assert(someSet['Z']);
    
}

pure @safe unittest
{
    import std.uni;

        auto set = unicode.ASCII;
        // union with the inverse gets all of the code points in the Unicode
        assert((set | set.inverted).length == 0x110000);
        // no intersection with the inverse
        assert((set & set.inverted).empty);
    
}

pure @safe unittest
{
    import std.uni;

        CodepointSet emptySet;
        assert(emptySet.length == 0);
        assert(emptySet.empty);
    
}

pure @safe unittest
{
    import std.uni;

        string truth = "2² = 4";
        auto m = utfMatcher!char(unicode.Number);
        assert(m.match(truth)); // '2' is a number all right
        assert(truth == "² = 4"); // skips on match
        assert(m.match(truth)); // so is the superscript '2'
        assert(!m.match(truth)); // space is not a number
        assert(truth == " = 4"); // unaffected on no match
        assert(!m.skip(truth)); // same test ...
        assert(truth == "= 4"); // but skips a codepoint regardless
        assert(!m.test(truth)); // '=' is not a number
        assert(truth == "= 4"); // test never affects argument
    
}

@safe unittest
{
    import std.uni;

        import std.exception : collectException;
        auto ascii = unicode.ASCII;
        assert(ascii['A']);
        assert(ascii['~']);
        assert(!ascii['\u00e0']);
        // matching is case-insensitive
        assert(ascii == unicode.ascII);
        assert(!ascii['à']);
        // underscores, '-' and whitespace in names are ignored too
        auto latin = unicode.in_latin1_Supplement;
        assert(latin['à']);
        assert(!latin['$']);
        // BTW Latin 1 Supplement is a block, hence "In" prefix
        assert(latin == unicode("In Latin 1 Supplement"));
        // run-time look up throws if no such set is found
        assert(collectException(unicode("InCyrilliac")));
    
}

@safe unittest
{
    import std.uni;

        // use .block for explicitness
        assert(unicode.block.Greek_and_Coptic == unicode.InGreek_and_Coptic);
    
}

@safe unittest
{
    import std.uni;

        auto arabicScript = unicode.script.arabic;
        auto arabicBlock = unicode.block.arabic;
        // there is an intersection between script and block
        assert(arabicBlock['؁']);
        assert(arabicScript['؁']);
        // but they are different
        assert(arabicBlock != arabicScript);
        assert(arabicBlock == unicode.inArabic);
        assert(arabicScript == unicode.arabic);
    
}

@safe unittest
{
    import std.uni;

        // L here is syllable type not Letter as in unicode.L short-cut
        auto leadingVowel = unicode.hangulSyllableType("L");
        // check that some leading vowels are present
        foreach (vowel; '\u1110'..'\u115F')
            assert(leadingVowel[vowel]);
        assert(leadingVowel == unicode.hangulSyllableType.L);
    
}

@safe unittest
{
    import std.uni;

        import std.uni : unicode;
        string pat = "[a-zA-Z0-9]hello";
        auto set = unicode.parseSet(pat);
        // check some of the codepoints
        assert(set['a'] && set['A'] && set['9']);
        assert(pat == "hello");
    
}

@safe unittest
{
    import std.uni;

    assert(graphemeStride("  ", 1) == 1);
    // A + combing ring above
    string city = "A\u030Arhus";
    size_t first = graphemeStride(city, 0);
    assert(first == 3); //\u030A has 2 UTF-8 code units
    assert(city[0 .. first] == "A\u030A");
    assert(city[first..$] == "rhus");
}

@safe pure unittest
{
    import std.uni;

    // Two Union Jacks of the Great Britain in each
    string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
    wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
    dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";

    // String pop length in code units, not points.
    assert(s.popGrapheme() == 8);
    assert(ws.popGrapheme() == 4);
    assert(ds.popGrapheme() == 2);

    assert(s == "\U0001F1EC\U0001F1E7");
    assert(ws == "\U0001F1EC\U0001F1E7");
    assert(ds == "\U0001F1EC\U0001F1E7");

    import std.algorithm.comparison : equal;
    import std.algorithm.iteration : filter;

    // Also works for non-random access ranges as long as the
    // character type is 32-bit.
    auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
    // Windows-style line ending is two code points in a single grapheme.
    assert(testPiece.popGrapheme() == 2);
    assert(testPiece.equal("!"d));
}

@safe unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;
    import std.range.primitives : walkLength;
    import std.range : take, drop;
    auto text = "noe\u0308l"; // noël using e + combining diaeresis
    assert(text.walkLength == 5); // 5 code points

    auto gText = text.byGrapheme;
    assert(gText.walkLength == 4); // 4 graphemes

    assert(gText.take(3).equal("noe\u0308".byGrapheme));
    assert(gText.drop(3).equal("l".byGrapheme));
}

@safe unittest
{
    import std.uni;

    import std.array : array;
    import std.conv : text;
    import std.range : retro;

    string s = "noe\u0308l"; // noël

    // reverse it and convert the result to a string
    string reverse = s.byGrapheme
        .array
        .retro
        .byCodePoint
        .text;

    assert(reverse == "le\u0308on"); // lëon
}

@safe unittest
{
    import std.uni;

        auto g = Grapheme("A\u0302");
        assert(g[0] == 'A');
        assert(g.valid);
        g[1] = '~'; // ASCII tilda is not a combining mark
        assert(g[1] == '~');
        assert(!g.valid);
    
}

@safe unittest
{
    import std.uni;

        import std.algorithm.comparison : equal;
        auto g = Grapheme("A");
        assert(g.valid);
        g ~= '\u0301';
        assert(g[].equal("A\u0301"));
        assert(g.valid);
        g ~= "B";
        // not a valid grapheme cluster anymore
        assert(!g.valid);
        // still could be useful though
        assert(g[].equal("A\u0301B"));
    
}

@safe unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;
    import std.algorithm.iteration : filter;
    import std.range : isRandomAccessRange;

    string bold = "ku\u0308hn";

    // note that decodeGrapheme takes parameter by ref
    auto first = decodeGrapheme(bold);

    assert(first.length == 1);
    assert(first[0] == 'k');

    // the next grapheme is 2 characters long
    auto wideOne = decodeGrapheme(bold);
    // slicing a grapheme yields a random-access range of dchar
    assert(wideOne[].equal("u\u0308"));
    assert(wideOne.length == 2);
    static assert(isRandomAccessRange!(typeof(wideOne[])));

    // all of the usual range manipulation is possible
    assert(wideOne[].filter!isMark().equal("\u0308"));

    auto g = Grapheme("A");
    assert(g.valid);
    g ~= '\u0301';
    assert(g[].equal("A\u0301"));
    assert(g.valid);
    g ~= "B";
    // not a valid grapheme cluster anymore
    assert(!g.valid);
    // still could be useful though
    assert(g[].equal("A\u0301B"));
}

@safe @nogc pure nothrow unittest
{
    import std.uni;

    assert(sicmp("Август", "авгусТ") == 0);
    // Greek also works as long as there is no 1:M mapping in sight
    assert(sicmp("ΌΎ", "όύ") == 0);
    // things like the following won't get matched as equal
    // Greek small letter iota with dialytika and tonos
    assert(sicmp("ΐ", "\u03B9\u0308\u0301") != 0);

    // while icmp has no problem with that
    assert(icmp("ΐ", "\u03B9\u0308\u0301") == 0);
    assert(icmp("ΌΎ", "όύ") == 0);
}

@safe @nogc pure nothrow unittest
{
    import std.uni;

    assert(icmp("Rußland", "Russland") == 0);
    assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
}

@safe @nogc nothrow pure unittest
{
    import std.uni;

    import std.utf : byDchar;

    assert(icmp("Rußland".byDchar, "Russland".byDchar) == 0);
    assert(icmp("ᾩ -> \u1F70\u03B9".byDchar, "\u1F61\u03B9 -> ᾲ".byDchar) == 0);
}

@safe unittest
{
    import std.uni;

    // shorten the code
    alias CC = combiningClass;

    // combining tilda
    assert(CC('\u0303') == 230);
    // combining ring below
    assert(CC('\u0325') == 220);
    // the simple consequence is that  "tilda" should be
    // placed after a "ring below" in a sequence
}

@safe unittest
{
    import std.uni;

    assert(compose('A','\u0308') == '\u00C4');
    assert(compose('A', 'B') == dchar.init);
    assert(compose('C', '\u0301') == '\u0106');
    // note that the starter is the first one
    // thus the following doesn't compose
    assert(compose('\u0308', 'A') == dchar.init);
}

@safe unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;

    assert(compose('A','\u0308') == '\u00C4');
    assert(compose('A', 'B') == dchar.init);
    assert(compose('C', '\u0301') == '\u0106');
    // note that the starter is the first one
    // thus the following doesn't compose
    assert(compose('\u0308', 'A') == dchar.init);

    assert(decompose('Ĉ')[].equal("C\u0302"));
    assert(decompose('D')[].equal("D"));
    assert(decompose('\uD4DC')[].equal("\u1111\u1171\u11B7"));
    assert(decompose!Compatibility('¹')[].equal("1"));
}

@safe unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;
    assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
}

@safe unittest
{
    import std.uni;

    assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
    // leaving out T-vowel, or passing any codepoint
    // that is not trailing consonant composes an LV-syllable
    assert(composeJamo('\u1111', '\u1171') == '\uD4CC');
    assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
    assert(composeJamo('\u1111', 'A') == dchar.init);
    assert(composeJamo('A', '\u1171') == dchar.init);
}

@safe pure unittest
{
    import std.uni;

    // any encoding works
    wstring greet = "Hello world";
    assert(normalize(greet) is greet); // the same exact slice

    // An example of a character with all 4 forms being different:
    // Greek upsilon with acute and hook symbol (code point 0x03D3)
    assert(normalize!NFC("ϓ") == "\u03D3");
    assert(normalize!NFD("ϓ") == "\u03D2\u0301");
    assert(normalize!NFKC("ϓ") == "\u038E");
    assert(normalize!NFKD("ϓ") == "\u03A5\u0301");
}

@safe unittest
{
    import std.uni;

    // e.g. Cyrillic is always allowed, so is ASCII
    assert(allowedIn!NFC('я'));
    assert(allowedIn!NFD('я'));
    assert(allowedIn!NFKC('я'));
    assert(allowedIn!NFKD('я'));
    assert(allowedIn!NFC('Z'));
}

@safe pure unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;

    assert("hEllo".asUpperCase.equal("HELLO"));
}

@safe pure unittest
{
    import std.uni;

    import std.algorithm.comparison : equal;

    assert("hEllo".asCapitalized.equal("Hello"));
}

@safe unittest
{
    import std.uni;

    import std.algorithm.iteration : map;
    import std.algorithm.mutation : copy;
    import std.array : appender;

    auto abuf = appender!(char[])();
    "hello".map!toUpper.copy(abuf);
    assert(abuf.data == "HELLO");
}