diff options
Diffstat (limited to 'libphobos/src/std/ascii.d')
-rw-r--r-- | libphobos/src/std/ascii.d | 729 |
1 files changed, 729 insertions, 0 deletions
diff --git a/libphobos/src/std/ascii.d b/libphobos/src/std/ascii.d new file mode 100644 index 0000000..b430114 --- /dev/null +++ b/libphobos/src/std/ascii.d @@ -0,0 +1,729 @@ +// Written in the D programming language. + +/++ + Functions which operate on ASCII characters. + + All of the functions in std._ascii accept Unicode characters but + effectively ignore them if they're not ASCII. All $(D isX) functions return + $(D false) for non-ASCII characters, and all $(D toX) functions do nothing + to non-ASCII characters. + + For functions which operate on Unicode characters, see + $(MREF std, uni). + +$(SCRIPT inhibitQuickIndex = 1;) +$(DIVC quickindex, +$(BOOKTABLE, +$(TR $(TH Category) $(TH Functions)) +$(TR $(TD Validation) $(TD + $(LREF isAlpha) + $(LREF isAlphaNum) + $(LREF isASCII) + $(LREF isControl) + $(LREF isDigit) + $(LREF isGraphical) + $(LREF isHexDigit) + $(LREF isOctalDigit) + $(LREF isPrintable) + $(LREF isPunctuation) + $(LREF isUpper) + $(LREF isWhite) +)) +$(TR $(TD Conversions) $(TD + $(LREF toLower) + $(LREF toUpper) +)) +$(TR $(TD Constants) $(TD + $(LREF digits) + $(LREF fullHexDigits) + $(LREF hexDigits) + $(LREF letters) + $(LREF lowercase) + $(LREF lowerHexDigits) + $(LREF newline) + $(LREF octalDigits) + $(LREF uppercase) + $(LREF whitespace) +)) +$(TR $(TD Enums) $(TD + $(LREF LetterCase) +)) +)) + References: + $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table), + $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia) + + License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). + Authors: $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis + Source: $(PHOBOSSRC std/_ascii.d) + +/ +module std.ascii; + +version (unittest) +{ + // FIXME: When dmd bug #314 is fixed, make these selective. + import std.meta; // : AliasSeq; + import std.range; // : chain; + import std.traits; // : functionAttributes, FunctionAttribute, isSafe; +} + + +immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f +immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F +immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f +immutable digits = hexDigits[0 .. 10]; /// 0 .. 9 +immutable octalDigits = digits[0 .. 8]; /// 0 .. 7 +immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z +immutable uppercase = letters[0 .. 26]; /// A .. Z +immutable lowercase = letters[26 .. 52]; /// a .. z +immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace + +/++ + Letter case specifier. + +/ +enum LetterCase : bool +{ + upper, /// Upper case letters + lower /// Lower case letters +} + +/// +@safe unittest +{ + import std.conv : to; + + assert(42.to!string(16, LetterCase.upper) == "2A"); + assert(42.to!string(16, LetterCase.lower) == "2a"); +} + +/// +@system unittest +{ + import std.digest.hmac : hmac; + import std.digest.digest : toHexString; + import std.digest.sha : SHA1; + import std.string : representation; + + const sha1HMAC = "A very long phrase".representation + .hmac!SHA1("secret".representation) + .toHexString!(LetterCase.lower); + assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5"); +} + +/// Newline sequence for this system. +version (Windows) + immutable newline = "\r\n"; +else version (Posix) + immutable newline = "\n"; +else + static assert(0, "Unsupported OS"); + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z). + +/ +bool isAlphaNum(dchar c) @safe pure nothrow @nogc +{ + return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z')); +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isAlphaNum('A')); + assert( isAlphaNum('1')); + assert(!isAlphaNum('#')); + + // N.B.: does not return true for non-ASCII Unicode alphanumerics: + assert(!isAlphaNum('á')); +} + +@safe unittest +{ + foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase)) + assert(isAlphaNum(c)); + + foreach (c; whitespace) + assert(!isAlphaNum(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z). + +/ +bool isAlpha(dchar c) @safe pure nothrow @nogc +{ + // Optimizer can turn this into a bitmask operation on 64 bit code + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isAlpha('A')); + assert(!isAlpha('1')); + assert(!isAlpha('#')); + + // N.B.: does not return true for non-ASCII Unicode alphabetic characters: + assert(!isAlpha('á')); +} + +@safe unittest +{ + foreach (c; chain(letters, lowercase, uppercase)) + assert(isAlpha(c)); + + foreach (c; chain(digits, octalDigits, whitespace)) + assert(!isAlpha(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a lowercase ASCII letter (a .. z). + +/ +bool isLower(dchar c) @safe pure nothrow @nogc +{ + return c >= 'a' && c <= 'z'; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isLower('a')); + assert(!isLower('A')); + assert(!isLower('#')); + + // N.B.: does not return true for non-ASCII Unicode lowercase letters + assert(!isLower('á')); + assert(!isLower('Á')); +} + +@safe unittest +{ + foreach (c; lowercase) + assert(isLower(c)); + + foreach (c; chain(digits, uppercase, whitespace)) + assert(!isLower(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z). + +/ +bool isUpper(dchar c) @safe pure nothrow @nogc +{ + return c <= 'Z' && 'A' <= c; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isUpper('A')); + assert(!isUpper('a')); + assert(!isUpper('#')); + + // N.B.: does not return true for non-ASCII Unicode uppercase letters + assert(!isUpper('á')); + assert(!isUpper('Á')); +} + +@safe unittest +{ + foreach (c; uppercase) + assert(isUpper(c)); + + foreach (c; chain(digits, lowercase, whitespace)) + assert(!isUpper(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a digit (0 .. 9). + +/ +bool isDigit(dchar c) @safe pure nothrow @nogc +{ + return '0' <= c && c <= '9'; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isDigit('3')); + assert( isDigit('8')); + assert(!isDigit('B')); + assert(!isDigit('#')); + + // N.B.: does not return true for non-ASCII Unicode numbers + assert(!isDigit('0')); // full-width digit zero (U+FF10) + assert(!isDigit('4')); // full-width digit four (U+FF14) +} + +@safe unittest +{ + foreach (c; digits) + assert(isDigit(c)); + + foreach (c; chain(letters, whitespace)) + assert(!isDigit(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a digit in base 8 (0 .. 7). + +/ +bool isOctalDigit(dchar c) @safe pure nothrow @nogc +{ + return c >= '0' && c <= '7'; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isOctalDigit('0')); + assert( isOctalDigit('7')); + assert(!isOctalDigit('8')); + assert(!isOctalDigit('A')); + assert(!isOctalDigit('#')); +} + +@safe unittest +{ + foreach (c; octalDigits) + assert(isOctalDigit(c)); + + foreach (c; chain(letters, ['8', '9'], whitespace)) + assert(!isOctalDigit(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f). + +/ +bool isHexDigit(dchar c) @safe pure nothrow @nogc +{ + return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F')); +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isHexDigit('0')); + assert( isHexDigit('A')); + assert( isHexDigit('f')); // lowercase hex digits are accepted + assert(!isHexDigit('g')); + assert(!isHexDigit('G')); + assert(!isHexDigit('#')); +} + +@safe unittest +{ + foreach (c; fullHexDigits) + assert(isHexDigit(c)); + + foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace)) + assert(!isHexDigit(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether or not $(D c) is a whitespace character. That includes the + space, tab, vertical tab, form feed, carriage return, and linefeed + characters. + +/ +bool isWhite(dchar c) @safe pure nothrow @nogc +{ + return c == ' ' || (c >= 0x09 && c <= 0x0D); +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isWhite(' ')); + assert( isWhite('\t')); + assert( isWhite('\n')); + assert(!isWhite('1')); + assert(!isWhite('a')); + assert(!isWhite('#')); + + // N.B.: Does not return true for non-ASCII Unicode whitespace characters. + static import std.uni; + assert(std.uni.isWhite('\u00A0')); + assert(!isWhite('\u00A0')); // std.ascii.isWhite +} + +@safe unittest +{ + foreach (c; whitespace) + assert(isWhite(c)); + + foreach (c; chain(digits, letters)) + assert(!isWhite(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether $(D c) is a control character. + +/ +bool isControl(dchar c) @safe pure nothrow @nogc +{ + return c < 0x20 || c == 0x7F; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isControl('\0')); + assert( isControl('\022')); + assert( isControl('\n')); // newline is both whitespace and control + assert(!isControl(' ')); + assert(!isControl('1')); + assert(!isControl('a')); + assert(!isControl('#')); + + // N.B.: non-ASCII Unicode control characters are not recognized: + assert(!isControl('\u0080')); + assert(!isControl('\u2028')); + assert(!isControl('\u2029')); +} + +@safe unittest +{ + foreach (dchar c; 0 .. 32) + assert(isControl(c)); + assert(isControl(127)); + + foreach (c; chain(digits, letters, [' '])) + assert(!isControl(c)); +} + + +/++ + Params: c = The character to test. + Returns: Whether or not $(D c) is a punctuation character. That includes + all ASCII characters which are not control characters, letters, digits, or + whitespace. + +/ +bool isPunctuation(dchar c) @safe pure nothrow @nogc +{ + return c <= '~' && c >= '!' && !isAlphaNum(c); +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isPunctuation('.')); + assert( isPunctuation(',')); + assert( isPunctuation(':')); + assert( isPunctuation('!')); + assert( isPunctuation('#')); + assert( isPunctuation('~')); + assert( isPunctuation('+')); + assert( isPunctuation('_')); + + assert(!isPunctuation('1')); + assert(!isPunctuation('a')); + assert(!isPunctuation(' ')); + assert(!isPunctuation('\n')); + assert(!isPunctuation('\0')); + + // N.B.: Non-ASCII Unicode punctuation characters are not recognized. + assert(!isPunctuation('\u2012')); // (U+2012 = en-dash) +} + +@safe unittest +{ + foreach (dchar c; 0 .. 128) + { + if (isControl(c) || isAlphaNum(c) || c == ' ') + assert(!isPunctuation(c)); + else + assert(isPunctuation(c)); + } +} + + +/++ + Params: c = The character to test. + Returns: Whether or not $(D c) is a printable character other than the + space character. + +/ +bool isGraphical(dchar c) @safe pure nothrow @nogc +{ + return '!' <= c && c <= '~'; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isGraphical('1')); + assert( isGraphical('a')); + assert( isGraphical('#')); + assert(!isGraphical(' ')); // whitespace is not graphical + assert(!isGraphical('\n')); + assert(!isGraphical('\0')); + + // N.B.: Unicode graphical characters are not regarded as such. + assert(!isGraphical('á')); +} + +@safe unittest +{ + foreach (dchar c; 0 .. 128) + { + if (isControl(c) || c == ' ') + assert(!isGraphical(c)); + else + assert(isGraphical(c)); + } +} + + +/++ + Params: c = The character to test. + Returns: Whether or not $(D c) is a printable character - including the + space character. + +/ +bool isPrintable(dchar c) @safe pure nothrow @nogc +{ + return c >= ' ' && c <= '~'; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isPrintable(' ')); // whitespace is printable + assert( isPrintable('1')); + assert( isPrintable('a')); + assert( isPrintable('#')); + assert(!isPrintable('\0')); // control characters are not printable + + // N.B.: Printable non-ASCII Unicode characters are not recognized. + assert(!isPrintable('á')); +} + +@safe unittest +{ + foreach (dchar c; 0 .. 128) + { + if (isControl(c)) + assert(!isPrintable(c)); + else + assert(isPrintable(c)); + } +} + + +/++ + Params: c = The character to test. + Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the + range 0 .. 0x7F. + +/ +pragma(inline, true) +bool isASCII(dchar c) @safe pure nothrow @nogc +{ + return c <= 0x7F; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert( isASCII('a')); + assert(!isASCII('á')); +} + +@safe unittest +{ + foreach (dchar c; 0 .. 128) + assert(isASCII(c)); + + assert(!isASCII(128)); +} + + +/++ + Converts an ASCII letter to lowercase. + + Params: c = A character of any type that implicitly converts to $(D dchar). + In the case where it's a built-in type, or an enum of a built-in type, + $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined + type, $(D dchar) is returned. + + Returns: The corresponding lowercase letter, if $(D c) is an uppercase + ASCII character, otherwise $(D c) itself. + +/ +auto toLower(C)(C c) +if (is(C : dchar)) +{ + import std.traits : isAggregateType, OriginalType, Unqual; + + alias OC = OriginalType!C; + static if (isAggregateType!OC) + alias R = dchar; + else + alias R = Unqual!OC; + + return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert(toLower('a') == 'a'); + assert(toLower('A') == 'a'); + assert(toLower('#') == '#'); + + // N.B.: Non-ASCII Unicode uppercase letters are not converted. + assert(toLower('Á') == 'Á'); +} + +@safe pure nothrow unittest +{ + + foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) + { + foreach (i, c; uppercase) + assert(toLower(cast(C) c) == lowercase[i]); + + foreach (C c; 0 .. 128) + { + if (c < 'A' || c > 'Z') + assert(toLower(c) == c); + else + assert(toLower(c) != c); + } + + foreach (C c; 128 .. C.max) + assert(toLower(c) == c); + + //CTFE + static assert(toLower(cast(C)'a') == 'a'); + static assert(toLower(cast(C)'A') == 'a'); + } +} + + +/++ + Converts an ASCII letter to uppercase. + + Params: c = Any type which implicitly converts to $(D dchar). In the case + where it's a built-in type, or an enum of a built-in type, + $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined + type, $(D dchar) is returned. + + Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII + character, otherwise $(D c) itself. + +/ +auto toUpper(C)(C c) +if (is(C : dchar)) +{ + import std.traits : isAggregateType, OriginalType, Unqual; + + alias OC = OriginalType!C; + static if (isAggregateType!OC) + alias R = dchar; + else + alias R = Unqual!OC; + + return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c; +} + +/// +@safe pure nothrow @nogc unittest +{ + assert(toUpper('a') == 'A'); + assert(toUpper('A') == 'A'); + assert(toUpper('#') == '#'); + + // N.B.: Non-ASCII Unicode lowercase letters are not converted. + assert(toUpper('á') == 'á'); +} + +@safe pure nothrow unittest +{ + foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) + { + foreach (i, c; lowercase) + assert(toUpper(cast(C) c) == uppercase[i]); + + foreach (C c; 0 .. 128) + { + if (c < 'a' || c > 'z') + assert(toUpper(c) == c); + else + assert(toUpper(c) != c); + } + + foreach (C c; 128 .. C.max) + assert(toUpper(c) == c); + + //CTFE + static assert(toUpper(cast(C)'a') == 'A'); + static assert(toUpper(cast(C)'A') == 'A'); + } +} + + +@safe unittest //Test both toUpper and toLower with non-builtin +{ + //User Defined [Char|Wchar|Dchar] + static struct UDC { char c; alias c this; } + static struct UDW { wchar c; alias c this; } + static struct UDD { dchar c; alias c this; } + //[Char|Wchar|Dchar] Enum + enum CE : char {a = 'a', A = 'A'} + enum WE : wchar {a = 'a', A = 'A'} + enum DE : dchar {a = 'a', A = 'A'} + //User Defined [Char|Wchar|Dchar] Enum + enum UDCE : UDC {a = UDC('a'), A = UDC('A')} + enum UDWE : UDW {a = UDW('a'), A = UDW('A')} + enum UDDE : UDD {a = UDD('a'), A = UDD('A')} + + //User defined types with implicit cast to dchar test. + foreach (Char; AliasSeq!(UDC, UDW, UDD)) + { + assert(toLower(Char('a')) == 'a'); + assert(toLower(Char('A')) == 'a'); + static assert(toLower(Char('a')) == 'a'); + static assert(toLower(Char('A')) == 'a'); + static assert(toUpper(Char('a')) == 'A'); + static assert(toUpper(Char('A')) == 'A'); + } + + //Various enum tests. + foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE)) + { + assert(toLower(Enum.a) == 'a'); + assert(toLower(Enum.A) == 'a'); + assert(toUpper(Enum.a) == 'A'); + assert(toUpper(Enum.A) == 'A'); + static assert(toLower(Enum.a) == 'a'); + static assert(toLower(Enum.A) == 'a'); + static assert(toUpper(Enum.a) == 'A'); + static assert(toUpper(Enum.A) == 'A'); + } + + //Return value type tests for enum of non-UDT. These should be the original type. + foreach (T; AliasSeq!(CE, WE, DE)) + { + alias C = OriginalType!T; + static assert(is(typeof(toLower(T.init)) == C)); + static assert(is(typeof(toUpper(T.init)) == C)); + } + + //Return value tests for UDT and enum of UDT. These should be dchar + foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE)) + { + static assert(is(typeof(toLower(T.init)) == dchar)); + static assert(is(typeof(toUpper(T.init)) == dchar)); + } +} |