diff options
Diffstat (limited to 'gcc/d/dmd/identifier.d')
-rw-r--r-- | gcc/d/dmd/identifier.d | 75 |
1 files changed, 65 insertions, 10 deletions
diff --git a/gcc/d/dmd/identifier.d b/gcc/d/dmd/identifier.d index 8ace310..6fd0d3a 100644 --- a/gcc/d/dmd/identifier.d +++ b/gcc/d/dmd/identifier.d @@ -269,12 +269,12 @@ nothrow: /******************************************** * Create an identifier in the string table. */ - static Identifier idPool(const(char)* s, uint len) + static Identifier idPool(scope const(char)* s, uint len) { return idPool(s[0 .. len]); } - extern (D) static Identifier idPool(const(char)[] s, bool isAnonymous = false) + extern (D) static Identifier idPool(scope const(char)[] s, bool isAnonymous = false) { auto sv = stringtable.update(s); auto id = sv.value; @@ -292,7 +292,7 @@ nothrow: * s = string for keyword * value = TOK.xxxx for the keyword */ - extern (D) static void idPool(const(char)[] s, TOK value) + extern (D) static void idPool(scope const(char)[] s, TOK value) { auto sv = stringtable.insert(s, null); assert(sv); @@ -315,28 +315,83 @@ nothrow: /********************************** * ditto */ - extern (D) static bool isValidIdentifier(const(char)[] str) @safe + extern (D) static bool isValidIdentifier(const(char)[] str) @trusted { + import dmd.common.charactertables; + if (str.length == 0 || (str[0] >= '0' && str[0] <= '9')) // beware of isdigit() on signed chars { return false; } - size_t idx = 0; - while (idx < str.length) + // In a previous implementation this was implemented quite naively, + // by utilizing the libc. + // However we can do better, by copying the lexer approach to identifier validation. + + const(char)* p = &str[0], pEnd = str.ptr + str.length; + + // handle start characters { - dchar dc; - const s = utf_decodeChar(str, idx, dc); - if (s || - !((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) + const c = *p; + + if (isidchar(c)) + p++; + else if (c & 0x80) { + size_t countDecoded; + dchar decoded; + + if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null || + isAnyStart(decoded)) + p += countDecoded; + else + return false; + } + else return false; + } + + // handle continue characters + while(p !is pEnd) + { + const c = *p; + + if (isidchar(c)) // handles ASCII subset + { + p++; + continue; } + else if (c & 0x80) + { + size_t countDecoded; + dchar decoded; + + if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null || + isAnyContinue(decoded)) + { + p += countDecoded; + continue; + } + else + return false; + } + else + return false; } + return true; } + /// + unittest + { + assert(Identifier.isValidIdentifier("tes123_t".ptr)); + assert(!Identifier.isValidIdentifier("tes123_^t".ptr)); + assert(Identifier.isValidIdentifier("te123s_ğt".ptr)); + assert(!Identifier.isValidIdentifier("t^e123s_ğt".ptr)); + } + extern (D) static Identifier lookup(const(char)* s, size_t len) { return lookup(s[0 .. len]); |