path: root/gcc/d/dmd/identifier.d
diff options
Diffstat (limited to 'gcc/d/dmd/identifier.d')
1 files changed, 65 insertions, 10 deletions
diff --git a/gcc/d/dmd/identifier.d b/gcc/d/dmd/identifier.d
index 8ace310..6fd0d3a 100644
--- a/gcc/d/dmd/identifier.d
+++ b/gcc/d/dmd/identifier.d
@@ -269,12 +269,12 @@ nothrow:
* Create an identifier in the string table.
- static Identifier idPool(const(char)* s, uint len)
+ static Identifier idPool(scope const(char)* s, uint len)
return idPool(s[0 .. len]);
- extern (D) static Identifier idPool(const(char)[] s, bool isAnonymous = false)
+ extern (D) static Identifier idPool(scope const(char)[] s, bool isAnonymous = false)
auto sv = stringtable.update(s);
auto id = sv.value;
@@ -292,7 +292,7 @@ nothrow:
* s = string for keyword
* value = TOK.xxxx for the keyword
- extern (D) static void idPool(const(char)[] s, TOK value)
+ extern (D) static void idPool(scope const(char)[] s, TOK value)
auto sv = stringtable.insert(s, null);
@@ -315,28 +315,83 @@ nothrow:
* ditto
- extern (D) static bool isValidIdentifier(const(char)[] str) @safe
+ extern (D) static bool isValidIdentifier(const(char)[] str) @trusted
+ import dmd.common.charactertables;
if (str.length == 0 ||
(str[0] >= '0' && str[0] <= '9')) // beware of isdigit() on signed chars
return false;
- size_t idx = 0;
- while (idx < str.length)
+ // In a previous implementation this was implemented quite naively,
+ // by utilizing the libc.
+ // However we can do better, by copying the lexer approach to identifier validation.
+ const(char)* p = &str[0], pEnd = str.ptr + str.length;
+ // handle start characters
- dchar dc;
- const s = utf_decodeChar(str, idx, dc);
- if (s ||
- !((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
+ const c = *p;
+ if (isidchar(c))
+ p++;
+ else if (c & 0x80)
+ size_t countDecoded;
+ dchar decoded;
+ if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null ||
+ isAnyStart(decoded))
+ p += countDecoded;
+ else
+ return false;
+ }
+ else
return false;
+ }
+ // handle continue characters
+ while(p !is pEnd)
+ {
+ const c = *p;
+ if (isidchar(c)) // handles ASCII subset
+ {
+ p++;
+ continue;
+ else if (c & 0x80)
+ {
+ size_t countDecoded;
+ dchar decoded;
+ if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null ||
+ isAnyContinue(decoded))
+ {
+ p += countDecoded;
+ continue;
+ }
+ else
+ return false;
+ }
+ else
+ return false;
return true;
+ ///
+ unittest
+ {
+ assert(Identifier.isValidIdentifier("tes123_t".ptr));
+ assert(!Identifier.isValidIdentifier("tes123_^t".ptr));
+ assert(Identifier.isValidIdentifier("te123s_ğt".ptr));
+ assert(!Identifier.isValidIdentifier("t^e123s_ğt".ptr));
+ }
extern (D) static Identifier lookup(const(char)* s, size_t len)
return lookup(s[0 .. len]);