diff options
author | Jordan Rose <jordan_rose@apple.com> | 2013-01-24 20:50:50 +0000 |
---|---|---|
committer | Jordan Rose <jordan_rose@apple.com> | 2013-01-24 20:50:50 +0000 |
commit | 4246ae0089571a1cbf357d7c4570e07c0193733c (patch) | |
tree | 60f3f47fe28d7d72063d9224427faaec232aef41 /clang/lib/Lex/Lexer.cpp | |
parent | 7f43dddae0669f0700cf04bf520c12d3301cd809 (diff) | |
download | llvm-4246ae0089571a1cbf357d7c4570e07c0193733c.zip llvm-4246ae0089571a1cbf357d7c4570e07c0193733c.tar.gz llvm-4246ae0089571a1cbf357d7c4570e07c0193733c.tar.bz2 |
As an extension, treat Unicode whitespace characters as whitespace.
llvm-svn: 173370
Diffstat (limited to 'clang/lib/Lex/Lexer.cpp')
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index e6ffca9..2a57e6f 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2791,7 +2791,30 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, return CodePoint; } +static bool isUnicodeWhitespace(uint32_t C) { + return (C == 0x0085 || C == 0x00A0 || C == 0x1680 || + C == 0x180E || (C >= 0x2000 && C <= 0x200A) || + C == 0x2028 || C == 0x2029 || C == 0x202F || + C == 0x205F || C == 0x3000); +} + void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { + if (isUnicodeWhitespace(C)) { + if (!isLexingRawMode()) { + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::ext_unicode_whitespace) + << CharRange; + } + + Result.setFlag(Token::LeadingSpace); + if (SkipWhitespace(Result, CurPtr)) + return; // KeepWhitespaceMode + + return LexTokenInternal(Result); + } + if (isAllowedIDChar(C) && isAllowedInitiallyIDChar(C)) { MIOpt.ReadToken(); return LexIdentifier(Result, CurPtr); |