diff options
author | paulhoad <mydeveloperday@gmail.com> | 2019-10-24 20:24:03 +0100 |
---|---|---|
committer | paulhoad <mydeveloperday@gmail.com> | 2019-10-24 20:24:44 +0100 |
commit | 8fa5e98fd191d02fc7e0e220d74af267b9140e6a (patch) | |
tree | a59a3d86aa396dc88e3655cdfb96529e988ec300 /clang/lib/Basic/SourceManager.cpp | |
parent | 78700ef8866db7f5cea113fa81d810a28b5b7438 (diff) | |
download | llvm-8fa5e98fd191d02fc7e0e220d74af267b9140e6a.zip llvm-8fa5e98fd191d02fc7e0e220d74af267b9140e6a.tar.gz llvm-8fa5e98fd191d02fc7e0e220d74af267b9140e6a.tar.bz2 |
[clang-format] Remove duplciate code from Invalid BOM detection
Summary:
Review comments on {D68767} asked that this duplicated code in clang-format was moved to one central location that being SourceManager (where it had originally be copied from I assume)
Moved function into static function ContentCache::getInvalidBOM(...) - (closest class to where it was defined before)
Updated clang-format to call this static function
Added unit tests for said new function in BasicTests
Sorry not my normal code area so may have the wrong reviewers. (but your names were on the recent history)
Reviewers: bruno, arphaman, klimek, owenpan, mitchell-stellar, dexonsmith
Reviewed By: owenpan
Subscribers: cfe-commits
Tags: #clang, #clang-format, #clang-tools-extra
Differential Revision: https://reviews.llvm.org/D68914
Diffstat (limited to 'clang/lib/Basic/SourceManager.cpp')
-rw-r--r-- | clang/lib/Basic/SourceManager.cpp | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 58b9528..5f457d6 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -95,6 +95,29 @@ void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree) { Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0); } +const char *ContentCache::getInvalidBOM(StringRef BufStr) { + // If the buffer is valid, check to see if it has a UTF Byte Order Mark + // (BOM). We only support UTF-8 with and without a BOM right now. See + // http://en.wikipedia.org/wiki/Byte_order_mark for more information. + const char *InvalidBOM = + llvm::StringSwitch<const char *>(BufStr) + .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), + "UTF-32 (BE)") + .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), + "UTF-32 (LE)") + .StartsWith("\xFE\xFF", "UTF-16 (BE)") + .StartsWith("\xFF\xFE", "UTF-16 (LE)") + .StartsWith("\x2B\x2F\x76", "UTF-7") + .StartsWith("\xF7\x64\x4C", "UTF-1") + .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") + .StartsWith("\x0E\xFE\xFF", "SCSU") + .StartsWith("\xFB\xEE\x28", "BOCU-1") + .StartsWith("\x84\x31\x95\x33", "GB-18030") + .Default(nullptr); + + return InvalidBOM; +} + const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag, FileManager &FM, SourceLocation Loc, @@ -190,20 +213,7 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag, // (BOM). We only support UTF-8 with and without a BOM right now. See // http://en.wikipedia.org/wiki/Byte_order_mark for more information. StringRef BufStr = Buffer.getPointer()->getBuffer(); - const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr) - .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), - "UTF-32 (BE)") - .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), - "UTF-32 (LE)") - .StartsWith("\xFE\xFF", "UTF-16 (BE)") - .StartsWith("\xFF\xFE", "UTF-16 (LE)") - .StartsWith("\x2B\x2F\x76", "UTF-7") - .StartsWith("\xF7\x64\x4C", "UTF-1") - .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") - .StartsWith("\x0E\xFE\xFF", "SCSU") - .StartsWith("\xFB\xEE\x28", "BOCU-1") - .StartsWith("\x84\x31\x95\x33", "GB-18030") - .Default(nullptr); + const char *InvalidBOM = getInvalidBOM(BufStr); if (InvalidBOM) { Diag.Report(Loc, diag::err_unsupported_bom) |