aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Analysis/FormatString.cpp
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2016-03-29 17:35:02 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2016-03-29 17:35:02 +0000
commit0c18d03d9157090cb379219a8b91f3104869f358 (patch)
tree4691c44c47d7b0f79928619e73b170efb9047c65 /clang/lib/Analysis/FormatString.cpp
parentac400900da8e0f756a73739a85151b01e42500ea (diff)
downloadllvm-0c18d03d9157090cb379219a8b91f3104869f358.zip
llvm-0c18d03d9157090cb379219a8b91f3104869f358.tar.gz
llvm-0c18d03d9157090cb379219a8b91f3104869f358.tar.bz2
[Sema] Handle UTF-8 invalid format string specifiers
Improve invalid format string specifier handling by printing out invalid specifiers characters with \x, \u and \U. Previously clang would print gargabe whenever the character is unprintable. Example, before: NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier] after: NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier] Differential Revision: http://reviews.llvm.org/D18296 rdar://problem/24672159 llvm-svn: 264752
Diffstat (limited to 'clang/lib/Analysis/FormatString.cpp')
-rw-r--r--clang/lib/Analysis/FormatString.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp
index 1c42ec0..badc710 100644
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp
@@ -15,6 +15,7 @@
#include "FormatStringParsing.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
using clang::analyze_format_string::ArgType;
using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
return true;
}
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+ const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+ if (SpecifierBegin + 1 >= FmtStrEnd)
+ return false;
+
+ const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+ const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const char FirstByte = *SB;
+
+ // If the invalid specifier is a multibyte UTF-8 string, return the
+ // total length accordingly so that the conversion specifier can be
+ // properly updated to reflect a complete UTF-8 specifier.
+ unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ if (NumBytes == 1)
+ return false;
+ if (SB + NumBytes > SE)
+ return false;
+
+ Len = NumBytes + 1;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Methods on ArgType.
//===----------------------------------------------------------------------===//