diff options
author | Michael Benfield <mbenfield@google.com> | 2021-10-14 20:02:28 +0000 |
---|---|---|
committer | Michael Benfield <mbenfield@google.com> | 2021-10-28 02:52:03 +0000 |
commit | 15e3d39110fa4449be4f56196af3bc81b623f3ab (patch) | |
tree | 809db65d5baae70f631166c3d2f61a2d24ab424a /clang/lib/Sema/SemaChecking.cpp | |
parent | fa592180b3f4f97efdb702c0db0163bc4916988e (diff) | |
download | llvm-15e3d39110fa4449be4f56196af3bc81b623f3ab.zip llvm-15e3d39110fa4449be4f56196af3bc81b623f3ab.tar.gz llvm-15e3d39110fa4449be4f56196af3bc81b623f3ab.tar.bz2 |
[clang] Fortify warning for scanf calls with field width too big.
Differential Revision: https://reviews.llvm.org/D111833
Diffstat (limited to 'clang/lib/Sema/SemaChecking.cpp')
-rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 132 |
1 files changed, 120 insertions, 12 deletions
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 147f50a..e119660 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -408,6 +408,50 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) { namespace { +class ScanfDiagnosticFormatHandler + : public analyze_format_string::FormatStringHandler { + // Accepts the argument index (relative to the first destination index) of the + // argument whose size we want. + using ComputeSizeFunction = + llvm::function_ref<Optional<llvm::APSInt>(unsigned)>; + + // Accepts the argument index (relative to the first destination index), the + // destination size, and the source size). + using DiagnoseFunction = + llvm::function_ref<void(unsigned, unsigned, unsigned)>; + + ComputeSizeFunction ComputeSizeArgument; + DiagnoseFunction Diagnose; + +public: + ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument, + DiagnoseFunction Diagnose) + : ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {} + + bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, + const char *StartSpecifier, + unsigned specifierLen) override { + auto OptionalFW = FS.getFieldWidth(); + if (OptionalFW.getHowSpecified() != + analyze_format_string::OptionalAmount::HowSpecified::Constant) + return true; + + // We have to write the data plus a NUL byte. + unsigned SourceSize = OptionalFW.getConstantAmount() + 1; + + auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex()); + if (!DestSizeAPS) + return true; + + unsigned DestSize = DestSizeAPS->getZExtValue(); + + if (DestSize < SourceSize) + Diagnose(FS.getArgIndex(), DestSize, SourceSize); + + return true; + } +}; + class EstimateSizeFormatHandler : public analyze_format_string::FormatStringHandler { size_t Size; @@ -615,9 +659,12 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, // (potentially) more strict checking mode. Otherwise, conservatively assume // type 0. int BOSType = 0; - if (const auto *POS = - FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>()) - BOSType = POS->getType(); + // This check can fail for variadic functions. + if (Index < FD->getNumParams()) { + if (const auto *POS = + FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>()) + BOSType = POS->getType(); + } const Expr *ObjArg = TheCall->getArg(Index); uint64_t Result; @@ -642,6 +689,20 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, unsigned DiagID = 0; bool IsChkVariant = false; + auto GetFunctionName = [&]() { + StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); + // Skim off the details of whichever builtin was called to produce a better + // diagnostic, as it's unlikely that the user wrote the __builtin + // explicitly. + if (IsChkVariant) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); + FunctionName = FunctionName.drop_back(std::strlen("_chk")); + } else if (FunctionName.startswith("__builtin_")) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); + } + return FunctionName; + }; + switch (BuiltinID) { default: return; @@ -661,6 +722,61 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, break; } + case Builtin::BIscanf: + case Builtin::BIfscanf: + case Builtin::BIsscanf: { + unsigned FormatIndex = 1; + unsigned DataIndex = 2; + if (BuiltinID == Builtin::BIscanf) { + FormatIndex = 0; + DataIndex = 1; + } + + const auto *FormatExpr = + TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); + + const auto *Format = dyn_cast<StringLiteral>(FormatExpr); + if (!Format) + return; + + if (!Format->isAscii() && !Format->isUTF8()) + return; + + auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize, + unsigned SourceSize) { + DiagID = diag::warn_fortify_scanf_overflow; + StringRef FunctionName = GetFunctionName(); + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(DiagID) + << FunctionName << (ArgIndex + DataIndex + 1) + << DestSize << SourceSize); + }; + + StringRef FormatStrRef = Format->getString(); + auto ShiftedComputeSizeArgument = [&](unsigned Index) { + return ComputeSizeArgument(Index + DataIndex); + }; + ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose); + const char *FormatBytes = FormatStrRef.data(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(Format->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + + // In case there's a null byte somewhere. + size_t StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); + + analyze_format_string::ParseScanfString(H, FormatBytes, + FormatBytes + StrLen, getLangOpts(), + Context.getTargetInfo()); + + // Unlike the other cases, in this one we have already issued the diagnostic + // here, so no need to continue (because unlike the other cases, here the + // diagnostic refers to the argument number). + return; + } + case Builtin::BIsprintf: case Builtin::BI__builtin___sprintf_chk: { size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; @@ -771,15 +887,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, SourceSize.getValue().ule(DestinationSize.getValue())) return; - StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); - // Skim off the details of whichever builtin was called to produce a better - // diagnostic, as it's unlikely that the user wrote the __builtin explicitly. - if (IsChkVariant) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); - FunctionName = FunctionName.drop_back(std::strlen("_chk")); - } else if (FunctionName.startswith("__builtin_")) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); - } + StringRef FunctionName = GetFunctionName(); SmallString<16> DestinationStr; SmallString<16> SourceStr; |