[llvm] Improve llvm.objectsize computation by computing GEP, alloca a… (#117849)

…nd malloc parameters bound Using a naive expression walker, it is possible to compute valuable information for allocation functions, GEP and alloca, even in the presence of some dynamic information. We don't rely on computeConstantRange to avoid taking advantage of undefined behavior, which would be counter-productive wrt. usual llvm.objectsize usage. llvm.objectsize plays an important role in _FORTIFY_SOURCE definitions, so improving its diagnostic in turns improves the security of compiled application. As a side note, as a result of recent optimization improvements, clang no longer passes https://github.com/serge-sans-paille/builtin_object_size-test-suite This commit restores the situation and greatly improves the scope of code handled by the static version of __builtin_object_size. This is a recommit of https://github.com/llvm/llvm-project/pull/115522 with fix applied.
author: serge-sans-paille <sguelton@mozilla.com> 2024-12-10 06:28:03 +0000
committer: GitHub <noreply@github.com> 2024-12-10 06:28:03 +0000
commit: f8c1a22220345dc85bf700c8a8ca6a28839edabb (patch)
tree: b8e4e4a3e5e4805142750a8f97638b4aa8599ac9 /llvm/lib/Analysis/MemoryBuiltins.cpp
parent: ef2e590e7b6fb5b0478e5e087006895a07d185c8 (diff)
download: llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.zip
llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.tar.gz
llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.tar.bz2
1 files changed, 100 insertions, 4 deletions
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 4028d5f..57b9799 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -670,6 +670,65 @@ STATISTIC(ObjectVisitorArgument,
 STATISTIC(ObjectVisitorLoad,
           "Number of load instructions with unsolved size and offset");
 
+static std::optional<APInt>
+combinePossibleConstantValues(std::optional<APInt> LHS,
+                              std::optional<APInt> RHS,
+                              ObjectSizeOpts::Mode EvalMode) {
+  if (!LHS || !RHS)
+    return std::nullopt;
+  if (EvalMode == ObjectSizeOpts::Mode::Max)
+    return LHS->sge(*RHS) ? *LHS : *RHS;
+  else
+    return LHS->sle(*RHS) ? *LHS : *RHS;
+}
+
+static std::optional<APInt> aggregatePossibleConstantValuesImpl(
+    const Value *V, ObjectSizeOpts::Mode EvalMode, unsigned recursionDepth) {
+  constexpr unsigned maxRecursionDepth = 4;
+  if (recursionDepth == maxRecursionDepth)
+    return std::nullopt;
+
+  if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+    return CI->getValue();
+  } else if (const auto *SI = dyn_cast<SelectInst>(V)) {
+    return combinePossibleConstantValues(
+        aggregatePossibleConstantValuesImpl(SI->getTrueValue(), EvalMode,
+                                            recursionDepth + 1),
+        aggregatePossibleConstantValuesImpl(SI->getFalseValue(), EvalMode,
+                                            recursionDepth + 1),
+        EvalMode);
+  } else if (const auto *PN = dyn_cast<PHINode>(V)) {
+    unsigned Count = PN->getNumIncomingValues();
+    if (Count == 0)
+      return std::nullopt;
+    auto Acc = aggregatePossibleConstantValuesImpl(
+        PN->getIncomingValue(0), EvalMode, recursionDepth + 1);
+    for (unsigned I = 1; Acc && I < Count; ++I) {
+      auto Tmp = aggregatePossibleConstantValuesImpl(
+          PN->getIncomingValue(I), EvalMode, recursionDepth + 1);
+      Acc = combinePossibleConstantValues(Acc, Tmp, EvalMode);
+    }
+    return Acc;
+  }
+
+  return std::nullopt;
+}
+
+static std::optional<APInt>
+aggregatePossibleConstantValues(const Value *V, ObjectSizeOpts::Mode EvalMode) {
+  if (auto *CI = dyn_cast<ConstantInt>(V))
+    return CI->getValue();
+
+  if (EvalMode != ObjectSizeOpts::Mode::Min &&
+      EvalMode != ObjectSizeOpts::Mode::Max)
+    return std::nullopt;
+
+  // Not using computeConstantRange here because we cannot guarantee it's not
+  // doing optimization based on UB which we want to avoid when expanding
+  // __builtin_object_size.
+  return aggregatePossibleConstantValuesImpl(V, EvalMode, 0u);
+}
+
 /// Align \p Size according to \p Alignment. If \p Size is greater than
 /// getSignedMaxValue(), set it as unknown as we can only represent signed value
 /// in OffsetSpan.
@@ -717,11 +776,36 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) {
   V = V->stripAndAccumulateConstantOffsets(
       DL, Offset, /* AllowNonInbounds */ true, /* AllowInvariantGroup */ true);
 
+  // Give it another try with approximated analysis. We don't start with this
+  // one because stripAndAccumulateConstantOffsets behaves differently wrt.
+  // overflows if we provide an external Analysis.
+  if ((Options.EvalMode == ObjectSizeOpts::Mode::Min ||
+       Options.EvalMode == ObjectSizeOpts::Mode::Max) &&
+      isa<GEPOperator>(V)) {
+    // External Analysis used to compute the Min/Max value of individual Offsets
+    // within a GEP.
+    ObjectSizeOpts::Mode EvalMode =
+        Options.EvalMode == ObjectSizeOpts::Mode::Min
+            ? ObjectSizeOpts::Mode::Max
+            : ObjectSizeOpts::Mode::Min;
+    auto OffsetRangeAnalysis = [EvalMode](Value &VOffset, APInt &Offset) {
+      if (auto PossibleOffset =
+              aggregatePossibleConstantValues(&VOffset, EvalMode)) {
+        Offset = *PossibleOffset;
+        return true;
+      }
+      return false;
+    };
+
+    V = V->stripAndAccumulateConstantOffsets(
+        DL, Offset, /* AllowNonInbounds */ true, /* AllowInvariantGroup */ true,
+        /*ExternalAnalysis=*/OffsetRangeAnalysis);
+  }
+
   // Later we use the index type size and zero but it will match the type of the
   // value that is passed to computeImpl.
   IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
   Zero = APInt::getZero(IntTyBits);
-
   OffsetSpan ORT = computeValue(V);
 
   bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits;
@@ -813,8 +897,9 @@ OffsetSpan ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
     return OffsetSpan(Zero, align(Size, I.getAlign()));
 
   Value *ArraySize = I.getArraySize();
-  if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
-    APInt NumElems = C->getValue();
+  if (auto PossibleSize =
+          aggregatePossibleConstantValues(ArraySize, Options.EvalMode)) {
+    APInt NumElems = *PossibleSize;
     if (!CheckedZextOrTrunc(NumElems))
       return ObjectSizeOffsetVisitor::unknown();
 
@@ -840,7 +925,18 @@ OffsetSpan ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
 }
 
 OffsetSpan ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
-  if (std::optional<APInt> Size = getAllocSize(&CB, TLI)) {
+  auto Mapper = [this](const Value *V) -> const Value * {
+    if (!V->getType()->isIntegerTy())
+      return V;
+
+    if (auto PossibleBound =
+            aggregatePossibleConstantValues(V, Options.EvalMode))
+      return ConstantInt::get(V->getType(), *PossibleBound);
+
+    return V;
+  };
+
+  if (std::optional<APInt> Size = getAllocSize(&CB, TLI, Mapper)) {
     // Very large unsigned value cannot be represented as OffsetSpan.
     if (Size->isNegative())
       return ObjectSizeOffsetVisitor::unknown();
author	serge-sans-paille <sguelton@mozilla.com>	2024-12-10 06:28:03 +0000
committer	GitHub <noreply@github.com>	2024-12-10 06:28:03 +0000
commit	f8c1a22220345dc85bf700c8a8ca6a28839edabb (patch)
tree	b8e4e4a3e5e4805142750a8f97638b4aa8599ac9 /llvm/lib/Analysis/MemoryBuiltins.cpp
parent	ef2e590e7b6fb5b0478e5e087006895a07d185c8 (diff)
download	llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.zip llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.tar.gz llvm-f8c1a22220345dc85bf700c8a8ca6a28839edabb.tar.bz2