Reland "[dataflow] Add dedicated representation of boolean formulas"

This reverts commit 7a72ce98224be76d9328e65eee472381f7c8e7fe. Test problems were due to unspecified order of function arg evaluation. Reland "[dataflow] Replace most BoolValue subclasses with references to Formula (and AtomicBoolValue => Atom and BoolValue => Formula where appropriate)" This properly frees the Value hierarchy from managing boolean formulas. We still distinguish AtomicBoolValue; this type is used in client code. However we expect to convert such uses to BoolValue (where the distinction is not needed) or Atom (where atomic identity is intended), and then fold AtomicBoolValue into FormulaBoolValue. We also distinguish TopBoolValue; this has distinct rules for widen/join/equivalence, and top-ness is not represented in Formula. It'd be nice to find a cleaner representation (e.g. the absence of a formula), but no immediate plans. For now, BoolValues with the same Formula are deduplicated. This doesn't seem desirable, as Values are mutable by their creators (properties). We can probably drop this for FormulaBoolValue immediately (not in this patch, to isolate changes). For AtomicBoolValue we first need to update clients to stop using value pointers for atom identity. The data structures around flow conditions are updated: - flow condition tokens are Atom, rather than AtomicBoolValue* - conditions are Formula, rather than BoolValue Most APIs were changed directly, some with many clients had a new version added and the existing one deprecated. The factories for BoolValues in Environment keep their existing signatures for now (e.g. makeOr(BoolValue, BoolValue) => BoolValue) and are not deprecated. These have very many clients and finding the most ergonomic API & migration path still needs some thought. Differential Revision: https://reviews.llvm.org/D153469
author: Sam McCall <sam.mccall@gmail.com> 2023-07-05 11:35:06 +0200
committer: Sam McCall <sam.mccall@gmail.com> 2023-07-07 11:58:33 +0200
commit: fc9821a877d4e1443603d67539e4369b3ec72890 (patch)
tree: 16074680f7b637e18f5206b2163e5d9c45e56fb2 /clang/lib
parent: 74eac85dae01ddda40233329700c9a0426b3ed22 (diff)
download: llvm-fc9821a877d4e1443603d67539e4369b3ec72890.zip
llvm-fc9821a877d4e1443603d67539e4369b3ec72890.tar.gz
llvm-fc9821a877d4e1443603d67539e4369b3ec72890.tar.bz2
9 files changed, 373 insertions, 557 deletions
diff --git a/clang/lib/Analysis/FlowSensitive/Arena.cpp b/clang/lib/Analysis/FlowSensitive/Arena.cpp
index cff6c45..a12da2d 100644
--- a/clang/lib/Analysis/FlowSensitive/Arena.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Arena.cpp
@@ -7,65 +7,75 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Analysis/FlowSensitive/Arena.h"
+#include "clang/Analysis/FlowSensitive/Value.h"
 
 namespace clang::dataflow {
 
-static std::pair<BoolValue *, BoolValue *>
-makeCanonicalBoolValuePair(BoolValue &LHS, BoolValue &RHS) {
+static std::pair<const Formula *, const Formula *>
+canonicalFormulaPair(const Formula &LHS, const Formula &RHS) {
   auto Res = std::make_pair(&LHS, &RHS);
-  if (&RHS < &LHS)
+  if (&RHS < &LHS) // FIXME: use a deterministic order instead
     std::swap(Res.first, Res.second);
   return Res;
 }
 
-BoolValue &Arena::makeAnd(BoolValue &LHS, BoolValue &RHS) {
+const Formula &Arena::makeAtomRef(Atom A) {
+  auto [It, Inserted] = AtomRefs.try_emplace(A);
+  if (Inserted)
+    It->second =
+        &Formula::create(Alloc, Formula::AtomRef, {}, static_cast<unsigned>(A));
+  return *It->second;
+}
+
+const Formula &Arena::makeAnd(const Formula &LHS, const Formula &RHS) {
   if (&LHS == &RHS)
     return LHS;
 
-  auto Res = ConjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS),
-                                         nullptr);
-  if (Res.second)
-    Res.first->second = &create<ConjunctionValue>(LHS, RHS);
-  return *Res.first->second;
+  auto [It, Inserted] =
+      Ands.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr);
+  if (Inserted)
+    It->second = &Formula::create(Alloc, Formula::And, {&LHS, &RHS});
+  return *It->second;
 }
 
-BoolValue &Arena::makeOr(BoolValue &LHS, BoolValue &RHS) {
+const Formula &Arena::makeOr(const Formula &LHS, const Formula &RHS) {
   if (&LHS == &RHS)
     return LHS;
 
-  auto Res = DisjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS),
-                                         nullptr);
-  if (Res.second)
-    Res.first->second = &create<DisjunctionValue>(LHS, RHS);
-  return *Res.first->second;
+  auto [It, Inserted] =
+      Ors.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr);
+  if (Inserted)
+    It->second = &Formula::create(Alloc, Formula::Or, {&LHS, &RHS});
+  return *It->second;
 }
 
-BoolValue &Arena::makeNot(BoolValue &Val) {
-  auto Res = NegationVals.try_emplace(&Val, nullptr);
-  if (Res.second)
-    Res.first->second = &create<NegationValue>(Val);
-  return *Res.first->second;
+const Formula &Arena::makeNot(const Formula &Val) {
+  auto [It, Inserted] = Nots.try_emplace(&Val, nullptr);
+  if (Inserted)
+    It->second = &Formula::create(Alloc, Formula::Not, {&Val});
+  return *It->second;
 }
 
-BoolValue &Arena::makeImplies(BoolValue &LHS, BoolValue &RHS) {
+const Formula &Arena::makeImplies(const Formula &LHS, const Formula &RHS) {
   if (&LHS == &RHS)
     return makeLiteral(true);
 
-  auto Res = ImplicationVals.try_emplace(std::make_pair(&LHS, &RHS), nullptr);
-  if (Res.second)
-    Res.first->second = &create<ImplicationValue>(LHS, RHS);
-  return *Res.first->second;
+  auto [It, Inserted] =
+      Implies.try_emplace(std::make_pair(&LHS, &RHS), nullptr);
+  if (Inserted)
+    It->second = &Formula::create(Alloc, Formula::Implies, {&LHS, &RHS});
+  return *It->second;
 }
 
-BoolValue &Arena::makeEquals(BoolValue &LHS, BoolValue &RHS) {
+const Formula &Arena::makeEquals(const Formula &LHS, const Formula &RHS) {
   if (&LHS == &RHS)
     return makeLiteral(true);
 
-  auto Res = BiconditionalVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS),
-                                           nullptr);
-  if (Res.second)
-    Res.first->second = &create<BiconditionalValue>(LHS, RHS);
-  return *Res.first->second;
+  auto [It, Inserted] =
+      Equals.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr);
+  if (Inserted)
+    It->second = &Formula::create(Alloc, Formula::Equal, {&LHS, &RHS});
+  return *It->second;
 }
 
 IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) {
@@ -76,4 +86,13 @@ IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) {
   return *It->second;
 }
 
+BoolValue &Arena::makeBoolValue(const Formula &F) {
+  auto [It, Inserted] = FormulaValues.try_emplace(&F);
+  if (Inserted)
+    It->second = (F.kind() == Formula::AtomRef)
+                     ? (BoolValue *)&create<AtomicBoolValue>(F)
+                     : &create<FormulaBoolValue>(F);
+  return *It->second;
+}
+
 } // namespace clang::dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
index d59bebf..171884a 100644
--- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -3,6 +3,7 @@ add_clang_library(clangAnalysisFlowSensitive
   ControlFlowContext.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
+  Formula.cpp
   HTMLLogger.cpp
   Logger.cpp
   RecordOps.cpp
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index 37bcc8b..a807ef8 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -15,6 +15,7 @@
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Logger.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/SetOperations.h"
@@ -23,9 +24,12 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <memory>
+#include <string>
 #include <utility>
+#include <vector>
 
 static llvm::cl::opt<std::string> DataflowLog(
     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
@@ -98,108 +102,114 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
 }
 
 void DataflowAnalysisContext::addFlowConditionConstraint(
-    AtomicBoolValue &Token, BoolValue &Constraint) {
-  auto Res = FlowConditionConstraints.try_emplace(&Token, &Constraint);
+    Atom Token, const Formula &Constraint) {
+  auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
   if (!Res.second) {
     Res.first->second =
         &arena().makeAnd(*Res.first->second, Constraint);
   }
 }
 
-AtomicBoolValue &
-DataflowAnalysisContext::forkFlowCondition(AtomicBoolValue &Token) {
-  auto &ForkToken = arena().makeFlowConditionToken();
-  FlowConditionDeps[&ForkToken].insert(&Token);
-  addFlowConditionConstraint(ForkToken, Token);
+Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
+  Atom ForkToken = arena().makeFlowConditionToken();
+  FlowConditionDeps[ForkToken].insert(Token);
+  addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
   return ForkToken;
 }
 
-AtomicBoolValue &
-DataflowAnalysisContext::joinFlowConditions(AtomicBoolValue &FirstToken,
-                                            AtomicBoolValue &SecondToken) {
-  auto &Token = arena().makeFlowConditionToken();
-  FlowConditionDeps[&Token].insert(&FirstToken);
-  FlowConditionDeps[&Token].insert(&SecondToken);
-  addFlowConditionConstraint(
-      Token, arena().makeOr(FirstToken, SecondToken));
+Atom 
+DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
+                                            Atom SecondToken) {
+  Atom Token = arena().makeFlowConditionToken();
+  FlowConditionDeps[Token].insert(FirstToken);
+  FlowConditionDeps[Token].insert(SecondToken);
+  addFlowConditionConstraint(Token,
+                             arena().makeOr(arena().makeAtomRef(FirstToken),
+                                            arena().makeAtomRef(SecondToken)));
   return Token;
 }
 
-Solver::Result
-DataflowAnalysisContext::querySolver(llvm::SetVector<BoolValue *> Constraints) {
+Solver::Result DataflowAnalysisContext::querySolver(
+    llvm::SetVector<const Formula *> Constraints) {
   Constraints.insert(&arena().makeLiteral(true));
   Constraints.insert(&arena().makeNot(arena().makeLiteral(false)));
   return S->solve(Constraints.getArrayRef());
 }
 
-bool DataflowAnalysisContext::flowConditionImplies(AtomicBoolValue &Token,
-                                                   BoolValue &Val) {
+bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
+                                                   const Formula &Val) {
   // Returns true if and only if truth assignment of the flow condition implies
   // that `Val` is also true. We prove whether or not this property holds by
   // reducing the problem to satisfiability checking. In other words, we attempt
   // to show that assuming `Val` is false makes the constraints induced by the
   // flow condition unsatisfiable.
-  llvm::SetVector<BoolValue *> Constraints;
-  Constraints.insert(&Token);
+  llvm::SetVector<const Formula *> Constraints;
+  Constraints.insert(&arena().makeAtomRef(Token));
   Constraints.insert(&arena().makeNot(Val));
-  llvm::DenseSet<AtomicBoolValue *> VisitedTokens;
+  llvm::DenseSet<Atom> VisitedTokens;
   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
   return isUnsatisfiable(std::move(Constraints));
 }
 
-bool DataflowAnalysisContext::flowConditionIsTautology(AtomicBoolValue &Token) {
+bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) {
   // Returns true if and only if we cannot prove that the flow condition can
   // ever be false.
-  llvm::SetVector<BoolValue *> Constraints;
-  Constraints.insert(&arena().makeNot(Token));
-  llvm::DenseSet<AtomicBoolValue *> VisitedTokens;
+  llvm::SetVector<const Formula *> Constraints;
+  Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token)));
+  llvm::DenseSet<Atom> VisitedTokens;
   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
   return isUnsatisfiable(std::move(Constraints));
 }
 
-bool DataflowAnalysisContext::equivalentBoolValues(BoolValue &Val1,
-                                                   BoolValue &Val2) {
-  llvm::SetVector<BoolValue*> Constraints;
+bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
+                                                 const Formula &Val2) {
+  llvm::SetVector<const Formula *> Constraints;
   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
   return isUnsatisfiable(std::move(Constraints));
 }
 
 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
-    AtomicBoolValue &Token, llvm::SetVector<BoolValue *> &Constraints,
-    llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) {
-  auto Res = VisitedTokens.insert(&Token);
+    Atom Token, llvm::SetVector<const Formula *> &Constraints,
+    llvm::DenseSet<Atom> &VisitedTokens) {
+  auto Res = VisitedTokens.insert(Token);
   if (!Res.second)
     return;
 
-  auto ConstraintsIt = FlowConditionConstraints.find(&Token);
+  auto ConstraintsIt = FlowConditionConstraints.find(Token);
   if (ConstraintsIt == FlowConditionConstraints.end()) {
-    Constraints.insert(&Token);
+    Constraints.insert(&arena().makeAtomRef(Token));
   } else {
     // Bind flow condition token via `iff` to its set of constraints:
     // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
-    Constraints.insert(&arena().makeEquals(Token, *ConstraintsIt->second));
+    Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
+                                           *ConstraintsIt->second));
   }
 
-  auto DepsIt = FlowConditionDeps.find(&Token);
+  auto DepsIt = FlowConditionDeps.find(Token);
   if (DepsIt != FlowConditionDeps.end()) {
-    for (AtomicBoolValue *DepToken : DepsIt->second) {
-      addTransitiveFlowConditionConstraints(*DepToken, Constraints,
+    for (Atom DepToken : DepsIt->second) {
+      addTransitiveFlowConditionConstraints(DepToken, Constraints,
                                             VisitedTokens);
     }
   }
 }
 
-void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token,
+void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
                                                 llvm::raw_ostream &OS) {
-  llvm::SetVector<BoolValue *> Constraints;
-  Constraints.insert(&Token);
-  llvm::DenseSet<AtomicBoolValue *> VisitedTokens;
+  llvm::SetVector<const Formula *> Constraints;
+  Constraints.insert(&arena().makeAtomRef(Token));
+  llvm::DenseSet<Atom> VisitedTokens;
   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
 
-  llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {
-      {&arena().makeLiteral(false), "False"},
-      {&arena().makeLiteral(true), "True"}};
-  OS << debugString(Constraints.getArrayRef(), AtomNames);
+  // TODO: have formulas know about true/false directly instead
+  Atom True = arena().makeLiteral(true).getAtom();
+  Atom False = arena().makeLiteral(false).getAtom();
+  Formula::AtomNames Names = {{False, "false"}, {True, "true"}};
+
+  for (const auto *Constraint : Constraints) {
+    Constraint->print(OS, &Names);
+    OS << "\n";
+  }
 }
 
 const ControlFlowContext *
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 4a11c09..11bb7da 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -107,15 +107,16 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
     // if (o.has_value())
     //   x = o.value();
     // ```
-    auto *Expr1 = cast<BoolValue>(&Val1);
-    auto *Expr2 = cast<BoolValue>(&Val2);
-    auto &MergedVal = MergedEnv.makeAtomicBoolValue();
-    MergedEnv.addToFlowCondition(MergedEnv.makeOr(
-        MergedEnv.makeAnd(Env1.getFlowConditionToken(),
-                          MergedEnv.makeIff(MergedVal, *Expr1)),
-        MergedEnv.makeAnd(Env2.getFlowConditionToken(),
-                          MergedEnv.makeIff(MergedVal, *Expr2))));
-    return &MergedVal;
+    auto &Expr1 = cast<BoolValue>(Val1).formula();
+    auto &Expr2 = cast<BoolValue>(Val2).formula();
+    auto &A = MergedEnv.arena();
+    auto &MergedVal = A.makeAtomRef(A.makeAtom());
+    MergedEnv.addToFlowCondition(
+        A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
+                           A.makeEquals(MergedVal, Expr1)),
+                 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
+                           A.makeEquals(MergedVal, Expr2))));
+    return &A.makeBoolValue(MergedVal);
   }
 
   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
@@ -269,11 +270,11 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
 
 Environment::Environment(DataflowAnalysisContext &DACtx)
     : DACtx(&DACtx),
-      FlowConditionToken(&DACtx.arena().makeFlowConditionToken()) {}
+      FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
 
 Environment Environment::fork() const {
   Environment Copy(*this);
-  Copy.FlowConditionToken = &DACtx->forkFlowCondition(*FlowConditionToken);
+  Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
   return Copy;
 }
 
@@ -587,8 +588,8 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
 
   // FIXME: update join to detect backedges and simplify the flow condition
   // accordingly.
-  JoinedEnv.FlowConditionToken = &EnvA.DACtx->joinFlowConditions(
-      *EnvA.FlowConditionToken, *EnvB.FlowConditionToken);
+  JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
+      EnvA.FlowConditionToken, EnvB.FlowConditionToken);
 
   for (auto &Entry : EnvA.LocToVal) {
     const StorageLocation *Loc = Entry.first;
@@ -819,7 +820,7 @@ Value *Environment::createValueUnlessSelfReferential(
     // with integers, and so distinguishing them serves no purpose, but could
     // prevent convergence.
     CreatedValuesCount++;
-    return &DACtx->arena().create<IntegerValue>();
+    return &arena().create<IntegerValue>();
   }
 
   if (Type->isReferenceType() || Type->isPointerType()) {
@@ -837,9 +838,9 @@ Value *Environment::createValueUnlessSelfReferential(
     }
 
     if (Type->isReferenceType())
-      return &DACtx->arena().create<ReferenceValue>(PointeeLoc);
+      return &arena().create<ReferenceValue>(PointeeLoc);
     else
-      return &DACtx->arena().create<PointerValue>(PointeeLoc);
+      return &arena().create<PointerValue>(PointeeLoc);
   }
 
   if (Type->isRecordType()) {
@@ -859,7 +860,7 @@ Value *Environment::createValueUnlessSelfReferential(
       Visited.erase(FieldType.getCanonicalType());
     }
 
-    return &DACtx->arena().create<StructValue>(std::move(FieldValues));
+    return &arena().create<StructValue>(std::move(FieldValues));
   }
 
   return nullptr;
@@ -884,12 +885,18 @@ const StorageLocation &Environment::skip(const StorageLocation &Loc,
   return skip(*const_cast<StorageLocation *>(&Loc), SP);
 }
 
+void Environment::addToFlowCondition(const Formula &Val) {
+  DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
+}
 void Environment::addToFlowCondition(BoolValue &Val) {
-  DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
+  addToFlowCondition(Val.formula());
 }
 
+bool Environment::flowConditionImplies(const Formula &Val) const {
+  return DACtx->flowConditionImplies(FlowConditionToken, Val);
+}
 bool Environment::flowConditionImplies(BoolValue &Val) const {
-  return DACtx->flowConditionImplies(*FlowConditionToken, Val);
+  return flowConditionImplies(Val.formula());
 }
 
 void Environment::dump(raw_ostream &OS) const {
@@ -909,7 +916,7 @@ void Environment::dump(raw_ostream &OS) const {
   }
 
   OS << "FlowConditionToken:\n";
-  DACtx->dumpFlowCondition(*FlowConditionToken, OS);
+  DACtx->dumpFlowCondition(FlowConditionToken, OS);
 }
 
 void Environment::dump() const {
diff --git a/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp
index 25225ed..f8a049a 100644
--- a/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp
@@ -16,22 +16,12 @@
 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormatAdapters.h"
-#include "llvm/Support/FormatCommon.h"
-#include "llvm/Support/FormatVariadic.h"
 
 namespace clang {
 namespace dataflow {
 
-using llvm::AlignStyle;
-using llvm::fmt_pad;
-using llvm::formatv;
-
 llvm::StringRef debugString(Value::Kind Kind) {
   switch (Kind) {
   case Value::Kind::Integer:
@@ -46,26 +36,19 @@ llvm::StringRef debugString(Value::Kind Kind) {
     return "AtomicBool";
   case Value::Kind::TopBool:
     return "TopBool";
-  case Value::Kind::Conjunction:
-    return "Conjunction";
-  case Value::Kind::Disjunction:
-    return "Disjunction";
-  case Value::Kind::Negation:
-    return "Negation";
-  case Value::Kind::Implication:
-    return "Implication";
-  case Value::Kind::Biconditional:
-    return "Biconditional";
+  case Value::Kind::FormulaBool:
+    return "FormulaBool";
   }
   llvm_unreachable("Unhandled value kind");
 }
 
-llvm::StringRef debugString(Solver::Result::Assignment Assignment) {
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              Solver::Result::Assignment Assignment) {
   switch (Assignment) {
   case Solver::Result::Assignment::AssignedFalse:
-    return "False";
+    return OS << "False";
   case Solver::Result::Assignment::AssignedTrue:
-    return "True";
+    return OS << "True";
   }
   llvm_unreachable("Booleans can only be assigned true/false");
 }
@@ -82,174 +65,16 @@ llvm::StringRef debugString(Solver::Result::Status Status) {
   llvm_unreachable("Unhandled SAT check result status");
 }
 
-namespace {
-
-class DebugStringGenerator {
-public:
-  explicit DebugStringGenerator(
-      llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNamesArg)
-      : Counter(0), AtomNames(std::move(AtomNamesArg)) {
-#ifndef NDEBUG
-    llvm::StringSet<> Names;
-    for (auto &N : AtomNames) {
-      assert(Names.insert(N.second).second &&
-             "The same name must not assigned to different atoms");
-    }
-#endif
-  }
-
-  /// Returns a string representation of a boolean value `B`.
-  std::string debugString(const BoolValue &B, size_t Depth = 0) {
-    std::string S;
-    switch (B.getKind()) {
-    case Value::Kind::AtomicBool: {
-      S = getAtomName(&cast<AtomicBoolValue>(B));
-      break;
-    }
-    case Value::Kind::TopBool: {
-      S = "top";
-      break;
-    }
-    case Value::Kind::Conjunction: {
-      auto &C = cast<ConjunctionValue>(B);
-      auto L = debugString(C.getLeftSubValue(), Depth + 1);
-      auto R = debugString(C.getRightSubValue(), Depth + 1);
-      S = formatv("(and\n{0}\n{1})", L, R);
-      break;
-    }
-    case Value::Kind::Disjunction: {
-      auto &D = cast<DisjunctionValue>(B);
-      auto L = debugString(D.getLeftSubValue(), Depth + 1);
-      auto R = debugString(D.getRightSubValue(), Depth + 1);
-      S = formatv("(or\n{0}\n{1})", L, R);
-      break;
-    }
-    case Value::Kind::Negation: {
-      auto &N = cast<NegationValue>(B);
-      S = formatv("(not\n{0})", debugString(N.getSubVal(), Depth + 1));
-      break;
-    }
-    case Value::Kind::Implication: {
-      auto &IV = cast<ImplicationValue>(B);
-      auto L = debugString(IV.getLeftSubValue(), Depth + 1);
-      auto R = debugString(IV.getRightSubValue(), Depth + 1);
-      S = formatv("(=>\n{0}\n{1})", L, R);
-      break;
-    }
-    case Value::Kind::Biconditional: {
-      auto &BV = cast<BiconditionalValue>(B);
-      auto L = debugString(BV.getLeftSubValue(), Depth + 1);
-      auto R = debugString(BV.getRightSubValue(), Depth + 1);
-      S = formatv("(=\n{0}\n{1})", L, R);
-      break;
-    }
-    default:
-      llvm_unreachable("Unhandled value kind");
-    }
-    auto Indent = Depth * 4;
-    return formatv("{0}", fmt_pad(S, Indent, 0));
-  }
-
-  std::string debugString(const llvm::ArrayRef<BoolValue *> &Constraints) {
-    std::string Result;
-    for (const BoolValue *S : Constraints) {
-      Result += debugString(*S);
-      Result += '\n';
-    }
-    return Result;
-  }
-
-  /// Returns a string representation of a set of boolean `Constraints` and the
-  /// `Result` of satisfiability checking on the `Constraints`.
-  std::string debugString(ArrayRef<BoolValue *> &Constraints,
-                          const Solver::Result &Result) {
-    auto Template = R"(
-Constraints
-------------
-{0:$[
-
-]}
-------------
-{1}.
-{2}
-)";
-
-    std::vector<std::string> ConstraintsStrings;
-    ConstraintsStrings.reserve(Constraints.size());
-    for (auto &Constraint : Constraints) {
-      ConstraintsStrings.push_back(debugString(*Constraint));
-    }
-
-    auto StatusString = clang::dataflow::debugString(Result.getStatus());
-    auto Solution = Result.getSolution();
-    auto SolutionString = Solution ? "\n" + debugString(*Solution) : "";
-
-    return formatv(
-        Template,
-        llvm::make_range(ConstraintsStrings.begin(), ConstraintsStrings.end()),
-        StatusString, SolutionString);
-  }
-
-private:
-  /// Returns a string representation of a truth assignment to atom booleans.
-  std::string debugString(
-      const llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment>
-          &AtomAssignments) {
-    size_t MaxNameLength = 0;
-    for (auto &AtomName : AtomNames) {
-      MaxNameLength = std::max(MaxNameLength, AtomName.second.size());
-    }
-
-    std::vector<std::string> Lines;
-    for (auto &AtomAssignment : AtomAssignments) {
-      auto Line = formatv("{0} = {1}",
-                          fmt_align(getAtomName(AtomAssignment.first),
-                                    AlignStyle::Left, MaxNameLength),
-                          clang::dataflow::debugString(AtomAssignment.second));
-      Lines.push_back(Line);
-    }
-    llvm::sort(Lines);
-
-    return formatv("{0:$[\n]}", llvm::make_range(Lines.begin(), Lines.end()));
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Solver::Result &R) {
+  OS << debugString(R.getStatus()) << "\n";
+  if (auto Solution = R.getSolution()) {
+    std::vector<std::pair<Atom, Solver::Result::Assignment>> Sorted = {
+        Solution->begin(), Solution->end()};
+    llvm::sort(Sorted);
+    for (const auto &Entry : Sorted)
+      OS << Entry.first << " = " << Entry.second << "\n";
   }
-
-  /// Returns the name assigned to `Atom`, either user-specified or created by
-  /// default rules (B0, B1, ...).
-  std::string getAtomName(const AtomicBoolValue *Atom) {
-    auto Entry = AtomNames.try_emplace(Atom, formatv("B{0}", Counter));
-    if (Entry.second) {
-      Counter++;
-    }
-    return Entry.first->second;
-  }
-
-  // Keep track of number of atoms without a user-specified name, used to assign
-  // non-repeating default names to such atoms.
-  size_t Counter;
-
-  // Keep track of names assigned to atoms.
-  llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames;
-};
-
-} // namespace
-
-std::string
-debugString(const BoolValue &B,
-            llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) {
-  return DebugStringGenerator(std::move(AtomNames)).debugString(B);
-}
-
-std::string
-debugString(llvm::ArrayRef<BoolValue *> Constraints,
-            llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) {
-  return DebugStringGenerator(std::move(AtomNames)).debugString(Constraints);
-}
-
-std::string
-debugString(ArrayRef<BoolValue *> Constraints, const Solver::Result &Result,
-            llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) {
-  return DebugStringGenerator(std::move(AtomNames))
-      .debugString(Constraints, Result);
+  return OS;
 }
 
 } // namespace dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/Formula.cpp b/clang/lib/Analysis/FlowSensitive/Formula.cpp
new file mode 100644
index 0000000..504ad2f
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/Formula.cpp
@@ -0,0 +1,82 @@
+//===- Formula.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/Formula.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+namespace clang::dataflow {
+
+Formula &Formula::create(llvm::BumpPtrAllocator &Alloc, Kind K,
+                         ArrayRef<const Formula *> Operands, unsigned Value) {
+  assert(Operands.size() == numOperands(K));
+  if (Value != 0) // Currently, formulas have values or operands, not both.
+    assert(numOperands(K) == 0);
+  void *Mem = Alloc.Allocate(sizeof(Formula) +
+                                 Operands.size() * sizeof(Operands.front()),
+                             alignof(Formula));
+  Formula *Result = new (Mem) Formula();
+  Result->FormulaKind = K;
+  Result->Value = Value;
+  // Operands are stored as `const Formula *`s after the formula itself.
+  // We don't need to construct an object as pointers are trivial types.
+  // Formula is alignas(const Formula *), so alignment is satisfied.
+  llvm::copy(Operands, reinterpret_cast<const Formula **>(Result + 1));
+  return *Result;
+}
+
+static llvm::StringLiteral sigil(Formula::Kind K) {
+  switch (K) {
+  case Formula::AtomRef:
+    return "";
+  case Formula::Not:
+    return "!";
+  case Formula::And:
+    return " & ";
+  case Formula::Or:
+    return " | ";
+  case Formula::Implies:
+    return " => ";
+  case Formula::Equal:
+    return " = ";
+  }
+  llvm_unreachable("unhandled formula kind");
+}
+
+void Formula::print(llvm::raw_ostream &OS, const AtomNames *Names) const {
+  if (Names && kind() == AtomRef)
+    if (auto It = Names->find(getAtom()); It != Names->end()) {
+      OS << It->second;
+      return;
+    }
+
+  switch (numOperands(kind())) {
+  case 0:
+    OS << getAtom();
+    break;
+  case 1:
+    OS << sigil(kind());
+    operands()[0]->print(OS, Names);
+    break;
+  case 2:
+    OS << '(';
+    operands()[0]->print(OS, Names);
+    OS << sigil(kind());
+    operands()[1]->print(OS, Names);
+    OS << ')';
+    break;
+  default:
+    llvm_unreachable("unhandled formula arity");
+  }
+}
+
+} // namespace clang::dataflow
+\ No newline at end of file
diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
index 14293a3..0c4e838 100644
--- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
+++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
@@ -97,6 +97,7 @@ public:
     case Value::Kind::Integer:
     case Value::Kind::TopBool:
     case Value::Kind::AtomicBool:
+    case Value::Kind::FormulaBool:
       break;
     case Value::Kind::Reference:
       JOS.attributeObject(
@@ -111,35 +112,6 @@ public:
         JOS.attributeObject("f:" + Child.first->getNameAsString(),
                             [&] { dump(*Child.second); });
       break;
-    case Value::Kind::Disjunction: {
-      auto &VV = cast<DisjunctionValue>(V);
-      JOS.attributeObject("lhs", [&] { dump(VV.getLeftSubValue()); });
-      JOS.attributeObject("rhs", [&] { dump(VV.getRightSubValue()); });
-      break;
-    }
-    case Value::Kind::Conjunction: {
-      auto &VV = cast<ConjunctionValue>(V);
-      JOS.attributeObject("lhs", [&] { dump(VV.getLeftSubValue()); });
-      JOS.attributeObject("rhs", [&] { dump(VV.getRightSubValue()); });
-      break;
-    }
-    case Value::Kind::Negation: {
-      auto &VV = cast<NegationValue>(V);
-      JOS.attributeObject("not", [&] { dump(VV.getSubVal()); });
-      break;
-    }
-    case Value::Kind::Implication: {
-      auto &VV = cast<ImplicationValue>(V);
-      JOS.attributeObject("if", [&] { dump(VV.getLeftSubValue()); });
-      JOS.attributeObject("then", [&] { dump(VV.getRightSubValue()); });
-      break;
-    }
-    case Value::Kind::Biconditional: {
-      auto &VV = cast<BiconditionalValue>(V);
-      JOS.attributeObject("lhs", [&] { dump(VV.getLeftSubValue()); });
-      JOS.attributeObject("rhs", [&] { dump(VV.getRightSubValue()); });
-      break;
-    }
     }
 
     for (const auto& Prop : V.properties())
@@ -149,10 +121,12 @@ public:
     // Running the SAT solver is expensive, but knowing which booleans are
     // guaranteed true/false here is valuable and hard to determine by hand.
     if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
-      JOS.attribute("truth", Env.flowConditionImplies(*B) ? "true"
-                             : Env.flowConditionImplies(Env.makeNot(*B))
-                                 ? "false"
-                                 : "unknown");
+      JOS.attribute("formula", llvm::to_string(B->formula()));
+      JOS.attribute(
+          "truth", Env.flowConditionImplies(B->formula()) ? "true"
+                   : Env.flowConditionImplies(Env.arena().makeNot(B->formula()))
+                       ? "false"
+                       : "unknown");
     }
   }
   void dump(const StorageLocation &L) {
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index d69e0f5..ac971f6 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -62,64 +62,12 @@ static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS,
   return Env.makeAtomicBoolValue();
 }
 
-// Functionally updates `V` such that any instances of `TopBool` are replaced
-// with fresh atomic bools. Note: This implementation assumes that `B` is a
-// tree; if `B` is a DAG, it will lose any sharing between subvalues that was
-// present in the original .
-static BoolValue &unpackValue(BoolValue &V, Environment &Env);
-
-template <typename Derived, typename M>
-BoolValue &unpackBinaryBoolValue(Environment &Env, BoolValue &B, M build) {
-  auto &V = *cast<Derived>(&B);
-  BoolValue &Left = V.getLeftSubValue();
-  BoolValue &Right = V.getRightSubValue();
-  BoolValue &ULeft = unpackValue(Left, Env);
-  BoolValue &URight = unpackValue(Right, Env);
-
-  if (&ULeft == &Left && &URight == &Right)
-    return V;
-
-  return (Env.*build)(ULeft, URight);
-}
-
 static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
-  switch (V.getKind()) {
-  case Value::Kind::Integer:
-  case Value::Kind::Reference:
-  case Value::Kind::Pointer:
-  case Value::Kind::Struct:
-    llvm_unreachable("BoolValue cannot have any of these kinds.");
-
-  case Value::Kind::AtomicBool:
-    return V;
-
-  case Value::Kind::TopBool:
-    // Unpack `TopBool` into a fresh atomic bool.
-    return Env.makeAtomicBoolValue();
-
-  case Value::Kind::Negation: {
-    auto &N = *cast<NegationValue>(&V);
-    BoolValue &Sub = N.getSubVal();
-    BoolValue &USub = unpackValue(Sub, Env);
-
-    if (&USub == &Sub)
-      return V;
-    return Env.makeNot(USub);
-  }
-  case Value::Kind::Conjunction:
-    return unpackBinaryBoolValue<ConjunctionValue>(Env, V,
-                                                   &Environment::makeAnd);
-  case Value::Kind::Disjunction:
-    return unpackBinaryBoolValue<DisjunctionValue>(Env, V,
-                                                   &Environment::makeOr);
-  case Value::Kind::Implication:
-    return unpackBinaryBoolValue<ImplicationValue>(
-        Env, V, &Environment::makeImplication);
-  case Value::Kind::Biconditional:
-    return unpackBinaryBoolValue<BiconditionalValue>(Env, V,
-                                                     &Environment::makeIff);
+  if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) {
+    auto &A = Env.getDataflowAnalysisContext().arena();
+    return A.makeBoolValue(A.makeAtomRef(Top->getAtom()));
   }
-  llvm_unreachable("All reachable cases in switch return");
+  return V;
 }
 
 // Unpacks the value (if any) associated with `E` and updates `E` to the new
diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
index db2e1d6..037886d 100644
--- a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
+++ b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
@@ -17,8 +17,8 @@
 #include <queue>
 #include <vector>
 
+#include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
-#include "clang/Analysis/FlowSensitive/Value.h"
 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -79,7 +79,7 @@ using ClauseID = uint32_t;
 static constexpr ClauseID NullClause = 0;
 
 /// A boolean formula in conjunctive normal form.
-struct BooleanFormula {
+struct CNFFormula {
   /// `LargestVar` is equal to the largest positive integer that represents a
   /// variable in the formula.
   const Variable LargestVar;
@@ -121,12 +121,12 @@ struct BooleanFormula {
   /// clauses in the formula start from the element at index 1.
   std::vector<ClauseID> NextWatched;
 
-  /// Stores the variable identifier and value location for atomic booleans in
-  /// the formula.
-  llvm::DenseMap<Variable, AtomicBoolValue *> Atomics;
+  /// Stores the variable identifier and Atom for atomic booleans in the
+  /// formula.
+  llvm::DenseMap<Variable, Atom> Atomics;
 
-  explicit BooleanFormula(Variable LargestVar,
-                          llvm::DenseMap<Variable, AtomicBoolValue *> Atomics)
+  explicit CNFFormula(Variable LargestVar,
+                      llvm::DenseMap<Variable, Atom> Atomics)
       : LargestVar(LargestVar), Atomics(std::move(Atomics)) {
     Clauses.push_back(0);
     ClauseStarts.push_back(0);
@@ -144,8 +144,8 @@ struct BooleanFormula {
   ///
   ///  All literals in the input that are not `NullLit` must be distinct.
   void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) {
-    // The literals are guaranteed to be distinct from properties of BoolValue
-    // and the construction in `buildBooleanFormula`.
+    // The literals are guaranteed to be distinct from properties of Formula
+    // and the construction in `buildCNF`.
     assert(L1 != NullLit && L1 != L2 && L1 != L3 &&
            (L2 != L3 || L2 == NullLit));
 
@@ -178,98 +178,59 @@ struct BooleanFormula {
 
 /// Converts the conjunction of `Vals` into a formula in conjunctive normal
 /// form where each clause has at least one and at most three literals.
-BooleanFormula buildBooleanFormula(const llvm::ArrayRef<BoolValue *> &Vals) {
+CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
   // The general strategy of the algorithm implemented below is to map each
   // of the sub-values in `Vals` to a unique variable and use these variables in
   // the resulting CNF expression to avoid exponential blow up. The number of
   // literals in the resulting formula is guaranteed to be linear in the number
-  // of sub-values in `Vals`.
+  // of sub-formulas in `Vals`.
 
-  // Map each sub-value in `Vals` to a unique variable.
-  llvm::DenseMap<BoolValue *, Variable> SubValsToVar;
-  // Store variable identifiers and value location of atomic booleans.
-  llvm::DenseMap<Variable, AtomicBoolValue *> Atomics;
+  // Map each sub-formula in `Vals` to a unique variable.
+  llvm::DenseMap<const Formula *, Variable> SubValsToVar;
+  // Store variable identifiers and Atom of atomic booleans.
+  llvm::DenseMap<Variable, Atom> Atomics;
   Variable NextVar = 1;
   {
-    std::queue<BoolValue *> UnprocessedSubVals;
-    for (BoolValue *Val : Vals)
+    std::queue<const Formula *> UnprocessedSubVals;
+    for (const Formula *Val : Vals)
       UnprocessedSubVals.push(Val);
     while (!UnprocessedSubVals.empty()) {
       Variable Var = NextVar;
-      BoolValue *Val = UnprocessedSubVals.front();
+      const Formula *Val = UnprocessedSubVals.front();
       UnprocessedSubVals.pop();
 
       if (!SubValsToVar.try_emplace(Val, Var).second)
         continue;
       ++NextVar;
 
-      // Visit the sub-values of `Val`.
-      switch (Val->getKind()) {
-      case Value::Kind::Conjunction: {
-        auto *C = cast<ConjunctionValue>(Val);
-        UnprocessedSubVals.push(&C->getLeftSubValue());
-        UnprocessedSubVals.push(&C->getRightSubValue());
-        break;
-      }
-      case Value::Kind::Disjunction: {
-        auto *D = cast<DisjunctionValue>(Val);
-        UnprocessedSubVals.push(&D->getLeftSubValue());
-        UnprocessedSubVals.push(&D->getRightSubValue());
-        break;
-      }
-      case Value::Kind::Negation: {
-        auto *N = cast<NegationValue>(Val);
-        UnprocessedSubVals.push(&N->getSubVal());
-        break;
-      }
-      case Value::Kind::Implication: {
-        auto *I = cast<ImplicationValue>(Val);
-        UnprocessedSubVals.push(&I->getLeftSubValue());
-        UnprocessedSubVals.push(&I->getRightSubValue());
-        break;
-      }
-      case Value::Kind::Biconditional: {
-        auto *B = cast<BiconditionalValue>(Val);
-        UnprocessedSubVals.push(&B->getLeftSubValue());
-        UnprocessedSubVals.push(&B->getRightSubValue());
-        break;
-      }
-      case Value::Kind::TopBool:
-        // Nothing more to do. This `TopBool` instance has already been mapped
-        // to a fresh solver variable (`NextVar`, above) and is thereafter
-        // anonymous. The solver never sees `Top`.
-        break;
-      case Value::Kind::AtomicBool: {
-        Atomics[Var] = cast<AtomicBoolValue>(Val);
-        break;
-      }
-      default:
-        llvm_unreachable("buildBooleanFormula: unhandled value kind");
-      }
+      for (const Formula *F : Val->operands())
+        UnprocessedSubVals.push(F);
+      if (Val->kind() == Formula::AtomRef)
+        Atomics[Var] = Val->getAtom();
     }
   }
 
-  auto GetVar = [&SubValsToVar](const BoolValue *Val) {
+  auto GetVar = [&SubValsToVar](const Formula *Val) {
     auto ValIt = SubValsToVar.find(Val);
     assert(ValIt != SubValsToVar.end());
     return ValIt->second;
   };
 
-  BooleanFormula Formula(NextVar - 1, std::move(Atomics));
+  CNFFormula CNF(NextVar - 1, std::move(Atomics));
   std::vector<bool> ProcessedSubVals(NextVar, false);
 
-  // Add a conjunct for each variable that represents a top-level conjunction
-  // value in `Vals`.
-  for (BoolValue *Val : Vals)
-    Formula.addClause(posLit(GetVar(Val)));
+  // Add a conjunct for each variable that represents a top-level formula in
+  // `Vals`.
+  for (const Formula *Val : Vals)
+    CNF.addClause(posLit(GetVar(Val)));
 
   // Add conjuncts that represent the mapping between newly-created variables
-  // and their corresponding sub-values.
-  std::queue<BoolValue *> UnprocessedSubVals;
-  for (BoolValue *Val : Vals)
+  // and their corresponding sub-formulas.
+  std::queue<const Formula *> UnprocessedSubVals;
+  for (const Formula *Val : Vals)
     UnprocessedSubVals.push(Val);
   while (!UnprocessedSubVals.empty()) {
-    const BoolValue *Val = UnprocessedSubVals.front();
+    const Formula *Val = UnprocessedSubVals.front();
     UnprocessedSubVals.pop();
     const Variable Var = GetVar(Val);
 
@@ -277,117 +238,107 @@ BooleanFormula buildBooleanFormula(const llvm::ArrayRef<BoolValue *> &Vals) {
       continue;
     ProcessedSubVals[Var] = true;
 
-    if (auto *C = dyn_cast<ConjunctionValue>(Val)) {
-      const Variable LeftSubVar = GetVar(&C->getLeftSubValue());
-      const Variable RightSubVar = GetVar(&C->getRightSubValue());
+    switch (Val->kind()) {
+    case Formula::AtomRef:
+      break;
+    case Formula::And: {
+      const Variable LHS = GetVar(Val->operands()[0]);
+      const Variable RHS = GetVar(Val->operands()[1]);
 
-      if (LeftSubVar == RightSubVar) {
+      if (LHS == RHS) {
         // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
         // already in conjunctive normal form. Below we add each of the
         // conjuncts of the latter expression to the result.
-        Formula.addClause(negLit(Var), posLit(LeftSubVar));
-        Formula.addClause(posLit(Var), negLit(LeftSubVar));
-
-        // Visit a sub-value of `Val` (pick any, they are identical).
-        UnprocessedSubVals.push(&C->getLeftSubValue());
+        CNF.addClause(negLit(Var), posLit(LHS));
+        CNF.addClause(posLit(Var), negLit(LHS));
       } else {
         // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)`
         // which is already in conjunctive normal form. Below we add each of the
         // conjuncts of the latter expression to the result.
-        Formula.addClause(negLit(Var), posLit(LeftSubVar));
-        Formula.addClause(negLit(Var), posLit(RightSubVar));
-        Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar));
-
-        // Visit the sub-values of `Val`.
-        UnprocessedSubVals.push(&C->getLeftSubValue());
-        UnprocessedSubVals.push(&C->getRightSubValue());
+        CNF.addClause(negLit(Var), posLit(LHS));
+        CNF.addClause(negLit(Var), posLit(RHS));
+        CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
       }
-    } else if (auto *D = dyn_cast<DisjunctionValue>(Val)) {
-      const Variable LeftSubVar = GetVar(&D->getLeftSubValue());
-      const Variable RightSubVar = GetVar(&D->getRightSubValue());
+      break;
+    }
+    case Formula::Or: {
+      const Variable LHS = GetVar(Val->operands()[0]);
+      const Variable RHS = GetVar(Val->operands()[1]);
 
-      if (LeftSubVar == RightSubVar) {
+      if (LHS == RHS) {
         // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
         // already in conjunctive normal form. Below we add each of the
         // conjuncts of the latter expression to the result.
-        Formula.addClause(negLit(Var), posLit(LeftSubVar));
-        Formula.addClause(posLit(Var), negLit(LeftSubVar));
-
-        // Visit a sub-value of `Val` (pick any, they are identical).
-        UnprocessedSubVals.push(&D->getLeftSubValue());
+        CNF.addClause(negLit(Var), posLit(LHS));
+        CNF.addClause(posLit(Var), negLit(LHS));
       } else {
-        // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v !B)`
-        // which is already in conjunctive normal form. Below we add each of the
-        // conjuncts of the latter expression to the result.
-        Formula.addClause(negLit(Var), posLit(LeftSubVar), posLit(RightSubVar));
-        Formula.addClause(posLit(Var), negLit(LeftSubVar));
-        Formula.addClause(posLit(Var), negLit(RightSubVar));
-
-        // Visit the sub-values of `Val`.
-        UnprocessedSubVals.push(&D->getLeftSubValue());
-        UnprocessedSubVals.push(&D->getRightSubValue());
+        // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
+        // !B)` which is already in conjunctive normal form. Below we add each
+        // of the conjuncts of the latter expression to the result.
+        CNF.addClause(negLit(Var), posLit(LHS), posLit(RHS));
+        CNF.addClause(posLit(Var), negLit(LHS));
+        CNF.addClause(posLit(Var), negLit(RHS));
       }
-    } else if (auto *N = dyn_cast<NegationValue>(Val)) {
-      const Variable SubVar = GetVar(&N->getSubVal());
-
-      // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is already in
-      // conjunctive normal form. Below we add each of the conjuncts of the
-      // latter expression to the result.
-      Formula.addClause(negLit(Var), negLit(SubVar));
-      Formula.addClause(posLit(Var), posLit(SubVar));
-
-      // Visit the sub-values of `Val`.
-      UnprocessedSubVals.push(&N->getSubVal());
-    } else if (auto *I = dyn_cast<ImplicationValue>(Val)) {
-      const Variable LeftSubVar = GetVar(&I->getLeftSubValue());
-      const Variable RightSubVar = GetVar(&I->getRightSubValue());
+      break;
+    }
+    case Formula::Not: {
+      const Variable Operand = GetVar(Val->operands()[0]);
+
+      // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
+      // already in conjunctive normal form. Below we add each of the
+      // conjuncts of the latter expression to the result.
+      CNF.addClause(negLit(Var), negLit(Operand));
+      CNF.addClause(posLit(Var), posLit(Operand));
+      break;
+    }
+    case Formula::Implies: {
+      const Variable LHS = GetVar(Val->operands()[0]);
+      const Variable RHS = GetVar(Val->operands()[1]);
 
       // `X <=> (A => B)` is equivalent to
       // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
-      // conjunctive normal form. Below we add each of the conjuncts of the
-      // latter expression to the result.
-      Formula.addClause(posLit(Var), posLit(LeftSubVar));
-      Formula.addClause(posLit(Var), negLit(RightSubVar));
-      Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar));
-
-      // Visit the sub-values of `Val`.
-      UnprocessedSubVals.push(&I->getLeftSubValue());
-      UnprocessedSubVals.push(&I->getRightSubValue());
-    } else if (auto *B = dyn_cast<BiconditionalValue>(Val)) {
-      const Variable LeftSubVar = GetVar(&B->getLeftSubValue());
-      const Variable RightSubVar = GetVar(&B->getRightSubValue());
-
-      if (LeftSubVar == RightSubVar) {
+      // conjunctive normal form. Below we add each of the conjuncts of
+      // the latter expression to the result.
+      CNF.addClause(posLit(Var), posLit(LHS));
+      CNF.addClause(posLit(Var), negLit(RHS));
+      CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+      break;
+    }
+    case Formula::Equal: {
+      const Variable LHS = GetVar(Val->operands()[0]);
+      const Variable RHS = GetVar(Val->operands()[1]);
+
+      if (LHS == RHS) {
         // `X <=> (A <=> A)` is equvalent to `X` which is already in
         // conjunctive normal form. Below we add each of the conjuncts of the
         // latter expression to the result.
-        Formula.addClause(posLit(Var));
+        CNF.addClause(posLit(Var));
 
         // No need to visit the sub-values of `Val`.
-      } else {
-        // `X <=> (A <=> B)` is equivalent to
-        // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which is
-        // already in conjunctive normal form. Below we add each of the conjuncts
-        // of the latter expression to the result.
-        Formula.addClause(posLit(Var), posLit(LeftSubVar), posLit(RightSubVar));
-        Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar));
-        Formula.addClause(negLit(Var), posLit(LeftSubVar), negLit(RightSubVar));
-        Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar));
-
-        // Visit the sub-values of `Val`.
-        UnprocessedSubVals.push(&B->getLeftSubValue());
-        UnprocessedSubVals.push(&B->getRightSubValue());
+        continue;
       }
+      // `X <=> (A <=> B)` is equivalent to
+      // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
+      // is already in conjunctive normal form. Below we add each of the
+      // conjuncts of the latter expression to the result.
+      CNF.addClause(posLit(Var), posLit(LHS), posLit(RHS));
+      CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
+      CNF.addClause(negLit(Var), posLit(LHS), negLit(RHS));
+      CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+      break;
+    }
     }
+    for (const Formula *Child : Val->operands())
+      UnprocessedSubVals.push(Child);
   }
 
-  return Formula;
+  return CNF;
 }
 
 class WatchedLiteralsSolverImpl {
   /// A boolean formula in conjunctive normal form that the solver will attempt
   /// to prove satisfiable. The formula will be modified in the process.
-  BooleanFormula Formula;
+  CNFFormula CNF;
 
   /// The search for a satisfying assignment of the variables in `Formula` will
   /// proceed in levels, starting from 1 and going up to `Formula.LargestVar`
@@ -439,9 +390,10 @@ class WatchedLiteralsSolverImpl {
   std::vector<Variable> ActiveVars;
 
 public:
-  explicit WatchedLiteralsSolverImpl(const llvm::ArrayRef<BoolValue *> &Vals)
-      : Formula(buildBooleanFormula(Vals)), LevelVars(Formula.LargestVar + 1),
-        LevelStates(Formula.LargestVar + 1) {
+  explicit WatchedLiteralsSolverImpl(
+      const llvm::ArrayRef<const Formula *> &Vals)
+      : CNF(buildCNF(Vals)), LevelVars(CNF.LargestVar + 1),
+        LevelStates(CNF.LargestVar + 1) {
     assert(!Vals.empty());
 
     // Initialize the state at the root level to a decision so that in
@@ -450,10 +402,10 @@ public:
     LevelStates[0] = State::Decision;
 
     // Initialize all variables as unassigned.
-    VarAssignments.resize(Formula.LargestVar + 1, Assignment::Unassigned);
+    VarAssignments.resize(CNF.LargestVar + 1, Assignment::Unassigned);
 
     // Initialize the active variables.
-    for (Variable Var = Formula.LargestVar; Var != NullVar; --Var) {
+    for (Variable Var = CNF.LargestVar; Var != NullVar; --Var) {
       if (isWatched(posLit(Var)) || isWatched(negLit(Var)))
         ActiveVars.push_back(Var);
     }
@@ -474,7 +426,7 @@ public:
       // 3. Unassigned variables that form watched literals are active.
       // FIXME: Consider replacing these with test cases that fail if the any
       // of the invariants is broken. That might not be easy due to the
-      // transformations performed by `buildBooleanFormula`.
+      // transformations performed by `buildCNF`.
       assert(activeVarsAreUnassigned());
       assert(activeVarsFormWatchedLiterals());
       assert(unassignedVarsFormingWatchedLiteralsAreActive());
@@ -555,12 +507,10 @@ public:
   }
 
 private:
-  /// Returns a satisfying truth assignment to the atomic values in the boolean
-  /// formula.
-  llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment>
-  buildSolution() {
-    llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> Solution;
-    for (auto &Atomic : Formula.Atomics) {
+  /// Returns a satisfying truth assignment to the atoms in the boolean formula.
+  llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() {
+    llvm::DenseMap<Atom, Solver::Result::Assignment> Solution;
+    for (auto &Atomic : CNF.Atomics) {
       // A variable may have a definite true/false assignment, or it may be
       // unassigned indicating its truth value does not affect the result of
       // the formula. Unassigned variables are assigned to true as a default.
@@ -596,24 +546,24 @@ private:
     const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue
                                  ? negLit(Var)
                                  : posLit(Var);
-    ClauseID FalseLitWatcher = Formula.WatchedHead[FalseLit];
-    Formula.WatchedHead[FalseLit] = NullClause;
+    ClauseID FalseLitWatcher = CNF.WatchedHead[FalseLit];
+    CNF.WatchedHead[FalseLit] = NullClause;
     while (FalseLitWatcher != NullClause) {
-      const ClauseID NextFalseLitWatcher = Formula.NextWatched[FalseLitWatcher];
+      const ClauseID NextFalseLitWatcher = CNF.NextWatched[FalseLitWatcher];
 
       // Pick the first non-false literal as the new watched literal.
-      const size_t FalseLitWatcherStart = Formula.ClauseStarts[FalseLitWatcher];
+      const size_t FalseLitWatcherStart = CNF.ClauseStarts[FalseLitWatcher];
       size_t NewWatchedLitIdx = FalseLitWatcherStart + 1;
-      while (isCurrentlyFalse(Formula.Clauses[NewWatchedLitIdx]))
+      while (isCurrentlyFalse(CNF.Clauses[NewWatchedLitIdx]))
         ++NewWatchedLitIdx;
-      const Literal NewWatchedLit = Formula.Clauses[NewWatchedLitIdx];
+      const Literal NewWatchedLit = CNF.Clauses[NewWatchedLitIdx];
       const Variable NewWatchedLitVar = var(NewWatchedLit);
 
       // Swap the old watched literal for the new one in `FalseLitWatcher` to
       // maintain the invariant that the watched literal is at the beginning of
       // the clause.
-      Formula.Clauses[NewWatchedLitIdx] = FalseLit;
-      Formula.Clauses[FalseLitWatcherStart] = NewWatchedLit;
+      CNF.Clauses[NewWatchedLitIdx] = FalseLit;
+      CNF.Clauses[FalseLitWatcherStart] = NewWatchedLit;
 
       // If the new watched literal isn't watched by any other clause and its
       // variable isn't assigned we need to add it to the active variables.
@@ -621,8 +571,8 @@ private:
           VarAssignments[NewWatchedLitVar] == Assignment::Unassigned)
         ActiveVars.push_back(NewWatchedLitVar);
 
-      Formula.NextWatched[FalseLitWatcher] = Formula.WatchedHead[NewWatchedLit];
-      Formula.WatchedHead[NewWatchedLit] = FalseLitWatcher;
+      CNF.NextWatched[FalseLitWatcher] = CNF.WatchedHead[NewWatchedLit];
+      CNF.WatchedHead[NewWatchedLit] = FalseLitWatcher;
 
       // Go to the next clause that watches `FalseLit`.
       FalseLitWatcher = NextFalseLitWatcher;
@@ -632,16 +582,15 @@ private:
   /// Returns true if and only if one of the clauses that watch `Lit` is a unit
   /// clause.
   bool watchedByUnitClause(Literal Lit) const {
-    for (ClauseID LitWatcher = Formula.WatchedHead[Lit];
-         LitWatcher != NullClause;
-         LitWatcher = Formula.NextWatched[LitWatcher]) {
-      llvm::ArrayRef<Literal> Clause = Formula.clauseLiterals(LitWatcher);
+    for (ClauseID LitWatcher = CNF.WatchedHead[Lit]; LitWatcher != NullClause;
+         LitWatcher = CNF.NextWatched[LitWatcher]) {
+      llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(LitWatcher);
 
       // Assert the invariant that the watched literal is always the first one
       // in the clause.
       // FIXME: Consider replacing this with a test case that fails if the
       // invariant is broken by `updateWatchedLiterals`. That might not be easy
-      // due to the transformations performed by `buildBooleanFormula`.
+      // due to the transformations performed by `buildCNF`.
       assert(Clause.front() == Lit);
 
       if (isUnit(Clause))
@@ -665,7 +614,7 @@ private:
 
   /// Returns true if and only if `Lit` is watched by a clause in `Formula`.
   bool isWatched(Literal Lit) const {
-    return Formula.WatchedHead[Lit] != NullClause;
+    return CNF.WatchedHead[Lit] != NullClause;
   }
 
   /// Returns an assignment for an unassigned variable.
@@ -678,8 +627,8 @@ private:
   /// Returns a set of all watched literals.
   llvm::DenseSet<Literal> watchedLiterals() const {
     llvm::DenseSet<Literal> WatchedLiterals;
-    for (Literal Lit = 2; Lit < Formula.WatchedHead.size(); Lit++) {
-      if (Formula.WatchedHead[Lit] == NullClause)
+    for (Literal Lit = 2; Lit < CNF.WatchedHead.size(); Lit++) {
+      if (CNF.WatchedHead[Lit] == NullClause)
         continue;
       WatchedLiterals.insert(Lit);
     }
@@ -719,7 +668,8 @@ private:
   }
 };
 
-Solver::Result WatchedLiteralsSolver::solve(llvm::ArrayRef<BoolValue *> Vals) {
+Solver::Result
+WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) {
   if (Vals.empty())
     return Solver::Result::Satisfiable({{}});
   auto [Res, Iterations] =
author	Sam McCall <sam.mccall@gmail.com>	2023-07-05 11:35:06 +0200
committer	Sam McCall <sam.mccall@gmail.com>	2023-07-07 11:58:33 +0200
commit	fc9821a877d4e1443603d67539e4369b3ec72890 (patch)
tree	16074680f7b637e18f5206b2163e5d9c45e56fb2 /clang/lib
parent	74eac85dae01ddda40233329700c9a0426b3ed22 (diff)
download	llvm-fc9821a877d4e1443603d67539e4369b3ec72890.zip llvm-fc9821a877d4e1443603d67539e4369b3ec72890.tar.gz llvm-fc9821a877d4e1443603d67539e4369b3ec72890.tar.bz2