Address reviewusers/meinersbur/llvm_buildbuiltins

author: Michael Kruse <llvm-project@meinersbur.de> 2025-04-10 17:19:12 +0200
committer: Michael Kruse <llvm-project@meinersbur.de> 2025-04-10 17:19:12 +0200
commit: 03f7d72e45f94c0506d5cb0cf7375b388c0d6557 (patch)
tree: a3c7db1fe51a25f751b233ceb4fb2e5191100c7a
parent: b072cabc62d380729206bdd92938c13061a85b20 (diff)
download: llvm-users/meinersbur/llvm_buildbuiltins.zip
llvm-users/meinersbur/llvm_buildbuiltins.tar.gz
llvm-users/meinersbur/llvm_buildbuiltins.tar.bz2
2 files changed, 110 insertions, 98 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index 65765ad..9f8712a 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -22,7 +22,7 @@
 //   availability depends on the target triple (e.g. GPU devices cannot
 //   implement a global lock by design).
 //
-// Whe want to mimic Clang's behaviour:
+// We want to mimic Clang's behaviour:
 //
 // * Prefer atomic instructions over libcall functions whenever possible. When a
 //   target backend does not support atomic instructions natively,
@@ -42,19 +42,19 @@
 // Clang also assumes that the maximum supported data size of atomic instruction
 // is 16, despite this is target-dependent and should be queried using
 // TargetLowing::getMaxAtomicSizeInBitsSupported(). However, TargetMachine
-// (which is a factory for TargetLowing) is not available during Clang's CodeGen
-// phase, it is only created for the LLVM pass pipeline.
+// (which is a factory for TargetLowering) is not available during Clang's
+// CodeGen phase, it is only created for the LLVM pass pipeline.
 //
 // The functions in this file are intended to handle the complexity of builtins
-// so frontends do not need to care about the details. A major difference betwee
-// the cases is that the IR instructions take values directly as an llvm::Value
-// (except the atomic address of course), but the libcall functions almost
-// always take pointers to those values. Since we cannot assume that everything
-// can be passed an llvm::Value (LLVM does not handle large types such as i4096
-// well), our abstraction passes everything as pointer which is load'ed when
-// needed. The caller is responsible to emit a temporary AllocaInst and store if
-// it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove any
-// unnecessary store/load pairs.
+// so frontends do not need to care about the details. A major difference
+// between the cases is that the IR instructions take values directly as an
+// llvm::Value (except the atomic address of course), but the libcall functions
+// almost always take pointers to those values. Since we cannot assume that
+// everything can be passed an llvm::Value (LLVM does not handle large types
+// such as i4096 well), our abstraction passes everything as pointer which is
+// loaded when needed. The caller is responsible to emit a temporary AllocaInst
+// and store if it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove
+// any unnecessary store/load pairs.
 //
 // In the future LLVM may introduce more generic atomic constructs that is
 // lowered by an LLVM pass, such as AtomicExpandPass. Once this exist, the
@@ -107,14 +107,14 @@ struct AtomicEmitOptions {
   ///  * cmpxchg
   ///  * atomicrmw
   ///
-  /// Atomic LLVM intructions have several restructions on when they can be
+  /// Atomic LLVM intructions have several restrictions on when they can be
   /// used, including:
-  ///  * Properties such as IsWeak,Memorder,Scope must be constant.
+  ///  * Properties such as IsVolatile,IsWeak,Memorder,Scope must be constant.
   ///  * Must be an integer or pointer type. Some cases also allow float types.
   ///  * Size must be a power-of-two number of bytes.
   ///  * Size must be at most the size of atomics supported by the target.
   ///  * Size should not be too large (e.g. i4096) since LLVM does not scale
-  ///    will with huge types.
+  ///    well with huge types.
   ///
   /// Even with all these limitations adhered to, AtomicExpandPass may still
   /// lower the instruction to a libcall function if the target does not support
@@ -176,6 +176,29 @@ struct AtomicEmitOptions {
 /// * https://llvm.org/docs/LangRef.html#load-instruction
 /// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
 /// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param AtomicPtr   The memory location accessed atomically.
+/// @Param RetPtr      Pointer the the data to be loaded from \p Ptr.
+/// @param TypeOrSize  Type of the value to be accessed. cmpxchg
+///                    supports integer and pointers only, other atomics also
+///                    support floats. If any other type or omitted, type-prunes
+///                    to an integer the holds at least \p DataSize bytes.
+///                    Alternatively, the number of bytes can be specified in
+///                    which case an intergers is also used.
+/// @param IsVolatile  Whether to mark the access as volatile.
+/// @param Memorder    Memory model to be used for the affected atomic address.
+/// @param Scope       (optional) The synchronization scope (domain of threads
+///                    where this access has to be atomic, e.g. CUDA
+///                    warp/block/grid-level atomics) of this access. Defaults
+///                    to system scope.
+/// @param Align       (optional) Known alignment of /p Ptr. If omitted,
+///                    alignment is inferred from /p Ptr itself or falls back
+///                    to no alignment.
+/// @param Builder     Used to emit instructions.
+/// @param EmitOptions For controlling what IR is emitted.
+/// @param Name        (optional) Stem for generated instruction names.
+///
+/// @return An error if the atomic operation could not be emitted.
 Error emitAtomicLoadBuiltin(
     Value *AtomicPtr, Value *RetPtr, std::variant<Type *, uint64_t> TypeOrSize,
     bool IsVolatile,
@@ -193,6 +216,29 @@ Error emitAtomicLoadBuiltin(
 /// * https://llvm.org/docs/LangRef.html#store-instruction
 /// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
 /// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param AtomicPtr   The memory location accessed atomically.
+/// @Param ValPtr      Pointer to the data to be stored at \p Ptr.
+/// @param TypeOrSize  Type of the value to be accessed. cmpxchg
+///                    supports integer and pointers only, other atomics also
+///                    support floats. If any other type or omitted, type-prunes
+///                    to an integer the holds at least \p DataSize bytes.
+///                    Alternatively, the number of bytes can be specified in
+///                    which case an intergers is also used.
+/// @param IsVolatile  Whether to mark the access as volatile.
+/// @param Memorder    Memory model to be used for the affected atomic address.
+/// @param Scope       (optional) The synchronization scope (domain of threads
+///                    where this access has to be atomic, e.g. CUDA
+///                    warp/block/grid-level atomics) of this access. Defaults
+///                    to system scope.
+/// @param Align       (optional) Known alignment of /p Ptr. If omitted,
+///                    alignment is inferred from /p Ptr itself or falls back
+///                    to no alignment.
+/// @param Builder     Used to emit instructions.
+/// @param EmitOptions For controlling what IR is emitted.
+/// @param Name        (optional) Stem for generated instruction names.
+///
+/// @return An error if the atomic operation could not be emitted.
 Error emitAtomicStoreBuiltin(
     Value *AtomicPtr, Value *ValPtr, std::variant<Type *, uint64_t> TypeOrSize,
     bool IsVolatile,
@@ -253,9 +299,9 @@ Error emitAtomicStoreBuiltin(
 ///                    call returns \p ExpectedPtr/\p ActualPtr will be the
 ///                    value as defined above (in contrast to being undefined).
 /// @param Align       (optional) Known alignment of /p Ptr. If omitted,
-///                    alignment is inferred from /p Ptr itself and falls back
+///                    alignment is inferred from /p Ptr itself or falls back
 ///                    to no alignment.
-/// @param Builder     User to emit instructions.
+/// @param Builder     Used to emit instructions.
 /// @param EmitOptions For controlling what IR is emitted.
 /// @param Name        (optional) Stem for generated instruction names.
 ///
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 4128218..6ef275e 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -204,6 +204,9 @@ protected:
       return emitInst(IsWeak, SuccessMemorder, *FailureMemorderConst);
     }
 
+    Type *BoolTy = Builder.getInt1Ty();
+    IntegerType *Int32Ty = Builder.getInt32Ty();
+
     // Create all the relevant BB's
     BasicBlock *ContBB =
         splitBB(Builder, /*CreateBranch=*/false,
@@ -215,43 +218,33 @@ protected:
     // MonotonicBB is arbitrarily chosen as the default case; in practice,
     // this doesn't matter unless someone is crazy enough to use something
     // that doesn't fold to a constant for the ordering.
-    SwitchInst *SI = Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB);
-    // Implemented as acquire, since it's the closest in LLVM.
-    SI->addCase(
-        Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)),
-        AcquireBB);
-    SI->addCase(
-        Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)),
-        AcquireBB);
-    SI->addCase(
-        Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)),
-        SeqCstBB);
+    Value *Order = Builder.CreateIntCast(FailureMemorderCABI, Int32Ty, false);
+    SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
 
     // TODO: Do not insert PHINode if operation cannot fail
     Builder.SetInsertPoint(ContBB, ContBB->begin());
     PHINode *Result =
-        Builder.CreatePHI(Builder.getInt1Ty(), 3,
+        Builder.CreatePHI(BoolTy, /*NumReservedValues=*/3,
                           Name + "." + getBuiltinSig() + ".failorder.success");
     IRBuilderBase::InsertPoint ContIP = Builder.saveIP();
 
-    // Emit all the different atomics
-    Builder.SetInsertPoint(MonotonicBB);
-    Value *MonotonicResult =
-        emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Monotonic);
-    Builder.CreateBr(ContBB);
-    Result->addIncoming(MonotonicResult, Builder.GetInsertBlock());
-
-    Builder.SetInsertPoint(AcquireBB);
-    Value *AcquireResult =
-        emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Acquire);
-    Builder.CreateBr(ContBB);
-    Result->addIncoming(AcquireResult, Builder.GetInsertBlock());
+    auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO,
+                            bool IsDefault = false) {
+      if (!IsDefault) {
+        for (auto CABI : seq<int>(0, 6)) {
+          if (fromCABI(CABI) == AO)
+            SI->addCase(Builder.getInt32(CABI), CaseBB);
+        }
+      }
+      Builder.SetInsertPoint(CaseBB);
+      Value *AtomicResult = emitInst(IsWeak, SuccessMemorder, AO);
+      Builder.CreateBr(ContBB);
+      Result->addIncoming(AtomicResult, Builder.GetInsertBlock());
+    };
 
-    Builder.SetInsertPoint(SeqCstBB);
-    Value *SeqCstResult = emitInst(IsWeak, SuccessMemorder,
-                                   AtomicOrdering::SequentiallyConsistent);
-    Builder.CreateBr(ContBB);
-    Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+    EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true);
+    EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire);
+    EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent);
 
     Builder.restoreIP(ContIP);
     return Result;
@@ -262,6 +255,7 @@ protected:
       return emitFailureMemorderSwitch(IsWeak, *SuccessMemorderConst);
 
     Type *BoolTy = Builder.getInt1Ty();
+    IntegerType *Int32Ty = Builder.getInt32Ty();
 
     // Create all the relevant BB's
     BasicBlock *ContBB =
@@ -282,67 +276,39 @@ protected:
     // MonotonicBB is arbitrarily chosen as the default case; in practice,
     // this doesn't matter unless someone is crazy enough to use something
     // that doesn't fold to a constant for the ordering.
-    IntegerType *IntTy = getIntTy(Builder, EmitOptions.TLI);
-    Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, IntTy, false);
+    Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, Int32Ty, false);
     SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
 
     // TODO: No PHI if operation cannot fail
     Builder.SetInsertPoint(ContBB, ContBB->begin());
-    PHINode *Result = Builder.CreatePHI(
-        BoolTy, 5, Name + "." + getBuiltinSig() + ".memorder.success");
+    PHINode *Result =
+        Builder.CreatePHI(BoolTy, /*NumReservedValues=*/5,
+                          Name + "." + getBuiltinSig() + ".memorder.success");
     IRBuilderBase::InsertPoint ContIP = Builder.saveIP();
 
-    // Emit all the different atomics
-    Builder.SetInsertPoint(MonotonicBB);
-    Value *MonotonicResult =
-        emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Monotonic);
-    Builder.CreateBr(ContBB);
-    Result->addIncoming(MonotonicResult, Builder.GetInsertBlock());
-
-    if (AcquireBB) {
-      SI->addCase(
-          Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
-          AcquireBB);
-      SI->addCase(
-          Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
-          AcquireBB);
-      Builder.SetInsertPoint(AcquireBB);
-      Value *AcquireResult =
-          emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Acquire);
-      Builder.CreateBr(ContBB);
-      Result->addIncoming(AcquireResult, Builder.GetInsertBlock());
-    }
-
-    if (ReleaseBB) {
-      SI->addCase(
-          Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
-          ReleaseBB);
-      Builder.SetInsertPoint(ReleaseBB);
-      Value *ReleaseResult =
-          emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Release);
-      Builder.CreateBr(ContBB);
-      Result->addIncoming(ReleaseResult, Builder.GetInsertBlock());
-    }
+    auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO,
+                            bool IsDefault = false) {
+      if (!CaseBB)
+        return;
 
-    if (AcqRelBB) {
-      SI->addCase(
-          Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
-          AcqRelBB);
-      Builder.SetInsertPoint(AcqRelBB);
-      Value *AcqRelResult =
-          emitFailureMemorderSwitch(IsWeak, AtomicOrdering::AcquireRelease);
+      if (!IsDefault) {
+        for (auto CABI : seq<int>(0, 6)) {
+          if (fromCABI(CABI) == AO)
+            SI->addCase(Builder.getInt32(CABI), CaseBB);
+        }
+      }
+      Builder.SetInsertPoint(CaseBB);
+      Value *AtomicResult = emitFailureMemorderSwitch(IsWeak, AO);
       Builder.CreateBr(ContBB);
-      Result->addIncoming(AcqRelResult, Builder.GetInsertBlock());
-    }
+      Result->addIncoming(AtomicResult, Builder.GetInsertBlock());
+    };
 
-    SI->addCase(
-        Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
-        SeqCstBB);
-    Builder.SetInsertPoint(SeqCstBB);
-    Value *SeqCstResult = emitFailureMemorderSwitch(
-        IsWeak, AtomicOrdering::SequentiallyConsistent);
-    Builder.CreateBr(ContBB);
-    Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+    // Emit all the different atomics.
+    EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true);
+    EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire);
+    EmitCaseImpl(ReleaseBB, AtomicOrdering::Release);
+    EmitCaseImpl(AcqRelBB, AtomicOrdering::AcquireRelease);
+    EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent);
 
     Builder.restoreIP(ContIP);
     return Result;
author	Michael Kruse <llvm-project@meinersbur.de>	2025-04-10 17:19:12 +0200
committer	Michael Kruse <llvm-project@meinersbur.de>	2025-04-10 17:19:12 +0200
commit	03f7d72e45f94c0506d5cb0cf7375b388c0d6557 (patch)
tree	a3c7db1fe51a25f751b233ceb4fb2e5191100c7a
parent	b072cabc62d380729206bdd92938c13061a85b20 (diff)
download	llvm-users/meinersbur/llvm_buildbuiltins.zip llvm-users/meinersbur/llvm_buildbuiltins.tar.gz llvm-users/meinersbur/llvm_buildbuiltins.tar.bz2