aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Kruse <llvm-project@meinersbur.de>2025-04-10 17:19:12 +0200
committerMichael Kruse <llvm-project@meinersbur.de>2025-04-10 17:19:12 +0200
commit03f7d72e45f94c0506d5cb0cf7375b388c0d6557 (patch)
treea3c7db1fe51a25f751b233ceb4fb2e5191100c7a
parentb072cabc62d380729206bdd92938c13061a85b20 (diff)
downloadllvm-users/meinersbur/llvm_buildbuiltins.zip
llvm-users/meinersbur/llvm_buildbuiltins.tar.gz
llvm-users/meinersbur/llvm_buildbuiltins.tar.bz2
-rw-r--r--llvm/include/llvm/Transforms/Utils/BuildBuiltins.h80
-rw-r--r--llvm/lib/Transforms/Utils/BuildBuiltins.cpp128
2 files changed, 110 insertions, 98 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
index 65765ad..9f8712a 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h
@@ -22,7 +22,7 @@
// availability depends on the target triple (e.g. GPU devices cannot
// implement a global lock by design).
//
-// Whe want to mimic Clang's behaviour:
+// We want to mimic Clang's behaviour:
//
// * Prefer atomic instructions over libcall functions whenever possible. When a
// target backend does not support atomic instructions natively,
@@ -42,19 +42,19 @@
// Clang also assumes that the maximum supported data size of atomic instruction
// is 16, despite this is target-dependent and should be queried using
// TargetLowing::getMaxAtomicSizeInBitsSupported(). However, TargetMachine
-// (which is a factory for TargetLowing) is not available during Clang's CodeGen
-// phase, it is only created for the LLVM pass pipeline.
+// (which is a factory for TargetLowering) is not available during Clang's
+// CodeGen phase, it is only created for the LLVM pass pipeline.
//
// The functions in this file are intended to handle the complexity of builtins
-// so frontends do not need to care about the details. A major difference betwee
-// the cases is that the IR instructions take values directly as an llvm::Value
-// (except the atomic address of course), but the libcall functions almost
-// always take pointers to those values. Since we cannot assume that everything
-// can be passed an llvm::Value (LLVM does not handle large types such as i4096
-// well), our abstraction passes everything as pointer which is load'ed when
-// needed. The caller is responsible to emit a temporary AllocaInst and store if
-// it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove any
-// unnecessary store/load pairs.
+// so frontends do not need to care about the details. A major difference
+// between the cases is that the IR instructions take values directly as an
+// llvm::Value (except the atomic address of course), but the libcall functions
+// almost always take pointers to those values. Since we cannot assume that
+// everything can be passed an llvm::Value (LLVM does not handle large types
+// such as i4096 well), our abstraction passes everything as pointer which is
+// loaded when needed. The caller is responsible to emit a temporary AllocaInst
+// and store if it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove
+// any unnecessary store/load pairs.
//
// In the future LLVM may introduce more generic atomic constructs that is
// lowered by an LLVM pass, such as AtomicExpandPass. Once this exist, the
@@ -107,14 +107,14 @@ struct AtomicEmitOptions {
/// * cmpxchg
/// * atomicrmw
///
- /// Atomic LLVM intructions have several restructions on when they can be
+ /// Atomic LLVM intructions have several restrictions on when they can be
/// used, including:
- /// * Properties such as IsWeak,Memorder,Scope must be constant.
+ /// * Properties such as IsVolatile,IsWeak,Memorder,Scope must be constant.
/// * Must be an integer or pointer type. Some cases also allow float types.
/// * Size must be a power-of-two number of bytes.
/// * Size must be at most the size of atomics supported by the target.
/// * Size should not be too large (e.g. i4096) since LLVM does not scale
- /// will with huge types.
+ /// well with huge types.
///
/// Even with all these limitations adhered to, AtomicExpandPass may still
/// lower the instruction to a libcall function if the target does not support
@@ -176,6 +176,29 @@ struct AtomicEmitOptions {
/// * https://llvm.org/docs/LangRef.html#load-instruction
/// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
/// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param AtomicPtr The memory location accessed atomically.
+/// @Param RetPtr Pointer the the data to be loaded from \p Ptr.
+/// @param TypeOrSize Type of the value to be accessed. cmpxchg
+/// supports integer and pointers only, other atomics also
+/// support floats. If any other type or omitted, type-prunes
+/// to an integer the holds at least \p DataSize bytes.
+/// Alternatively, the number of bytes can be specified in
+/// which case an intergers is also used.
+/// @param IsVolatile Whether to mark the access as volatile.
+/// @param Memorder Memory model to be used for the affected atomic address.
+/// @param Scope (optional) The synchronization scope (domain of threads
+/// where this access has to be atomic, e.g. CUDA
+/// warp/block/grid-level atomics) of this access. Defaults
+/// to system scope.
+/// @param Align (optional) Known alignment of /p Ptr. If omitted,
+/// alignment is inferred from /p Ptr itself or falls back
+/// to no alignment.
+/// @param Builder Used to emit instructions.
+/// @param EmitOptions For controlling what IR is emitted.
+/// @param Name (optional) Stem for generated instruction names.
+///
+/// @return An error if the atomic operation could not be emitted.
Error emitAtomicLoadBuiltin(
Value *AtomicPtr, Value *RetPtr, std::variant<Type *, uint64_t> TypeOrSize,
bool IsVolatile,
@@ -193,6 +216,29 @@ Error emitAtomicLoadBuiltin(
/// * https://llvm.org/docs/LangRef.html#store-instruction
/// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
/// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics
+///
+/// @param AtomicPtr The memory location accessed atomically.
+/// @Param ValPtr Pointer to the data to be stored at \p Ptr.
+/// @param TypeOrSize Type of the value to be accessed. cmpxchg
+/// supports integer and pointers only, other atomics also
+/// support floats. If any other type or omitted, type-prunes
+/// to an integer the holds at least \p DataSize bytes.
+/// Alternatively, the number of bytes can be specified in
+/// which case an intergers is also used.
+/// @param IsVolatile Whether to mark the access as volatile.
+/// @param Memorder Memory model to be used for the affected atomic address.
+/// @param Scope (optional) The synchronization scope (domain of threads
+/// where this access has to be atomic, e.g. CUDA
+/// warp/block/grid-level atomics) of this access. Defaults
+/// to system scope.
+/// @param Align (optional) Known alignment of /p Ptr. If omitted,
+/// alignment is inferred from /p Ptr itself or falls back
+/// to no alignment.
+/// @param Builder Used to emit instructions.
+/// @param EmitOptions For controlling what IR is emitted.
+/// @param Name (optional) Stem for generated instruction names.
+///
+/// @return An error if the atomic operation could not be emitted.
Error emitAtomicStoreBuiltin(
Value *AtomicPtr, Value *ValPtr, std::variant<Type *, uint64_t> TypeOrSize,
bool IsVolatile,
@@ -253,9 +299,9 @@ Error emitAtomicStoreBuiltin(
/// call returns \p ExpectedPtr/\p ActualPtr will be the
/// value as defined above (in contrast to being undefined).
/// @param Align (optional) Known alignment of /p Ptr. If omitted,
-/// alignment is inferred from /p Ptr itself and falls back
+/// alignment is inferred from /p Ptr itself or falls back
/// to no alignment.
-/// @param Builder User to emit instructions.
+/// @param Builder Used to emit instructions.
/// @param EmitOptions For controlling what IR is emitted.
/// @param Name (optional) Stem for generated instruction names.
///
diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
index 4128218..6ef275e 100644
--- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
+++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp
@@ -204,6 +204,9 @@ protected:
return emitInst(IsWeak, SuccessMemorder, *FailureMemorderConst);
}
+ Type *BoolTy = Builder.getInt1Ty();
+ IntegerType *Int32Ty = Builder.getInt32Ty();
+
// Create all the relevant BB's
BasicBlock *ContBB =
splitBB(Builder, /*CreateBranch=*/false,
@@ -215,43 +218,33 @@ protected:
// MonotonicBB is arbitrarily chosen as the default case; in practice,
// this doesn't matter unless someone is crazy enough to use something
// that doesn't fold to a constant for the ordering.
- SwitchInst *SI = Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB);
- // Implemented as acquire, since it's the closest in LLVM.
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)),
- AcquireBB);
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)),
- AcquireBB);
- SI->addCase(
- Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
+ Value *Order = Builder.CreateIntCast(FailureMemorderCABI, Int32Ty, false);
+ SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
// TODO: Do not insert PHINode if operation cannot fail
Builder.SetInsertPoint(ContBB, ContBB->begin());
PHINode *Result =
- Builder.CreatePHI(Builder.getInt1Ty(), 3,
+ Builder.CreatePHI(BoolTy, /*NumReservedValues=*/3,
Name + "." + getBuiltinSig() + ".failorder.success");
IRBuilderBase::InsertPoint ContIP = Builder.saveIP();
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- Value *MonotonicResult =
- emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Monotonic);
- Builder.CreateBr(ContBB);
- Result->addIncoming(MonotonicResult, Builder.GetInsertBlock());
-
- Builder.SetInsertPoint(AcquireBB);
- Value *AcquireResult =
- emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Acquire);
- Builder.CreateBr(ContBB);
- Result->addIncoming(AcquireResult, Builder.GetInsertBlock());
+ auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO,
+ bool IsDefault = false) {
+ if (!IsDefault) {
+ for (auto CABI : seq<int>(0, 6)) {
+ if (fromCABI(CABI) == AO)
+ SI->addCase(Builder.getInt32(CABI), CaseBB);
+ }
+ }
+ Builder.SetInsertPoint(CaseBB);
+ Value *AtomicResult = emitInst(IsWeak, SuccessMemorder, AO);
+ Builder.CreateBr(ContBB);
+ Result->addIncoming(AtomicResult, Builder.GetInsertBlock());
+ };
- Builder.SetInsertPoint(SeqCstBB);
- Value *SeqCstResult = emitInst(IsWeak, SuccessMemorder,
- AtomicOrdering::SequentiallyConsistent);
- Builder.CreateBr(ContBB);
- Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+ EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true);
+ EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire);
+ EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent);
Builder.restoreIP(ContIP);
return Result;
@@ -262,6 +255,7 @@ protected:
return emitFailureMemorderSwitch(IsWeak, *SuccessMemorderConst);
Type *BoolTy = Builder.getInt1Ty();
+ IntegerType *Int32Ty = Builder.getInt32Ty();
// Create all the relevant BB's
BasicBlock *ContBB =
@@ -282,67 +276,39 @@ protected:
// MonotonicBB is arbitrarily chosen as the default case; in practice,
// this doesn't matter unless someone is crazy enough to use something
// that doesn't fold to a constant for the ordering.
- IntegerType *IntTy = getIntTy(Builder, EmitOptions.TLI);
- Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, IntTy, false);
+ Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, Int32Ty, false);
SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
// TODO: No PHI if operation cannot fail
Builder.SetInsertPoint(ContBB, ContBB->begin());
- PHINode *Result = Builder.CreatePHI(
- BoolTy, 5, Name + "." + getBuiltinSig() + ".memorder.success");
+ PHINode *Result =
+ Builder.CreatePHI(BoolTy, /*NumReservedValues=*/5,
+ Name + "." + getBuiltinSig() + ".memorder.success");
IRBuilderBase::InsertPoint ContIP = Builder.saveIP();
- // Emit all the different atomics
- Builder.SetInsertPoint(MonotonicBB);
- Value *MonotonicResult =
- emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Monotonic);
- Builder.CreateBr(ContBB);
- Result->addIncoming(MonotonicResult, Builder.GetInsertBlock());
-
- if (AcquireBB) {
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)),
- AcquireBB);
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)),
- AcquireBB);
- Builder.SetInsertPoint(AcquireBB);
- Value *AcquireResult =
- emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Acquire);
- Builder.CreateBr(ContBB);
- Result->addIncoming(AcquireResult, Builder.GetInsertBlock());
- }
-
- if (ReleaseBB) {
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)),
- ReleaseBB);
- Builder.SetInsertPoint(ReleaseBB);
- Value *ReleaseResult =
- emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Release);
- Builder.CreateBr(ContBB);
- Result->addIncoming(ReleaseResult, Builder.GetInsertBlock());
- }
+ auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO,
+ bool IsDefault = false) {
+ if (!CaseBB)
+ return;
- if (AcqRelBB) {
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)),
- AcqRelBB);
- Builder.SetInsertPoint(AcqRelBB);
- Value *AcqRelResult =
- emitFailureMemorderSwitch(IsWeak, AtomicOrdering::AcquireRelease);
+ if (!IsDefault) {
+ for (auto CABI : seq<int>(0, 6)) {
+ if (fromCABI(CABI) == AO)
+ SI->addCase(Builder.getInt32(CABI), CaseBB);
+ }
+ }
+ Builder.SetInsertPoint(CaseBB);
+ Value *AtomicResult = emitFailureMemorderSwitch(IsWeak, AO);
Builder.CreateBr(ContBB);
- Result->addIncoming(AcqRelResult, Builder.GetInsertBlock());
- }
+ Result->addIncoming(AtomicResult, Builder.GetInsertBlock());
+ };
- SI->addCase(
- Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)),
- SeqCstBB);
- Builder.SetInsertPoint(SeqCstBB);
- Value *SeqCstResult = emitFailureMemorderSwitch(
- IsWeak, AtomicOrdering::SequentiallyConsistent);
- Builder.CreateBr(ContBB);
- Result->addIncoming(SeqCstResult, Builder.GetInsertBlock());
+ // Emit all the different atomics.
+ EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true);
+ EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire);
+ EmitCaseImpl(ReleaseBB, AtomicOrdering::Release);
+ EmitCaseImpl(AcqRelBB, AtomicOrdering::AcquireRelease);
+ EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent);
Builder.restoreIP(ContIP);
return Result;