diff options
author | Michael Kruse <llvm-project@meinersbur.de> | 2025-04-10 17:19:12 +0200 |
---|---|---|
committer | Michael Kruse <llvm-project@meinersbur.de> | 2025-04-10 17:19:12 +0200 |
commit | 03f7d72e45f94c0506d5cb0cf7375b388c0d6557 (patch) | |
tree | a3c7db1fe51a25f751b233ceb4fb2e5191100c7a | |
parent | b072cabc62d380729206bdd92938c13061a85b20 (diff) | |
download | llvm-users/meinersbur/llvm_buildbuiltins.zip llvm-users/meinersbur/llvm_buildbuiltins.tar.gz llvm-users/meinersbur/llvm_buildbuiltins.tar.bz2 |
Address reviewusers/meinersbur/llvm_buildbuiltins
-rw-r--r-- | llvm/include/llvm/Transforms/Utils/BuildBuiltins.h | 80 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/BuildBuiltins.cpp | 128 |
2 files changed, 110 insertions, 98 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h index 65765ad..9f8712a 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h +++ b/llvm/include/llvm/Transforms/Utils/BuildBuiltins.h @@ -22,7 +22,7 @@ // availability depends on the target triple (e.g. GPU devices cannot // implement a global lock by design). // -// Whe want to mimic Clang's behaviour: +// We want to mimic Clang's behaviour: // // * Prefer atomic instructions over libcall functions whenever possible. When a // target backend does not support atomic instructions natively, @@ -42,19 +42,19 @@ // Clang also assumes that the maximum supported data size of atomic instruction // is 16, despite this is target-dependent and should be queried using // TargetLowing::getMaxAtomicSizeInBitsSupported(). However, TargetMachine -// (which is a factory for TargetLowing) is not available during Clang's CodeGen -// phase, it is only created for the LLVM pass pipeline. +// (which is a factory for TargetLowering) is not available during Clang's +// CodeGen phase, it is only created for the LLVM pass pipeline. // // The functions in this file are intended to handle the complexity of builtins -// so frontends do not need to care about the details. A major difference betwee -// the cases is that the IR instructions take values directly as an llvm::Value -// (except the atomic address of course), but the libcall functions almost -// always take pointers to those values. Since we cannot assume that everything -// can be passed an llvm::Value (LLVM does not handle large types such as i4096 -// well), our abstraction passes everything as pointer which is load'ed when -// needed. The caller is responsible to emit a temporary AllocaInst and store if -// it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove any -// unnecessary store/load pairs. +// so frontends do not need to care about the details. A major difference +// between the cases is that the IR instructions take values directly as an +// llvm::Value (except the atomic address of course), but the libcall functions +// almost always take pointers to those values. Since we cannot assume that +// everything can be passed an llvm::Value (LLVM does not handle large types +// such as i4096 well), our abstraction passes everything as pointer which is +// loaded when needed. The caller is responsible to emit a temporary AllocaInst +// and store if it needs to pass an llvm::Value. Mem2Reg/SROA will easily remove +// any unnecessary store/load pairs. // // In the future LLVM may introduce more generic atomic constructs that is // lowered by an LLVM pass, such as AtomicExpandPass. Once this exist, the @@ -107,14 +107,14 @@ struct AtomicEmitOptions { /// * cmpxchg /// * atomicrmw /// - /// Atomic LLVM intructions have several restructions on when they can be + /// Atomic LLVM intructions have several restrictions on when they can be /// used, including: - /// * Properties such as IsWeak,Memorder,Scope must be constant. + /// * Properties such as IsVolatile,IsWeak,Memorder,Scope must be constant. /// * Must be an integer or pointer type. Some cases also allow float types. /// * Size must be a power-of-two number of bytes. /// * Size must be at most the size of atomics supported by the target. /// * Size should not be too large (e.g. i4096) since LLVM does not scale - /// will with huge types. + /// well with huge types. /// /// Even with all these limitations adhered to, AtomicExpandPass may still /// lower the instruction to a libcall function if the target does not support @@ -176,6 +176,29 @@ struct AtomicEmitOptions { /// * https://llvm.org/docs/LangRef.html#load-instruction /// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html /// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics +/// +/// @param AtomicPtr The memory location accessed atomically. +/// @Param RetPtr Pointer the the data to be loaded from \p Ptr. +/// @param TypeOrSize Type of the value to be accessed. cmpxchg +/// supports integer and pointers only, other atomics also +/// support floats. If any other type or omitted, type-prunes +/// to an integer the holds at least \p DataSize bytes. +/// Alternatively, the number of bytes can be specified in +/// which case an intergers is also used. +/// @param IsVolatile Whether to mark the access as volatile. +/// @param Memorder Memory model to be used for the affected atomic address. +/// @param Scope (optional) The synchronization scope (domain of threads +/// where this access has to be atomic, e.g. CUDA +/// warp/block/grid-level atomics) of this access. Defaults +/// to system scope. +/// @param Align (optional) Known alignment of /p Ptr. If omitted, +/// alignment is inferred from /p Ptr itself or falls back +/// to no alignment. +/// @param Builder Used to emit instructions. +/// @param EmitOptions For controlling what IR is emitted. +/// @param Name (optional) Stem for generated instruction names. +/// +/// @return An error if the atomic operation could not be emitted. Error emitAtomicLoadBuiltin( Value *AtomicPtr, Value *RetPtr, std::variant<Type *, uint64_t> TypeOrSize, bool IsVolatile, @@ -193,6 +216,29 @@ Error emitAtomicLoadBuiltin( /// * https://llvm.org/docs/LangRef.html#store-instruction /// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html /// * https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#GCC_intrinsics +/// +/// @param AtomicPtr The memory location accessed atomically. +/// @Param ValPtr Pointer to the data to be stored at \p Ptr. +/// @param TypeOrSize Type of the value to be accessed. cmpxchg +/// supports integer and pointers only, other atomics also +/// support floats. If any other type or omitted, type-prunes +/// to an integer the holds at least \p DataSize bytes. +/// Alternatively, the number of bytes can be specified in +/// which case an intergers is also used. +/// @param IsVolatile Whether to mark the access as volatile. +/// @param Memorder Memory model to be used for the affected atomic address. +/// @param Scope (optional) The synchronization scope (domain of threads +/// where this access has to be atomic, e.g. CUDA +/// warp/block/grid-level atomics) of this access. Defaults +/// to system scope. +/// @param Align (optional) Known alignment of /p Ptr. If omitted, +/// alignment is inferred from /p Ptr itself or falls back +/// to no alignment. +/// @param Builder Used to emit instructions. +/// @param EmitOptions For controlling what IR is emitted. +/// @param Name (optional) Stem for generated instruction names. +/// +/// @return An error if the atomic operation could not be emitted. Error emitAtomicStoreBuiltin( Value *AtomicPtr, Value *ValPtr, std::variant<Type *, uint64_t> TypeOrSize, bool IsVolatile, @@ -253,9 +299,9 @@ Error emitAtomicStoreBuiltin( /// call returns \p ExpectedPtr/\p ActualPtr will be the /// value as defined above (in contrast to being undefined). /// @param Align (optional) Known alignment of /p Ptr. If omitted, -/// alignment is inferred from /p Ptr itself and falls back +/// alignment is inferred from /p Ptr itself or falls back /// to no alignment. -/// @param Builder User to emit instructions. +/// @param Builder Used to emit instructions. /// @param EmitOptions For controlling what IR is emitted. /// @param Name (optional) Stem for generated instruction names. /// diff --git a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp index 4128218..6ef275e 100644 --- a/llvm/lib/Transforms/Utils/BuildBuiltins.cpp +++ b/llvm/lib/Transforms/Utils/BuildBuiltins.cpp @@ -204,6 +204,9 @@ protected: return emitInst(IsWeak, SuccessMemorder, *FailureMemorderConst); } + Type *BoolTy = Builder.getInt1Ty(); + IntegerType *Int32Ty = Builder.getInt32Ty(); + // Create all the relevant BB's BasicBlock *ContBB = splitBB(Builder, /*CreateBranch=*/false, @@ -215,43 +218,33 @@ protected: // MonotonicBB is arbitrarily chosen as the default case; in practice, // this doesn't matter unless someone is crazy enough to use something // that doesn't fold to a constant for the ordering. - SwitchInst *SI = Builder.CreateSwitch(FailureMemorderCABI, MonotonicBB); - // Implemented as acquire, since it's the closest in LLVM. - SI->addCase( - Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::consume)), - AcquireBB); - SI->addCase( - Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::acquire)), - AcquireBB); - SI->addCase( - Builder.getInt32(static_cast<int32_t>(AtomicOrderingCABI::seq_cst)), - SeqCstBB); + Value *Order = Builder.CreateIntCast(FailureMemorderCABI, Int32Ty, false); + SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB); // TODO: Do not insert PHINode if operation cannot fail Builder.SetInsertPoint(ContBB, ContBB->begin()); PHINode *Result = - Builder.CreatePHI(Builder.getInt1Ty(), 3, + Builder.CreatePHI(BoolTy, /*NumReservedValues=*/3, Name + "." + getBuiltinSig() + ".failorder.success"); IRBuilderBase::InsertPoint ContIP = Builder.saveIP(); - // Emit all the different atomics - Builder.SetInsertPoint(MonotonicBB); - Value *MonotonicResult = - emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Monotonic); - Builder.CreateBr(ContBB); - Result->addIncoming(MonotonicResult, Builder.GetInsertBlock()); - - Builder.SetInsertPoint(AcquireBB); - Value *AcquireResult = - emitInst(IsWeak, SuccessMemorder, AtomicOrdering::Acquire); - Builder.CreateBr(ContBB); - Result->addIncoming(AcquireResult, Builder.GetInsertBlock()); + auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO, + bool IsDefault = false) { + if (!IsDefault) { + for (auto CABI : seq<int>(0, 6)) { + if (fromCABI(CABI) == AO) + SI->addCase(Builder.getInt32(CABI), CaseBB); + } + } + Builder.SetInsertPoint(CaseBB); + Value *AtomicResult = emitInst(IsWeak, SuccessMemorder, AO); + Builder.CreateBr(ContBB); + Result->addIncoming(AtomicResult, Builder.GetInsertBlock()); + }; - Builder.SetInsertPoint(SeqCstBB); - Value *SeqCstResult = emitInst(IsWeak, SuccessMemorder, - AtomicOrdering::SequentiallyConsistent); - Builder.CreateBr(ContBB); - Result->addIncoming(SeqCstResult, Builder.GetInsertBlock()); + EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true); + EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire); + EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent); Builder.restoreIP(ContIP); return Result; @@ -262,6 +255,7 @@ protected: return emitFailureMemorderSwitch(IsWeak, *SuccessMemorderConst); Type *BoolTy = Builder.getInt1Ty(); + IntegerType *Int32Ty = Builder.getInt32Ty(); // Create all the relevant BB's BasicBlock *ContBB = @@ -282,67 +276,39 @@ protected: // MonotonicBB is arbitrarily chosen as the default case; in practice, // this doesn't matter unless someone is crazy enough to use something // that doesn't fold to a constant for the ordering. - IntegerType *IntTy = getIntTy(Builder, EmitOptions.TLI); - Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, IntTy, false); + Value *Order = Builder.CreateIntCast(SuccessMemorderCABI, Int32Ty, false); SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB); // TODO: No PHI if operation cannot fail Builder.SetInsertPoint(ContBB, ContBB->begin()); - PHINode *Result = Builder.CreatePHI( - BoolTy, 5, Name + "." + getBuiltinSig() + ".memorder.success"); + PHINode *Result = + Builder.CreatePHI(BoolTy, /*NumReservedValues=*/5, + Name + "." + getBuiltinSig() + ".memorder.success"); IRBuilderBase::InsertPoint ContIP = Builder.saveIP(); - // Emit all the different atomics - Builder.SetInsertPoint(MonotonicBB); - Value *MonotonicResult = - emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Monotonic); - Builder.CreateBr(ContBB); - Result->addIncoming(MonotonicResult, Builder.GetInsertBlock()); - - if (AcquireBB) { - SI->addCase( - Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::consume)), - AcquireBB); - SI->addCase( - Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acquire)), - AcquireBB); - Builder.SetInsertPoint(AcquireBB); - Value *AcquireResult = - emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Acquire); - Builder.CreateBr(ContBB); - Result->addIncoming(AcquireResult, Builder.GetInsertBlock()); - } - - if (ReleaseBB) { - SI->addCase( - Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::release)), - ReleaseBB); - Builder.SetInsertPoint(ReleaseBB); - Value *ReleaseResult = - emitFailureMemorderSwitch(IsWeak, AtomicOrdering::Release); - Builder.CreateBr(ContBB); - Result->addIncoming(ReleaseResult, Builder.GetInsertBlock()); - } + auto EmitCaseImpl = [&](BasicBlock *CaseBB, AtomicOrdering AO, + bool IsDefault = false) { + if (!CaseBB) + return; - if (AcqRelBB) { - SI->addCase( - Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::acq_rel)), - AcqRelBB); - Builder.SetInsertPoint(AcqRelBB); - Value *AcqRelResult = - emitFailureMemorderSwitch(IsWeak, AtomicOrdering::AcquireRelease); + if (!IsDefault) { + for (auto CABI : seq<int>(0, 6)) { + if (fromCABI(CABI) == AO) + SI->addCase(Builder.getInt32(CABI), CaseBB); + } + } + Builder.SetInsertPoint(CaseBB); + Value *AtomicResult = emitFailureMemorderSwitch(IsWeak, AO); Builder.CreateBr(ContBB); - Result->addIncoming(AcqRelResult, Builder.GetInsertBlock()); - } + Result->addIncoming(AtomicResult, Builder.GetInsertBlock()); + }; - SI->addCase( - Builder.getInt32(static_cast<uint32_t>(AtomicOrderingCABI::seq_cst)), - SeqCstBB); - Builder.SetInsertPoint(SeqCstBB); - Value *SeqCstResult = emitFailureMemorderSwitch( - IsWeak, AtomicOrdering::SequentiallyConsistent); - Builder.CreateBr(ContBB); - Result->addIncoming(SeqCstResult, Builder.GetInsertBlock()); + // Emit all the different atomics. + EmitCaseImpl(MonotonicBB, AtomicOrdering::Monotonic, /*IsDefault=*/true); + EmitCaseImpl(AcquireBB, AtomicOrdering::Acquire); + EmitCaseImpl(ReleaseBB, AtomicOrdering::Release); + EmitCaseImpl(AcqRelBB, AtomicOrdering::AcquireRelease); + EmitCaseImpl(SeqCstBB, AtomicOrdering::SequentiallyConsistent); Builder.restoreIP(ContIP); return Result; |