diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGAtomic.cpp | 153 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGHLSLBuiltins.cpp | 23 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGHLSLRuntime.h | 1 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenTBAA.cpp | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/AMDGPU.cpp | 20 |
5 files changed, 138 insertions, 62 deletions
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index d95dab3..5bf1816 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -374,10 +374,9 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const { } static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, - Address Dest, Address Ptr, - Address Val1, Address Val2, - uint64_t Size, - llvm::AtomicOrdering SuccessOrder, + Address Dest, Address Ptr, Address Val1, + Address Val2, Address ExpectedResult, + uint64_t Size, llvm::AtomicOrdering SuccessOrder, llvm::AtomicOrdering FailureOrder, llvm::SyncScope::ID Scope) { // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment. @@ -411,8 +410,30 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, CGF.Builder.SetInsertPoint(StoreExpectedBB); // Update the memory at Expected with Old's value. - auto *I = CGF.Builder.CreateStore(Old, Val1); - CGF.addInstToCurrentSourceAtom(I, Old); + llvm::Type *ExpectedType = ExpectedResult.getElementType(); + const llvm::DataLayout &DL = CGF.CGM.getDataLayout(); + uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType); + + if (ExpectedSizeInBytes == Size) { + // Sizes match: store directly + auto *I = CGF.Builder.CreateStore(Old, ExpectedResult); + CGF.addInstToCurrentSourceAtom(I, Old); + } else { + // store only the first ExpectedSizeInBytes bytes of Old + llvm::Type *OldType = Old->getType(); + + // Allocate temporary storage for Old value + Address OldTmp = + CGF.CreateTempAlloca(OldType, Ptr.getAlignment(), "old.tmp"); + + // Store Old into this temporary + auto *I = CGF.Builder.CreateStore(Old, OldTmp); + CGF.addInstToCurrentSourceAtom(I, Old); + + // Perform memcpy for first ExpectedSizeInBytes bytes + CGF.Builder.CreateMemCpy(ExpectedResult, OldTmp, ExpectedSizeInBytes, + /*isVolatile=*/false); + } // Finally, branch to the exit point. CGF.Builder.CreateBr(ContinueBB); @@ -425,13 +446,11 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, /// Given an ordering required on success, emit all possible cmpxchg /// instructions to cope with the provided (but possibly only dynamically known) /// FailureOrder. -static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, - bool IsWeak, Address Dest, Address Ptr, - Address Val1, Address Val2, - llvm::Value *FailureOrderVal, - uint64_t Size, - llvm::AtomicOrdering SuccessOrder, - llvm::SyncScope::ID Scope) { +static void emitAtomicCmpXchgFailureSet( + CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Dest, Address Ptr, + Address Val1, Address Val2, Address ExpectedResult, + llvm::Value *FailureOrderVal, uint64_t Size, + llvm::AtomicOrdering SuccessOrder, llvm::SyncScope::ID Scope) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) { auto FOS = FO->getSExtValue(); @@ -458,8 +477,8 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // success argument". This condition has been lifted and the only // precondition is 31.7.2.18. Effectively treat this as a DR and skip // language version checks. - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - FailureOrder, Scope); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, + Size, SuccessOrder, FailureOrder, Scope); return; } @@ -483,18 +502,19 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // Emit all the different atomics CGF.Builder.SetInsertPoint(MonotonicBB); - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, + SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(AcquireBB); - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - llvm::AtomicOrdering::Acquire, Scope); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, + SuccessOrder, llvm::AtomicOrdering::Acquire, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(SeqCstBB); - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - llvm::AtomicOrdering::SequentiallyConsistent, Scope); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, + SuccessOrder, llvm::AtomicOrdering::SequentiallyConsistent, + Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(ContBB); @@ -538,8 +558,9 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder, static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Address Ptr, Address Val1, Address Val2, - llvm::Value *IsWeak, llvm::Value *FailureOrder, - uint64_t Size, llvm::AtomicOrdering Order, + Address ExpectedResult, llvm::Value *IsWeak, + llvm::Value *FailureOrder, uint64_t Size, + llvm::AtomicOrdering Order, llvm::SyncScope::ID Scope) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; bool PostOpMinMax = false; @@ -554,13 +575,15 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order, Scope); + ExpectedResult, FailureOrder, Size, Order, + Scope); return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__hip_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order, Scope); + ExpectedResult, FailureOrder, Size, Order, + Scope); return; case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: @@ -568,7 +591,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__scoped_atomic_compare_exchange_n: { if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) { emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr, - Val1, Val2, FailureOrder, Size, Order, Scope); + Val1, Val2, ExpectedResult, FailureOrder, + Size, Order, Scope); } else { // Create all the relevant BB's llvm::BasicBlock *StrongBB = @@ -582,12 +606,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, CGF.Builder.SetInsertPoint(StrongBB); emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order, Scope); + ExpectedResult, FailureOrder, Size, Order, + Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(WeakBB); emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order, Scope); + ExpectedResult, FailureOrder, Size, Order, + Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(ContBB); @@ -797,9 +823,9 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) { static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Address Ptr, Address Val1, Address Val2, - llvm::Value *IsWeak, llvm::Value *FailureOrder, - uint64_t Size, llvm::AtomicOrdering Order, - llvm::Value *Scope) { + Address OriginalVal1, llvm::Value *IsWeak, + llvm::Value *FailureOrder, uint64_t Size, + llvm::AtomicOrdering Order, llvm::Value *Scope) { auto ScopeModel = Expr->getScopeModel(); // LLVM atomic instructions always have sync scope. If clang atomic @@ -816,8 +842,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Order, CGF.getLLVMContext()); else SS = llvm::SyncScope::System; - EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, SS); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + FailureOrder, Size, Order, SS); return; } @@ -826,8 +852,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()), Order, CGF.CGM.getLLVMContext()); - EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, SCID); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + FailureOrder, Size, Order, SCID); return; } @@ -852,12 +878,11 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, SI->addCase(Builder.getInt32(S), B); Builder.SetInsertPoint(B); - EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, - CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(), - ScopeModel->map(S), - Order, - CGF.getLLVMContext())); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + FailureOrder, Size, Order, + CGF.getTargetHooks().getLLVMSyncScopeID( + CGF.CGM.getLangOpts(), ScopeModel->map(S), Order, + CGF.getLLVMContext())); Builder.CreateBr(ContBB); } @@ -1058,6 +1083,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy); AtomicInfo Atomics(*this, AtomicVal); + Address OriginalVal1 = Val1; if (ShouldCastToIntPtrTy) { Ptr = Atomics.castToAtomicIntPointer(Ptr); if (Val1.isValid()) @@ -1301,30 +1327,32 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { if (llvm::isValidAtomicOrderingCABI(ord)) switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::Monotonic, Scope); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::Acquire, Scope); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::Release, Scope); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::AcquireRelease, + Scope); break; case llvm::AtomicOrderingCABI::seq_cst: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::SequentiallyConsistent, Scope); break; } @@ -1360,13 +1388,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, + Size, llvm::AtomicOrdering::Monotonic, Scope); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1375,23 +1403,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, + OrderFail, Size, llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent, Scope); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, + Size, llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -1417,6 +1445,11 @@ Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const { uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty); if (SourceSizeInBits != AtomicSizeInBits) { Address Tmp = CreateTempAlloca(); + CGF.Builder.CreateMemSet( + Tmp.emitRawPointer(CGF), llvm::ConstantInt::get(CGF.Int8Ty, 0), + CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(), + Tmp.getAlignment().getAsAlign()); + CGF.Builder.CreateMemCpy(Tmp, Addr, std::min(AtomicSizeInBits, SourceSizeInBits) / 8); Addr = Tmp; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 4f2f5a76..384bd59 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -160,6 +160,16 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, + LValue &Stride) { + // Figure out the stride of the buffer elements from the handle type. + auto *HandleTy = + cast<HLSLAttributedResourceType>(HandleExpr->getType().getTypePtr()); + QualType ElementTy = HandleTy->getContainedType(); + Value *StrideValue = CGF->getTypeSize(ElementTy); + return CGF->Builder.CreateStore(StrideValue, Stride.getAddress()); +} + // Return dot product intrinsic that corresponds to the QT scalar type static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { if (QT->isFloatingType()) @@ -372,6 +382,19 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(), ArrayRef<Value *>{IndexOp}); } + case Builtin::BI__builtin_hlsl_resource_getdimensions_x: { + Value *Handle = EmitScalarExpr(E->getArg(0)); + LValue Dim = EmitLValue(E->getArg(1)); + llvm::Type *RetTy = llvm::Type::getInt32Ty(getLLVMContext()); + Value *DimValue = Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(), + ArrayRef<Value *>{Handle}); + return Builder.CreateStore(DimValue, Dim.getAddress()); + } + case Builtin::BI__builtin_hlsl_resource_getstride: { + LValue Stride = EmitLValue(E->getArg(1)); + return emitBufferStride(this, E->getArg(0), Stride); + } case Builtin::BI__builtin_hlsl_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 7c6c285..103b4a9 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -135,6 +135,7 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) + GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 4e29d8a..cd08f3e 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -609,8 +609,7 @@ llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { // First calculate the metadata, before recomputing the insertion point, as // the helper can recursively call us. llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); - LLVM_ATTRIBUTE_UNUSED auto inserted = - BaseTypeMetadataCache.insert({Ty, TypeNode}); + [[maybe_unused]] auto inserted = BaseTypeMetadataCache.insert({Ty, TypeNode}); assert(inserted.second && "BaseType metadata was already inserted"); return TypeNode; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e..16d5919 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -402,6 +402,26 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); } + + if (auto *Attr = FD->getAttr<CUDAClusterDimsAttr>()) { + auto GetExprVal = [&](const auto &E) { + return E ? E->EvaluateKnownConstInt(M.getContext()).getExtValue() : 1; + }; + unsigned X = GetExprVal(Attr->getX()); + unsigned Y = GetExprVal(Attr->getY()); + unsigned Z = GetExprVal(Attr->getZ()); + llvm::SmallString<32> AttrVal; + llvm::raw_svector_ostream OS(AttrVal); + OS << X << ',' << Y << ',' << Z; + F->addFnAttr("amdgpu-cluster-dims", AttrVal.str()); + } + + // OpenCL doesn't support cluster feature. + const TargetInfo &TTI = M.getContext().getTargetInfo(); + if ((IsOpenCLKernel && + TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters")) || + FD->hasAttr<CUDANoClusterAttr>()) + F->addFnAttr("amdgpu-cluster-dims", "0,0,0"); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( |