aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp153
-rw-r--r--clang/lib/CodeGen/CGHLSLBuiltins.cpp23
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h1
-rw-r--r--clang/lib/CodeGen/CodeGenTBAA.cpp3
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp20
5 files changed, 138 insertions, 62 deletions
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index d95dab3..5bf1816 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -374,10 +374,9 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
}
static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
- Address Dest, Address Ptr,
- Address Val1, Address Val2,
- uint64_t Size,
- llvm::AtomicOrdering SuccessOrder,
+ Address Dest, Address Ptr, Address Val1,
+ Address Val2, Address ExpectedResult,
+ uint64_t Size, llvm::AtomicOrdering SuccessOrder,
llvm::AtomicOrdering FailureOrder,
llvm::SyncScope::ID Scope) {
// Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
@@ -411,8 +410,30 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
CGF.Builder.SetInsertPoint(StoreExpectedBB);
// Update the memory at Expected with Old's value.
- auto *I = CGF.Builder.CreateStore(Old, Val1);
- CGF.addInstToCurrentSourceAtom(I, Old);
+ llvm::Type *ExpectedType = ExpectedResult.getElementType();
+ const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
+ uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType);
+
+ if (ExpectedSizeInBytes == Size) {
+ // Sizes match: store directly
+ auto *I = CGF.Builder.CreateStore(Old, ExpectedResult);
+ CGF.addInstToCurrentSourceAtom(I, Old);
+ } else {
+ // store only the first ExpectedSizeInBytes bytes of Old
+ llvm::Type *OldType = Old->getType();
+
+ // Allocate temporary storage for Old value
+ Address OldTmp =
+ CGF.CreateTempAlloca(OldType, Ptr.getAlignment(), "old.tmp");
+
+ // Store Old into this temporary
+ auto *I = CGF.Builder.CreateStore(Old, OldTmp);
+ CGF.addInstToCurrentSourceAtom(I, Old);
+
+ // Perform memcpy for first ExpectedSizeInBytes bytes
+ CGF.Builder.CreateMemCpy(ExpectedResult, OldTmp, ExpectedSizeInBytes,
+ /*isVolatile=*/false);
+ }
// Finally, branch to the exit point.
CGF.Builder.CreateBr(ContinueBB);
@@ -425,13 +446,11 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
/// Given an ordering required on success, emit all possible cmpxchg
/// instructions to cope with the provided (but possibly only dynamically known)
/// FailureOrder.
-static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
- bool IsWeak, Address Dest, Address Ptr,
- Address Val1, Address Val2,
- llvm::Value *FailureOrderVal,
- uint64_t Size,
- llvm::AtomicOrdering SuccessOrder,
- llvm::SyncScope::ID Scope) {
+static void emitAtomicCmpXchgFailureSet(
+ CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Dest, Address Ptr,
+ Address Val1, Address Val2, Address ExpectedResult,
+ llvm::Value *FailureOrderVal, uint64_t Size,
+ llvm::AtomicOrdering SuccessOrder, llvm::SyncScope::ID Scope) {
llvm::AtomicOrdering FailureOrder;
if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
auto FOS = FO->getSExtValue();
@@ -458,8 +477,8 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// success argument". This condition has been lifted and the only
// precondition is 31.7.2.18. Effectively treat this as a DR and skip
// language version checks.
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- FailureOrder, Scope);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult,
+ Size, SuccessOrder, FailureOrder, Scope);
return;
}
@@ -483,18 +502,19 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// Emit all the different atomics
CGF.Builder.SetInsertPoint(MonotonicBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
- Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size,
+ SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(AcquireBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- llvm::AtomicOrdering::Acquire, Scope);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size,
+ SuccessOrder, llvm::AtomicOrdering::Acquire, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(SeqCstBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- llvm::AtomicOrdering::SequentiallyConsistent, Scope);
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size,
+ SuccessOrder, llvm::AtomicOrdering::SequentiallyConsistent,
+ Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
@@ -538,8 +558,9 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
Address Ptr, Address Val1, Address Val2,
- llvm::Value *IsWeak, llvm::Value *FailureOrder,
- uint64_t Size, llvm::AtomicOrdering Order,
+ Address ExpectedResult, llvm::Value *IsWeak,
+ llvm::Value *FailureOrder, uint64_t Size,
+ llvm::AtomicOrdering Order,
llvm::SyncScope::ID Scope) {
llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
bool PostOpMinMax = false;
@@ -554,13 +575,15 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order,
+ Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order,
+ Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
@@ -568,7 +591,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order, Scope);
+ Val1, Val2, ExpectedResult, FailureOrder,
+ Size, Order, Scope);
} else {
// Create all the relevant BB's
llvm::BasicBlock *StrongBB =
@@ -582,12 +606,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.SetInsertPoint(StrongBB);
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order,
+ Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(WeakBB);
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order,
+ Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
@@ -797,9 +823,9 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Address Ptr, Address Val1, Address Val2,
- llvm::Value *IsWeak, llvm::Value *FailureOrder,
- uint64_t Size, llvm::AtomicOrdering Order,
- llvm::Value *Scope) {
+ Address OriginalVal1, llvm::Value *IsWeak,
+ llvm::Value *FailureOrder, uint64_t Size,
+ llvm::AtomicOrdering Order, llvm::Value *Scope) {
auto ScopeModel = Expr->getScopeModel();
// LLVM atomic instructions always have sync scope. If clang atomic
@@ -816,8 +842,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Order, CGF.getLLVMContext());
else
SS = llvm::SyncScope::System;
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
- Order, SS);
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ FailureOrder, Size, Order, SS);
return;
}
@@ -826,8 +852,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
Order, CGF.CGM.getLLVMContext());
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
- Order, SCID);
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ FailureOrder, Size, Order, SCID);
return;
}
@@ -852,12 +878,11 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SI->addCase(Builder.getInt32(S), B);
Builder.SetInsertPoint(B);
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
- Order,
- CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
- ScopeModel->map(S),
- Order,
- CGF.getLLVMContext()));
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ FailureOrder, Size, Order,
+ CGF.getTargetHooks().getLLVMSyncScopeID(
+ CGF.CGM.getLangOpts(), ScopeModel->map(S), Order,
+ CGF.getLLVMContext()));
Builder.CreateBr(ContBB);
}
@@ -1058,6 +1083,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);
+ Address OriginalVal1 = Val1;
if (ShouldCastToIntPtrTy) {
Ptr = Atomics.castToAtomicIntPointer(Ptr);
if (Val1.isValid())
@@ -1301,30 +1327,32 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (llvm::isValidAtomicOrderingCABI(ord))
switch ((llvm::AtomicOrderingCABI)ord) {
case llvm::AtomicOrderingCABI::relaxed:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Monotonic, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::Monotonic, Scope);
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Acquire, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::Acquire, Scope);
break;
case llvm::AtomicOrderingCABI::release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Release, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::Release, Scope);
break;
case llvm::AtomicOrderingCABI::acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::AcquireRelease, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::AcquireRelease,
+ Scope);
break;
case llvm::AtomicOrderingCABI::seq_cst:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
break;
}
@@ -1360,13 +1388,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Monotonic, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail,
+ Size, llvm::AtomicOrdering::Monotonic, Scope);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Acquire, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::Acquire, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
AcquireBB);
@@ -1375,23 +1403,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Release, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::Release, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
ReleaseBB);
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::AcquireRelease, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak,
+ OrderFail, Size, llvm::AtomicOrdering::AcquireRelease, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::SequentiallyConsistent, Scope);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail,
+ Size, llvm::AtomicOrdering::SequentiallyConsistent, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
SeqCstBB);
@@ -1417,6 +1445,11 @@ Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
if (SourceSizeInBits != AtomicSizeInBits) {
Address Tmp = CreateTempAlloca();
+ CGF.Builder.CreateMemSet(
+ Tmp.emitRawPointer(CGF), llvm::ConstantInt::get(CGF.Int8Ty, 0),
+ CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
+ Tmp.getAlignment().getAsAlign());
+
CGF.Builder.CreateMemCpy(Tmp, Addr,
std::min(AtomicSizeInBits, SourceSizeInBits) / 8);
Addr = Tmp;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 4f2f5a76..384bd59 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,16 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
return LastInst;
}
+static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
+ LValue &Stride) {
+ // Figure out the stride of the buffer elements from the handle type.
+ auto *HandleTy =
+ cast<HLSLAttributedResourceType>(HandleExpr->getType().getTypePtr());
+ QualType ElementTy = HandleTy->getContainedType();
+ Value *StrideValue = CGF->getTypeSize(ElementTy);
+ return CGF->Builder.CreateStore(StrideValue, Stride.getAddress());
+}
+
// Return dot product intrinsic that corresponds to the QT scalar type
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
if (QT->isFloatingType())
@@ -372,6 +382,19 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
ArrayRef<Value *>{IndexOp});
}
+ case Builtin::BI__builtin_hlsl_resource_getdimensions_x: {
+ Value *Handle = EmitScalarExpr(E->getArg(0));
+ LValue Dim = EmitLValue(E->getArg(1));
+ llvm::Type *RetTy = llvm::Type::getInt32Ty(getLLVMContext());
+ Value *DimValue = Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
+ ArrayRef<Value *>{Handle});
+ return Builder.CreateStore(DimValue, Dim.getAddress());
+ }
+ case Builtin::BI__builtin_hlsl_resource_getstride: {
+ LValue Stride = EmitLValue(E->getArg(1));
+ return emitBufferStride(this, E->getArg(0), Stride);
+ }
case Builtin::BI__builtin_hlsl_all: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
return Builder.CreateIntrinsic(
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 7c6c285..103b4a9 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -135,6 +135,7 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
group_memory_barrier_with_group_sync)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
//===----------------------------------------------------------------------===//
// End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 4e29d8a..cd08f3e 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -609,8 +609,7 @@ llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) {
// First calculate the metadata, before recomputing the insertion point, as
// the helper can recursively call us.
llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
- LLVM_ATTRIBUTE_UNUSED auto inserted =
- BaseTypeMetadataCache.insert({Ty, TypeNode});
+ [[maybe_unused]] auto inserted = BaseTypeMetadataCache.insert({Ty, TypeNode});
assert(inserted.second && "BaseType metadata was already inserted");
return TypeNode;
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e..16d5919 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -402,6 +402,26 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
}
+
+ if (auto *Attr = FD->getAttr<CUDAClusterDimsAttr>()) {
+ auto GetExprVal = [&](const auto &E) {
+ return E ? E->EvaluateKnownConstInt(M.getContext()).getExtValue() : 1;
+ };
+ unsigned X = GetExprVal(Attr->getX());
+ unsigned Y = GetExprVal(Attr->getY());
+ unsigned Z = GetExprVal(Attr->getZ());
+ llvm::SmallString<32> AttrVal;
+ llvm::raw_svector_ostream OS(AttrVal);
+ OS << X << ',' << Y << ',' << Z;
+ F->addFnAttr("amdgpu-cluster-dims", AttrVal.str());
+ }
+
+ // OpenCL doesn't support cluster feature.
+ const TargetInfo &TTI = M.getContext().getTargetInfo();
+ if ((IsOpenCLKernel &&
+ TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters")) ||
+ FD->hasAttr<CUDANoClusterAttr>())
+ F->addFnAttr("amdgpu-cluster-dims", "0,0,0");
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(