aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGHLSLBuiltins.cpp23
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h1
-rw-r--r--clang/lib/CodeGen/CodeGenTBAA.cpp3
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp20
4 files changed, 45 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 4f2f5a76..384bd59 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,16 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
return LastInst;
}
+static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
+ LValue &Stride) {
+ // Figure out the stride of the buffer elements from the handle type.
+ auto *HandleTy =
+ cast<HLSLAttributedResourceType>(HandleExpr->getType().getTypePtr());
+ QualType ElementTy = HandleTy->getContainedType();
+ Value *StrideValue = CGF->getTypeSize(ElementTy);
+ return CGF->Builder.CreateStore(StrideValue, Stride.getAddress());
+}
+
// Return dot product intrinsic that corresponds to the QT scalar type
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
if (QT->isFloatingType())
@@ -372,6 +382,19 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
ArrayRef<Value *>{IndexOp});
}
+ case Builtin::BI__builtin_hlsl_resource_getdimensions_x: {
+ Value *Handle = EmitScalarExpr(E->getArg(0));
+ LValue Dim = EmitLValue(E->getArg(1));
+ llvm::Type *RetTy = llvm::Type::getInt32Ty(getLLVMContext());
+ Value *DimValue = Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
+ ArrayRef<Value *>{Handle});
+ return Builder.CreateStore(DimValue, Dim.getAddress());
+ }
+ case Builtin::BI__builtin_hlsl_resource_getstride: {
+ LValue Stride = EmitLValue(E->getArg(1));
+ return emitBufferStride(this, E->getArg(0), Stride);
+ }
case Builtin::BI__builtin_hlsl_all: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
return Builder.CreateIntrinsic(
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 7c6c285..103b4a9 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -135,6 +135,7 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
group_memory_barrier_with_group_sync)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
//===----------------------------------------------------------------------===//
// End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 4e29d8a..cd08f3e 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -609,8 +609,7 @@ llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) {
// First calculate the metadata, before recomputing the insertion point, as
// the helper can recursively call us.
llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
- LLVM_ATTRIBUTE_UNUSED auto inserted =
- BaseTypeMetadataCache.insert({Ty, TypeNode});
+ [[maybe_unused]] auto inserted = BaseTypeMetadataCache.insert({Ty, TypeNode});
assert(inserted.second && "BaseType metadata was already inserted");
return TypeNode;
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e..16d5919 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -402,6 +402,26 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
}
+
+ if (auto *Attr = FD->getAttr<CUDAClusterDimsAttr>()) {
+ auto GetExprVal = [&](const auto &E) {
+ return E ? E->EvaluateKnownConstInt(M.getContext()).getExtValue() : 1;
+ };
+ unsigned X = GetExprVal(Attr->getX());
+ unsigned Y = GetExprVal(Attr->getY());
+ unsigned Z = GetExprVal(Attr->getZ());
+ llvm::SmallString<32> AttrVal;
+ llvm::raw_svector_ostream OS(AttrVal);
+ OS << X << ',' << Y << ',' << Z;
+ F->addFnAttr("amdgpu-cluster-dims", AttrVal.str());
+ }
+
+ // OpenCL doesn't support cluster feature.
+ const TargetInfo &TTI = M.getContext().getTargetInfo();
+ if ((IsOpenCLKernel &&
+ TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters")) ||
+ FD->hasAttr<CUDANoClusterAttr>())
+ F->addFnAttr("amdgpu-cluster-dims", "0,0,0");
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(