diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGHLSLBuiltins.cpp | 23 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGHLSLRuntime.h | 1 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenTBAA.cpp | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/AMDGPU.cpp | 20 |
4 files changed, 45 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 4f2f5a76..384bd59 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -160,6 +160,16 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, + LValue &Stride) { + // Figure out the stride of the buffer elements from the handle type. + auto *HandleTy = + cast<HLSLAttributedResourceType>(HandleExpr->getType().getTypePtr()); + QualType ElementTy = HandleTy->getContainedType(); + Value *StrideValue = CGF->getTypeSize(ElementTy); + return CGF->Builder.CreateStore(StrideValue, Stride.getAddress()); +} + // Return dot product intrinsic that corresponds to the QT scalar type static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { if (QT->isFloatingType()) @@ -372,6 +382,19 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(), ArrayRef<Value *>{IndexOp}); } + case Builtin::BI__builtin_hlsl_resource_getdimensions_x: { + Value *Handle = EmitScalarExpr(E->getArg(0)); + LValue Dim = EmitLValue(E->getArg(1)); + llvm::Type *RetTy = llvm::Type::getInt32Ty(getLLVMContext()); + Value *DimValue = Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(), + ArrayRef<Value *>{Handle}); + return Builder.CreateStore(DimValue, Dim.getAddress()); + } + case Builtin::BI__builtin_hlsl_resource_getstride: { + LValue Stride = EmitLValue(E->getArg(1)); + return emitBufferStride(this, E->getArg(0), Stride); + } case Builtin::BI__builtin_hlsl_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 7c6c285..103b4a9 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -135,6 +135,7 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) + GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 4e29d8a..cd08f3e 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -609,8 +609,7 @@ llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { // First calculate the metadata, before recomputing the insertion point, as // the helper can recursively call us. llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); - LLVM_ATTRIBUTE_UNUSED auto inserted = - BaseTypeMetadataCache.insert({Ty, TypeNode}); + [[maybe_unused]] auto inserted = BaseTypeMetadataCache.insert({Ty, TypeNode}); assert(inserted.second && "BaseType metadata was already inserted"); return TypeNode; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e..16d5919 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -402,6 +402,26 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); } + + if (auto *Attr = FD->getAttr<CUDAClusterDimsAttr>()) { + auto GetExprVal = [&](const auto &E) { + return E ? E->EvaluateKnownConstInt(M.getContext()).getExtValue() : 1; + }; + unsigned X = GetExprVal(Attr->getX()); + unsigned Y = GetExprVal(Attr->getY()); + unsigned Z = GetExprVal(Attr->getZ()); + llvm::SmallString<32> AttrVal; + llvm::raw_svector_ostream OS(AttrVal); + OS << X << ',' << Y << ',' << Z; + F->addFnAttr("amdgpu-cluster-dims", AttrVal.str()); + } + + // OpenCL doesn't support cluster feature. + const TargetInfo &TTI = M.getContext().getTargetInfo(); + if ((IsOpenCLKernel && + TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters")) || + FD->hasAttr<CUDANoClusterAttr>()) + F->addFnAttr("amdgpu-cluster-dims", "0,0,0"); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( |