aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
authorNick Sarnie <nick.sarnie@intel.com>2025-06-05 10:15:38 -0400
committerGitHub <noreply@github.com>2025-06-05 14:15:38 +0000
commit3b9ebe92011b033523217a9b9a2f03f4c8c37aab (patch)
treebb519b3f1df97024db7a14874c82ae0fd65b6d7d /clang/lib
parentb2379bd5d59993c0d859ad90f9f5cdfcfce26e71 (diff)
downloadllvm-3b9ebe92011b033523217a9b9a2f03f4c8c37aab.zip
llvm-3b9ebe92011b033523217a9b9a2f03f4c8c37aab.tar.gz
llvm-3b9ebe92011b033523217a9b9a2f03f4c8c37aab.tar.bz2
[clang] Simplify device kernel attributes (#137882)
We have multiple different attributes in clang representing device kernels for specific targets/languages. Refactor them into one attribute with different spellings to make it more easily scalable for new languages/targets. --------- Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/Decl.cpp5
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp6
-rw-r--r--clang/lib/AST/MicrosoftMangle.cpp4
-rw-r--r--clang/lib/AST/Type.cpp8
-rw-r--r--clang/lib/AST/TypePrinter.cpp9
-rw-r--r--clang/lib/Basic/Targets/AArch64.cpp4
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.h3
-rw-r--r--clang/lib/Basic/Targets/ARM.cpp4
-rw-r--r--clang/lib/Basic/Targets/BPF.h2
-rw-r--r--clang/lib/Basic/Targets/Mips.cpp2
-rw-r--r--clang/lib/Basic/Targets/SPIR.h2
-rw-r--r--clang/lib/Basic/Targets/SystemZ.h2
-rw-r--r--clang/lib/Basic/Targets/X86.h23
-rw-r--r--clang/lib/CodeGen/CGCall.cpp30
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp5
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp4
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp5
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp11
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp2
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/CodeGen/Targets/NVPTX.cpp45
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp2
-rw-r--r--clang/lib/CodeGen/Targets/TCE.cpp2
-rw-r--r--clang/lib/Sema/SemaDecl.cpp14
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp61
-rw-r--r--clang/lib/Sema/SemaSYCL.cpp2
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp8
-rw-r--r--clang/lib/Sema/SemaType.cpp43
28 files changed, 185 insertions, 129 deletions
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 8425e40..aad2d82 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const {
}
bool FunctionDecl::isInExternCContext() const {
- if (hasAttr<OpenCLKernelAttr>())
+ if (DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>()))
return true;
return getLexicalDeclContext()->isExternCContext();
}
@@ -5510,7 +5510,8 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
}
bool FunctionDecl::isReferenceableKernel() const {
- return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
+ return hasAttr<CUDAGlobalAttr>() ||
+ DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>());
}
BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index f7c620d..ecf5be2 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1557,7 +1557,8 @@ void CXXNameMangler::mangleUnqualifiedName(
FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
bool IsOCLDeviceStub =
- FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ FD &&
+ DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleDeviceStubName(II);
@@ -3532,10 +3533,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
case CC_AAPCS_VFP:
case CC_AArch64VectorCall:
case CC_AArch64SVEPCS:
- case CC_AMDGPUKernelCall:
case CC_IntelOclBicc:
case CC_SpirFunction:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_M68kRTD:
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index d633902..bc47e05 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1164,7 +1164,9 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
->hasAttr<CUDAGlobalAttr>())) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
bool IsOCLDeviceStub =
- ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
+ ND && isa<FunctionDecl>(ND) &&
+ DeviceKernelAttr::isOpenCLSpelling(
+ ND->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleSourceName(
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 5c990b1..5bb39b1 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3606,14 +3606,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
return "aarch64_vector_pcs";
case CC_AArch64SVEPCS:
return "aarch64_sve_pcs";
- case CC_AMDGPUKernelCall:
- return "amdgpu_kernel";
case CC_IntelOclBicc:
return "intel_ocl_bicc";
case CC_SpirFunction:
return "spir_function";
- case CC_OpenCLKernel:
- return "opencl_kernel";
+ case CC_DeviceKernel:
+ return "device_kernel";
case CC_Swift:
return "swiftcall";
case CC_SwiftAsync:
@@ -4328,7 +4326,7 @@ bool AttributedType::isCallingConv() const {
case attr::VectorCall:
case attr::AArch64VectorPcs:
case attr::AArch64SVEPcs:
- case attr::AMDGPUKernelCall:
+ case attr::DeviceKernel:
case attr::Pascal:
case attr::MSABI:
case attr::SysVABI:
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 694cd121..330cfcd 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1100,8 +1100,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
case CC_AArch64SVEPCS:
OS << "__attribute__((aarch64_sve_pcs))";
break;
- case CC_AMDGPUKernelCall:
- OS << "__attribute__((amdgpu_kernel))";
+ case CC_DeviceKernel:
+ OS << "__attribute__((device_kernel))";
break;
case CC_IntelOclBicc:
OS << " __attribute__((intel_ocl_bicc))";
@@ -1116,7 +1116,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
OS << " __attribute__((regcall))";
break;
case CC_SpirFunction:
- case CC_OpenCLKernel:
// Do nothing. These CCs are not available as attributes.
break;
case CC_Swift:
@@ -2069,7 +2068,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
- case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
+ case attr::DeviceKernel:
+ OS << T->getAttr()->getSpelling();
+ break;
case attr::IntelOclBicc: OS << "inteloclbicc"; break;
case attr::PreserveMost:
OS << "preserve_most";
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index d0dde3d..e8abdf9 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1400,7 +1400,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_PreserveMost:
case CC_PreserveAll:
case CC_PreserveNone:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
case CC_AArch64VectorCall:
case CC_AArch64SVEPCS:
case CC_Win64:
@@ -1758,7 +1758,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86FastCall:
return CCCR_Ignore;
case CC_C:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_PreserveNone:
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 8ea544b..509128f 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -415,8 +415,7 @@ public:
default:
return CCCR_Warning;
case CC_C:
- case CC_OpenCLKernel:
- case CC_AMDGPUKernelCall:
+ case CC_DeviceKernel:
return CCCR_OK;
}
}
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index bd12350..65d4ed1 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1404,7 +1404,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_AAPCS_VFP:
case CC_Swift:
case CC_SwiftAsync:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
return CCCR_OK;
default:
return CCCR_Warning;
@@ -1479,7 +1479,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86VectorCall:
return CCCR_Ignore;
case CC_C:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_Swift:
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index d1f68b84..d9e5cf4 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -94,7 +94,7 @@ public:
default:
return CCCR_Warning;
case CC_C:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
return CCCR_OK;
}
}
diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp
index d693b19..34837cc 100644
--- a/clang/lib/Basic/Targets/Mips.cpp
+++ b/clang/lib/Basic/Targets/Mips.cpp
@@ -336,7 +336,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86VectorCall:
return CCCR_Ignore;
case CC_C:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_Swift:
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 470e578..0eaf82e 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -193,7 +193,7 @@ public:
}
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
- return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK
+ return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK
: CCCR_Warning;
}
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 6431be0..1af6122 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -245,7 +245,7 @@ public:
switch (CC) {
case CC_C:
case CC_Swift:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
return CCCR_OK;
case CC_SwiftAsync:
return CCCR_Error;
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index babea81..3d58be8 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -409,10 +409,11 @@ public:
case CC_Swift:
case CC_X86Pascal:
case CC_IntelOclBicc:
- case CC_OpenCLKernel:
return CCCR_OK;
case CC_SwiftAsync:
return CCCR_Error;
+ case CC_DeviceKernel:
+ return IsOpenCL ? CCCR_OK : CCCR_Warning;
default:
return CCCR_Warning;
}
@@ -440,7 +441,13 @@ public:
uint64_t getPointerAlignV(LangAS AddrSpace) const override {
return getPointerWidthV(AddrSpace);
}
+ void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+ TargetInfo::adjust(Diags, Opts);
+ IsOpenCL = Opts.OpenCL;
+ }
+private:
+ bool IsOpenCL = false;
};
// X86-32 generic target
@@ -786,8 +793,9 @@ public:
case CC_PreserveAll:
case CC_PreserveNone:
case CC_X86RegCall:
- case CC_OpenCLKernel:
return CCCR_OK;
+ case CC_DeviceKernel:
+ return IsOpenCL ? CCCR_OK : CCCR_Warning;
default:
return CCCR_Warning;
}
@@ -818,7 +826,6 @@ public:
return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize,
HasSizeMismatch);
}
-
void setMaxAtomicWidth() override {
if (hasFeature("cx16"))
MaxAtomicInlineWidth = 128;
@@ -830,6 +837,14 @@ public:
size_t getMaxBitIntWidth() const override {
return llvm::IntegerType::MAX_INT_BITS;
}
+
+ void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+ TargetInfo::adjust(Diags, Opts);
+ IsOpenCL = Opts.OpenCL;
+ }
+
+private:
+ bool IsOpenCL = false;
};
// x86-64 UEFI target
@@ -915,7 +930,7 @@ public:
case CC_Swift:
case CC_SwiftAsync:
case CC_X86RegCall:
- case CC_OpenCLKernel:
+ case CC_DeviceKernel:
return CCCR_OK;
default:
return CCCR_Warning;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a67b0d8..46a5d64 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -81,12 +81,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
return llvm::CallingConv::AArch64_VectorCall;
case CC_AArch64SVEPCS:
return llvm::CallingConv::AArch64_SVE_VectorCall;
- case CC_AMDGPUKernelCall:
- return llvm::CallingConv::AMDGPU_KERNEL;
case CC_SpirFunction:
return llvm::CallingConv::SPIR_FUNC;
- case CC_OpenCLKernel:
- return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+ case CC_DeviceKernel: {
+ if (CGM.getLangOpts().OpenCL)
+ return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+ if (CGM.getTriple().isSPIROrSPIRV())
+ return llvm::CallingConv::SPIR_KERNEL;
+ if (CGM.getTriple().isAMDGPU())
+ return llvm::CallingConv::AMDGPU_KERNEL;
+ if (CGM.getTriple().isNVPTX())
+ return llvm::CallingConv::PTX_Kernel;
+ llvm_unreachable("Unknown kernel calling convention");
+ }
case CC_PreserveMost:
return llvm::CallingConv::PreserveMost;
case CC_PreserveAll:
@@ -284,8 +291,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
if (D->hasAttr<AArch64SVEPcsAttr>())
return CC_AArch64SVEPCS;
- if (D->hasAttr<AMDGPUKernelCallAttr>())
- return CC_AMDGPUKernelCall;
+ if (D->hasAttr<DeviceKernelAttr>())
+ return CC_DeviceKernel;
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
@@ -533,7 +540,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) {
assert(isa<FunctionType>(FTy));
setCUDAKernelCallingConvention(FTy, CGM, FD);
- if (FD->hasAttr<OpenCLKernelAttr>() &&
+ if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
const FunctionType *FT = FTy->getAs<FunctionType>();
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
@@ -761,7 +768,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
argTypes,
- FunctionType::ExtInfo(CC_OpenCLKernel),
+ FunctionType::ExtInfo(CC_DeviceKernel),
/*paramInfos=*/{}, RequiredArgs::All);
}
@@ -2536,7 +2543,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
NumElemsParam);
}
- if (TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+ if (DeviceKernelAttr::isOpenCLSpelling(
+ TargetDecl->getAttr<DeviceKernelAttr>()) &&
CallingConv != CallingConv::CC_C &&
CallingConv != CallingConv::CC_SpirFunction) {
// Check CallingConv to avoid adding uniform-work-group-size attribute to
@@ -2919,7 +2927,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// > For arguments to a __kernel function declared to be a pointer to a
// > data type, the OpenCL compiler can assume that the pointee is always
// > appropriately aligned as required by the data type.
- if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+ if (TargetDecl &&
+ DeviceKernelAttr::isOpenCLSpelling(
+ TargetDecl->getAttr<DeviceKernelAttr>()) &&
ParamType->isPointerType()) {
QualType PTy = ParamType->getPointeeType();
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 7cb5259..fbcc330a 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1692,9 +1692,8 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
case CC_SpirFunction:
return llvm::dwarf::DW_CC_LLVM_SpirFunction;
- case CC_OpenCLKernel:
- case CC_AMDGPUKernelCall:
- return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
+ case CC_DeviceKernel:
+ return llvm::dwarf::DW_CC_LLVM_DeviceKernel;
case CC_Swift:
return llvm::dwarf::DW_CC_LLVM_Swift;
case CC_SwiftAsync:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5fc98b6..1099a54 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5944,7 +5944,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
}
static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) {
- if (FD->hasAttr<OpenCLKernelAttr>())
+ if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()))
return GlobalDecl(FD, KernelReferenceKind::Stub);
return GlobalDecl(FD);
}
@@ -6375,7 +6375,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
const auto *FnType = cast<FunctionType>(PointeeType);
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
- FD && FD->hasAttr<OpenCLKernelAttr>())
+ FD && DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()))
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
bool CFIUnchecked =
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 2ac7e9d..3302abad 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -626,7 +626,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const {
void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn) {
- if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
+ if (!FD->hasAttr<DeviceKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
return;
llvm::LLVMContext &Context = getLLVMContext();
@@ -1598,7 +1598,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// Implicit copy-assignment gets the same special treatment as implicit
// copy-constructors.
emitImplicitAssignmentOperatorBody(Args);
- } else if (FD->hasAttr<OpenCLKernelAttr>() &&
+ } else if (DeviceKernelAttr::isOpenCLSpelling(
+ FD->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) {
CallArgList CallArgs;
for (unsigned i = 0; i < Args.size(); ++i) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 468fc6e..84166dd 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1913,7 +1913,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
- } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+ } else if (FD &&
+ DeviceKernelAttr::isOpenCLSpelling(
+ FD->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__clang_ocl_kern_imp_" << II->getName();
} else {
@@ -3930,7 +3932,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Ignore declarations, they will be emitted on their first use.
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
- if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
+ if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
+ FD->doesThisDeclarationHaveABody())
addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
// Update deferred annotations with the latest declaration if the function
@@ -4895,7 +4898,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
if (!Ty) {
const auto *FD = cast<FunctionDecl>(GD.getDecl());
Ty = getTypes().ConvertType(FD->getType());
- if (FD->hasAttr<OpenCLKernelAttr>() &&
+ if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
Ty = getTypes().GetFunctionType(FI);
@@ -6195,7 +6198,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
(CodeGenOpts.OptimizationLevel == 0) &&
!D->hasAttr<MinSizeAttr>();
- if (D->hasAttr<OpenCLKernelAttr>()) {
+ if (DeviceKernelAttr::isOpenCLSpelling(D->getAttr<DeviceKernelAttr>())) {
if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
!D->hasAttr<NoInlineAttr>() &&
!Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 7d176e4..f3df92c 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
&CGF.CGM.getModule());
llvm::CallingConv::ID KernelCC =
- CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel);
+ CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel);
F->setCallingConv(KernelCC);
llvm::AttrBuilder KernelAttrs(C);
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 452b2e6..8660373 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -337,7 +337,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
return false;
return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
- (D->hasAttr<OpenCLKernelAttr>() ||
+ (D->hasAttr<DeviceKernelAttr>() ||
(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
(isa<VarDecl>(D) &&
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
@@ -350,7 +350,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
const auto *ReqdWGS =
M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
const bool IsOpenCLKernel =
- M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
+ M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>();
const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
@@ -572,7 +572,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
const FunctionType *&FT) const {
FT = getABIInfo().getContext().adjustFunctionType(
- FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+ FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
}
/// Return IR struct type for rtinfo struct in rocm-device-libs used for device
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 0ceca61..ad802c9 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -260,40 +260,31 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = cast<llvm::Function>(GV);
- // Perform special handling in OpenCL mode
- if (M.getLangOpts().OpenCL) {
- // Use OpenCL function attributes to check for kernel functions
+ // Perform special handling in OpenCL/CUDA mode
+ if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
+ // Use function attributes to check for kernel functions
// By default, all functions are device functions
- if (FD->hasAttr<OpenCLKernelAttr>()) {
- // OpenCL __kernel functions get kernel metadata
+ if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
+ // OpenCL/CUDA kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ SmallVector<int, 10> GCI;
+ for (auto IV : llvm::enumerate(FD->parameters()))
+ if (IV.value()->hasAttr<CUDAGridConstantAttr>())
+ // For some reason arg indices are 1-based in NVVM
+ GCI.push_back(IV.index() + 1);
+ // Create !{<func-ref>, metadata !"kernel", i32 1} node
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+ addGridConstantNVVMMetadata(F, GCI);
+ }
+ if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+ M.handleCUDALaunchBoundsAttr(F, Attr);
}
}
-
- // Perform special handling in CUDA mode.
- if (M.getLangOpts().CUDA) {
- // CUDA __global__ functions get a kernel metadata entry. Since
- // __global__ functions cannot be called from the device, we do not
- // need to set the noinline attribute.
- if (FD->hasAttr<CUDAGlobalAttr>()) {
- SmallVector<int, 10> GCI;
- for (auto IV : llvm::enumerate(FD->parameters()))
- if (IV.value()->hasAttr<CUDAGridConstantAttr>())
- // For some reason arg indices are 1-based in NVVM
- GCI.push_back(IV.index() + 1);
- // Create !{<func-ref>, metadata !"kernel", i32 1} node
- F->setCallingConv(llvm::CallingConv::PTX_Kernel);
- addGridConstantNVVMMetadata(F, GCI);
- }
- if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
- M.handleCUDALaunchBoundsAttr(F, Attr);
- }
-
// Attach kernel metadata directly if compiling for NVPTX.
- if (FD->hasAttr<NVPTXKernelAttr>()) {
+ if (FD->hasAttr<DeviceKernelAttr>()) {
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
}
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 92ae462..2f1e43c 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -228,7 +228,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
// Convert HIP kernels to SPIR-V kernels.
if (getABIInfo().getContext().getLangOpts().HIP) {
FT = getABIInfo().getContext().adjustFunctionType(
- FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+ FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
return;
}
}
diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp
index f3685cc..df49aea 100644
--- a/clang/lib/CodeGen/Targets/TCE.cpp
+++ b/clang/lib/CodeGen/Targets/TCE.cpp
@@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = cast<llvm::Function>(GV);
if (M.getLangOpts().OpenCL) {
- if (FD->hasAttr<OpenCLKernelAttr>()) {
+ if (FD->hasAttr<DeviceKernelAttr>()) {
// OpenCL C Kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index c662b0e..60e911b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8789,7 +8789,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
FunctionDecl *FD = getCurFunctionDecl();
// OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables
// in functions.
- if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
+ if (FD && !FD->hasAttr<DeviceKernelAttr>()) {
if (T.getAddressSpace() == LangAS::opencl_constant)
Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
<< 0 /*non-kernel only*/ << "constant";
@@ -8801,7 +8801,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
}
// OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be
// in the outermost scope of a kernel function.
- if (FD && FD->hasAttr<OpenCLKernelAttr>()) {
+ if (FD && FD->hasAttr<DeviceKernelAttr>()) {
if (!getCurScope()->isFunctionScope()) {
if (T.getAddressSpace() == LangAS::opencl_constant)
Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
@@ -10930,9 +10930,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
MarkUnusedFileScopedDecl(NewFD);
-
-
- if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) {
+ if (getLangOpts().OpenCL && NewFD->hasAttr<DeviceKernelAttr>()) {
// OpenCL v1.2 s6.8 static is invalid for kernel functions.
if (SC == SC_Static) {
Diag(D.getIdentifierLoc(), diag::err_static_kernel);
@@ -12437,7 +12435,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) {
if (getLangOpts().OpenCL) {
Diag(FD->getLocation(), diag::err_opencl_no_main)
- << FD->hasAttr<OpenCLKernelAttr>();
+ << FD->hasAttr<DeviceKernelAttr>();
FD->setInvalidDecl();
return;
}
@@ -15713,7 +15711,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
return false;
// Don't warn for OpenCL kernels.
- if (FD->hasAttr<OpenCLKernelAttr>())
+ if (FD->hasAttr<DeviceKernelAttr>())
return false;
// Don't warn on explicitly deleted functions.
@@ -20607,7 +20605,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
// SYCL functions can be template, so we check if they have appropriate
// attribute prior to checking if it is a template.
- if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>())
+ if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>())
return FunctionEmissionStatus::Emitted;
// Templates are emitted when they're instantiated.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 334e112..da0e32657 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5108,8 +5108,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
- if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
- D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+ if (AL.getKind() == ParsedAttr::AT_DeviceKernel)
+ D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL));
else
D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
// In host compilation the kernel is emitted as a stub function, which is
@@ -5244,9 +5244,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
case ParsedAttr::AT_AArch64SVEPcs:
D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL));
return;
- case ParsedAttr::AT_AMDGPUKernelCall:
- D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL));
+ case ParsedAttr::AT_DeviceKernel: {
+ // The attribute should already be applied.
+ assert(D->hasAttr<DeviceKernelAttr>() && "Expected attribute");
return;
+ }
case ParsedAttr::AT_IntelOclBicc:
D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL));
return;
@@ -5289,6 +5291,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
+static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
+ if (S.getLangOpts().SYCLIsDevice) {
+ if (!IsFunctionTemplate) {
+ S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
+ << AL << AL.isRegularKeywordAttribute() << "function templates";
+ } else {
+ S.SYCL().handleKernelAttr(D, AL);
+ }
+ } else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
+ S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
+ } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
+ handleGlobalAttr(S, D, AL);
+ } else {
+ // OpenCL C++ will throw a more specific error.
+ if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
+ S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
+ << AL << AL.isRegularKeywordAttribute() << "functions";
+ }
+ handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
+ }
+ // Make sure we validate the CC with the target
+ // and warn/error if necessary.
+ handleCallConvAttr(S, D, AL);
+}
+
static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) {
// Suppression attribute with GSL spelling requires at least 1 argument.
@@ -5453,9 +5482,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
case ParsedAttr::AT_AArch64SVEPcs:
CC = CC_AArch64SVEPCS;
break;
- case ParsedAttr::AT_AMDGPUKernelCall:
- CC = CC_AMDGPUKernelCall;
- break;
case ParsedAttr::AT_RegCall:
CC = CC_X86RegCall;
break;
@@ -5525,6 +5551,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
llvm::Log2_64(ABIVLen) - 5);
break;
}
+ case ParsedAttr::AT_DeviceKernel: {
+ // Validation was handled in handleDeviceKernelAttr.
+ CC = CC_DeviceKernel;
+ break;
+ }
default: llvm_unreachable("unexpected attribute kind");
}
@@ -7148,9 +7179,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_EnumExtensibility:
handleEnumExtensibilityAttr(S, D, AL);
break;
- case ParsedAttr::AT_SYCLKernel:
- S.SYCL().handleKernelAttr(D, AL);
- break;
case ParsedAttr::AT_SYCLKernelEntryPoint:
S.SYCL().handleKernelEntryPointAttr(D, AL);
break;
@@ -7175,7 +7203,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CalledOnce:
handleCalledOnceAttr(S, D, AL);
break;
- case ParsedAttr::AT_NVPTXKernel:
case ParsedAttr::AT_CUDAGlobal:
handleGlobalAttr(S, D, AL);
break;
@@ -7439,13 +7466,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_PreserveAll:
case ParsedAttr::AT_AArch64VectorPcs:
case ParsedAttr::AT_AArch64SVEPcs:
- case ParsedAttr::AT_AMDGPUKernelCall:
case ParsedAttr::AT_M68kRTD:
case ParsedAttr::AT_PreserveNone:
case ParsedAttr::AT_RISCVVectorCC:
case ParsedAttr::AT_RISCVVLSCC:
handleCallConvAttr(S, D, AL);
break;
+ case ParsedAttr::AT_DeviceKernel:
+ handleDeviceKernelAttr(S, D, AL);
+ break;
case ParsedAttr::AT_Suppress:
handleSuppressAttr(S, D, AL);
break;
@@ -7764,9 +7793,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
static bool isKernelDecl(Decl *D) {
const FunctionType *FnTy = D->getFunctionType();
- return D->hasAttr<OpenCLKernelAttr>() ||
- (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) ||
- D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>();
+ return D->hasAttr<DeviceKernelAttr>() ||
+ (FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) ||
+ D->hasAttr<CUDAGlobalAttr>();
}
void Sema::ProcessDeclAttributeList(
@@ -7793,7 +7822,7 @@ void Sema::ProcessDeclAttributeList(
// good to have a way to specify "these attributes must appear as a group",
// for these. Additionally, it would be good to have a way to specify "these
// attribute must never appear as a group" for attributes like cold and hot.
- if (!(D->hasAttr<OpenCLKernelAttr>() ||
+ if (!(D->hasAttr<DeviceKernelAttr>() ||
(D->hasAttr<CUDAGlobalAttr>() &&
Context.getTargetInfo().getTriple().isSPIRV()))) {
// These attributes cannot be applied to a non-kernel function.
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 1969d7b..3e03cb4 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) {
return;
}
- handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL);
+ handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL);
}
void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index bcad815..b8e830c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -676,9 +676,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
// This doesn't take any template parameters, but we have a custom action that
// needs to happen when the kernel itself is instantiated. We need to run the
// ItaniumMangler to mark the names required to name this kernel.
-static void instantiateDependentSYCLKernelAttr(
+static void instantiateDependentDeviceKernelAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
- const SYCLKernelAttr &Attr, Decl *New) {
+ const DeviceKernelAttr &Attr, Decl *New) {
New->addAttr(Attr.clone(S.getASTContext()));
}
@@ -920,8 +920,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}
- if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) {
- instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New);
+ if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) {
+ instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New);
continue;
}
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index f863531..a0cd2d16 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_VectorCall: \
case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
- case ParsedAttr::AT_AMDGPUKernelCall: \
+ case ParsedAttr::AT_DeviceKernel: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
@@ -3755,18 +3755,7 @@ static CallingConv getCCForDeclaratorChunk(
CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
IsCXXInstanceMethod);
- // Attribute AT_OpenCLKernel affects the calling convention for SPIR
- // and AMDGPU targets, hence it cannot be treated as a calling
- // convention attribute. This is the simplest place to infer
- // calling convention for OpenCL kernels.
- if (S.getLangOpts().OpenCL) {
- for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
- if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) {
- CC = CC_OpenCLKernel;
- break;
- }
- }
- } else if (S.getLangOpts().CUDA) {
+ if (S.getLangOpts().CUDA) {
// If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
// sure the kernels will be marked with the right calling convention so that
// they will be visible by the APIs that ingest SPIR-V. We do not do this
@@ -3775,13 +3764,20 @@ static CallingConv getCCForDeclaratorChunk(
if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
- CC = CC_OpenCLKernel;
+ CC = CC_DeviceKernel;
break;
}
}
}
}
-
+ if (!S.getLangOpts().isSYCL()) {
+ for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
+ if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
+ CC = CC_DeviceKernel;
+ break;
+ }
+ }
+ }
return CC;
}
@@ -7532,8 +7528,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_ArmStreaming:
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
- case ParsedAttr::AT_AMDGPUKernelCall:
- return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
+ case ParsedAttr::AT_DeviceKernel:
+ return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
@@ -8742,6 +8738,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
}
}
+static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
+ // The DeviceKernel attribute is shared for many targets, and
+ // it is only allowed to be a type attribute with the AMDGPU
+ // spelling, so skip processing the attr as a type attr
+ // unless it has that spelling.
+ if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
+ return true;
+ return DeviceKernelAttr::isAMDGPUSpelling(Attr);
+}
+
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs,
@@ -8995,6 +9001,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
[[fallthrough]];
FUNCTION_TYPE_ATTRS_CASELIST:
+ if (!isMultiSubjectAttrAllowedOnType(attr))
+ break;
+
attr.setUsedAsTypeAttr();
// Attributes with standard syntax have strict rules for what they