aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/BackendConsumer.h34
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp6
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp80
-rw-r--r--clang/lib/CodeGen/CGCall.cpp37
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp8
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h2
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp5
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp2
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.h3
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp52
-rw-r--r--clang/lib/CodeGen/CodeGenAction.cpp67
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h34
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp18
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp26
-rw-r--r--clang/lib/CodeGen/SanitizerMetadata.cpp4
-rw-r--r--clang/lib/CodeGen/Targets/AArch64.cpp17
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/CodeGen/Targets/NVPTX.cpp39
-rw-r--r--clang/lib/CodeGen/Targets/RISCV.cpp8
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp37
20 files changed, 292 insertions, 193 deletions
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
index a023d29..d932a78 100644
--- a/clang/lib/CodeGen/BackendConsumer.h
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -29,17 +29,16 @@ class BackendConsumer : public ASTConsumer {
virtual void anchor();
DiagnosticsEngine &Diags;
- BackendAction Action;
const HeaderSearchOptions &HeaderSearchOpts;
const CodeGenOptions &CodeGenOpts;
const TargetOptions &TargetOpts;
const LangOptions &LangOpts;
std::unique_ptr<raw_pwrite_stream> AsmOutStream;
- ASTContext *Context;
+ ASTContext *Context = nullptr;
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
llvm::Timer LLVMIRGeneration;
- unsigned LLVMIRGenerationRefCount;
+ unsigned LLVMIRGenerationRefCount = 0;
/// True if we've finished generating IR. This prevents us from generating
/// additional LLVM IR after emitting output in HandleTranslationUnit. This
@@ -48,6 +47,8 @@ class BackendConsumer : public ASTConsumer {
bool TimerIsEnabled = false;
+ BackendAction Action;
+
std::unique_ptr<CodeGenerator> Gen;
SmallVector<LinkModule, 4> LinkModules;
@@ -69,29 +70,12 @@ class BackendConsumer : public ASTConsumer {
llvm::Module *CurLinkModule = nullptr;
public:
- BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts,
- const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- const std::string &InFile,
- SmallVector<LinkModule, 4> LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr);
-
- // This constructor is used in installing an empty BackendConsumer
- // to use the clang diagnostic handler for IR input files. It avoids
- // initializing the OS field.
- BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ BackendConsumer(const CompilerInstance &CI, BackendAction Action,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts,
- const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- llvm::LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr);
+ llvm::LLVMContext &C, SmallVector<LinkModule, 4> LinkModules,
+ StringRef InFile, std::unique_ptr<raw_pwrite_stream> OS,
+ CoverageSourceInfo *CoverageInfo,
+ llvm::Module *CurLinkModule = nullptr);
llvm::Module *getModule() const;
std::unique_ptr<llvm::Module> takeModule();
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 04358cd..2dbab78 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -736,10 +736,8 @@ static void addSanitizers(const Triple &TargetTriple,
MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
}
- if (LangOpts.Sanitize.has(SanitizerKind::Type)) {
- MPM.addPass(ModuleTypeSanitizerPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(TypeSanitizerPass()));
- }
+ if (LangOpts.Sanitize.has(SanitizerKind::Type))
+ MPM.addPass(TypeSanitizerPass());
if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability))
MPM.addPass(NumericalStabilitySanitizerPass());
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d4b742..573be93 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -835,6 +835,38 @@ static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateExtractValue(Call, 0);
}
+static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+ llvm::Intrinsic::ID IntrinsicID) {
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
+
+ llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
+ llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
+
+ llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
+ llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
+
+ QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
+ LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
+ LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
+
+ llvm::StoreInst *StoreSin =
+ CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
+ llvm::StoreInst *StoreCos =
+ CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
+
+ // Mark the two stores as non-aliasing with each other. The order of stores
+ // emitted by this builtin is arbitrary, enforcing a particular order will
+ // prevent optimizations later on.
+ llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+ MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
+ MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
+ MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
+ StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
+ StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
+}
+
/// EmitFAbs - Emit a call to @llvm.fabs().
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -3232,6 +3264,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
+ case Builtin::BI__builtin_sincos:
+ case Builtin::BI__builtin_sincosf:
+ case Builtin::BI__builtin_sincosf16:
+ case Builtin::BI__builtin_sincosl:
+ case Builtin::BI__builtin_sincosf128:
+ emitSincosBuiltin(*this, E, Intrinsic::sincos);
+ return RValue::get(nullptr);
+
case Builtin::BIsqrt:
case Builtin::BIsqrtf:
case Builtin::BIsqrtl:
@@ -6757,6 +6797,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
+ case llvm::Triple::spirv:
+ return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
case llvm::Triple::spirv64:
if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
return nullptr;
@@ -11285,6 +11327,19 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
if (Builtin->LLVMIntrinsic == 0)
return nullptr;
+ if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
+ // If we already know the streaming mode, don't bother with the intrinsic
+ // and emit a constant instead
+ const auto *FD = cast<FunctionDecl>(CurFuncDecl);
+ if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
+ unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
+ if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
+ bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
+ return ConstantInt::getBool(Builder.getContext(), IsStreaming);
+ }
+ }
+ }
+
// Predicates must match the main datatype.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
@@ -19157,8 +19212,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
// TODO: Map to an hlsl_device address space.
llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
- return Builder.CreateIntrinsic(RetTy, Intrinsic::dx_resource_getpointer,
- ArrayRef<Value *>{HandleOp, IndexOp});
+ return Builder.CreateIntrinsic(
+ RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
+ ArrayRef<Value *>{HandleOp, IndexOp});
}
case Builtin::BI__builtin_hlsl_all: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
@@ -20440,6 +20496,26 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
}
+Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ switch (BuiltinID) {
+ case SPIRV::BI__builtin_spirv_distance: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+ E->getArg(1)->getType()->hasFloatingRepresentation() &&
+ "Distance operands must have a float representation");
+ assert(E->getArg(0)->getType()->isVectorType() &&
+ E->getArg(1)->getType()->isVectorType() &&
+ "Distance operands must be a vector");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
+ ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
+ }
+ }
+ return nullptr;
+}
+
/// Handle a SystemZ function in which the final argument is a pointer
/// to an int that receives the post-instruction CC value. At the LLVM level
/// this is represented as a function that returns a {result, cc} pair.
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 50b9dfb..7b0ef4b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3235,22 +3235,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
- if (ArgI.isDirect() && !ArgI.getCanBeFlattened() && STy &&
- STy->getNumElements() > 1) {
- [[maybe_unused]] llvm::TypeSize StructSize =
- CGM.getDataLayout().getTypeAllocSize(STy);
- [[maybe_unused]] llvm::TypeSize PtrElementSize =
- CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
- if (STy->containsHomogeneousScalableVectorTypes()) {
- assert(StructSize == PtrElementSize &&
- "Only allow non-fractional movement of structure with"
- "homogeneous scalable vector type");
-
- ArgVals.push_back(ParamValue::forDirect(AI));
- break;
- }
- }
-
Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
Arg->getName());
@@ -4887,7 +4871,7 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
call->setCallingConv(getRuntimeCC());
if (CGM.shouldEmitConvergenceTokens() && call->isConvergent())
- return addControlledConvergenceToken(call);
+ return cast<llvm::CallInst>(addConvergenceControlToken(call));
return call;
}
@@ -5414,21 +5398,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
- if (STy && ArgInfo.isDirect() && !ArgInfo.getCanBeFlattened()) {
- llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
- [[maybe_unused]] llvm::TypeSize SrcTypeSize =
- CGM.getDataLayout().getTypeAllocSize(SrcTy);
- [[maybe_unused]] llvm::TypeSize DstTypeSize =
- CGM.getDataLayout().getTypeAllocSize(STy);
- if (STy->containsHomogeneousScalableVectorTypes()) {
- assert(SrcTypeSize == DstTypeSize &&
- "Only allow non-fractional movement of structure with "
- "homogeneous scalable vector type");
-
- IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal();
- break;
- }
- }
// FIXME: Avoid the conversion through memory if possible.
Address Src = Address::invalid();
@@ -5818,7 +5787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CI->setName("call");
if (CGM.shouldEmitConvergenceTokens() && CI->isConvergent())
- CI = addControlledConvergenceToken(CI);
+ CI = addConvergenceControlToken(CI);
// Update largest vector width from the return type.
LargestVectorWidth =
@@ -6121,6 +6090,8 @@ RValue CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr,
VAListAddr = VE->isMicrosoftABI() ? EmitMSVAListRef(VE->getSubExpr())
: EmitVAListRef(VE->getSubExpr());
QualType Ty = VE->getType();
+ if (Ty->isVariablyModifiedType())
+ EmitVariablyModifiedType(Ty);
if (VE->isMicrosoftABI())
return CGM.getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty, Slot);
return CGM.getABIInfo().EmitVAArg(*this, VAListAddr, Ty, Slot);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index f29ddec..560d4ce 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -3492,6 +3492,11 @@ llvm::DIType *CGDebugInfo::CreateType(const PipeType *Ty, llvm::DIFile *U) {
return getOrCreateType(Ty->getElementType(), U);
}
+llvm::DIType *CGDebugInfo::CreateType(const HLSLAttributedResourceType *Ty,
+ llvm::DIFile *U) {
+ return getOrCreateType(Ty->getWrappedType(), U);
+}
+
llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
const EnumDecl *ED = Ty->getDecl();
@@ -3834,12 +3839,13 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::TemplateSpecialization:
return CreateType(cast<TemplateSpecializationType>(Ty), Unit);
+ case Type::HLSLAttributedResource:
+ return CreateType(cast<HLSLAttributedResourceType>(Ty), Unit);
case Type::CountAttributed:
case Type::Auto:
case Type::Attributed:
case Type::BTFTagAttributed:
- case Type::HLSLAttributedResource:
case Type::Adjusted:
case Type::Decayed:
case Type::DeducedTemplateSpecialization:
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 3fd0237..38f73ec 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -196,6 +196,8 @@ class CGDebugInfo {
llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F);
llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F);
llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F);
+ llvm::DIType *CreateType(const HLSLAttributedResourceType *Ty,
+ llvm::DIFile *F);
/// Get structure or union type.
llvm::DIType *CreateType(const RecordType *Tyg);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 4159cee..0f27bd0 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -5455,11 +5455,6 @@ Value *ScalarExprEmitter::VisitChooseExpr(ChooseExpr *E) {
}
Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
- QualType Ty = VE->getType();
-
- if (Ty->isVariablyModifiedType())
- CGF.EmitVariablyModifiedType(Ty);
-
Address ArgValue = Address::invalid();
RValue ArgPtr = CGF.EmitVAArg(VE, ArgValue);
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index c354e58..5679bd7 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -395,7 +395,7 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B,
return buildVectorInput(B, GroupThreadIDIntrinsic, Ty);
}
if (D.hasAttr<HLSLSV_GroupIDAttr>()) {
- llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(Intrinsic::dx_group_id);
+ llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(getGroupIdIntrinsic());
return buildVectorInput(B, GroupIDIntrinsic, Ty);
}
assert(false && "Unhandled parameter attribute");
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index edb87f9..46e472f 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -87,6 +87,7 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupThreadId, thread_id_in_group)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GroupId, group_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
@@ -103,6 +104,8 @@ public:
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceGetPointer,
+ resource_getpointer)
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding,
resource_handlefrombinding)
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index be9605f..e9a8500 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -480,6 +480,12 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OpenACCShutdownConstructClass:
EmitOpenACCShutdownConstruct(cast<OpenACCShutdownConstruct>(*S));
break;
+ case Stmt::OpenACCSetConstructClass:
+ EmitOpenACCSetConstruct(cast<OpenACCSetConstruct>(*S));
+ break;
+ case Stmt::OpenACCUpdateConstructClass:
+ EmitOpenACCUpdateConstruct(cast<OpenACCUpdateConstruct>(*S));
+ break;
}
}
@@ -1026,8 +1032,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
EmitBlock(LoopHeader.getBlock());
if (CGM.shouldEmitConvergenceTokens())
- ConvergenceTokenStack.push_back(emitConvergenceLoopToken(
- LoopHeader.getBlock(), ConvergenceTokenStack.back()));
+ ConvergenceTokenStack.push_back(
+ emitConvergenceLoopToken(LoopHeader.getBlock()));
// Create an exit block for when the condition fails, which will
// also become the break target.
@@ -1154,8 +1160,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
EmitBlockWithFallThrough(LoopBody, &S);
if (CGM.shouldEmitConvergenceTokens())
- ConvergenceTokenStack.push_back(
- emitConvergenceLoopToken(LoopBody, ConvergenceTokenStack.back()));
+ ConvergenceTokenStack.push_back(emitConvergenceLoopToken(LoopBody));
{
RunCleanupsScope BodyScope(*this);
@@ -1233,8 +1238,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
EmitBlock(CondBlock);
if (CGM.shouldEmitConvergenceTokens())
- ConvergenceTokenStack.push_back(
- emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));
+ ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock));
const SourceRange &R = S.getSourceRange();
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
@@ -1371,8 +1375,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
EmitBlock(CondBlock);
if (CGM.shouldEmitConvergenceTokens())
- ConvergenceTokenStack.push_back(
- emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));
+ ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock));
const SourceRange &R = S.getSourceRange();
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
@@ -3248,35 +3251,32 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
return F;
}
-namespace {
// Returns the first convergence entry/loop/anchor instruction found in |BB|.
// std::nullptr otherwise.
-llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
+static llvm::ConvergenceControlInst *getConvergenceToken(llvm::BasicBlock *BB) {
for (auto &I : *BB) {
- auto *II = dyn_cast<llvm::IntrinsicInst>(&I);
- if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID()))
- return II;
+ if (auto *CI = dyn_cast<llvm::ConvergenceControlInst>(&I))
+ return CI;
}
return nullptr;
}
-} // namespace
-
llvm::CallBase *
-CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
- llvm::Value *ParentToken) {
+CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
+ llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+ assert(ParentToken);
+
llvm::Value *bundleArgs[] = {ParentToken};
llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
- auto Output = llvm::CallBase::addOperandBundle(
+ auto *Output = llvm::CallBase::addOperandBundle(
Input, llvm::LLVMContext::OB_convergencectrl, OB, Input->getIterator());
Input->replaceAllUsesWith(Output);
Input->eraseFromParent();
return Output;
}
-llvm::IntrinsicInst *
-CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
- llvm::Value *ParentToken) {
+llvm::ConvergenceControlInst *
+CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) {
CGBuilderTy::InsertPoint IP = Builder.saveIP();
if (BB->empty())
Builder.SetInsertPoint(BB);
@@ -3287,14 +3287,14 @@ CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
llvm::Intrinsic::experimental_convergence_loop, {}, {});
Builder.restoreIP(IP);
- llvm::CallBase *I = addConvergenceControlToken(CB, ParentToken);
- return cast<llvm::IntrinsicInst>(I);
+ CB = addConvergenceControlToken(CB);
+ return cast<llvm::ConvergenceControlInst>(CB);
}
-llvm::IntrinsicInst *
+llvm::ConvergenceControlInst *
CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
llvm::BasicBlock *BB = &F->getEntryBlock();
- llvm::IntrinsicInst *Token = getConvergenceToken(BB);
+ llvm::ConvergenceControlInst *Token = getConvergenceToken(BB);
if (Token)
return Token;
@@ -3309,5 +3309,5 @@ CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
assert(isa<llvm::IntrinsicInst>(I));
Builder.restoreIP(IP);
- return cast<llvm::IntrinsicInst>(I);
+ return cast<llvm::ConvergenceControlInst>(I);
}
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index cc927f4..f63cb9b 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -106,46 +106,19 @@ static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
}
BackendConsumer::BackendConsumer(
- BackendAction Action, DiagnosticsEngine &Diags,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- const std::string &InFile, SmallVector<LinkModule, 4> LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo)
- : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
- CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
- AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
- LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
- LLVMIRGenerationRefCount(0),
- Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
- PPOpts, CodeGenOpts, C, CoverageInfo)),
- LinkModules(std::move(LinkModules)) {
- TimerIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
-}
-
-// This constructor is used in installing an empty BackendConsumer
-// to use the clang diagnostic handler for IR input files. It avoids
-// initializing the OS field.
-BackendConsumer::BackendConsumer(
- BackendAction Action, DiagnosticsEngine &Diags,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts, const LangOptions &LangOpts,
- llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules,
- LLVMContext &C, CoverageSourceInfo *CoverageInfo)
- : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
- CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
- Context(nullptr), FS(VFS),
- LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
- LLVMIRGenerationRefCount(0),
- Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts,
- CodeGenOpts, C, CoverageInfo)),
- LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
+ const CompilerInstance &CI, BackendAction Action,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, LLVMContext &C,
+ SmallVector<LinkModule, 4> LinkModules, StringRef InFile,
+ std::unique_ptr<raw_pwrite_stream> OS, CoverageSourceInfo *CoverageInfo,
+ llvm::Module *CurLinkModule)
+ : Diags(CI.getDiagnostics()), HeaderSearchOpts(CI.getHeaderSearchOpts()),
+ CodeGenOpts(CI.getCodeGenOpts()), TargetOpts(CI.getTargetOpts()),
+ LangOpts(CI.getLangOpts()), AsmOutStream(std::move(OS)), FS(VFS),
+ LLVMIRGeneration("irgen", "LLVM IR Generation Time"), Action(Action),
+ Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS),
+ CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(),
+ CI.getCodeGenOpts(), C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)), CurLinkModule(CurLinkModule) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
@@ -1011,10 +984,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
CI.getPreprocessor());
std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
- BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
- CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
- CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile),
- std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
+ CI, BA, &CI.getVirtualFileSystem(), *VMContext, std::move(LinkModules),
+ InFile, std::move(OS), CoverageInfo));
BEConsumer = Result.get();
// Enable generating macro debug info only when debug info is not disabled and
@@ -1182,11 +1153,9 @@ void CodeGenAction::ExecuteAction() {
// Set clang diagnostic handler. To do this we need to create a fake
// BackendConsumer.
- BackendConsumer Result(BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
- CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(),
- CI.getCodeGenOpts(), CI.getTargetOpts(),
- CI.getLangOpts(), TheModule.get(),
- std::move(LinkModules), *VMContext, nullptr);
+ BackendConsumer Result(CI, BA, &CI.getVirtualFileSystem(), *VMContext,
+ std::move(LinkModules), "", nullptr, nullptr,
+ TheModule.get());
// Link in each pending link module.
if (!CodeGenOpts.LinkBitcodePostopt && Result.LinkInModules(&*TheModule))
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 87d016a..e2dc0b1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -315,7 +315,7 @@ public:
SmallVector<const BinaryOperator *, 16> MCDCLogOpStack;
/// Stack to track the controlled convergence tokens.
- SmallVector<llvm::IntrinsicInst *, 4> ConvergenceTokenStack;
+ SmallVector<llvm::ConvergenceControlInst *, 4> ConvergenceTokenStack;
/// Number of nested loop to be consumed by the last surrounding
/// loop-associated directive.
@@ -4140,6 +4140,16 @@ public:
// but in the future we will implement some sort of IR.
}
+ void EmitOpenACCSetConstruct(const OpenACCSetConstruct &S) {
+ // TODO OpenACC: Implement this. It is currently implemented as a 'no-op',
+ // but in the future we will implement some sort of IR.
+ }
+
+ void EmitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &S) {
+ // TODO OpenACC: Implement this. It is currently implemented as a 'no-op',
+ // but in the future we will implement some sort of IR.
+ }
+
//===--------------------------------------------------------------------===//
// LValue Expression Emission
//===--------------------------------------------------------------------===//
@@ -4763,6 +4773,7 @@ public:
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
+ llvm::Value *EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
const CallExpr *E);
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -5241,29 +5252,20 @@ public:
llvm::Value *emitBoolVecConversion(llvm::Value *SrcVec,
unsigned NumElementsDst,
const llvm::Twine &Name = "");
- // Adds a convergence_ctrl token to |Input| and emits the required parent
- // convergence instructions.
- template <typename CallType>
- CallType *addControlledConvergenceToken(CallType *Input) {
- return cast<CallType>(
- addConvergenceControlToken(Input, ConvergenceTokenStack.back()));
- }
private:
// Emits a convergence_loop instruction for the given |BB|, with |ParentToken|
// as it's parent convergence instr.
- llvm::IntrinsicInst *emitConvergenceLoopToken(llvm::BasicBlock *BB,
- llvm::Value *ParentToken);
+ llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB);
+
// Adds a convergence_ctrl token with |ParentToken| as parent convergence
// instr to the call |Input|.
- llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input,
- llvm::Value *ParentToken);
+ llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input);
+
// Find the convergence_entry instruction |F|, or emits ones if none exists.
// Returns the convergence instruction.
- llvm::IntrinsicInst *getOrEmitConvergenceEntryToken(llvm::Function *F);
- // Find the convergence_loop instruction for the loop defined by |LI|, or
- // emits one if none exists. Returns the convergence instruction.
- llvm::IntrinsicInst *getOrEmitConvergenceLoopToken(const LoopInfo *LI);
+ llvm::ConvergenceControlInst *
+ getOrEmitConvergenceEntryToken(llvm::Function *F);
private:
llvm::MDNode *getRangeForLoadFromType(QualType Ty);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c49f763..7db1ed7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2748,7 +2748,21 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
Attrs.addAttribute("target-features", llvm::join(Features, ","));
AddedAttr = true;
}
-
+ if (getTarget().getTriple().isAArch64()) {
+ llvm::SmallVector<StringRef, 8> Feats;
+ if (TV)
+ TV->getFeatures(Feats);
+ else if (TC)
+ TC->getFeatures(Feats, GD.getMultiVersionIndex());
+ if (!Feats.empty()) {
+ llvm::sort(Feats);
+ std::string FMVFeatures;
+ for (StringRef F : Feats)
+ FMVFeatures.append(",+" + F.str());
+ Attrs.addAttribute("fmv-features", FMVFeatures.substr(1));
+ AddedAttr = true;
+ }
+ }
return AddedAttr;
}
@@ -4227,7 +4241,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
llvm::Function *NewFn);
-static unsigned getFMVPriority(const TargetInfo &TI,
+static uint64_t getFMVPriority(const TargetInfo &TI,
const CodeGenFunction::FMVResolverOption &RO) {
llvm::SmallVector<StringRef, 8> Features{RO.Features};
if (RO.Architecture)
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 438982f..f091577 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -935,18 +935,36 @@ struct CounterCoverageMappingBuilder
}
struct BranchCounterPair {
- Counter Executed;
- Counter Skipped;
+ Counter Executed; ///< The Counter previously assigned.
+ Counter Skipped; ///< An expression (Parent-Executed), or equivalent to it.
};
+ /// Retrieve or assign the pair of Counter(s).
+ ///
+ /// This returns BranchCounterPair {Executed, Skipped}.
+ /// Executed is the Counter associated with S assigned by an earlier
+ /// CounterMapping pass.
+ /// Skipped may be an expression (Executed - ParentCnt) or newly
+ /// assigned Counter in EnableSingleByteCoverage, as subtract
+ /// expressions are not available in this mode.
+ ///
+ /// \param S Key to the CounterMap
+ /// \param ParentCnt The Counter representing how many times S is evaluated.
+ /// \param SkipCntForOld (To be removed later) Optional fake Counter
+ /// to override Skipped for adjustment of
+ /// expressions in the old behavior of
+ /// EnableSingleByteCoverage that is unaware of
+ /// Branch coverage.
BranchCounterPair
getBranchCounterPair(const Stmt *S, Counter ParentCnt,
std::optional<Counter> SkipCntForOld = std::nullopt) {
Counter ExecCnt = getRegionCounter(S);
- // The old behavior of SingleByte shouldn't emit Branches.
+ // The old behavior of SingleByte is unaware of Branches.
+ // Will be pruned after the migration of SingleByte.
if (llvm::EnableSingleByteCoverage) {
- assert(SkipCntForOld);
+ assert(SkipCntForOld &&
+ "SingleByte must provide SkipCntForOld as a fake Skipped count.");
return {ExecCnt, *SkipCntForOld};
}
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 61fdf33..b7b212b 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -145,7 +145,9 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
for (auto *Attr : D.specific_attrs<NoSanitizeAttr>())
NoSanitizeMask |= Attr->getMask();
- if (D.hasExternalStorage())
+ // External definitions and incomplete types get handled at the place they
+ // are defined.
+ if (D.hasExternalStorage() || D.getType()->isIncompleteType())
NoSanitizeMask |= SanitizerKind::Type;
return NoSanitizeMask;
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index ad7f405..7db67ec 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -662,7 +662,7 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
- const auto *BT = Ty->getAs<BuiltinType>();
+ const auto *BT = Ty->castAs<BuiltinType>();
return !BT->isSVECount() &&
getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
}
@@ -1169,8 +1169,9 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
enum class ArmSMEInlinability : uint8_t {
Ok = 0,
ErrorCalleeRequiresNewZA = 1 << 0,
- WarnIncompatibleStreamingModes = 1 << 1,
- ErrorIncompatibleStreamingModes = 1 << 2,
+ ErrorCalleeRequiresNewZT0 = 1 << 1,
+ WarnIncompatibleStreamingModes = 1 << 2,
+ ErrorIncompatibleStreamingModes = 1 << 3,
IncompatibleStreamingModes =
WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
@@ -1198,9 +1199,12 @@ static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
else
Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
}
- if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
+ if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
if (NewAttr->isNewZA())
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
+ if (NewAttr->isNewZT0())
+ Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
+ }
return Inlinability;
}
@@ -1227,6 +1231,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
ArmSMEInlinability::ErrorCalleeRequiresNewZA)
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
<< Callee->getDeclName();
+
+ if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
+ ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
+ CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
+ << Callee->getDeclName();
}
// If the target does not have floating-point registers, but we are using a
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 56ad050..fa07e68 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -537,7 +537,11 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
break;
}
- if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
+ // OpenCL assumes by default that atomic scopes are per-address space for
+ // non-sequentially consistent operations.
+ if (Scope >= SyncScope::OpenCLWorkGroup &&
+ Scope <= SyncScope::OpenCLSubGroup &&
+ Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
if (!Name.empty())
Name = Twine(Twine(Name) + Twine("-")).str();
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 0431d2c..b82e4dd 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -79,13 +80,11 @@ public:
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand,
- const SmallVectorImpl<int> &GridConstantArgs);
+ int Operand);
- static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand) {
- addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
- }
+ static void
+ addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
+ const SmallVectorImpl<int> &GridConstantArgs);
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
@@ -277,7 +276,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1, GCI);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+ addGridConstantNVVMMetadata(F, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -285,13 +285,12 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<NVPTXKernelAttr>()) {
- addNVVMMetadata(F, "kernel", 1);
+ F->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(
- llvm::GlobalValue *GV, StringRef Name, int Operand,
- const SmallVectorImpl<int> &GridConstantArgs) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
+ StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
@@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+
+ // Append metadata to nvvm.annotations
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
+ llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
+
+ llvm::Module *M = GV->getParent();
+ llvm::LLVMContext &Ctx = M->getContext();
+
+ // Get "nvvm.annotations" metadata node
+ llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+ SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
@@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}
+
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index b04e436..873e696 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -495,13 +495,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
- ABIArgInfo Info = ABIArgInfo::getDirect();
-
- // If it is tuple type, it can't be flattened.
- if (llvm::StructType *STy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)))
- Info.setCanBeFlattened(!STy->containsHomogeneousScalableVectorTypes());
-
- return Info;
+ return ABIArgInfo::getDirect();
}
if (const VectorType *VT = Ty->getAs<VectorType>())
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index a48fe9d..5c75e98 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -64,6 +64,8 @@ public:
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
@@ -245,6 +247,41 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
return DefaultGlobalAS;
}
+void SPIRVTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (!M.getLangOpts().HIP ||
+ M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+ return;
+ if (GV->isDeclaration())
+ return;
+
+ auto F = dyn_cast<llvm::Function>(GV);
+ if (!F)
+ return;
+
+ auto FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD)
+ return;
+ if (!FD->hasAttr<CUDAGlobalAttr>())
+ return;
+
+ unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
+ if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
+ N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();
+
+ // We encode the maximum flat WG size in the first component of the 3D
+ // max_work_group_size attribute, which will get reverse translated into the
+ // original AMDGPU attribute when targeting AMDGPU.
+ auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
+ llvm::Metadata *AttrMDArgs[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};
+
+ F->setMetadata("max_work_group_size",
+ llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
+}
+
llvm::SyncScope::ID
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
llvm::AtomicOrdering,