diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/BackendConsumer.h | 34 | ||||
-rw-r--r-- | clang/lib/CodeGen/BackendUtil.cpp | 6 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 80 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGCall.cpp | 6 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGDebugInfo.cpp | 8 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGDebugInfo.h | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGExprScalar.cpp | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGHLSLRuntime.cpp | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGHLSLRuntime.h | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmt.cpp | 52 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenAction.cpp | 67 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 34 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 18 | ||||
-rw-r--r-- | clang/lib/CodeGen/CoverageMappingGen.cpp | 23 | ||||
-rw-r--r-- | clang/lib/CodeGen/SanitizerMetadata.cpp | 4 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/AArch64.cpp | 17 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/NVPTX.cpp | 39 | ||||
-rw-r--r-- | clang/lib/CodeGen/Targets/SPIR.cpp | 37 |
19 files changed, 289 insertions, 154 deletions
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h index a023d29..d932a78 100644 --- a/clang/lib/CodeGen/BackendConsumer.h +++ b/clang/lib/CodeGen/BackendConsumer.h @@ -29,17 +29,16 @@ class BackendConsumer : public ASTConsumer { virtual void anchor(); DiagnosticsEngine &Diags; - BackendAction Action; const HeaderSearchOptions &HeaderSearchOpts; const CodeGenOptions &CodeGenOpts; const TargetOptions &TargetOpts; const LangOptions &LangOpts; std::unique_ptr<raw_pwrite_stream> AsmOutStream; - ASTContext *Context; + ASTContext *Context = nullptr; IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; llvm::Timer LLVMIRGeneration; - unsigned LLVMIRGenerationRefCount; + unsigned LLVMIRGenerationRefCount = 0; /// True if we've finished generating IR. This prevents us from generating /// additional LLVM IR after emitting output in HandleTranslationUnit. This @@ -48,6 +47,8 @@ class BackendConsumer : public ASTConsumer { bool TimerIsEnabled = false; + BackendAction Action; + std::unique_ptr<CodeGenerator> Gen; SmallVector<LinkModule, 4> LinkModules; @@ -69,29 +70,12 @@ class BackendConsumer : public ASTConsumer { llvm::Module *CurLinkModule = nullptr; public: - BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - const std::string &InFile, - SmallVector<LinkModule, 4> LinkModules, - std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr); - - // This constructor is used in installing an empty BackendConsumer - // to use the clang diagnostic handler for IR input files. It avoids - // initializing the OS field. - BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + BackendConsumer(const CompilerInstance &CI, BackendAction Action, IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules, - llvm::LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr); + llvm::LLVMContext &C, SmallVector<LinkModule, 4> LinkModules, + StringRef InFile, std::unique_ptr<raw_pwrite_stream> OS, + CoverageSourceInfo *CoverageInfo, + llvm::Module *CurLinkModule = nullptr); llvm::Module *getModule() const; std::unique_ptr<llvm::Module> takeModule(); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 04358cd..2dbab78 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -736,10 +736,8 @@ static void addSanitizers(const Triple &TargetTriple, MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } - if (LangOpts.Sanitize.has(SanitizerKind::Type)) { - MPM.addPass(ModuleTypeSanitizerPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(TypeSanitizerPass())); - } + if (LangOpts.Sanitize.has(SanitizerKind::Type)) + MPM.addPass(TypeSanitizerPass()); if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability)) MPM.addPass(NumericalStabilitySanitizerPass()); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4d4b742..573be93 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -835,6 +835,38 @@ static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateExtractValue(Call, 0); } +static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, + llvm::Intrinsic::ID IntrinsicID) { + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2)); + + llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()}); + llvm::Value *Call = CGF.Builder.CreateCall(F, Val); + + llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0); + llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1); + + QualType DestPtrType = E->getArg(1)->getType()->getPointeeType(); + LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType); + LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType); + + llvm::StoreInst *StoreSin = + CGF.Builder.CreateStore(SinResult, SinLV.getAddress()); + llvm::StoreInst *StoreCos = + CGF.Builder.CreateStore(CosResult, CosLV.getAddress()); + + // Mark the two stores as non-aliasing with each other. The order of stores + // emitted by this builtin is arbitrary, enforcing a particular order will + // prevent optimizations later on. + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain(); + MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain); + MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope); + StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList); + StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -3232,6 +3264,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); + case Builtin::BI__builtin_sincos: + case Builtin::BI__builtin_sincosf: + case Builtin::BI__builtin_sincosf16: + case Builtin::BI__builtin_sincosl: + case Builtin::BI__builtin_sincosf128: + emitSincosBuiltin(*this, E, Intrinsic::sincos); + return RValue::get(nullptr); + case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: @@ -6757,6 +6797,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); + case llvm::Triple::spirv: + return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E); case llvm::Triple::spirv64: if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) return nullptr; @@ -11285,6 +11327,19 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, if (Builtin->LLVMIntrinsic == 0) return nullptr; + if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) { + // If we already know the streaming mode, don't bother with the intrinsic + // and emit a constant instead + const auto *FD = cast<FunctionDecl>(CurFuncDecl); + if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) { + unsigned SMEAttrs = FPT->getAArch64SMEAttributes(); + if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) { + bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask; + return ConstantInt::getBool(Builder.getContext(), IsStreaming); + } + } + } + // Predicates must match the main datatype. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) @@ -19157,8 +19212,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, // TODO: Map to an hlsl_device address space. llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext()); - return Builder.CreateIntrinsic(RetTy, Intrinsic::dx_resource_getpointer, - ArrayRef<Value *>{HandleOp, IndexOp}); + return Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(), + ArrayRef<Value *>{HandleOp, IndexOp}); } case Builtin::BI__builtin_hlsl_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -20440,6 +20496,26 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } } +Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case SPIRV::BI__builtin_spirv_distance: { + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + "Distance operands must have a float representation"); + assert(E->getArg(0)->getType()->isVectorType() && + E->getArg(1)->getType()->isVectorType() && + "Distance operands must be a vector"); + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance, + ArrayRef<Value *>{X, Y}, nullptr, "spv.distance"); + } + } + return nullptr; +} + /// Handle a SystemZ function in which the final argument is a pointer /// to an int that receives the post-instruction CC value. At the LLVM level /// this is represented as a function that returns a {result, cc} pair. diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index f139c30..7b0ef4b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4871,7 +4871,7 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, call->setCallingConv(getRuntimeCC()); if (CGM.shouldEmitConvergenceTokens() && call->isConvergent()) - return addControlledConvergenceToken(call); + return cast<llvm::CallInst>(addConvergenceControlToken(call)); return call; } @@ -5787,7 +5787,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CI->setName("call"); if (CGM.shouldEmitConvergenceTokens() && CI->isConvergent()) - CI = addControlledConvergenceToken(CI); + CI = addConvergenceControlToken(CI); // Update largest vector width from the return type. LargestVectorWidth = @@ -6090,6 +6090,8 @@ RValue CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr, VAListAddr = VE->isMicrosoftABI() ? EmitMSVAListRef(VE->getSubExpr()) : EmitVAListRef(VE->getSubExpr()); QualType Ty = VE->getType(); + if (Ty->isVariablyModifiedType()) + EmitVariablyModifiedType(Ty); if (VE->isMicrosoftABI()) return CGM.getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty, Slot); return CGM.getABIInfo().EmitVAArg(*this, VAListAddr, Ty, Slot); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index f29ddec..560d4ce 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3492,6 +3492,11 @@ llvm::DIType *CGDebugInfo::CreateType(const PipeType *Ty, llvm::DIFile *U) { return getOrCreateType(Ty->getElementType(), U); } +llvm::DIType *CGDebugInfo::CreateType(const HLSLAttributedResourceType *Ty, + llvm::DIFile *U) { + return getOrCreateType(Ty->getWrappedType(), U); +} + llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { const EnumDecl *ED = Ty->getDecl(); @@ -3834,12 +3839,13 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::TemplateSpecialization: return CreateType(cast<TemplateSpecializationType>(Ty), Unit); + case Type::HLSLAttributedResource: + return CreateType(cast<HLSLAttributedResourceType>(Ty), Unit); case Type::CountAttributed: case Type::Auto: case Type::Attributed: case Type::BTFTagAttributed: - case Type::HLSLAttributedResource: case Type::Adjusted: case Type::Decayed: case Type::DeducedTemplateSpecialization: diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 3fd0237..38f73ec 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -196,6 +196,8 @@ class CGDebugInfo { llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const HLSLAttributedResourceType *Ty, + llvm::DIFile *F); /// Get structure or union type. llvm::DIType *CreateType(const RecordType *Tyg); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 4159cee..0f27bd0 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -5455,11 +5455,6 @@ Value *ScalarExprEmitter::VisitChooseExpr(ChooseExpr *E) { } Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { - QualType Ty = VE->getType(); - - if (Ty->isVariablyModifiedType()) - CGF.EmitVariablyModifiedType(Ty); - Address ArgValue = Address::invalid(); RValue ArgPtr = CGF.EmitVAArg(VE, ArgValue); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index c354e58..5679bd7 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -395,7 +395,7 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, return buildVectorInput(B, GroupThreadIDIntrinsic, Ty); } if (D.hasAttr<HLSLSV_GroupIDAttr>()) { - llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(Intrinsic::dx_group_id); + llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(getGroupIdIntrinsic()); return buildVectorInput(B, GroupIDIntrinsic, Ty); } assert(false && "Unhandled parameter attribute"); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index edb87f9..46e472f 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -87,6 +87,7 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians) GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupThreadId, thread_id_in_group) + GENERATE_HLSL_INTRINSIC_FUNCTION(GroupId, group_id) GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot) GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot) GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) @@ -103,6 +104,8 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp) + GENERATE_HLSL_INTRINSIC_FUNCTION(CreateResourceGetPointer, + resource_getpointer) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, resource_handlefrombinding) GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 13223ec..c551506 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -480,6 +480,12 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OpenACCShutdownConstructClass: EmitOpenACCShutdownConstruct(cast<OpenACCShutdownConstruct>(*S)); break; + case Stmt::OpenACCSetConstructClass: + EmitOpenACCSetConstruct(cast<OpenACCSetConstruct>(*S)); + break; + case Stmt::OpenACCUpdateConstructClass: + EmitOpenACCUpdateConstruct(cast<OpenACCUpdateConstruct>(*S)); + break; } } @@ -1026,8 +1032,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, EmitBlock(LoopHeader.getBlock()); if (CGM.shouldEmitConvergenceTokens()) - ConvergenceTokenStack.push_back(emitConvergenceLoopToken( - LoopHeader.getBlock(), ConvergenceTokenStack.back())); + ConvergenceTokenStack.push_back( + emitConvergenceLoopToken(LoopHeader.getBlock())); // Create an exit block for when the condition fails, which will // also become the break target. @@ -1141,8 +1147,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, EmitBlockWithFallThrough(LoopBody, &S); if (CGM.shouldEmitConvergenceTokens()) - ConvergenceTokenStack.push_back( - emitConvergenceLoopToken(LoopBody, ConvergenceTokenStack.back())); + ConvergenceTokenStack.push_back(emitConvergenceLoopToken(LoopBody)); { RunCleanupsScope BodyScope(*this); @@ -1221,8 +1226,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, EmitBlock(CondBlock); if (CGM.shouldEmitConvergenceTokens()) - ConvergenceTokenStack.push_back( - emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back())); + ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock)); const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, @@ -1346,8 +1350,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, EmitBlock(CondBlock); if (CGM.shouldEmitConvergenceTokens()) - ConvergenceTokenStack.push_back( - emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back())); + ConvergenceTokenStack.push_back(emitConvergenceLoopToken(CondBlock)); const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, @@ -3216,35 +3219,32 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { return F; } -namespace { // Returns the first convergence entry/loop/anchor instruction found in |BB|. // std::nullptr otherwise. -llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { +static llvm::ConvergenceControlInst *getConvergenceToken(llvm::BasicBlock *BB) { for (auto &I : *BB) { - auto *II = dyn_cast<llvm::IntrinsicInst>(&I); - if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID())) - return II; + if (auto *CI = dyn_cast<llvm::ConvergenceControlInst>(&I)) + return CI; } return nullptr; } -} // namespace - llvm::CallBase * -CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, - llvm::Value *ParentToken) { +CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) { + llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back(); + assert(ParentToken); + llvm::Value *bundleArgs[] = {ParentToken}; llvm::OperandBundleDef OB("convergencectrl", bundleArgs); - auto Output = llvm::CallBase::addOperandBundle( + auto *Output = llvm::CallBase::addOperandBundle( Input, llvm::LLVMContext::OB_convergencectrl, OB, Input->getIterator()); Input->replaceAllUsesWith(Output); Input->eraseFromParent(); return Output; } -llvm::IntrinsicInst * -CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken) { +llvm::ConvergenceControlInst * +CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) { CGBuilderTy::InsertPoint IP = Builder.saveIP(); if (BB->empty()) Builder.SetInsertPoint(BB); @@ -3255,14 +3255,14 @@ CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, llvm::Intrinsic::experimental_convergence_loop, {}, {}); Builder.restoreIP(IP); - llvm::CallBase *I = addConvergenceControlToken(CB, ParentToken); - return cast<llvm::IntrinsicInst>(I); + CB = addConvergenceControlToken(CB); + return cast<llvm::ConvergenceControlInst>(CB); } -llvm::IntrinsicInst * +llvm::ConvergenceControlInst * CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { llvm::BasicBlock *BB = &F->getEntryBlock(); - llvm::IntrinsicInst *Token = getConvergenceToken(BB); + llvm::ConvergenceControlInst *Token = getConvergenceToken(BB); if (Token) return Token; @@ -3277,5 +3277,5 @@ CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { assert(isa<llvm::IntrinsicInst>(I)); Builder.restoreIP(IP); - return cast<llvm::IntrinsicInst>(I); + return cast<llvm::ConvergenceControlInst>(I); } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index cc927f4..f63cb9b 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -106,46 +106,19 @@ static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, } BackendConsumer::BackendConsumer( - BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - const std::string &InFile, SmallVector<LinkModule, 4> LinkModules, - std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, - PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)) { - TimerIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; -} - -// This constructor is used in installing an empty BackendConsumer -// to use the clang diagnostic handler for IR input files. It avoids -// initializing the OS field. -BackendConsumer::BackendConsumer( - BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - llvm::Module *Module, SmallVector<LinkModule, 4> LinkModules, - LLVMContext &C, CoverageSourceInfo *CoverageInfo) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts, - CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)), CurLinkModule(Module) { + const CompilerInstance &CI, BackendAction Action, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, LLVMContext &C, + SmallVector<LinkModule, 4> LinkModules, StringRef InFile, + std::unique_ptr<raw_pwrite_stream> OS, CoverageSourceInfo *CoverageInfo, + llvm::Module *CurLinkModule) + : Diags(CI.getDiagnostics()), HeaderSearchOpts(CI.getHeaderSearchOpts()), + CodeGenOpts(CI.getCodeGenOpts()), TargetOpts(CI.getTargetOpts()), + LangOpts(CI.getLangOpts()), AsmOutStream(std::move(OS)), FS(VFS), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), Action(Action), + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), + CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), + CI.getCodeGenOpts(), C, CoverageInfo)), + LinkModules(std::move(LinkModules)), CurLinkModule(CurLinkModule) { TimerIsEnabled = CodeGenOpts.TimePasses; llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; @@ -1011,10 +984,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { CI.getPreprocessor()); std::unique_ptr<BackendConsumer> Result(new BackendConsumer( - BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(), - CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), - CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile), - std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); + CI, BA, &CI.getVirtualFileSystem(), *VMContext, std::move(LinkModules), + InFile, std::move(OS), CoverageInfo)); BEConsumer = Result.get(); // Enable generating macro debug info only when debug info is not disabled and @@ -1182,11 +1153,9 @@ void CodeGenAction::ExecuteAction() { // Set clang diagnostic handler. To do this we need to create a fake // BackendConsumer. - BackendConsumer Result(BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(), - CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), - CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), TheModule.get(), - std::move(LinkModules), *VMContext, nullptr); + BackendConsumer Result(CI, BA, &CI.getVirtualFileSystem(), *VMContext, + std::move(LinkModules), "", nullptr, nullptr, + TheModule.get()); // Link in each pending link module. if (!CodeGenOpts.LinkBitcodePostopt && Result.LinkInModules(&*TheModule)) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6145c6a..cdeff1e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -315,7 +315,7 @@ public: SmallVector<const BinaryOperator *, 16> MCDCLogOpStack; /// Stack to track the controlled convergence tokens. - SmallVector<llvm::IntrinsicInst *, 4> ConvergenceTokenStack; + SmallVector<llvm::ConvergenceControlInst *, 4> ConvergenceTokenStack; /// Number of nested loop to be consumed by the last surrounding /// loop-associated directive. @@ -4157,6 +4157,16 @@ public: // but in the future we will implement some sort of IR. } + void EmitOpenACCSetConstruct(const OpenACCSetConstruct &S) { + // TODO OpenACC: Implement this. It is currently implemented as a 'no-op', + // but in the future we will implement some sort of IR. + } + + void EmitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &S) { + // TODO OpenACC: Implement this. It is currently implemented as a 'no-op', + // but in the future we will implement some sort of IR. + } + //===--------------------------------------------------------------------===// // LValue Expression Emission //===--------------------------------------------------------------------===// @@ -4780,6 +4790,7 @@ public: llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + llvm::Value *EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); @@ -5258,29 +5269,20 @@ public: llvm::Value *emitBoolVecConversion(llvm::Value *SrcVec, unsigned NumElementsDst, const llvm::Twine &Name = ""); - // Adds a convergence_ctrl token to |Input| and emits the required parent - // convergence instructions. - template <typename CallType> - CallType *addControlledConvergenceToken(CallType *Input) { - return cast<CallType>( - addConvergenceControlToken(Input, ConvergenceTokenStack.back())); - } private: // Emits a convergence_loop instruction for the given |BB|, with |ParentToken| // as it's parent convergence instr. - llvm::IntrinsicInst *emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken); + llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB); + // Adds a convergence_ctrl token with |ParentToken| as parent convergence // instr to the call |Input|. - llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input, - llvm::Value *ParentToken); + llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input); + // Find the convergence_entry instruction |F|, or emits ones if none exists. // Returns the convergence instruction. - llvm::IntrinsicInst *getOrEmitConvergenceEntryToken(llvm::Function *F); - // Find the convergence_loop instruction for the loop defined by |LI|, or - // emits one if none exists. Returns the convergence instruction. - llvm::IntrinsicInst *getOrEmitConvergenceLoopToken(const LoopInfo *LI); + llvm::ConvergenceControlInst * + getOrEmitConvergenceEntryToken(llvm::Function *F); private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c49f763..7db1ed7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2748,7 +2748,21 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; } - + if (getTarget().getTriple().isAArch64()) { + llvm::SmallVector<StringRef, 8> Feats; + if (TV) + TV->getFeatures(Feats); + else if (TC) + TC->getFeatures(Feats, GD.getMultiVersionIndex()); + if (!Feats.empty()) { + llvm::sort(Feats); + std::string FMVFeatures; + for (StringRef F : Feats) + FMVFeatures.append(",+" + F.str()); + Attrs.addAttribute("fmv-features", FMVFeatures.substr(1)); + AddedAttr = true; + } + } return AddedAttr; } @@ -4227,7 +4241,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); -static unsigned getFMVPriority(const TargetInfo &TI, +static uint64_t getFMVPriority(const TargetInfo &TI, const CodeGenFunction::FMVResolverOption &RO) { llvm::SmallVector<StringRef, 8> Features{RO.Features}; if (RO.Architecture) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 9c870c6..8fcfd8b 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -938,17 +938,34 @@ struct CounterCoverageMappingBuilder } struct BranchCounterPair { - Counter Executed; - Counter Skipped; + Counter Executed; ///< The Counter previously assigned. + Counter Skipped; ///< An expression (Parent-Executed), or equivalent to it. }; + /// Retrieve or assign the pair of Counter(s). + /// + /// This returns BranchCounterPair {Executed, Skipped}. + /// Executed is the Counter associated with S assigned by an earlier + /// CounterMapping pass. + /// Skipped may be an expression (Executed - ParentCnt) or newly + /// assigned Counter in EnableSingleByteCoverage, as subtract + /// expressions are not available in this mode. + /// + /// \param S Key to the CounterMap + /// \param ParentCnt The Counter representing how many times S is evaluated. + /// \param SkipCntForOld (To be removed later) Optional fake Counter + /// to override Skipped for adjustment of + /// expressions in the old behavior of + /// EnableSingleByteCoverage that is unaware of + /// Branch coverage. BranchCounterPair getBranchCounterPair(const Stmt *S, Counter ParentCnt, std::optional<Counter> SkipCntForOld = std::nullopt) { auto &TheMap = CounterMap[S]; auto ExecCnt = Counter::getCounter(TheMap.Executed); - // The old behavior of SingleByte shouldn't emit Branches. + // The old behavior of SingleByte is unaware of Branches. + // Will be pruned after the migration of SingleByte. if (llvm::EnableSingleByteCoverage && SkipCntForOld) return {ExecCnt, *SkipCntForOld}; diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 61fdf33..b7b212b 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -145,7 +145,9 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D, for (auto *Attr : D.specific_attrs<NoSanitizeAttr>()) NoSanitizeMask |= Attr->getMask(); - if (D.hasExternalStorage()) + // External definitions and incomplete types get handled at the place they + // are defined. + if (D.hasExternalStorage() || D.getType()->isIncompleteType()) NoSanitizeMask |= SanitizerKind::Type; return NoSanitizeMask; diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index ad7f405..7db67ec 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -662,7 +662,7 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const { if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) { - const auto *BT = Ty->getAs<BuiltinType>(); + const auto *BT = Ty->castAs<BuiltinType>(); return !BT->isSVECount() && getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1; } @@ -1169,8 +1169,9 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( enum class ArmSMEInlinability : uint8_t { Ok = 0, ErrorCalleeRequiresNewZA = 1 << 0, - WarnIncompatibleStreamingModes = 1 << 1, - ErrorIncompatibleStreamingModes = 1 << 2, + ErrorCalleeRequiresNewZT0 = 1 << 1, + WarnIncompatibleStreamingModes = 1 << 2, + ErrorIncompatibleStreamingModes = 1 << 3, IncompatibleStreamingModes = WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes, @@ -1198,9 +1199,12 @@ static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller, else Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes; } - if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) + if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) { if (NewAttr->isNewZA()) Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA; + if (NewAttr->isNewZT0()) + Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0; + } return Inlinability; } @@ -1227,6 +1231,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( ArmSMEInlinability::ErrorCalleeRequiresNewZA) CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) << Callee->getDeclName(); + + if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) == + ArmSMEInlinability::ErrorCalleeRequiresNewZT0) + CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0) + << Callee->getDeclName(); } // If the target does not have floating-point registers, but we are using a diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 56ad050..fa07e68 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -537,7 +537,11 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, break; } - if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { + // OpenCL assumes by default that atomic scopes are per-address space for + // non-sequentially consistent operations. + if (Scope >= SyncScope::OpenCLWorkGroup && + Scope <= SyncScope::OpenCLSubGroup && + Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { if (!Name.empty()) Name = Twine(Twine(Name) + Twine("-")).str(); diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0431d2c..b82e4dd 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -9,6 +9,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/IntrinsicsNVPTX.h" using namespace clang; @@ -79,13 +80,11 @@ public: // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand, - const SmallVectorImpl<int> &GridConstantArgs); + int Operand); - static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand) { - addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0)); - } + static void + addGridConstantNVVMMetadata(llvm::GlobalValue *GV, + const SmallVectorImpl<int> &GridConstantArgs); private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, @@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( if (FD->hasAttr<OpenCLKernelAttr>()) { // OpenCL __kernel functions get kernel metadata // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } @@ -277,7 +276,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // For some reason arg indices are 1-based in NVVM GCI.push_back(IV.index() + 1); // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1, GCI); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addGridConstantNVVMMetadata(F, GCI); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) M.handleCUDALaunchBoundsAttr(F, Attr); @@ -285,13 +285,12 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // Attach kernel metadata directly if compiling for NVPTX. if (FD->hasAttr<NVPTXKernelAttr>()) { - addNVVMMetadata(F, "kernel", 1); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata( - llvm::GlobalValue *GV, StringRef Name, int Operand, - const SmallVectorImpl<int> &GridConstantArgs) { +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); @@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata( llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; + + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( + llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { + + llvm::Module *M = GV->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)}; if (!GridConstantArgs.empty()) { SmallVector<llvm::Metadata *, 10> GCM; for (int I : GridConstantArgs) @@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata( MDVals.append({llvm::MDString::get(Ctx, "grid_constant"), llvm::MDNode::get(Ctx, GCM)}); } + // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index a48fe9d..5c75e98 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -64,6 +64,8 @@ public: void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, @@ -245,6 +247,41 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, return DefaultGlobalAS; } +void SPIRVTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (!M.getLangOpts().HIP || + M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) + return; + if (GV->isDeclaration()) + return; + + auto F = dyn_cast<llvm::Function>(GV); + if (!F) + return; + + auto FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + if (!FD->hasAttr<CUDAGlobalAttr>()) + return; + + unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock; + if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) + N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + + // We encode the maximum flat WG size in the first component of the 3D + // max_work_group_size attribute, which will get reverse translated into the + // original AMDGPU attribute when targeting AMDGPU. + auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext()); + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))}; + + F->setMetadata("max_work_group_size", + llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs)); +} + llvm::SyncScope::ID SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, llvm::AtomicOrdering, |