diff options
Diffstat (limited to 'clang/lib')
41 files changed, 564 insertions, 1329 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 687cd46..2669f62 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, // Read the base type. switch (*Str++) { default: llvm_unreachable("Unknown builtin type letter!"); + case 'e': + assert(HowLong == 0 && !Signed && !Unsigned && + "Bad modifiers used with 'e'!"); + Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty; + break; case 'x': assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers used with 'x'!"); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b3ab82d..8b57b96 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift( static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { assert(Call->getNumArgs() == 3); @@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic( for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const Pointer &Src = (SrcVecIdx == 0) ? A : B; - TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + + if (SrcIdx < 0) { + // Zero out this element + if (ElemT == PT_Float) { + Dst.elem<Floating>(DstIdx) = Floating( + S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + } else { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); + } + } else { + const Pointer &Src = (SrcVecIdx == 0) ? A : B; + TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + } } Dst.initializeAllElements(); @@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; + }); + case X86::BI__builtin_ia32_insertps128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return std::pair<unsigned, int>{0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; + } }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d0404b9..97eeba8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { const auto *VT = Call->getType()->getAs<VectorType>(); @@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric( for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const APValue &Src = (SrcVecIdx == 0) ? A : B; - ResultElements.push_back(Src.getVectorElt(SrcIdx)); + + if (SrcIdx < 0) { + // Zero out this element + QualType ElemTy = VT->getElementType(); + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else { + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } } Out = APValue(ResultElements.data(), ResultElements.size()); @@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 32; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 64; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_insertps128: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return {0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return {1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return {0, static_cast<int>(DstIdx)}; + } })) return false; return Success(R, E); diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index d4de704..d4d696b 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -356,12 +356,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); - Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), - "compile-time-constant access to the wavefront size will " - "be removed in a future release"); Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 846b240..d2eb9c5 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -445,27 +445,17 @@ public: LongWidth = LongAlign = PointerWidth = PointerAlign = 64; IntMaxType = SignedLong; Int64Type = SignedLong; - std::string DataLayout; if (Triple.isOSAIX()) { // TODO: Set appropriate ABI for AIX platform. - DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64"; LongDoubleWidth = 64; LongDoubleAlign = DoubleAlign = 32; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); - } else if ((Triple.getArch() == llvm::Triple::ppc64le)) { - DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; + } else if ((Triple.getArch() == llvm::Triple::ppc64le) || + Triple.isPPC64ELFv2ABI()) { ABI = "elfv2"; } else { - DataLayout = "E-m:e"; - if (Triple.isPPC64ELFv2ABI()) { - ABI = "elfv2"; - DataLayout += "-Fn32"; - } else { - ABI = "elfv1"; - DataLayout += "-Fi64"; - } - DataLayout += "-i64:64-i128:128-n32:64"; + ABI = "elfv1"; } if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) { @@ -473,14 +463,12 @@ public: LongDoubleFormat = &llvm::APFloat::IEEEdouble(); } - if (Triple.isOSAIX() || Triple.isOSLinux()) - DataLayout += "-S128-v256:256:256-v512:512:512"; - resetDataLayout(DataLayout); - // Newer PPC64 instruction sets support atomics up to 16 bytes. MaxAtomicPromoteWidth = 128; // Baseline PPC64 supports inlining atomics up to 8 bytes. MaxAtomicInlineWidth = 64; + + calculateDataLayout(); } void setMaxAtomicWidth() override { @@ -495,10 +483,33 @@ public: return TargetInfo::CharPtrBuiltinVaList; } + void calculateDataLayout() { + std::string DataLayout; + + if (getTriple().isOSAIX()) { + DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64"; + } else if ((getTriple().getArch() == llvm::Triple::ppc64le)) { + DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; + } else { + DataLayout = "E-m:e"; + if (ABI == "elfv2") { + DataLayout += "-Fn32"; + } else { + DataLayout += "-Fi64"; + } + DataLayout += "-i64:64-i128:128-n32:64"; + } + + if (getTriple().isOSAIX() || getTriple().isOSLinux()) + DataLayout += "-S128-v256:256:256-v512:512:512"; + resetDataLayout(DataLayout); + } + // PPC64 Linux-specific ABI options. bool setABI(const std::string &Name) override { if (Name == "elfv1" || Name == "elfv2") { ABI = Name; + calculateDataLayout(); return true; } return false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index e71f10c..7a90c89 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAMXFP8 = true; } else if (Feature == "+amx-movrs") { HasAMXMOVRS = true; - } else if (Feature == "+amx-transpose") { - HasAMXTRANSPOSE = true; } else if (Feature == "+amx-avx512") { HasAMXAVX512 = true; } else if (Feature == "+amx-tf32") { @@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AMX_FP8__"); if (HasAMXMOVRS) Builder.defineMacro("__AMX_MOVRS__"); - if (HasAMXTRANSPOSE) - Builder.defineMacro("__AMX_TRANSPOSE__"); if (HasAMXAVX512) Builder.defineMacro("__AMX_AVX512__"); if (HasAMXTF32) @@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("amx-movrs", true) .Case("amx-tf32", true) .Case("amx-tile", true) - .Case("amx-transpose", true) .Case("avx", true) .Case("avx10.1", true) .Case("avx10.2", true) @@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("amx-movrs", HasAMXMOVRS) .Case("amx-tf32", HasAMXTF32) .Case("amx-tile", HasAMXTILE) - .Case("amx-transpose", HasAMXTRANSPOSE) .Case("avx", SSELevel >= AVX) .Case("avx10.1", HasAVX10_1) .Case("avx10.2", HasAVX10_2) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index be3a473..e7da262 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -160,7 +160,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAMXCOMPLEX = false; bool HasAMXFP8 = false; bool HasAMXMOVRS = false; - bool HasAMXTRANSPOSE = false; bool HasAMXAVX512 = false; bool HasAMXTF32 = false; bool HasSERIALIZE = false; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 3c9c7ec..0198a9d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -771,14 +771,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI_WriteBarrier: case X86::BI_AddressOfReturnAddress: case X86::BI__stosb: - case X86::BI__builtin_ia32_t2rpntlvwz0_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: case X86::BI__ud2: case X86::BI__int2c: case X86::BI__readfsbyte: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 71ff20a..5d5209b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -242,12 +242,19 @@ void CIRGenFunction::LexicalScope::cleanup() { } }; - if (returnBlock != nullptr) { - // Write out the return block, which loads the value from `__retval` and - // issues the `cir.return`. + // Cleanup are done right before codegen resumes a scope. This is where + // objects are destroyed. Process all return blocks. + // TODO(cir): Handle returning from a switch statement through a cleanup + // block. We can't simply jump to the cleanup block, because the cleanup block + // is not part of the case region. Either reemit all cleanups in the return + // block or wait for MLIR structured control flow to support early exits. + llvm::SmallVector<mlir::Block *> retBlocks; + for (mlir::Block *retBlock : localScope->getRetBlocks()) { mlir::OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(returnBlock); - (void)emitReturn(*returnLoc); + builder.setInsertionPointToEnd(retBlock); + retBlocks.push_back(retBlock); + mlir::Location retLoc = localScope->getRetLoc(retBlock); + emitReturn(retLoc); } auto insertCleanupAndLeave = [&](mlir::Block *insPt) { @@ -274,19 +281,22 @@ void CIRGenFunction::LexicalScope::cleanup() { if (localScope->depth == 0) { // Reached the end of the function. - if (returnBlock != nullptr) { - if (returnBlock->getUses().empty()) { - returnBlock->erase(); + // Special handling only for single return block case + if (localScope->getRetBlocks().size() == 1) { + mlir::Block *retBlock = localScope->getRetBlocks()[0]; + mlir::Location retLoc = localScope->getRetLoc(retBlock); + if (retBlock->getUses().empty()) { + retBlock->erase(); } else { // Thread return block via cleanup block. if (cleanupBlock) { - for (mlir::BlockOperand &blockUse : returnBlock->getUses()) { + for (mlir::BlockOperand &blockUse : retBlock->getUses()) { cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner()); brOp.setSuccessor(cleanupBlock); } } - cir::BrOp::create(builder, *returnLoc, returnBlock); + cir::BrOp::create(builder, retLoc, retBlock); return; } } @@ -324,8 +334,10 @@ void CIRGenFunction::LexicalScope::cleanup() { bool entryBlock = builder.getInsertionBlock()->isEntryBlock(); if (!entryBlock && curBlock->empty()) { curBlock->erase(); - if (returnBlock != nullptr && returnBlock->getUses().empty()) - returnBlock->erase(); + for (mlir::Block *retBlock : retBlocks) { + if (retBlock->getUses().empty()) + retBlock->erase(); + } return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index c3fcd1a6..e5cecaa5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1103,44 +1103,69 @@ public: // --- private: - // `returnBlock`, `returnLoc`, and all the functions that deal with them - // will change and become more complicated when `switch` statements are - // upstreamed. `case` statements within the `switch` are in the same scope - // but have their own regions. Therefore the LexicalScope will need to - // keep track of multiple return blocks. - mlir::Block *returnBlock = nullptr; - std::optional<mlir::Location> returnLoc; - - // See the comment on `getOrCreateRetBlock`. + // On switches we need one return block per region, since cases don't + // have their own scopes but are distinct regions nonetheless. + + // TODO: This implementation should change once we have support for early + // exits in MLIR structured control flow (llvm-project#161575) + llvm::SmallVector<mlir::Block *> retBlocks; + llvm::DenseMap<mlir::Block *, mlir::Location> retLocs; + llvm::DenseMap<cir::CaseOp, unsigned> retBlockInCaseIndex; + std::optional<unsigned> normalRetBlockIndex; + + // There's usually only one ret block per scope, but this needs to be + // get or create because of potential unreachable return statements, note + // that for those, all source location maps to the first one found. mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) { - assert(returnBlock == nullptr && "only one return block per scope"); - // Create the cleanup block but don't hook it up just yet. + assert((isa_and_nonnull<cir::CaseOp>( + cgf.builder.getBlock()->getParentOp()) || + retBlocks.size() == 0) && + "only switches can hold more than one ret block"); + + // Create the return block but don't hook it up just yet. mlir::OpBuilder::InsertionGuard guard(cgf.builder); - returnBlock = - cgf.builder.createBlock(cgf.builder.getBlock()->getParent()); - updateRetLoc(returnBlock, loc); - return returnBlock; + auto *b = cgf.builder.createBlock(cgf.builder.getBlock()->getParent()); + retBlocks.push_back(b); + updateRetLoc(b, loc); + return b; } cir::ReturnOp emitReturn(mlir::Location loc); void emitImplicitReturn(); public: - mlir::Block *getRetBlock() { return returnBlock; } - mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; } - void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; } - - // Create the return block for this scope, or return the existing one. - // This get-or-create logic is necessary to handle multiple return - // statements within the same scope, which can happen if some of them are - // dead code or if there is a `goto` into the middle of the scope. + llvm::ArrayRef<mlir::Block *> getRetBlocks() { return retBlocks; } + mlir::Location getRetLoc(mlir::Block *b) { return retLocs.at(b); } + void updateRetLoc(mlir::Block *b, mlir::Location loc) { + retLocs.insert_or_assign(b, loc); + } + mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) { - if (returnBlock == nullptr) { - returnBlock = createRetBlock(cgf, loc); - return returnBlock; + // Check if we're inside a case region + if (auto caseOp = mlir::dyn_cast_if_present<cir::CaseOp>( + cgf.builder.getBlock()->getParentOp())) { + auto iter = retBlockInCaseIndex.find(caseOp); + if (iter != retBlockInCaseIndex.end()) { + // Reuse existing return block + mlir::Block *ret = retBlocks[iter->second]; + updateRetLoc(ret, loc); + return ret; + } + // Create new return block + mlir::Block *ret = createRetBlock(cgf, loc); + retBlockInCaseIndex[caseOp] = retBlocks.size() - 1; + return ret; } - updateRetLoc(returnBlock, loc); - return returnBlock; + + if (normalRetBlockIndex) { + mlir::Block *ret = retBlocks[*normalRetBlockIndex]; + updateRetLoc(ret, loc); + return ret; + } + + mlir::Block *ret = createRetBlock(cgf, loc); + normalRetBlockIndex = retBlocks.size() - 1; + return ret; } mlir::Block *getEntryBlock() { return entryBlock; } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index fd2f6dc..ca579c9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -345,7 +345,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) { if (Loc.isInvalid()) return; - CurLoc = CGM.getContext().getSourceManager().getExpansionLoc(Loc); + CurLoc = CGM.getContext().getSourceManager().getFileLoc(Loc); // If we've changed files in the middle of a lexical scope go ahead // and create a new lexical scope with file node if it's different @@ -572,7 +572,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { FileName = TheCU->getFile()->getFilename(); CSInfo = TheCU->getFile()->getChecksum(); } else { - PresumedLoc PLoc = SM.getPresumedLoc(Loc); + PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc)); FileName = PLoc.getFilename(); if (FileName.empty()) { @@ -599,7 +599,8 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (CSKind) CSInfo.emplace(*CSKind, Checksum); } - return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); + return createFile(FileName, CSInfo, + getSource(SM, SM.getFileID(SM.getFileLoc(Loc)))); } llvm::DIFile *CGDebugInfo::createFile( @@ -654,7 +655,7 @@ unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) { if (Loc.isInvalid()) return 0; SourceManager &SM = CGM.getContext().getSourceManager(); - return SM.getPresumedLoc(Loc).getLine(); + return SM.getPresumedLoc(SM.getFileLoc(Loc)).getLine(); } unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) { @@ -666,7 +667,8 @@ unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) { if (Loc.isInvalid() && CurLoc.isInvalid()) return 0; SourceManager &SM = CGM.getContext().getSourceManager(); - PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc); + PresumedLoc PLoc = + SM.getPresumedLoc(Loc.isValid() ? SM.getFileLoc(Loc) : CurLoc); return PLoc.isValid() ? PLoc.getColumn() : 0; } @@ -5002,7 +5004,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); - if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty()) + if (CurLoc.isInvalid() || LexicalBlockStack.empty()) return; llvm::MDNode *Scope = LexicalBlockStack.back(); @@ -6278,7 +6280,8 @@ void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV, void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV, const StringLiteral *S) { SourceLocation Loc = S->getStrTokenLoc(0); - PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc); + SourceManager &SM = CGM.getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc)); if (!PLoc.isValid()) return; diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f49a5af..9eab709 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); - return Builder.CreateCall(F, { Src }); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + return Builder.CreateCall(F, {Src}); } case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32: case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: { @@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index b924407..2381b2e 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); } - // Corresponding to intrisics which will return 2 tiles (tile0_tile1). - case X86::BI__builtin_ia32_t2rpntlvwz0_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: { - Intrinsic::ID IID; - switch (BuiltinID) { - default: - llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_t2rpntlvwz0_internal: - IID = Intrinsic::x86_t2rpntlvwz0_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: - IID = Intrinsic::x86_t2rpntlvwz0rs_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: - IID = Intrinsic::x86_t2rpntlvwz0t1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: - IID = Intrinsic::x86_t2rpntlvwz0rst1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1_internal: - IID = Intrinsic::x86_t2rpntlvwz1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: - IID = Intrinsic::x86_t2rpntlvwz1rs_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: - IID = Intrinsic::x86_t2rpntlvwz1t1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: - IID = Intrinsic::x86_t2rpntlvwz1rst1_internal; - break; - } - - // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride) - Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), - {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]}); - - auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>(); - assert(PtrTy && "arg3 must be of pointer type"); - QualType PtreeTy = PtrTy->getPointeeType(); - llvm::Type *TyPtee = ConvertType(PtreeTy); - - // Bitcast amx type (x86_amx) to vector type (256 x i32) - // Then store tile0 into DstPtr0 - Value *T0 = Builder.CreateExtractValue(Call, 0); - Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, - {TyPtee}, {T0}); - Builder.CreateDefaultAlignedStore(VecT0, Ops[3]); - - // Then store tile1 into DstPtr1 - Value *T1 = Builder.CreateExtractValue(Call, 1); - Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, - {TyPtee}, {T1}); - Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]); - - // Note: Here we escape directly use x86_tilestored64_internal to store - // the results due to it can't make sure the Mem written scope. This may - // cause shapes reloads after first amx intrinsic, which current amx reg- - // ister allocation has no ability to handle it. - - return Store; - } case X86::BI__ud2: // llvm.trap makes a ud2a instruction on x86. return EmitTrapCall(Intrinsic::trap); diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 15d0b35..abd049a 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -260,7 +260,8 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType() ? LangAS::Default : QT->getPointeeType().getAddressSpace(); - if (AS == LangAS::Default || AS == LangAS::opencl_generic) + if (AS == LangAS::Default || AS == LangAS::opencl_generic || + AS == LangAS::opencl_constant) return llvm::ConstantPointerNull::get(PT); auto &Ctx = CGM.getContext(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4e8f63e..d3ab6f1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3708,6 +3708,7 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs, options::OPT_emit_obj, options::OPT_disable_llvm_passes, options::OPT_fnative_half_type, + options::OPT_fnative_int16_type, options::OPT_hlsl_entrypoint, options::OPT_fdx_rootsignature_define, options::OPT_fdx_rootsignature_version, diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp index 20a320e..8d3fba7 100644 --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -498,6 +498,15 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, continue; } + if (A->getOption().getID() == options::OPT_enable_16bit_types) { + // Translate -enable-16bit-types into -fnative-half-type and + // -fnative-int16-type + DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_half_type)); + DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_int16_type)); + A->claim(); + continue; + } + DAL->append(A); } diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp index 57bcb3c..9a3c453 100644 --- a/clang/lib/Driver/ToolChains/ZOS.cpp +++ b/clang/lib/Driver/ToolChains/ZOS.cpp @@ -75,7 +75,7 @@ void zos::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } static std::string getLEHLQ(const ArgList &Args) { @@ -213,7 +213,7 @@ void zos::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } ToolChain::RuntimeLibType ZOS::GetDefaultRuntimeLibType() const { diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e5abf83..9ab024a 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) { return CurrentState.BreakBeforeClosingBrace; } - // Allow breaking before the right parens with block indentation if there was - // a break after the left parens, which is tracked by BreakBeforeClosingParen. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && + // Check need to break before the right parens if there was a break after + // the left parens, which is tracked by BreakBeforeClosingParen. + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && Current.is(tok::r_paren)) { return CurrentState.BreakBeforeClosingParen; } @@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) && Style.Cpp11BracedListStyle != FormatStyle::BLS_Block; }; - if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && - !IsStartOfBracedList()) { + if (IsStartOfBracedList()) + return Style.BreakAfterOpenBracketBracedList; + if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square)) return false; - } if (!Tok.Previous) return true; if (Tok.Previous->isIf()) - return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak; - return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while, - tok::kw_switch) && - !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await)); + return Style.BreakAfterOpenBracketIf; + if (Tok.Previous->isLoop(Style)) + return Style.BreakAfterOpenBracketLoop; + if (Tok.Previous->is(tok::kw_switch)) + return Style.BreakAfterOpenBracketSwitch; + if (Style.BreakAfterOpenBracketFunction) { + return !Tok.Previous->is(TT_CastRParen) && + !(Style.isJavaScript() && Tok.is(Keywords.kw_await)); + } + return false; }; auto IsFunctionCallParen = [](const FormatToken &Tok) { return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous && Tok.Previous->is(tok::identifier); }; - auto IsInTemplateString = [this](const FormatToken &Tok) { + auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) { if (!Style.isJavaScript()) return false; for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) { if (Prev->is(TT_TemplateString) && Prev->opensScope()) return true; - if (Prev->opensScope() || - (Prev->is(TT_TemplateString) && Prev->closesScope())) { - break; - } + if (Prev->opensScope() && !NestBlocks) + return false; + if (Prev->is(TT_TemplateString) && Prev->closesScope()) + return false; } return false; }; @@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) { return true; } - if (const auto *Previous = Tok.Previous; - !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen, - TT_LambdaDefinitionLParen) && - !IsFunctionCallParen(*Previous))) { + const auto *Previous = TokAfterLParen.Previous; + assert(Previous); // IsOpeningBracket(Previous) + if (Previous->Previous && + (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) || + Previous->Previous->is(tok::kw_switch))) { + return false; + } + if (Previous->isNoneOf(TT_FunctionDeclarationLParen, + TT_LambdaDefinitionLParen) && + !IsFunctionCallParen(*Previous)) { return true; } - if (IsOpeningBracket(Tok) || IsInTemplateString(Tok)) + if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true)) return true; const auto *Next = Tok.Next; return !Next || Next->isMemberAccess() || Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next); }; - if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak || - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) && - IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && + if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && // Don't do this for simple (no expressions) one-argument function calls // as that feels like needlessly wasting whitespace, e.g.: // @@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // Note: This doesn't apply to macro expansion lines, which are MACRO( , , ) // with args as children of the '(' and ',' tokens. It does not make sense to // align the commas with the opening paren. - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && + if (Style.AlignAfterOpenBracket && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause, TT_TableGenDAGArgOpener, @@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Previous.Previous->isNoneOf(tok::identifier, tok::l_paren, BK_BracedInit))) || Previous.is(TT_VerilogMultiLineListLParen)) && - !IsInTemplateString(Current)) { + !IsInTemplateString(Current, false)) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; } @@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, } if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) { - CurrentState.BreakBeforeClosingParen = - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent; + if (auto Previous = PreviousNonComment->Previous) { + if (Previous->isIf()) { + CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf; + } else if (Previous->isLoop(Style)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketLoop; + } else if (Previous->is(tok::kw_switch)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketSwitch; + } else { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketFunction; + } + } } if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener)) @@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && - (Current.is(tok::r_paren) || - (Current.is(tok::r_brace) && Current.MatchingParen && - Current.MatchingParen->is(BK_BracedInit))) && + if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) && + Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && + Current.is(tok::r_paren) && State.Stack.size() > 1) { + return State.Stack[State.Stack.size() - 2].LastSpace; + } if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; @@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, PrecedenceLevel < prec::Assignment) && (!Previous || Previous->isNot(tok::kw_return) || (!Style.isJava() && PrecedenceLevel > 0)) && - (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || - PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) && + (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma || + Current.NestingLevel == 0) && (!Style.isTableGen() || (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma, TT_TableGenDAGArgListCommaToBreak)))) { @@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, if (PrecedenceLevel > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); if (PrecedenceLevel != prec::Conditional && - Current.isNot(TT_UnaryOperator) && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) { NewParenState.StartOfFunctionCall = State.Column; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index edd126c..dd14fcd 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -32,6 +32,13 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat) +enum BracketAlignmentStyle : int8_t { + BAS_Align, + BAS_DontAlign, + BAS_AlwaysBreak, + BAS_BlockIndent +}; + namespace llvm { namespace yaml { template <> @@ -204,16 +211,16 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { } }; -template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { - static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { - IO.enumCase(Value, "Align", FormatStyle::BAS_Align); - IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); - IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); - IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent); +template <> struct ScalarEnumerationTraits<BracketAlignmentStyle> { + static void enumeration(IO &IO, BracketAlignmentStyle &Value) { + IO.enumCase(Value, "Align", BAS_Align); + IO.enumCase(Value, "DontAlign", BAS_DontAlign); // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::BAS_Align); - IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); + IO.enumCase(Value, "true", BAS_Align); + IO.enumCase(Value, "false", BAS_DontAlign); + IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak); + IO.enumCase(Value, "BlockIndent", BAS_BlockIndent); } }; @@ -979,6 +986,54 @@ template <> struct MappingTraits<FormatStyle> { bool SpacesInCStyleCastParentheses = false; bool SpacesInParentheses = false; + if (IO.outputting()) { + IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); + } else { + // For backward compatibility. + BracketAlignmentStyle LocalBAS = BAS_Align; + if (IsGoogleOrChromium) { + FormatStyle::LanguageKind Language = Style.Language; + if (Language == FormatStyle::LK_None) + Language = ((FormatStyle *)IO.getContext())->Language; + if (Language == FormatStyle::LK_JavaScript) + LocalBAS = BAS_AlwaysBreak; + else if (Language == FormatStyle::LK_Java) + LocalBAS = BAS_DontAlign; + } else if (BasedOnStyle.equals_insensitive("webkit")) { + LocalBAS = BAS_DontAlign; + } + IO.mapOptional("AlignAfterOpenBracket", LocalBAS); + Style.BreakAfterOpenBracketBracedList = false; + Style.BreakAfterOpenBracketFunction = false; + Style.BreakAfterOpenBracketIf = false; + Style.BreakAfterOpenBracketLoop = false; + Style.BreakAfterOpenBracketSwitch = false; + Style.BreakBeforeCloseBracketBracedList = false; + Style.BreakBeforeCloseBracketFunction = false; + Style.BreakBeforeCloseBracketIf = false; + Style.BreakBeforeCloseBracketLoop = false; + Style.BreakBeforeCloseBracketSwitch = false; + + switch (LocalBAS) { + case BAS_DontAlign: + Style.AlignAfterOpenBracket = false; + break; + case BAS_BlockIndent: + Style.BreakBeforeCloseBracketBracedList = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketIf = true; + [[fallthrough]]; + case BAS_AlwaysBreak: + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketIf = true; + [[fallthrough]]; + case BAS_Align: + Style.AlignAfterOpenBracket = true; + break; + } + } + // For backward compatibility. if (!IO.outputting()) { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); @@ -1014,7 +1069,6 @@ template <> struct MappingTraits<FormatStyle> { } IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); - IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures); IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); @@ -1079,10 +1133,29 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes); IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); + IO.mapOptional("BreakAfterOpenBracketBracedList", + Style.BreakAfterOpenBracketBracedList); + IO.mapOptional("BreakAfterOpenBracketFunction", + Style.BreakAfterOpenBracketFunction); + IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf); + IO.mapOptional("BreakAfterOpenBracketLoop", + Style.BreakAfterOpenBracketLoop); + IO.mapOptional("BreakAfterOpenBracketSwitch", + Style.BreakAfterOpenBracketSwitch); IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType); IO.mapOptional("BreakArrays", Style.BreakArrays); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); + IO.mapOptional("BreakBeforeCloseBracketBracedList", + Style.BreakBeforeCloseBracketBracedList); + IO.mapOptional("BreakBeforeCloseBracketFunction", + Style.BreakBeforeCloseBracketFunction); + IO.mapOptional("BreakBeforeCloseBracketIf", + Style.BreakBeforeCloseBracketIf); + IO.mapOptional("BreakBeforeCloseBracketLoop", + Style.BreakBeforeCloseBracketLoop); + IO.mapOptional("BreakBeforeCloseBracketSwitch", + Style.BreakBeforeCloseBracketSwitch); IO.mapOptional("BreakBeforeConceptDeclarations", Style.BreakBeforeConceptDeclarations); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); @@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) { FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { FormatStyle LLVMStyle; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; + LLVMStyle.AlignAfterOpenBracket = true; LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None; LLVMStyle.AlignConsecutiveAssignments = {}; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; @@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakAdjacentStringLiterals = true; LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave; LLVMStyle.BreakAfterJavaFieldAnnotations = false; + LLVMStyle.BreakAfterOpenBracketBracedList = false; + LLVMStyle.BreakAfterOpenBracketFunction = false; + LLVMStyle.BreakAfterOpenBracketIf = false; + LLVMStyle.BreakAfterOpenBracketLoop = false; + LLVMStyle.BreakAfterOpenBracketSwitch = false; LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.BreakArrays = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BreakBeforeCloseBracketBracedList = false; + LLVMStyle.BreakBeforeCloseBracketFunction = false; + LLVMStyle.BreakBeforeCloseBracketIf = false; + LLVMStyle.BreakBeforeCloseBracketLoop = false; + LLVMStyle.BreakBeforeCloseBracketSwitch = false; LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always; LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline; LLVMStyle.BreakBeforeTemplateCloser = false; @@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; if (Language == FormatStyle::LK_Java) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + GoogleStyle.AlignAfterOpenBracket = false; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AlignTrailingComments = {}; GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; @@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpaceAfterCStyleCast = true; GoogleStyle.SpacesBeforeTrailingComments = 1; } else if (Language == FormatStyle::LK_JavaScript) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + GoogleStyle.BreakAfterOpenBracketBracedList = true; + GoogleStyle.BreakAfterOpenBracketFunction = true; + GoogleStyle.BreakAfterOpenBracketIf = true; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; // TODO: still under discussion whether to switch to SLS_All. @@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() { FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; Style.AlignTrailingComments = {}; Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index d1c6264..28fdbcb 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { assert(MatchingParen); assert(MatchingParen->is(tok::l_brace)); if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block || - Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { + !Style.BreakBeforeCloseBracketBracedList) { return false; } const auto *LBrace = MatchingParen; @@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { return; // Column format doesn't really make sense if we don't align after brackets. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) + if (!Style.AlignAfterOpenBracket) return; FormatToken *ItemBegin = Token->Next; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 6f3d24a..d833130 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -666,6 +666,12 @@ public: (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); } + bool isLoop(const FormatStyle &Style) const { + return isOneOf(tok::kw_for, tok::kw_while) || + (Style.isJavaScript() && isNot(tok::l_paren) && Previous && + Previous->is(tok::kw_for)); + } + bool closesScopeAfterBlock() const { if (getBlockKind() == BK_Block) return true; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 021d8c6..8e227da 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) return Style.PenaltyBreakOpenParenthesis; - if (Left.is(tok::l_paren) && InFunctionDecl && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) return 100; - } if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) || Left.Previous->isIf())) { @@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // If we aren't aligning after opening parens/braces we can always break // here unless the style does not want us to place all arguments on the // next line. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && + if (!Style.AlignAfterOpenBracket && (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) { return 0; } @@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, (Right.isBlockIndentedInitRBrace(Style))); } - // We only break before r_paren if we're in a block indented context. + // We can break before r_paren if we're in a block indented context or + // a control statement with an explicit style option. if (Right.is(tok::r_paren)) { - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || - !Right.MatchingParen) { + if (!Right.MatchingParen) return false; - } auto Next = Right.Next; if (Next && Next->is(tok::r_paren)) Next = Next->Next; if (Next && Next->is(tok::l_paren)) return false; const FormatToken *Previous = Right.MatchingParen->Previous; - return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); + if (!Previous) + return false; + if (Previous->isIf()) + return Style.BreakBeforeCloseBracketIf; + if (Previous->isLoop(Style)) + return Style.BreakBeforeCloseBracketLoop; + if (Previous->is(tok::kw_switch)) + return Style.BreakBeforeCloseBracketSwitch; + return Style.BreakBeforeCloseBracketFunction; } if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) && Right.is(TT_TrailingAnnotation) && - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) { + Style.BreakBeforeCloseBracketFunction) { return false; } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bd36eb4..1951e7f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4600,7 +4600,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, // Validate that if fnative-half-type is given, that // the language standard is at least hlsl2018, and that // the target shader model is at least 6.2. - if (Args.getLastArg(OPT_fnative_half_type)) { + if (Args.getLastArg(OPT_fnative_half_type) || + Args.getLastArg(OPT_fnative_int16_type)) { const LangStandard &Std = LangStandard::getLangStandardForKind(Opts.LangStd); if (!(Opts.LangStd >= LangStandard::lang_hlsl2018 && @@ -4614,12 +4615,16 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported) << VulkanEnv << T.getOSName() << T.str(); } - if (Args.getLastArg(OPT_fnative_half_type)) { + if (Args.getLastArg(OPT_fnative_half_type) || + Args.getLastArg(OPT_fnative_int16_type)) { + const char *Str = Args.getLastArg(OPT_fnative_half_type) + ? "-fnative-half-type" + : "-fnative-int16-type"; const LangStandard &Std = LangStandard::getLangStandardForKind(Opts.LangStd); if (!(Opts.LangStd >= LangStandard::lang_hlsl2018)) Diags.Report(diag::err_drv_hlsl_16bit_types_unsupported) - << "-fnative-half-type" << false << Std.getName(); + << Str << false << Std.getName(); } } else { llvm_unreachable("expected DXIL or SPIR-V target"); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 47f1d5a..8602be1 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -399,7 +399,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__HLSL_202y", Twine((unsigned)LangOptions::HLSLLangStd::HLSL_202y)); - if (LangOpts.NativeHalfType) + if (LangOpts.NativeHalfType && LangOpts.NativeInt16Type) Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1"); // Shader target information diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 1858912..33fff76 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -162,18 +162,12 @@ set(x86_files adxintrin.h ammintrin.h amxavx512intrin.h - amxbf16transposeintrin.h amxcomplexintrin.h - amxcomplextransposeintrin.h amxfp16intrin.h - amxfp16transposeintrin.h amxfp8intrin.h amxintrin.h amxmovrsintrin.h - amxmovrstransposeintrin.h amxtf32intrin.h - amxtf32transposeintrin.h - amxtransposeintrin.h avx10_2_512bf16intrin.h avx10_2_512convertintrin.h avx10_2_512minmaxintrin.h diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h deleted file mode 100644 index 86f09f2..0000000 --- a/clang/lib/Headers/amxbf16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_BF16TRANSPOSEINTRIN_H -#define __AMX_BF16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-bf16,amx-transpose"))) - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles \a a and \a b, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in \a dst, and store the -/// 32-bit result back to tile \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO (a.colsb / 4) - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) * -/// FP32(b.row[k].bf16[2*n+0]) -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) * -/// FP32(b.row[k].bf16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPBF16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles src0 and src1, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in "dst", and store the -/// 32-bit result back to tile "dst". -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_BF16TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h deleted file mode 100644 index 11abaf9..0000000 --- a/clang/lib/Headers/amxcomplextransposeintrin.h +++ /dev/null @@ -1,303 +0,0 @@ -/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H -#define __AMX_COMPLEXTRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-complex,amx-transpose"))) - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles \a a and \a b is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// Calculates the imaginary part of the result. For each possible combination -/// of (transposed column of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The imaginary part of the \a a element -/// is multiplied with the real part of the corresponding \a b element, and -/// the real part of the \a a element is multiplied with the imaginary part -/// of the corresponding \a b elements. The two accumulated results are -/// added, and then accumulated into the corresponding row and column of -/// \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmimfp16ps(dst, a, b) \ - __builtin_ia32_ttcmmimfp16ps((dst), (a), (b)) - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles \a a and \a b is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// Calculates the real part of the result. For each possible combination -/// of (rtransposed colum of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The real part of the \a a element is -/// multiplied with the real part of the corresponding \a b element, and the -/// negated imaginary part of the \a a element is multiplied with the -/// imaginary part of the corresponding \a b elements. The two accumulated -/// results are added, and then accumulated into the corresponding row and -/// column of \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) -/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmrlfp16ps(dst, a, b) \ - __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b)) - -/// Perform matrix conjugate transpose and multiplication of two tiles -/// containing complex elements and accumulate the results into a packed -/// single precision tile. Each dword element in input tiles \a a and \a b -/// is interpreted as a complex number with FP16 real part and FP16 imaginary -/// part. -/// Calculates the imaginary part of the result. For each possible combination -/// of (transposed column of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The negated imaginary part of the \a a -/// element is multiplied with the real part of the corresponding \a b -/// element, and the real part of the \a a element is multiplied with the -/// imaginary part of the corresponding \a b elements. The two accumulated -/// results are added, and then accumulated into the corresponding row and -/// column of \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_conjtcmmimfp16ps(dst, a, b) \ - __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b)) - -/// Perform conjugate transpose of an FP16-pair of complex elements from \a a -/// and writes the result to \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_conjtfp16(__tile dst, __tile a); -/// \endcode -/// -/// \code{.operation} -/// FOR i := 0 TO dst.rows - 1 -/// FOR j := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0] -/// tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1] -/// ENDFOR -/// write_row_and_zero(dst, i, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTFP16 instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The source tile. Max size is 1024 Bytes. -#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a)) - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) { - return __builtin_ia32_tconjtfp16_internal(m, n, src); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles src0 and src1 is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// This function calculates the imaginary part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles src0 and src1 is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// This function calculates the real part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix conjugate transpose and multiplication of two tiles -/// containing complex elements and accumulate the results into a packed -/// single precision tile. Each dword element in input tiles src0 and src1 -/// is interpreted as a complex number with FP16 real part and FP16 imaginary -/// part. -/// This function calculates the imaginary part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform conjugate transpose of an FP16-pair of complex elements from src and -/// writes the result to dst. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) { - dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif // __x86_64__ -#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H diff --git a/clang/lib/Headers/amxfp16transposeintrin.h b/clang/lib/Headers/amxfp16transposeintrin.h deleted file mode 100644 index 191f8c6..0000000 --- a/clang/lib/Headers/amxfp16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_FP16TRANSPOSEINTRIN_H -#define __AMX_FP16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-fp16,amx-transpose"))) - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -/// tiles \a a and \a b, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in \a dst, and store the -/// 32-bit result back to tile \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO (a.colsb / 4) - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * -/// FP32(b.row[k].fp16[2*n+0]) -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * -/// FP32(b.row[k].fp16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -/// tiles src0 and src1, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in "dst", and store the -/// 32-bit result back to tile "dst". -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_FP16TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h index a7da10d..208aa358 100644 --- a/clang/lib/Headers/amxintrin.h +++ b/clang/lib/Headers/amxintrin.h @@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { /// bytes. Since there is no 2D type in llvm IR, we use vector type to /// represent 2D tile and the fixed size is maximum amx tile register size. typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); -typedef int _tile1024i_1024a - __attribute__((__vector_size__(1024), __aligned__(1024))); /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE diff --git a/clang/lib/Headers/amxmovrstransposeintrin.h b/clang/lib/Headers/amxmovrstransposeintrin.h deleted file mode 100644 index 5f48cba..0000000 --- a/clang/lib/Headers/amxmovrstransposeintrin.h +++ /dev/null @@ -1,200 +0,0 @@ -/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H -#define __AMX_MOVRS_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-transpose,amx-movrs"))) - -#define _tile_2rpntlvwz0rs(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride) -#define _tile_2rpntlvwz0rst1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride) -#define _tile_2rpntlvwz1rs(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride) -#define _tile_2rpntlvwz1rst1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride) - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - // Use __tile1024i_1024a* to escape the alignment check in - // clang/test/Headers/x86-intrinsics-headers-clean.cpp - __builtin_ia32_t2rpntlvwz0rs_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz0rst1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1rs_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1rst1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -#undef __DEFAULT_FN_ATTRS -#endif /* __x86_64__ */ -#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxtf32transposeintrin.h b/clang/lib/Headers/amxtf32transposeintrin.h deleted file mode 100644 index e1b90c1..0000000 --- a/clang/lib/Headers/amxtf32transposeintrin.h +++ /dev/null @@ -1,105 +0,0 @@ -/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_TF32TRANSPOSEINTRIN_H -#define __AMX_TF32TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-tf32,amx-transpose"))) - -/// \code -/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \ -/// constexpr int b); -/// \endcode -/// -/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction. -/// -/// \param srcdst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) { -/// dword[12:0] := 0 -/// dword[31:13] := x[31:13] -/// return dword -/// } -/// -/// DEFINE silence_snan_fp32(x[31:0]) { -/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0) -/// x.fraction[22] := 1 -/// return x -/// } -/// -/// elements_dest:= srcdst.colsb/4 -/// -/// FOR m := 0 TO (srcdst.rows-1) -/// tmp[511:0] := 0 -/// FOR k := 0 TO (a.rows-1) -/// FOR n := 0 TO (elements_dest-1) -/// a1e := silence_snan_fp32(a.row[k].fp32[m]) -/// a2e := silence_snan_fp32(b.row[k].fp32[n]) -/// s1e := zero_lower_mantissa_bits_fp32(a1e) -/// s2e := zero_lower_mantissa_bits_fp32(a2e) -/// tmp.fp32[n] += s1e * s2e -/// ENDFOR -/// ENDFOR -/// -/// FOR n := 0 TO (elements_dest-1) -/// tmp.fp32[n] += srcdst.row[m].fp32[n] -/// ENDFOR -/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb) -/// -/// ENDFOR -/// -/// zero_upper_rows(srcdst, srcdst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_tmmultf32ps(srcdst, a, b) \ - __builtin_ia32_ttmmultf32ps((srcdst), (a), (b)) - -// dst = m x n (srcdest), src1 = k x m, src2 = k x n -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE -_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do -/// Matrix Plus with dst. All the calculation is base on float32 but with the -/// lower 13-bit set to 0. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TF32_TRANSPOSE -static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -#endif // __x86_64__ -#endif // __AMX_TF32TRANSPOSEINTRIN_H diff --git a/clang/lib/Headers/amxtransposeintrin.h b/clang/lib/Headers/amxtransposeintrin.h deleted file mode 100644 index b3fa37d..0000000 --- a/clang/lib/Headers/amxtransposeintrin.h +++ /dev/null @@ -1,248 +0,0 @@ -/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_TRANSPOSEINTRIN_H -#define __AMX_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TRANSPOSE \ - __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose"))) - -#define _tile_2rpntlvwz0(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0(tdst, base, stride) -#define _tile_2rpntlvwz0t1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride) -#define _tile_2rpntlvwz1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1(tdst, base, stride) -#define _tile_2rpntlvwz1t1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride) - -/// Transpose 32-bit elements from \a src and write the result to \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_transposed(__tile dst, __tile src); -/// \endcode -/// -/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// -/// FOR i := 0 TO (dst.rows-1) -/// tmp[511:0] := 0 -/// FOR j := 0 TO (dst.colsb/4-1) -/// tmp.dword[j] := src.row[j].dword[i] -/// ENDFOR -/// dst.row[i] := tmp -/// ENDFOR -/// -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src) - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - // Use __tile1024i_1024a* to escape the alignment check in - // clang/test/Headers/x86-intrinsics-headers-clean.cpp - __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0, - (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz0t1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0, - (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1t1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE -_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) { - return __builtin_ia32_ttransposed_internal(m, n, src); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Transpose 32-bit elements from src and write the result to dst. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_transposed(__tile1024i *dst, __tile1024i src) { - dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile); -} - -#endif /* __x86_64__ */ -#endif /* __AMX_TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 35f012c..19064a4 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) { #include <amxfp8intrin.h> -#include <amxtransposeintrin.h> - #include <amxmovrsintrin.h> -#include <amxmovrstransposeintrin.h> - #include <amxavx512intrin.h> #include <amxtf32intrin.h> -#include <amxtf32transposeintrin.h> - -#include <amxbf16transposeintrin.h> - -#include <amxfp16transposeintrin.h> - -#include <amxcomplextransposeintrin.h> - #include <avx512vp2intersectintrin.h> #include <avx512vlvp2intersectintrin.h> diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 65c324c..f05c28fd 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, // file. for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> Result(Dir); - llvm::sys::fs::make_absolute(Result); + FileMgr.makeAbsolutePath(Result); if (ModuleName.contains(':')) // The separator of C++20 modules partitions (':') is not good for file // systems, here clang and gcc choose '-' by default since it is not a @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash(); for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> CachePath(Dir); - llvm::sys::fs::make_absolute(CachePath); + FileMgr.makeAbsolutePath(CachePath); llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index e4b158e..7e4a164 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4248,6 +4248,13 @@ void Parser::ParseDeclarationSpecifiers( // type-specifier case tok::kw_short: + if (!getLangOpts().NativeInt16Type) { + Diag(Tok, diag::err_unknown_typename) << Tok.getName(); + DS.SetTypeSpecError(); + DS.SetRangeEnd(Tok.getLocation()); + ConsumeToken(); + goto DoneWithDeclSpec; + } isInvalid = DS.SetTypeSpecWidth(TypeSpecifierWidth::Short, Loc, PrevSpec, DiagID, Policy); break; diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index e32f437..139c4ab 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: - case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: { StringRef FeatureList( getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f451787..ad2c2e4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3542,9 +3542,7 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) { bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx, unsigned FirstArg, FormatStringInfo *FSI) { - bool IsCXXMember = false; - if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) - IsCXXMember = MD->isInstance(); + bool HasImplicitThisParam = hasImplicitObjectParameter(D); bool IsVariadic = false; if (const FunctionType *FnTy = D->getFunctionType()) IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic(); @@ -3553,11 +3551,12 @@ bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx, else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) IsVariadic = OMD->isVariadic(); - return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI); + return getFormatStringInfo(FormatIdx, FirstArg, HasImplicitThisParam, + IsVariadic, FSI); } bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg, - bool IsCXXMember, bool IsVariadic, + bool HasImplicitThisParam, bool IsVariadic, FormatStringInfo *FSI) { if (FirstArg == 0) FSI->ArgPassingKind = FAPK_VAList; @@ -3571,7 +3570,7 @@ bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg, // The way the format attribute works in GCC, the implicit this argument // of member functions is counted. However, it doesn't appear in our own // lists, so decrement format_idx in that case. - if (IsCXXMember) { + if (HasImplicitThisParam) { if(FSI->FormatIdx == 0) return false; --FSI->FormatIdx; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 0514d10..aa93507 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -10208,6 +10208,24 @@ void SemaCodeCompletion::CodeCompletePreprocessorDirective(bool InConditional) { Builder.AddPlaceholderChunk("message"); Results.AddResult(Builder.TakeString()); + if (getLangOpts().C23) { + // #embed "file" + Builder.AddTypedTextChunk("embed"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddTextChunk("\""); + Builder.AddPlaceholderChunk("file"); + Builder.AddTextChunk("\""); + Results.AddResult(Builder.TakeString()); + + // #embed <file> + Builder.AddTypedTextChunk("embed"); + Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + Builder.AddTextChunk("<"); + Builder.AddPlaceholderChunk("file"); + Builder.AddTextChunk(">"); + Results.AddResult(Builder.TakeString()); + } + // Note: #ident and #sccs are such crazy anachronisms that we don't provide // completions for them. And __include_macros is a Clang-internal extension // that we don't want to encourage anyone to use. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 964a2a7..a9e7b44 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3785,7 +3785,7 @@ static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL, // In C++ the implicit 'this' function parameter also counts, and they are // counted from one. - bool HasImplicitThisParam = isInstanceMethod(D); + bool HasImplicitThisParam = hasImplicitObjectParameter(D); Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam; Info->Identifier = AL.getArgAsIdent(0)->getIdentifierInfo(); @@ -3926,7 +3926,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - bool HasImplicitThisParam = isInstanceMethod(D); + bool HasImplicitThisParam = hasImplicitObjectParameter(D); int32_t NumArgs = getFunctionOrMethodNumParams(D); FunctionDecl *FD = D->getAsFunction(); @@ -4110,7 +4110,7 @@ static void handleLifetimeCaptureByAttr(Sema &S, Decl *D, } void Sema::LazyProcessLifetimeCaptureByParams(FunctionDecl *FD) { - bool HasImplicitThisParam = isInstanceMethod(FD); + bool HasImplicitThisParam = hasImplicitObjectParameter(FD); SmallVector<LifetimeCaptureByAttr *, 1> Attrs; for (ParmVarDecl *PVD : FD->parameters()) if (auto *A = PVD->getAttr<LifetimeCaptureByAttr>()) diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 850bcb1..2f61bdd 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -489,14 +489,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_tileloaddrst164: case X86::BI__builtin_ia32_tilestored64: case X86::BI__builtin_ia32_tilezero: - case X86::BI__builtin_ia32_t2rpntlvwz0: - case X86::BI__builtin_ia32_t2rpntlvwz0t1: - case X86::BI__builtin_ia32_t2rpntlvwz1: - case X86::BI__builtin_ia32_t2rpntlvwz1t1: - case X86::BI__builtin_ia32_t2rpntlvwz0rst1: - case X86::BI__builtin_ia32_t2rpntlvwz1rs: - case X86::BI__builtin_ia32_t2rpntlvwz1rst1: - case X86::BI__builtin_ia32_t2rpntlvwz0rs: case X86::BI__builtin_ia32_tcvtrowps2bf16h: case X86::BI__builtin_ia32_tcvtrowps2bf16l: case X86::BI__builtin_ia32_tcvtrowps2phh: @@ -516,17 +508,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_tdpbhf8ps: case X86::BI__builtin_ia32_tdphbf8ps: case X86::BI__builtin_ia32_tdphf8ps: - case X86::BI__builtin_ia32_ttdpbf16ps: - case X86::BI__builtin_ia32_ttdpfp16ps: - case X86::BI__builtin_ia32_ttcmmimfp16ps: - case X86::BI__builtin_ia32_ttcmmrlfp16ps: - case X86::BI__builtin_ia32_tconjtcmmimfp16ps: case X86::BI__builtin_ia32_tmmultf32ps: - case X86::BI__builtin_ia32_ttmmultf32ps: return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2}); - case X86::BI__builtin_ia32_ttransposed: - case X86::BI__builtin_ia32_tconjtfp16: - return CheckBuiltinTileArgumentsRange(TheCall, {0, 1}); } } static bool isX86_32Builtin(unsigned BuiltinID) { diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 171c786..b4bdec1 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) { // `foo<int>` for which this range will be too short. Doing so will // require subcasing `NamedDecl`, because it doesn't provide virtual // access to the \c DeclarationNameInfo. - if (tooling::getText(R, *Result.Context) != D->getName()) - return CharSourceRange(); + StringRef Text = tooling::getText(R, *Result.Context); + if (Text != D->getName()) + return llvm::make_error<StringError>( + llvm::errc::not_supported, + "range selected by name(node id=" + ID + "): '" + Text + + "' is different from decl name '" + D->getName() + "'"); return R; } if (const auto *E = Node.get<DeclRefExpr>()) { |
