diff options
Diffstat (limited to 'clang/lib')
55 files changed, 1013 insertions, 1797 deletions
| diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 687cd46..2669f62 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,    // Read the base type.    switch (*Str++) {    default: llvm_unreachable("Unknown builtin type letter!"); +  case 'e': +    assert(HowLong == 0 && !Signed && !Unsigned && +           "Bad modifiers used with 'e'!"); +    Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty; +    break;    case 'x':      assert(HowLong == 0 && !Signed && !Unsigned &&             "Bad modifiers used with 'x'!"); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b3ab82d..8b57b96 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift(  static bool interp__builtin_ia32_shuffle_generic(      InterpState &S, CodePtr OpPC, const CallExpr *Call, -    llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> +    llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>          GetSourceIndex) {    assert(Call->getNumArgs() == 3); @@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic(    for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {      auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); -    const Pointer &Src = (SrcVecIdx == 0) ? A : B; -    TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + +    if (SrcIdx < 0) { +      // Zero out this element +      if (ElemT == PT_Float) { +        Dst.elem<Floating>(DstIdx) = Floating( +            S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); +      } else { +        INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); +      } +    } else { +      const Pointer &Src = (SrcVecIdx == 0) ? A : B; +      TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); +    }    }    Dst.initializeAllElements(); @@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,            unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;            unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;            unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; -          return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; +          return std::pair<unsigned, int>{SrcIdx, +                                          static_cast<int>(LaneOffset + Index)};          });    case X86::BI__builtin_ia32_shufpd:    case X86::BI__builtin_ia32_shufpd256: @@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,            unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;            unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;            unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; -          return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; +          return std::pair<unsigned, int>{SrcIdx, +                                          static_cast<int>(LaneOffset + Index)}; +        }); +  case X86::BI__builtin_ia32_insertps128: +    return interp__builtin_ia32_shuffle_generic( +        S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { +          // Bits [3:0]: zero mask - if bit is set, zero this element +          if ((Mask & (1 << DstIdx)) != 0) { +            return std::pair<unsigned, int>{0, -1}; +          } +          // Bits [7:6]: select element from source vector Y (0-3) +          // Bits [5:4]: select destination position (0-3) +          unsigned SrcElem = (Mask >> 6) & 0x3; +          unsigned DstElem = (Mask >> 4) & 0x3; +          if (DstIdx == DstElem) { +            // Insert element from source vector (B) at this position +            return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)}; +          } else { +            // Copy from destination vector (A) +            return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; +          }          });    case X86::BI__builtin_ia32_pshufb128:    case X86::BI__builtin_ia32_pshufb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d0404b9..97eeba8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,  static bool evalShuffleGeneric(      EvalInfo &Info, const CallExpr *Call, APValue &Out, -    llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> +    llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>          GetSourceIndex) {    const auto *VT = Call->getType()->getAs<VectorType>(); @@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric(    for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {      auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); -    const APValue &Src = (SrcVecIdx == 0) ? A : B; -    ResultElements.push_back(Src.getVectorElt(SrcIdx)); + +    if (SrcIdx < 0) { +      // Zero out this element +      QualType ElemTy = VT->getElementType(); +      ResultElements.push_back( +          APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); +    } else { +      const APValue &Src = (SrcVecIdx == 0) ? A : B; +      ResultElements.push_back(Src.getVectorElt(SrcIdx)); +    }    }    Out = APValue(ResultElements.data(), ResultElements.size()); @@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {      if (!evalShuffleGeneric(              Info, E, R,              [](unsigned DstIdx, -               unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { +               unsigned ShuffleMask) -> std::pair<unsigned, int> {                constexpr unsigned LaneBits = 128u;                unsigned NumElemPerLane = LaneBits / 32;                unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {                unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;                unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;                unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; -              return {SrcIdx, LaneOffset + Index}; +              return {SrcIdx, static_cast<int>(LaneOffset + Index)};              }))        return false;      return Success(R, E); @@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {      if (!evalShuffleGeneric(              Info, E, R,              [](unsigned DstIdx, -               unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { +               unsigned ShuffleMask) -> std::pair<unsigned, int> {                constexpr unsigned LaneBits = 128u;                unsigned NumElemPerLane = LaneBits / 64;                unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {                unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;                unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;                unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; -              return {SrcIdx, LaneOffset + Index}; +              return {SrcIdx, static_cast<int>(LaneOffset + Index)}; +            })) +      return false; +    return Success(R, E); +  } +  case X86::BI__builtin_ia32_insertps128: { +    APValue R; +    if (!evalShuffleGeneric( +            Info, E, R, +            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { +              // Bits [3:0]: zero mask - if bit is set, zero this element +              if ((Mask & (1 << DstIdx)) != 0) { +                return {0, -1}; +              } +              // Bits [7:6]: select element from source vector Y (0-3) +              // Bits [5:4]: select destination position (0-3) +              unsigned SrcElem = (Mask >> 6) & 0x3; +              unsigned DstElem = (Mask >> 4) & 0x3; +              if (DstIdx == DstElem) { +                // Insert element from source vector (B) at this position +                return {1, static_cast<int>(SrcElem)}; +              } else { +                // Copy from destination vector (A) +                return {0, static_cast<int>(DstIdx)}; +              }              }))        return false;      return Success(R, E); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 791df7e..59d9459 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -124,6 +124,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {    case OMPC_nowait:    case OMPC_untied:    case OMPC_mergeable: +  case OMPC_threadset:    case OMPC_threadprivate:    case OMPC_groupprivate:    case OMPC_flush: @@ -2035,6 +2036,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) {    OS << ")";  } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { +  OS << "threadset(" +     << getOpenMPSimpleClauseTypeName(OMPC_threadset, +                                      unsigned(Node->getThreadsetKind())) +     << ")"; +} +  void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) {    OS << "proc_bind("       << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 05b64cc..c909e1b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -546,6 +546,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) {  void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} +  void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { }  void OMPClauseProfiler::VisitOMPUnifiedAddressClause( diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 64b2bff..3d41f2d 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -210,6 +210,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,  #define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name)  #include "clang/Basic/OpenMPKinds.def"          .Default(OMPC_ALLOCATE_unknown); +  case OMPC_threadset: { +    unsigned Type = llvm::StringSwitch<unsigned>(Str) +#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name) +#include "clang/Basic/OpenMPKinds.def" +                        .Default(OMPC_THREADSET_unknown); +    if (LangOpts.OpenMP < 60) +      return OMPC_THREADSET_unknown; +    return Type; +  }    case OMPC_num_threads: {      unsigned Type = llvm::StringSwitch<unsigned>(Str)  #define OPENMP_NUMTHREADS_MODIFIER(Name) .Case(#Name, OMPC_NUMTHREADS_##Name) @@ -565,6 +574,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,  #include "clang/Basic/OpenMPKinds.def"      }      llvm_unreachable("Invalid OpenMP 'num_threads' clause modifier"); +  case OMPC_threadset: +    switch (Type) { +    case OMPC_THREADSET_unknown: +      return "unknown"; +#define OPENMP_THREADSET_KIND(Name)                                            \ +  case OMPC_THREADSET_##Name:                                                  \ +    return #Name; +#include "clang/Basic/OpenMPKinds.def" +    } +    llvm_unreachable("Invalid OpenMP 'threadset' clause modifier");    case OMPC_unknown:    case OMPC_threadprivate:    case OMPC_groupprivate: diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index d8ec837..938c648 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -608,8 +608,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,      return FileID::get(LoadedID);    }    unsigned FileSize = File.getSize(); -  llvm::ErrorOr<bool> NeedConversion = -      llvm::needConversion(Filename.str().c_str()); +  llvm::ErrorOr<bool> NeedConversion = llvm::needConversion(Filename);    if (NeedConversion && *NeedConversion) {      // Buffer size may increase due to potential z/OS EBCDIC to UTF-8      // conversion. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index d4de704..d4d696b 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -356,12 +356,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,    if (hasFastFMA())      Builder.defineMacro("FP_FAST_FMA"); -  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), -                      "compile-time-constant access to the wavefront size will " -                      "be removed in a future release"); -  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), -                      "compile-time-constant access to the wavefront size will " -                      "be removed in a future release");    Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));  } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 846b240..d2eb9c5 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -445,27 +445,17 @@ public:      LongWidth = LongAlign = PointerWidth = PointerAlign = 64;      IntMaxType = SignedLong;      Int64Type = SignedLong; -    std::string DataLayout;      if (Triple.isOSAIX()) {        // TODO: Set appropriate ABI for AIX platform. -      DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";        LongDoubleWidth = 64;        LongDoubleAlign = DoubleAlign = 32;        LongDoubleFormat = &llvm::APFloat::IEEEdouble(); -    } else if ((Triple.getArch() == llvm::Triple::ppc64le)) { -      DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; +    } else if ((Triple.getArch() == llvm::Triple::ppc64le) || +               Triple.isPPC64ELFv2ABI()) {        ABI = "elfv2";      } else { -      DataLayout = "E-m:e"; -      if (Triple.isPPC64ELFv2ABI()) { -        ABI = "elfv2"; -        DataLayout += "-Fn32"; -      } else { -        ABI = "elfv1"; -        DataLayout += "-Fi64"; -      } -      DataLayout += "-i64:64-i128:128-n32:64"; +      ABI = "elfv1";      }      if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) { @@ -473,14 +463,12 @@ public:        LongDoubleFormat = &llvm::APFloat::IEEEdouble();      } -    if (Triple.isOSAIX() || Triple.isOSLinux()) -      DataLayout += "-S128-v256:256:256-v512:512:512"; -    resetDataLayout(DataLayout); -      // Newer PPC64 instruction sets support atomics up to 16 bytes.      MaxAtomicPromoteWidth = 128;      // Baseline PPC64 supports inlining atomics up to 8 bytes.      MaxAtomicInlineWidth = 64; + +    calculateDataLayout();    }    void setMaxAtomicWidth() override { @@ -495,10 +483,33 @@ public:      return TargetInfo::CharPtrBuiltinVaList;    } +  void calculateDataLayout() { +    std::string DataLayout; + +    if (getTriple().isOSAIX()) { +      DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64"; +    } else if ((getTriple().getArch() == llvm::Triple::ppc64le)) { +      DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; +    } else { +      DataLayout = "E-m:e"; +      if (ABI == "elfv2") { +        DataLayout += "-Fn32"; +      } else { +        DataLayout += "-Fi64"; +      } +      DataLayout += "-i64:64-i128:128-n32:64"; +    } + +    if (getTriple().isOSAIX() || getTriple().isOSLinux()) +      DataLayout += "-S128-v256:256:256-v512:512:512"; +    resetDataLayout(DataLayout); +  } +    // PPC64 Linux-specific ABI options.    bool setABI(const std::string &Name) override {      if (Name == "elfv1" || Name == "elfv2") {        ABI = Name; +      calculateDataLayout();        return true;      }      return false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index e71f10c..7a90c89 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,        HasAMXFP8 = true;      } else if (Feature == "+amx-movrs") {        HasAMXMOVRS = true; -    } else if (Feature == "+amx-transpose") { -      HasAMXTRANSPOSE = true;      } else if (Feature == "+amx-avx512") {        HasAMXAVX512 = true;      } else if (Feature == "+amx-tf32") { @@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,      Builder.defineMacro("__AMX_FP8__");    if (HasAMXMOVRS)      Builder.defineMacro("__AMX_MOVRS__"); -  if (HasAMXTRANSPOSE) -    Builder.defineMacro("__AMX_TRANSPOSE__");    if (HasAMXAVX512)      Builder.defineMacro("__AMX_AVX512__");    if (HasAMXTF32) @@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {        .Case("amx-movrs", true)        .Case("amx-tf32", true)        .Case("amx-tile", true) -      .Case("amx-transpose", true)        .Case("avx", true)        .Case("avx10.1", true)        .Case("avx10.2", true) @@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {        .Case("amx-movrs", HasAMXMOVRS)        .Case("amx-tf32", HasAMXTF32)        .Case("amx-tile", HasAMXTILE) -      .Case("amx-transpose", HasAMXTRANSPOSE)        .Case("avx", SSELevel >= AVX)        .Case("avx10.1", HasAVX10_1)        .Case("avx10.2", HasAVX10_2) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index be3a473..e7da262 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -160,7 +160,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {    bool HasAMXCOMPLEX = false;    bool HasAMXFP8 = false;    bool HasAMXMOVRS = false; -  bool HasAMXTRANSPOSE = false;    bool HasAMXAVX512 = false;    bool HasAMXTF32 = false;    bool HasSERIALIZE = false; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 3c9c7ec..0198a9d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -771,14 +771,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,    case X86::BI_WriteBarrier:    case X86::BI_AddressOfReturnAddress:    case X86::BI__stosb: -  case X86::BI__builtin_ia32_t2rpntlvwz0_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:    case X86::BI__ud2:    case X86::BI__int2c:    case X86::BI__readfsbyte: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 71ff20a..5d5209b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -242,12 +242,19 @@ void CIRGenFunction::LexicalScope::cleanup() {      }    }; -  if (returnBlock != nullptr) { -    // Write out the return block, which loads the value from `__retval` and -    // issues the `cir.return`. +  // Cleanup are done right before codegen resumes a scope. This is where +  // objects are destroyed. Process all return blocks. +  // TODO(cir): Handle returning from a switch statement through a cleanup +  // block. We can't simply jump to the cleanup block, because the cleanup block +  // is not part of the case region. Either reemit all cleanups in the return +  // block or wait for MLIR structured control flow to support early exits. +  llvm::SmallVector<mlir::Block *> retBlocks; +  for (mlir::Block *retBlock : localScope->getRetBlocks()) {      mlir::OpBuilder::InsertionGuard guard(builder); -    builder.setInsertionPointToEnd(returnBlock); -    (void)emitReturn(*returnLoc); +    builder.setInsertionPointToEnd(retBlock); +    retBlocks.push_back(retBlock); +    mlir::Location retLoc = localScope->getRetLoc(retBlock); +    emitReturn(retLoc);    }    auto insertCleanupAndLeave = [&](mlir::Block *insPt) { @@ -274,19 +281,22 @@ void CIRGenFunction::LexicalScope::cleanup() {      if (localScope->depth == 0) {        // Reached the end of the function. -      if (returnBlock != nullptr) { -        if (returnBlock->getUses().empty()) { -          returnBlock->erase(); +      // Special handling only for single return block case +      if (localScope->getRetBlocks().size() == 1) { +        mlir::Block *retBlock = localScope->getRetBlocks()[0]; +        mlir::Location retLoc = localScope->getRetLoc(retBlock); +        if (retBlock->getUses().empty()) { +          retBlock->erase();          } else {            // Thread return block via cleanup block.            if (cleanupBlock) { -            for (mlir::BlockOperand &blockUse : returnBlock->getUses()) { +            for (mlir::BlockOperand &blockUse : retBlock->getUses()) {                cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner());                brOp.setSuccessor(cleanupBlock);              }            } -          cir::BrOp::create(builder, *returnLoc, returnBlock); +          cir::BrOp::create(builder, retLoc, retBlock);            return;          }        } @@ -324,8 +334,10 @@ void CIRGenFunction::LexicalScope::cleanup() {    bool entryBlock = builder.getInsertionBlock()->isEntryBlock();    if (!entryBlock && curBlock->empty()) {      curBlock->erase(); -    if (returnBlock != nullptr && returnBlock->getUses().empty()) -      returnBlock->erase(); +    for (mlir::Block *retBlock : retBlocks) { +      if (retBlock->getUses().empty()) +        retBlock->erase(); +    }      return;    } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index c3fcd1a6..e5cecaa5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1103,44 +1103,69 @@ public:      // ---    private: -    // `returnBlock`, `returnLoc`, and all the functions that deal with them -    // will change and become more complicated when `switch` statements are -    // upstreamed.  `case` statements within the `switch` are in the same scope -    // but have their own regions.  Therefore the LexicalScope will need to -    // keep track of multiple return blocks. -    mlir::Block *returnBlock = nullptr; -    std::optional<mlir::Location> returnLoc; - -    // See the comment on `getOrCreateRetBlock`. +    // On switches we need one return block per region, since cases don't +    // have their own scopes but are distinct regions nonetheless. + +    // TODO: This implementation should change once we have support for early +    //       exits in MLIR structured control flow (llvm-project#161575) +    llvm::SmallVector<mlir::Block *> retBlocks; +    llvm::DenseMap<mlir::Block *, mlir::Location> retLocs; +    llvm::DenseMap<cir::CaseOp, unsigned> retBlockInCaseIndex; +    std::optional<unsigned> normalRetBlockIndex; + +    // There's usually only one ret block per scope, but this needs to be +    // get or create because of potential unreachable return statements, note +    // that for those, all source location maps to the first one found.      mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) { -      assert(returnBlock == nullptr && "only one return block per scope"); -      // Create the cleanup block but don't hook it up just yet. +      assert((isa_and_nonnull<cir::CaseOp>( +                  cgf.builder.getBlock()->getParentOp()) || +              retBlocks.size() == 0) && +             "only switches can hold more than one ret block"); + +      // Create the return block but don't hook it up just yet.        mlir::OpBuilder::InsertionGuard guard(cgf.builder); -      returnBlock = -          cgf.builder.createBlock(cgf.builder.getBlock()->getParent()); -      updateRetLoc(returnBlock, loc); -      return returnBlock; +      auto *b = cgf.builder.createBlock(cgf.builder.getBlock()->getParent()); +      retBlocks.push_back(b); +      updateRetLoc(b, loc); +      return b;      }      cir::ReturnOp emitReturn(mlir::Location loc);      void emitImplicitReturn();    public: -    mlir::Block *getRetBlock() { return returnBlock; } -    mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; } -    void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; } - -    // Create the return block for this scope, or return the existing one. -    // This get-or-create logic is necessary to handle multiple return -    // statements within the same scope, which can happen if some of them are -    // dead code or if there is a `goto` into the middle of the scope. +    llvm::ArrayRef<mlir::Block *> getRetBlocks() { return retBlocks; } +    mlir::Location getRetLoc(mlir::Block *b) { return retLocs.at(b); } +    void updateRetLoc(mlir::Block *b, mlir::Location loc) { +      retLocs.insert_or_assign(b, loc); +    } +      mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) { -      if (returnBlock == nullptr) { -        returnBlock = createRetBlock(cgf, loc); -        return returnBlock; +      // Check if we're inside a case region +      if (auto caseOp = mlir::dyn_cast_if_present<cir::CaseOp>( +              cgf.builder.getBlock()->getParentOp())) { +        auto iter = retBlockInCaseIndex.find(caseOp); +        if (iter != retBlockInCaseIndex.end()) { +          // Reuse existing return block +          mlir::Block *ret = retBlocks[iter->second]; +          updateRetLoc(ret, loc); +          return ret; +        } +        // Create new return block +        mlir::Block *ret = createRetBlock(cgf, loc); +        retBlockInCaseIndex[caseOp] = retBlocks.size() - 1; +        return ret;        } -      updateRetLoc(returnBlock, loc); -      return returnBlock; + +      if (normalRetBlockIndex) { +        mlir::Block *ret = retBlocks[*normalRetBlockIndex]; +        updateRetLoc(ret, loc); +        return ret; +      } + +      mlir::Block *ret = createRetBlock(cgf, loc); +      normalRetBlockIndex = retBlocks.size() - 1; +      return ret;      }      mlir::Block *getEntryBlock() { return entryBlock; } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6af8066..ca579c9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -345,7 +345,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {    if (Loc.isInvalid())      return; -  CurLoc = CGM.getContext().getSourceManager().getExpansionLoc(Loc); +  CurLoc = CGM.getContext().getSourceManager().getFileLoc(Loc);    // If we've changed files in the middle of a lexical scope go ahead    // and create a new lexical scope with file node if it's different @@ -572,7 +572,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {      FileName = TheCU->getFile()->getFilename();      CSInfo = TheCU->getFile()->getChecksum();    } else { -    PresumedLoc PLoc = SM.getPresumedLoc(Loc); +    PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));      FileName = PLoc.getFilename();      if (FileName.empty()) { @@ -599,7 +599,8 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {      if (CSKind)        CSInfo.emplace(*CSKind, Checksum);    } -  return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); +  return createFile(FileName, CSInfo, +                    getSource(SM, SM.getFileID(SM.getFileLoc(Loc))));  }  llvm::DIFile *CGDebugInfo::createFile( @@ -654,7 +655,7 @@ unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {    if (Loc.isInvalid())      return 0;    SourceManager &SM = CGM.getContext().getSourceManager(); -  return SM.getPresumedLoc(Loc).getLine(); +  return SM.getPresumedLoc(SM.getFileLoc(Loc)).getLine();  }  unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) { @@ -666,7 +667,8 @@ unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {    if (Loc.isInvalid() && CurLoc.isInvalid())      return 0;    SourceManager &SM = CGM.getContext().getSourceManager(); -  PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc); +  PresumedLoc PLoc = +      SM.getPresumedLoc(Loc.isValid() ? SM.getFileLoc(Loc) : CurLoc);    return PLoc.isValid() ? PLoc.getColumn() : 0;  } @@ -1174,14 +1176,16 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {  }  llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) { - -  StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt"; +  SmallString<32> Name; +  llvm::raw_svector_ostream OS(Name); +  OS << (Ty->isUnsigned() ? "unsigned _BitInt(" : "_BitInt(") +     << Ty->getNumBits() << ")";    llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()                                         ? llvm::dwarf::DW_ATE_unsigned                                         : llvm::dwarf::DW_ATE_signed; -    return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty), -                                  Encoding); +                                  Encoding, llvm::DINode::FlagZero, 0, +                                  Ty->getNumBits());  }  llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) { @@ -5000,7 +5004,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {    // Update our current location    setLocation(Loc); -  if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty()) +  if (CurLoc.isInvalid() || LexicalBlockStack.empty())      return;    llvm::MDNode *Scope = LexicalBlockStack.back(); @@ -6276,7 +6280,8 @@ void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,  void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,                                              const StringLiteral *S) {    SourceLocation Loc = S->getStrTokenLoc(0); -  PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc); +  SourceManager &SM = CGM.getContext().getSourceManager(); +  PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));    if (!PLoc.isValid())      return; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 66fea92..121de42 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3731,6 +3731,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,      DestructorsFlag = 0x8,      PriorityFlag = 0x20,      DetachableFlag = 0x40, +    FreeAgentFlag = 0x80,    };    unsigned Flags = Data.Tied ? TiedFlag : 0;    bool NeedsCleanup = false; @@ -3740,6 +3741,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,      if (NeedsCleanup)        Flags = Flags | DestructorsFlag;    } +  if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) { +    OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); +    if (Kind == OMPC_THREADSET_omp_pool) +      Flags = Flags | FreeAgentFlag; +  }    if (Data.Priority.getInt())      Flags = Flags | PriorityFlag;    if (D.hasClausesOfKind<OMPDetachClause>()) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f49a5af..9eab709 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,    case AMDGPU::BI__builtin_amdgcn_ballot_w64: {      llvm::Type *ResultType = ConvertType(E->getType());      llvm::Value *Src = EmitScalarExpr(E->getArg(0)); -    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); -    return Builder.CreateCall(F, { Src }); +    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); +    return Builder.CreateCall(F, {Src});    }    case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:    case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: { @@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,    case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:      return emitAMDGCNImageOverloadedReturnType(          *this, E, Intrinsic::amdgcn_image_sample_cube, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_1d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_d_1d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_2d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_d_2d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_3d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_d_3d, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_cube, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: +  case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false); +  case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: +    return emitAMDGCNImageOverloadedReturnType( +        *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);    case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:    case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {      llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 60f9b86..15fa78d 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {    NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),    NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),    NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), +  NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),    NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),    NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), -  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), +  NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType), +  NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),    NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),    NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),    NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), @@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {    NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),    NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),    NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), -  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), -  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), +  NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType), +  NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType), +  NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),    NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),    NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), -  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), -  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), +  NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType), +  NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType), +  NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType), +  NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),    NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),    NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),    NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),    NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), -  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), -  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), +  NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType), +  NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType), +  NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),    NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),    NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), -  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), -  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), +  NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType), +  NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType), +  NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType), +  NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),    NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),    NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),    NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), -  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), -  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), +  NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType), +  NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),    NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),    NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),    NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), @@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,      Int = Intrinsic::bitreverse;      return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");    } -  case NEON::BI__builtin_neon_vaddv_u8: -    // FIXME: These are handled by the AArch64 scalar code. -    usgn = true; -    [[fallthrough]]; -  case NEON::BI__builtin_neon_vaddv_s8: { -    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vaddv_u16: -    usgn = true; -    [[fallthrough]]; -  case NEON::BI__builtin_neon_vaddv_s16: { -    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 4); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vaddvq_u8: -    usgn = true; -    [[fallthrough]]; -  case NEON::BI__builtin_neon_vaddvq_s8: { -    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 16); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vaddvq_u16: -    usgn = true; -    [[fallthrough]]; -  case NEON::BI__builtin_neon_vaddvq_s16: { -    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vmaxv_u8: { -    Int = Intrinsic::aarch64_neon_umaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vmaxv_u16: { -    Int = Intrinsic::aarch64_neon_umaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 4); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vmaxvq_u8: { -    Int = Intrinsic::aarch64_neon_umaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 16); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vmaxvq_u16: { -    Int = Intrinsic::aarch64_neon_umaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vmaxv_s8: { -    Int = Intrinsic::aarch64_neon_smaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vmaxv_s16: { -    Int = Intrinsic::aarch64_neon_smaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 4); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vmaxvq_s8: { -    Int = Intrinsic::aarch64_neon_smaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 16); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vmaxvq_s16: { -    Int = Intrinsic::aarch64_neon_smaxv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  }    case NEON::BI__builtin_neon_vmaxv_f16: {      Int = Intrinsic::aarch64_neon_fmaxv;      Ty = HalfTy; @@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,      Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");      return Builder.CreateTrunc(Ops[0], HalfTy);    } -  case NEON::BI__builtin_neon_vminv_u8: { -    Int = Intrinsic::aarch64_neon_uminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vminv_u16: { -    Int = Intrinsic::aarch64_neon_uminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 4); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vminvq_u8: { -    Int = Intrinsic::aarch64_neon_uminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 16); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vminvq_u16: { -    Int = Intrinsic::aarch64_neon_uminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vminv_s8: { -    Int = Intrinsic::aarch64_neon_sminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vminv_s16: { -    Int = Intrinsic::aarch64_neon_sminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 4); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  } -  case NEON::BI__builtin_neon_vminvq_s8: { -    Int = Intrinsic::aarch64_neon_sminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int8Ty, 16); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int8Ty); -  } -  case NEON::BI__builtin_neon_vminvq_s16: { -    Int = Intrinsic::aarch64_neon_sminv; -    Ty = Int32Ty; -    VTy = llvm::FixedVectorType::get(Int16Ty, 8); -    llvm::Type *Tys[2] = { Ty, VTy }; -    Ops.push_back(EmitScalarExpr(E->getArg(0))); -    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); -    return Builder.CreateTrunc(Ops[0], Int16Ty); -  }    case NEON::BI__builtin_neon_vminv_f16: {      Int = Intrinsic::aarch64_neon_fminv;      Ty = HalfTy; diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index b924407..2381b2e 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,      // instruction, but it will create a memset that won't be optimized away.      return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);    } -  // Corresponding to intrisics which will return 2 tiles (tile0_tile1). -  case X86::BI__builtin_ia32_t2rpntlvwz0_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: -  case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: { -    Intrinsic::ID IID; -    switch (BuiltinID) { -    default: -      llvm_unreachable("Unsupported intrinsic!"); -    case X86::BI__builtin_ia32_t2rpntlvwz0_internal: -      IID = Intrinsic::x86_t2rpntlvwz0_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: -      IID = Intrinsic::x86_t2rpntlvwz0rs_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: -      IID = Intrinsic::x86_t2rpntlvwz0t1_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: -      IID = Intrinsic::x86_t2rpntlvwz0rst1_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz1_internal: -      IID = Intrinsic::x86_t2rpntlvwz1_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: -      IID = Intrinsic::x86_t2rpntlvwz1rs_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: -      IID = Intrinsic::x86_t2rpntlvwz1t1_internal; -      break; -    case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: -      IID = Intrinsic::x86_t2rpntlvwz1rst1_internal; -      break; -    } - -    // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride) -    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), -                                     {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]}); - -    auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>(); -    assert(PtrTy && "arg3 must be of pointer type"); -    QualType PtreeTy = PtrTy->getPointeeType(); -    llvm::Type *TyPtee = ConvertType(PtreeTy); - -    // Bitcast amx type (x86_amx) to vector type (256 x i32) -    // Then store tile0 into DstPtr0 -    Value *T0 = Builder.CreateExtractValue(Call, 0); -    Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, -                                           {TyPtee}, {T0}); -    Builder.CreateDefaultAlignedStore(VecT0, Ops[3]); - -    // Then store tile1 into DstPtr1 -    Value *T1 = Builder.CreateExtractValue(Call, 1); -    Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, -                                           {TyPtee}, {T1}); -    Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]); - -    // Note: Here we escape directly use x86_tilestored64_internal to store -    // the results due to it can't make sure the Mem written scope. This may -    // cause shapes reloads after first amx intrinsic, which current amx reg- -    // ister allocation has no ability to handle it. - -    return Store; -  }    case X86::BI__ud2:      // llvm.trap makes a ud2a instruction on x86.      return EmitTrapCall(Intrinsic::trap); diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 15d0b35..abd049a 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -260,7 +260,8 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,    LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()                    ? LangAS::Default                    : QT->getPointeeType().getAddressSpace(); -  if (AS == LangAS::Default || AS == LangAS::opencl_generic) +  if (AS == LangAS::Default || AS == LangAS::opencl_generic || +      AS == LangAS::opencl_constant)      return llvm::ConstantPointerNull::get(PT);    auto &Ctx = CGM.getContext(); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 40ea513..71c5280 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -308,9 +308,18 @@ InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,      auto ArgString = A->getAsString(Args);      std::string Nearest;      if (getOpts().findNearest(ArgString, Nearest, VisibilityMask) > 1) { -      if (!IsCLMode() && -          getOpts().findExact(ArgString, Nearest, -                              llvm::opt::Visibility(options::CC1Option))) { +      if (IsFlangMode()) { +        if (getOpts().findExact(ArgString, Nearest, +                                llvm::opt::Visibility(options::FC1Option))) { +          DiagID = diag::err_drv_unknown_argument_with_suggestion; +          Diags.Report(DiagID) << ArgString << "-Xflang " + Nearest; +        } else { +          DiagID = diag::err_drv_unknown_argument; +          Diags.Report(DiagID) << ArgString; +        } +      } else if (!IsCLMode() && getOpts().findExact(ArgString, Nearest, +                                                    llvm::opt::Visibility( +                                                        options::CC1Option))) {          DiagID = diag::err_drv_unknown_argument_with_suggestion;          Diags.Report(DiagID) << ArgString << "-Xclang " + Nearest;        } else { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 79edc56..d3ab6f1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1414,17 +1414,18 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,      GuardedControlStack = PBP.GuardedControlStack;    } -  bool HasPtrauthReturns = llvm::any_of(CmdArgs, [](const char *Arg) { -    return StringRef(Arg) == "-fptrauth-returns"; -  }); +  Arg *PtrauthReturnsArg = Args.getLastArg(options::OPT_fptrauth_returns, +                                           options::OPT_fno_ptrauth_returns); +  bool HasPtrauthReturns = +      PtrauthReturnsArg && +      PtrauthReturnsArg->getOption().matches(options::OPT_fptrauth_returns);    // GCS is currently untested with ptrauth-returns, but enabling this could be    // allowed in future after testing with a suitable system. -  if (HasPtrauthReturns && -      (Scope != "none" || BranchProtectionPAuthLR || GuardedControlStack)) { +  if (Scope != "none" || BranchProtectionPAuthLR || GuardedControlStack) {      if (Triple.getEnvironment() == llvm::Triple::PAuthTest)        D.Diag(diag::err_drv_unsupported_opt_for_target)            << A->getAsString(Args) << Triple.getTriple(); -    else +    else if (HasPtrauthReturns)        D.Diag(diag::err_drv_incompatible_options)            << A->getAsString(Args) << "-fptrauth-returns";    } @@ -1670,34 +1671,42 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,    AddUnalignedAccessWarning(CmdArgs); -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_intrinsics, -                    options::OPT_fno_ptrauth_intrinsics); -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_calls, -                    options::OPT_fno_ptrauth_calls); -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_returns, -                    options::OPT_fno_ptrauth_returns); -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_auth_traps, -                    options::OPT_fno_ptrauth_auth_traps); -  Args.addOptInFlag( -      CmdArgs, options::OPT_fptrauth_vtable_pointer_address_discrimination, -      options::OPT_fno_ptrauth_vtable_pointer_address_discrimination); -  Args.addOptInFlag( -      CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination, -      options::OPT_fno_ptrauth_vtable_pointer_type_discrimination); -  Args.addOptInFlag( -      CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination, -      options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination); -  Args.addOptInFlag( -      CmdArgs, options::OPT_fptrauth_function_pointer_type_discrimination, -      options::OPT_fno_ptrauth_function_pointer_type_discrimination); - -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_indirect_gotos, -                    options::OPT_fno_ptrauth_indirect_gotos); -  Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini, -                    options::OPT_fno_ptrauth_init_fini); -  Args.addOptInFlag(CmdArgs, -                    options::OPT_fptrauth_init_fini_address_discrimination, -                    options::OPT_fno_ptrauth_init_fini_address_discrimination); +  if (Triple.isOSDarwin() || +      (Triple.isOSLinux() && +       Triple.getEnvironment() == llvm::Triple::PAuthTest)) { +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_intrinsics, +                      options::OPT_fno_ptrauth_intrinsics); +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_calls, +                      options::OPT_fno_ptrauth_calls); +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_returns, +                      options::OPT_fno_ptrauth_returns); +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_auth_traps, +                      options::OPT_fno_ptrauth_auth_traps); +    Args.addOptInFlag( +        CmdArgs, options::OPT_fptrauth_vtable_pointer_address_discrimination, +        options::OPT_fno_ptrauth_vtable_pointer_address_discrimination); +    Args.addOptInFlag( +        CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination, +        options::OPT_fno_ptrauth_vtable_pointer_type_discrimination); +    Args.addOptInFlag( +        CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination, +        options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination); +    Args.addOptInFlag( +        CmdArgs, options::OPT_fptrauth_function_pointer_type_discrimination, +        options::OPT_fno_ptrauth_function_pointer_type_discrimination); +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_indirect_gotos, +                      options::OPT_fno_ptrauth_indirect_gotos); +  } +  if (Triple.isOSLinux() && +      Triple.getEnvironment() == llvm::Triple::PAuthTest) { +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini, +                      options::OPT_fno_ptrauth_init_fini); +    Args.addOptInFlag( +        CmdArgs, options::OPT_fptrauth_init_fini_address_discrimination, +        options::OPT_fno_ptrauth_init_fini_address_discrimination); +    Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_elf_got, +                      options::OPT_fno_ptrauth_elf_got); +  }    Args.addOptInFlag(CmdArgs, options::OPT_faarch64_jump_table_hardening,                      options::OPT_fno_aarch64_jump_table_hardening); @@ -3699,6 +3708,7 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs,        options::OPT_emit_obj,        options::OPT_disable_llvm_passes,        options::OPT_fnative_half_type, +      options::OPT_fnative_int16_type,        options::OPT_hlsl_entrypoint,        options::OPT_fdx_rootsignature_define,        options::OPT_fdx_rootsignature_version, diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp index 20a320e..8d3fba7 100644 --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -498,6 +498,15 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,        continue;      } +    if (A->getOption().getID() == options::OPT_enable_16bit_types) { +      // Translate -enable-16bit-types into -fnative-half-type and +      // -fnative-int16-type +      DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_half_type)); +      DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_int16_type)); +      A->claim(); +      continue; +    } +      DAL->append(A);    } diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp index 57bcb3c..9a3c453 100644 --- a/clang/lib/Driver/ToolChains/ZOS.cpp +++ b/clang/lib/Driver/ToolChains/ZOS.cpp @@ -75,7 +75,7 @@ void zos::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));    C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), -                                         Exec, CmdArgs, Inputs)); +                                         Exec, CmdArgs, Inputs, Output));  }  static std::string getLEHLQ(const ArgList &Args) { @@ -213,7 +213,7 @@ void zos::Linker::ConstructJob(Compilation &C, const JobAction &JA,    const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());    C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), -                                         Exec, CmdArgs, Inputs)); +                                         Exec, CmdArgs, Inputs, Output));  }  ToolChain::RuntimeLibType ZOS::GetDefaultRuntimeLibType() const { diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e5abf83..9ab024a 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) {      return CurrentState.BreakBeforeClosingBrace;    } -  // Allow breaking before the right parens with block indentation if there was -  // a break after the left parens, which is tracked by BreakBeforeClosingParen. -  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && +  // Check need to break before the right parens if there was a break after +  // the left parens, which is tracked by BreakBeforeClosingParen. +  if ((Style.BreakBeforeCloseBracketFunction || +       Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || +       Style.BreakBeforeCloseBracketSwitch) &&        Current.is(tok::r_paren)) {      return CurrentState.BreakBeforeClosingParen;    } @@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,        return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&               Style.Cpp11BracedListStyle != FormatStyle::BLS_Block;      }; -    if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && -        !IsStartOfBracedList()) { +    if (IsStartOfBracedList()) +      return Style.BreakAfterOpenBracketBracedList; +    if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square))        return false; -    }      if (!Tok.Previous)        return true;      if (Tok.Previous->isIf()) -      return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak; -    return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while, -                                  tok::kw_switch) && -           !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await)); +      return Style.BreakAfterOpenBracketIf; +    if (Tok.Previous->isLoop(Style)) +      return Style.BreakAfterOpenBracketLoop; +    if (Tok.Previous->is(tok::kw_switch)) +      return Style.BreakAfterOpenBracketSwitch; +    if (Style.BreakAfterOpenBracketFunction) { +      return !Tok.Previous->is(TT_CastRParen) && +             !(Style.isJavaScript() && Tok.is(Keywords.kw_await)); +    } +    return false;    };    auto IsFunctionCallParen = [](const FormatToken &Tok) {      return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous &&             Tok.Previous->is(tok::identifier);    }; -  auto IsInTemplateString = [this](const FormatToken &Tok) { +  auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) {      if (!Style.isJavaScript())        return false;      for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) {        if (Prev->is(TT_TemplateString) && Prev->opensScope())          return true; -      if (Prev->opensScope() || -          (Prev->is(TT_TemplateString) && Prev->closesScope())) { -        break; -      } +      if (Prev->opensScope() && !NestBlocks) +        return false; +      if (Prev->is(TT_TemplateString) && Prev->closesScope()) +        return false;      }      return false;    }; @@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,           Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) {        return true;      } -    if (const auto *Previous = Tok.Previous; -        !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen, -                                         TT_LambdaDefinitionLParen) && -                      !IsFunctionCallParen(*Previous))) { +    const auto *Previous = TokAfterLParen.Previous; +    assert(Previous); // IsOpeningBracket(Previous) +    if (Previous->Previous && +        (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) || +         Previous->Previous->is(tok::kw_switch))) { +      return false; +    } +    if (Previous->isNoneOf(TT_FunctionDeclarationLParen, +                           TT_LambdaDefinitionLParen) && +        !IsFunctionCallParen(*Previous)) {        return true;      } -    if (IsOpeningBracket(Tok) || IsInTemplateString(Tok)) +    if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true))        return true;      const auto *Next = Tok.Next;      return !Next || Next->isMemberAccess() ||             Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next);    }; -  if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak || -       Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) && -      IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && +  if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&        // Don't do this for simple (no expressions) one-argument function calls        // as that feels like needlessly wasting whitespace, e.g.:        // @@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,    // Note: This doesn't apply to macro expansion lines, which are MACRO( , , )    // with args as children of the '(' and ',' tokens. It does not make sense to    // align the commas with the opening paren. -  if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && +  if (Style.AlignAfterOpenBracket &&        !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&        Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause,                          TT_TableGenDAGArgOpener, @@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,           Previous.Previous->isNoneOf(tok::identifier, tok::l_paren,                                       BK_BracedInit))) ||         Previous.is(TT_VerilogMultiLineListLParen)) && -      !IsInTemplateString(Current)) { +      !IsInTemplateString(Current, false)) {      CurrentState.Indent = State.Column + Spaces;      CurrentState.IsAligned = true;    } @@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,    }    if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) { -    CurrentState.BreakBeforeClosingParen = -        Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent; +    if (auto Previous = PreviousNonComment->Previous) { +      if (Previous->isIf()) { +        CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf; +      } else if (Previous->isLoop(Style)) { +        CurrentState.BreakBeforeClosingParen = +            Style.BreakBeforeCloseBracketLoop; +      } else if (Previous->is(tok::kw_switch)) { +        CurrentState.BreakBeforeClosingParen = +            Style.BreakBeforeCloseBracketSwitch; +      } else { +        CurrentState.BreakBeforeClosingParen = +            Style.BreakBeforeCloseBracketFunction; +      } +    }    }    if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener)) @@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {        State.Stack.size() > 1) {      return State.Stack[State.Stack.size() - 2].LastSpace;    } -  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && -      (Current.is(tok::r_paren) || -       (Current.is(tok::r_brace) && Current.MatchingParen && -        Current.MatchingParen->is(BK_BracedInit))) && +  if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) && +      Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) &&        State.Stack.size() > 1) {      return State.Stack[State.Stack.size() - 2].LastSpace;    } +  if ((Style.BreakBeforeCloseBracketFunction || +       Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || +       Style.BreakBeforeCloseBracketSwitch) && +      Current.is(tok::r_paren) && State.Stack.size() > 1) { +    return State.Stack[State.Stack.size() - 2].LastSpace; +  }    if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) &&        State.Stack.size() > 1) {      return State.Stack[State.Stack.size() - 2].LastSpace; @@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,           PrecedenceLevel < prec::Assignment) &&          (!Previous || Previous->isNot(tok::kw_return) ||           (!Style.isJava() && PrecedenceLevel > 0)) && -        (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || -         PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) && +        (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma || +         Current.NestingLevel == 0) &&          (!Style.isTableGen() ||           (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,                                          TT_TableGenDAGArgListCommaToBreak)))) { @@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,      if (PrecedenceLevel > prec::Unknown)        NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);      if (PrecedenceLevel != prec::Conditional && -        Current.isNot(TT_UnaryOperator) && -        Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { +        Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) {        NewParenState.StartOfFunctionCall = State.Column;      } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index edd126c..dd14fcd 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -32,6 +32,13 @@ using clang::format::FormatStyle;  LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat) +enum BracketAlignmentStyle : int8_t { +  BAS_Align, +  BAS_DontAlign, +  BAS_AlwaysBreak, +  BAS_BlockIndent +}; +  namespace llvm {  namespace yaml {  template <> @@ -204,16 +211,16 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {    }  }; -template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { -  static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { -    IO.enumCase(Value, "Align", FormatStyle::BAS_Align); -    IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); -    IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); -    IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent); +template <> struct ScalarEnumerationTraits<BracketAlignmentStyle> { +  static void enumeration(IO &IO, BracketAlignmentStyle &Value) { +    IO.enumCase(Value, "Align", BAS_Align); +    IO.enumCase(Value, "DontAlign", BAS_DontAlign);      // For backward compatibility. -    IO.enumCase(Value, "true", FormatStyle::BAS_Align); -    IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); +    IO.enumCase(Value, "true", BAS_Align); +    IO.enumCase(Value, "false", BAS_DontAlign); +    IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak); +    IO.enumCase(Value, "BlockIndent", BAS_BlockIndent);    }  }; @@ -979,6 +986,54 @@ template <> struct MappingTraits<FormatStyle> {      bool SpacesInCStyleCastParentheses = false;      bool SpacesInParentheses = false; +    if (IO.outputting()) { +      IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); +    } else { +      // For backward compatibility. +      BracketAlignmentStyle LocalBAS = BAS_Align; +      if (IsGoogleOrChromium) { +        FormatStyle::LanguageKind Language = Style.Language; +        if (Language == FormatStyle::LK_None) +          Language = ((FormatStyle *)IO.getContext())->Language; +        if (Language == FormatStyle::LK_JavaScript) +          LocalBAS = BAS_AlwaysBreak; +        else if (Language == FormatStyle::LK_Java) +          LocalBAS = BAS_DontAlign; +      } else if (BasedOnStyle.equals_insensitive("webkit")) { +        LocalBAS = BAS_DontAlign; +      } +      IO.mapOptional("AlignAfterOpenBracket", LocalBAS); +      Style.BreakAfterOpenBracketBracedList = false; +      Style.BreakAfterOpenBracketFunction = false; +      Style.BreakAfterOpenBracketIf = false; +      Style.BreakAfterOpenBracketLoop = false; +      Style.BreakAfterOpenBracketSwitch = false; +      Style.BreakBeforeCloseBracketBracedList = false; +      Style.BreakBeforeCloseBracketFunction = false; +      Style.BreakBeforeCloseBracketIf = false; +      Style.BreakBeforeCloseBracketLoop = false; +      Style.BreakBeforeCloseBracketSwitch = false; + +      switch (LocalBAS) { +      case BAS_DontAlign: +        Style.AlignAfterOpenBracket = false; +        break; +      case BAS_BlockIndent: +        Style.BreakBeforeCloseBracketBracedList = true; +        Style.BreakBeforeCloseBracketFunction = true; +        Style.BreakBeforeCloseBracketIf = true; +        [[fallthrough]]; +      case BAS_AlwaysBreak: +        Style.BreakAfterOpenBracketBracedList = true; +        Style.BreakAfterOpenBracketFunction = true; +        Style.BreakAfterOpenBracketIf = true; +        [[fallthrough]]; +      case BAS_Align: +        Style.AlignAfterOpenBracket = true; +        break; +      } +    } +      // For backward compatibility.      if (!IO.outputting()) {        IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); @@ -1014,7 +1069,6 @@ template <> struct MappingTraits<FormatStyle> {      }      IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); -    IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);      IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures);      IO.mapOptional("AlignConsecutiveAssignments",                     Style.AlignConsecutiveAssignments); @@ -1079,10 +1133,29 @@ template <> struct MappingTraits<FormatStyle> {      IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes);      IO.mapOptional("BreakAfterJavaFieldAnnotations",                     Style.BreakAfterJavaFieldAnnotations); +    IO.mapOptional("BreakAfterOpenBracketBracedList", +                   Style.BreakAfterOpenBracketBracedList); +    IO.mapOptional("BreakAfterOpenBracketFunction", +                   Style.BreakAfterOpenBracketFunction); +    IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf); +    IO.mapOptional("BreakAfterOpenBracketLoop", +                   Style.BreakAfterOpenBracketLoop); +    IO.mapOptional("BreakAfterOpenBracketSwitch", +                   Style.BreakAfterOpenBracketSwitch);      IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType);      IO.mapOptional("BreakArrays", Style.BreakArrays);      IO.mapOptional("BreakBeforeBinaryOperators",                     Style.BreakBeforeBinaryOperators); +    IO.mapOptional("BreakBeforeCloseBracketBracedList", +                   Style.BreakBeforeCloseBracketBracedList); +    IO.mapOptional("BreakBeforeCloseBracketFunction", +                   Style.BreakBeforeCloseBracketFunction); +    IO.mapOptional("BreakBeforeCloseBracketIf", +                   Style.BreakBeforeCloseBracketIf); +    IO.mapOptional("BreakBeforeCloseBracketLoop", +                   Style.BreakBeforeCloseBracketLoop); +    IO.mapOptional("BreakBeforeCloseBracketSwitch", +                   Style.BreakBeforeCloseBracketSwitch);      IO.mapOptional("BreakBeforeConceptDeclarations",                     Style.BreakBeforeConceptDeclarations);      IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); @@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) {  FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {    FormatStyle LLVMStyle;    LLVMStyle.AccessModifierOffset = -2; -  LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; +  LLVMStyle.AlignAfterOpenBracket = true;    LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None;    LLVMStyle.AlignConsecutiveAssignments = {};    LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; @@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {    LLVMStyle.BreakAdjacentStringLiterals = true;    LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave;    LLVMStyle.BreakAfterJavaFieldAnnotations = false; +  LLVMStyle.BreakAfterOpenBracketBracedList = false; +  LLVMStyle.BreakAfterOpenBracketFunction = false; +  LLVMStyle.BreakAfterOpenBracketIf = false; +  LLVMStyle.BreakAfterOpenBracketLoop = false; +  LLVMStyle.BreakAfterOpenBracketSwitch = false;    LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None;    LLVMStyle.BreakArrays = true;    LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;    LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; +  LLVMStyle.BreakBeforeCloseBracketBracedList = false; +  LLVMStyle.BreakBeforeCloseBracketFunction = false; +  LLVMStyle.BreakBeforeCloseBracketIf = false; +  LLVMStyle.BreakBeforeCloseBracketLoop = false; +  LLVMStyle.BreakBeforeCloseBracketSwitch = false;    LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always;    LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline;    LLVMStyle.BreakBeforeTemplateCloser = false; @@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {    GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;    if (Language == FormatStyle::LK_Java) { -    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; +    GoogleStyle.AlignAfterOpenBracket = false;      GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;      GoogleStyle.AlignTrailingComments = {};      GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; @@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {      GoogleStyle.SpaceAfterCStyleCast = true;      GoogleStyle.SpacesBeforeTrailingComments = 1;    } else if (Language == FormatStyle::LK_JavaScript) { -    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; +    GoogleStyle.BreakAfterOpenBracketBracedList = true; +    GoogleStyle.BreakAfterOpenBracketFunction = true; +    GoogleStyle.BreakAfterOpenBracketIf = true;      GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;      GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;      // TODO: still under discussion whether to switch to SLS_All. @@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() {  FormatStyle getWebKitStyle() {    FormatStyle Style = getLLVMStyle();    Style.AccessModifierOffset = -4; -  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; +  Style.AlignAfterOpenBracket = false;    Style.AlignOperands = FormatStyle::OAS_DontAlign;    Style.AlignTrailingComments = {};    Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index d1c6264..28fdbcb 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {    assert(MatchingParen);    assert(MatchingParen->is(tok::l_brace));    if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block || -      Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { +      !Style.BreakBeforeCloseBracketBracedList) {      return false;    }    const auto *LBrace = MatchingParen; @@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {      return;    // Column format doesn't really make sense if we don't align after brackets. -  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) +  if (!Style.AlignAfterOpenBracket)      return;    FormatToken *ItemBegin = Token->Next; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 6f3d24a..d833130 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -666,6 +666,12 @@ public:             (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);    } +  bool isLoop(const FormatStyle &Style) const { +    return isOneOf(tok::kw_for, tok::kw_while) || +           (Style.isJavaScript() && isNot(tok::l_paren) && Previous && +            Previous->is(tok::kw_for)); +  } +    bool closesScopeAfterBlock() const {      if (getBlockKind() == BK_Block)        return true; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 021d8c6..8e227da 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,    if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)      return Style.PenaltyBreakOpenParenthesis; -  if (Left.is(tok::l_paren) && InFunctionDecl && -      Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { +  if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket)      return 100; -  }    if (Left.is(tok::l_paren) && Left.Previous &&        (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||         Left.Previous->isIf())) { @@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,      // If we aren't aligning after opening parens/braces we can always break      // here unless the style does not want us to place all arguments on the      // next line. -    if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && +    if (!Style.AlignAfterOpenBracket &&          (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {        return 0;      } @@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,                                     (Right.isBlockIndentedInitRBrace(Style)));    } -  // We only break before r_paren if we're in a block indented context. +  // We can break before r_paren if we're in a block indented context or +  // a control statement with an explicit style option.    if (Right.is(tok::r_paren)) { -    if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || -        !Right.MatchingParen) { +    if (!Right.MatchingParen)        return false; -    }      auto Next = Right.Next;      if (Next && Next->is(tok::r_paren))        Next = Next->Next;      if (Next && Next->is(tok::l_paren))        return false;      const FormatToken *Previous = Right.MatchingParen->Previous; -    return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); +    if (!Previous) +      return false; +    if (Previous->isIf()) +      return Style.BreakBeforeCloseBracketIf; +    if (Previous->isLoop(Style)) +      return Style.BreakBeforeCloseBracketLoop; +    if (Previous->is(tok::kw_switch)) +      return Style.BreakBeforeCloseBracketSwitch; +    return Style.BreakBeforeCloseBracketFunction;    }    if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) &&        Right.is(TT_TrailingAnnotation) && -      Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) { +      Style.BreakBeforeCloseBracketFunction) {      return false;    } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bd36eb4..1951e7f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4600,7 +4600,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,          // Validate that if fnative-half-type is given, that          // the language standard is at least hlsl2018, and that          // the target shader model is at least 6.2. -        if (Args.getLastArg(OPT_fnative_half_type)) { +        if (Args.getLastArg(OPT_fnative_half_type) || +            Args.getLastArg(OPT_fnative_int16_type)) {            const LangStandard &Std =                LangStandard::getLangStandardForKind(Opts.LangStd);            if (!(Opts.LangStd >= LangStandard::lang_hlsl2018 && @@ -4614,12 +4615,16 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,            Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported)                << VulkanEnv << T.getOSName() << T.str();          } -        if (Args.getLastArg(OPT_fnative_half_type)) { +        if (Args.getLastArg(OPT_fnative_half_type) || +            Args.getLastArg(OPT_fnative_int16_type)) { +          const char *Str = Args.getLastArg(OPT_fnative_half_type) +                                ? "-fnative-half-type" +                                : "-fnative-int16-type";            const LangStandard &Std =                LangStandard::getLangStandardForKind(Opts.LangStd);            if (!(Opts.LangStd >= LangStandard::lang_hlsl2018))              Diags.Report(diag::err_drv_hlsl_16bit_types_unsupported) -                << "-fnative-half-type" << false << Std.getName(); +                << Str << false << Std.getName();          }        } else {          llvm_unreachable("expected DXIL or SPIR-V target"); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 47f1d5a..8602be1 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -399,7 +399,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,      Builder.defineMacro("__HLSL_202y",                          Twine((unsigned)LangOptions::HLSLLangStd::HLSL_202y)); -    if (LangOpts.NativeHalfType) +    if (LangOpts.NativeHalfType && LangOpts.NativeInt16Type)        Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1");      // Shader target information diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index f5add2a..aea3e72 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -22,22 +22,16 @@  using namespace clang; -static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN; -static const enum raw_ostream::Colors remarkColor = -  raw_ostream::BLUE; -static const enum raw_ostream::Colors fixitColor = -  raw_ostream::GREEN; -static const enum raw_ostream::Colors caretColor = -  raw_ostream::GREEN; -static const enum raw_ostream::Colors warningColor = -  raw_ostream::MAGENTA; -static const enum raw_ostream::Colors templateColor = -  raw_ostream::CYAN; -static const enum raw_ostream::Colors errorColor = raw_ostream::RED; -static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; +static constexpr raw_ostream::Colors NoteColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors RemarkColor = raw_ostream::BLUE; +static constexpr raw_ostream::Colors FixitColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors CaretColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors WarningColor = raw_ostream::MAGENTA; +static constexpr raw_ostream::Colors TemplateColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors ErrorColor = raw_ostream::RED; +static constexpr raw_ostream::Colors FatalColor = raw_ostream::RED;  // Used for changing only the bold attribute. -static const enum raw_ostream::Colors savedColor = -  raw_ostream::SAVEDCOLOR; +static constexpr raw_ostream::Colors SavedColor = raw_ostream::SAVEDCOLOR;  // Magenta is taken for 'warning'. Red is already 'error' and 'cyan'  // is already taken for 'note'. Green is already used to underline @@ -47,6 +41,43 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;  static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;  static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; +namespace { +template <typename Sub> class ColumnsOrBytes { +public: +  int V = 0; +  ColumnsOrBytes(int V) : V(V) {} +  bool isValid() const { return V != -1; } +  Sub next() const { return Sub(V + 1); } +  Sub prev() const { return Sub(V - 1); } + +  bool operator>(Sub O) const { return V > O.V; } +  bool operator<(Sub O) const { return V < O.V; } +  bool operator<=(Sub B) const { return V <= B.V; } +  bool operator!=(Sub C) const { return C.V != V; } + +  Sub operator+(Sub B) const { return Sub(V + B.V); } +  Sub &operator+=(Sub B) { +    V += B.V; +    return *static_cast<Sub *>(this); +  } +  Sub operator-(Sub B) const { return Sub(V - B.V); } +  Sub &operator-=(Sub B) { +    V -= B.V; +    return *static_cast<Sub *>(this); +  } +}; + +class Bytes final : public ColumnsOrBytes<Bytes> { +public: +  Bytes(int V) : ColumnsOrBytes(V) {} +}; + +class Columns final : public ColumnsOrBytes<Columns> { +public: +  Columns(int V) : ColumnsOrBytes(V) {} +}; +} // namespace +  /// Add highlights to differences in template strings.  static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,                                        bool &Normal, bool Bold) { @@ -58,11 +89,11 @@ static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,      Str = Str.substr(Pos + 1);      if (Normal) -      OS.changeColor(templateColor, true); +      OS.changeColor(TemplateColor, true);      else {        OS.resetColor();        if (Bold) -        OS.changeColor(savedColor, true); +        OS.changeColor(SavedColor, true);      }      Normal = !Normal;    } @@ -109,8 +140,8 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *I,    if (SourceLine[*I] == '\t') {      assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&             "Invalid -ftabstop value"); -    unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I); -    unsigned NumSpaces = TabStop - (Col % TabStop); +    unsigned LineBytes = bytesSincePreviousTabOrLineBegin(SourceLine, *I); +    unsigned NumSpaces = TabStop - (LineBytes % TabStop);      assert(0 < NumSpaces && NumSpaces <= TabStop             && "Invalid computation of space amt");      ++(*I); @@ -220,97 +251,99 @@ static void expandTabs(std::string &SourceLine, unsigned TabStop) {  ///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to  ///   display)  static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, -                                 SmallVectorImpl<int> &BytesOut, -                                 SmallVectorImpl<int> &ColumnsOut) { +                                 SmallVectorImpl<Bytes> &BytesOut, +                                 SmallVectorImpl<Columns> &ColumnsOut) {    assert(BytesOut.empty());    assert(ColumnsOut.empty());    if (SourceLine.empty()) { -    BytesOut.resize(1u, 0); -    ColumnsOut.resize(1u, 0); +    BytesOut.resize(1u, Bytes(0)); +    ColumnsOut.resize(1u, Columns(0));      return;    }    ColumnsOut.resize(SourceLine.size() + 1, -1); -  int Columns = 0; +  Columns NumColumns = 0;    size_t I = 0;    while (I < SourceLine.size()) { -    ColumnsOut[I] = Columns; -    BytesOut.resize(Columns + 1, -1); -    BytesOut.back() = I; +    ColumnsOut[I] = NumColumns; +    BytesOut.resize(NumColumns.V + 1, -1); +    BytesOut.back() = Bytes(I);      auto [Str, Printable] =          printableTextForNextCharacter(SourceLine, &I, TabStop); -    Columns += llvm::sys::locale::columnWidth(Str); +    NumColumns += Columns(llvm::sys::locale::columnWidth(Str));    } -  ColumnsOut.back() = Columns; -  BytesOut.resize(Columns + 1, -1); -  BytesOut.back() = I; +  ColumnsOut.back() = NumColumns; +  BytesOut.resize(NumColumns.V + 1, -1); +  BytesOut.back() = Bytes(I);  }  namespace {  struct SourceColumnMap {    SourceColumnMap(StringRef SourceLine, unsigned TabStop) -  : m_SourceLine(SourceLine) { +      : SourceLine(SourceLine) { -    genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn); +    genColumnByteMapping(SourceLine, TabStop, ColumnToByte, ByteToColumn); -    assert(m_byteToColumn.size()==SourceLine.size()+1); -    assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); -    assert(m_byteToColumn.size() -           == static_cast<unsigned>(m_columnToByte.back()+1)); -    assert(static_cast<unsigned>(m_byteToColumn.back()+1) -           == m_columnToByte.size()); +    assert(ByteToColumn.size() == SourceLine.size() + 1); +    assert(0 < ByteToColumn.size() && 0 < ColumnToByte.size()); +    assert(ByteToColumn.size() == +           static_cast<unsigned>(ColumnToByte.back().V + 1)); +    assert(static_cast<unsigned>(ByteToColumn.back().V + 1) == +           ColumnToByte.size());    } -  int columns() const { return m_byteToColumn.back(); } -  int bytes() const { return m_columnToByte.back(); } +  Columns columns() const { return ByteToColumn.back(); } +  Bytes bytes() const { return ColumnToByte.back(); }    /// Map a byte to the column which it is at the start of, or return -1    /// if it is not at the start of a column (for a UTF-8 trailing byte). -  int byteToColumn(int n) const { -    assert(0<=n && n<static_cast<int>(m_byteToColumn.size())); -    return m_byteToColumn[n]; +  Columns byteToColumn(Bytes N) const { +    assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size())); +    return ByteToColumn[N.V];    }    /// Map a byte to the first column which contains it. -  int byteToContainingColumn(int N) const { -    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size())); -    while (m_byteToColumn[N] == -1) -      --N; -    return m_byteToColumn[N]; +  Columns byteToContainingColumn(Bytes N) const { +    assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size())); +    while (!ByteToColumn[N.V].isValid()) +      --N.V; +    return ByteToColumn[N.V];    }    /// Map a column to the byte which starts the column, or return -1 if    /// the column the second or subsequent column of an expanded tab or similar    /// multi-column entity. -  int columnToByte(int n) const { -    assert(0<=n && n<static_cast<int>(m_columnToByte.size())); -    return m_columnToByte[n]; +  Bytes columnToByte(Columns N) const { +    assert(0 <= N.V && N.V < static_cast<int>(ColumnToByte.size())); +    return ColumnToByte[N.V];    }    /// Map from a byte index to the next byte which starts a column. -  int startOfNextColumn(int N) const { -    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1)); -    while (byteToColumn(++N) == -1) {} +  Bytes startOfNextColumn(Bytes N) const { +    assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size() - 1)); +    N = N.next(); +    while (!byteToColumn(N).isValid()) +      N = N.next();      return N;    }    /// Map from a byte index to the previous byte which starts a column. -  int startOfPreviousColumn(int N) const { -    assert(0 < N && N < static_cast<int>(m_byteToColumn.size())); -    while (byteToColumn(--N) == -1) {} +  Bytes startOfPreviousColumn(Bytes N) const { +    assert(0 < N.V && N.V < static_cast<int>(ByteToColumn.size())); +    N = N.prev(); +    while (!byteToColumn(N).isValid()) +      N = N.prev();      return N;    } -  StringRef getSourceLine() const { -    return m_SourceLine; -  } +  StringRef getSourceLine() const { return SourceLine; }  private: -  const std::string m_SourceLine; -  SmallVector<int,200> m_byteToColumn; -  SmallVector<int,200> m_columnToByte; +  StringRef SourceLine; +  SmallVector<Columns, 200> ByteToColumn; +  SmallVector<Bytes, 200> ColumnToByte;  };  } // end anonymous namespace @@ -319,14 +352,15 @@ private:  static void selectInterestingSourceRegion(std::string &SourceLine,                                            std::string &CaretLine,                                            std::string &FixItInsertionLine, -                                          unsigned Columns, -                                          const SourceColumnMap &map) { -  unsigned CaretColumns = CaretLine.size(); -  unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); -  unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()), -                                 std::max(CaretColumns, FixItColumns)); +                                          Columns NonGutterColumns, +                                          const SourceColumnMap &Map) { +  Columns CaretColumns = Columns(CaretLine.size()); +  Columns FixItColumns = +      Columns(llvm::sys::locale::columnWidth(FixItInsertionLine)); +  Columns MaxColumns = +      std::max({Map.columns().V, CaretColumns.V, FixItColumns.V});    // if the number of columns is less than the desired number we're done -  if (MaxColumns <= Columns) +  if (MaxColumns <= NonGutterColumns)      return;    // No special characters are allowed in CaretLine. @@ -334,13 +368,13 @@ static void selectInterestingSourceRegion(std::string &SourceLine,    // Find the slice that we need to display the full caret line    // correctly. -  unsigned CaretStart = 0, CaretEnd = CaretLine.size(); -  for (; CaretStart != CaretEnd; ++CaretStart) -    if (!isWhitespace(CaretLine[CaretStart])) +  Columns CaretStart = 0, CaretEnd = CaretLine.size(); +  for (; CaretStart != CaretEnd; CaretStart = CaretStart.next()) +    if (!isWhitespace(CaretLine[CaretStart.V]))        break; -  for (; CaretEnd != CaretStart; --CaretEnd) -    if (!isWhitespace(CaretLine[CaretEnd - 1])) +  for (; CaretEnd != CaretStart; CaretEnd = CaretEnd.prev()) +    if (!isWhitespace(CaretLine[CaretEnd.V - 1]))        break;    // caret has already been inserted into CaretLine so the above whitespace @@ -349,39 +383,38 @@ static void selectInterestingSourceRegion(std::string &SourceLine,    // If we have a fix-it line, make sure the slice includes all of the    // fix-it information.    if (!FixItInsertionLine.empty()) { -    unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); -    for (; FixItStart != FixItEnd; ++FixItStart) -      if (!isWhitespace(FixItInsertionLine[FixItStart])) -        break; - -    for (; FixItEnd != FixItStart; --FixItEnd) -      if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) -        break; -      // We can safely use the byte offset FixItStart as the column offset      // because the characters up until FixItStart are all ASCII whitespace      // characters. -    unsigned FixItStartCol = FixItStart; -    unsigned FixItEndCol -      = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); - -    CaretStart = std::min(FixItStartCol, CaretStart); -    CaretEnd = std::max(FixItEndCol, CaretEnd); +    Bytes FixItStart = 0; +    Bytes FixItEnd = Bytes(FixItInsertionLine.size()); +    while (FixItStart != FixItEnd && +           isWhitespace(FixItInsertionLine[FixItStart.V])) +      FixItStart = FixItStart.next(); + +    while (FixItEnd != FixItStart && +           isWhitespace(FixItInsertionLine[FixItEnd.V - 1])) +      FixItEnd = FixItEnd.prev(); + +    Columns FixItStartCol = Columns(FixItStart.V); +    Columns FixItEndCol = Columns(llvm::sys::locale::columnWidth( +        FixItInsertionLine.substr(0, FixItEnd.V))); + +    CaretStart = std::min(FixItStartCol.V, CaretStart.V); +    CaretEnd = std::max(FixItEndCol.V, CaretEnd.V);    }    // CaretEnd may have been set at the middle of a character    // If it's not at a character's first column then advance it past the current    //   character. -  while (static_cast<int>(CaretEnd) < map.columns() && -         -1 == map.columnToByte(CaretEnd)) -    ++CaretEnd; - -  assert((static_cast<int>(CaretStart) > map.columns() || -          -1!=map.columnToByte(CaretStart)) && -         "CaretStart must not point to a column in the middle of a source" -         " line character"); -  assert((static_cast<int>(CaretEnd) > map.columns() || -          -1!=map.columnToByte(CaretEnd)) && +  while (CaretEnd < Map.columns() && !Map.columnToByte(CaretEnd).isValid()) +    CaretEnd = CaretEnd.next(); + +  assert( +      (CaretStart > Map.columns() || Map.columnToByte(CaretStart).isValid()) && +      "CaretStart must not point to a column in the middle of a source" +      " line character"); +  assert((CaretEnd > Map.columns() || Map.columnToByte(CaretEnd).isValid()) &&           "CaretEnd must not point to a column in the middle of a source line"           " character"); @@ -390,70 +423,69 @@ static void selectInterestingSourceRegion(std::string &SourceLine,    // number of columns we have, try to grow the slice to encompass    // more context. -  unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart, -                                                             map.columns())); -  unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd, -                                                           map.columns())); +  Bytes SourceStart = Map.columnToByte(std::min(CaretStart.V, Map.columns().V)); +  Bytes SourceEnd = Map.columnToByte(std::min(CaretEnd.V, Map.columns().V)); -  unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart -    - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart)); +  Columns CaretColumnsOutsideSource = +      CaretEnd - CaretStart - +      (Map.byteToColumn(SourceEnd) - Map.byteToColumn(SourceStart)); -  char const *front_ellipse = "  ..."; -  char const *front_space   = "     "; -  char const *back_ellipse = "..."; -  unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse); +  constexpr StringRef FrontEllipse = "  ..."; +  constexpr StringRef FrontSpace = "     "; +  constexpr StringRef BackEllipse = "..."; +  Columns EllipsesColumns = Columns(FrontEllipse.size() + BackEllipse.size()); -  unsigned TargetColumns = Columns; +  Columns TargetColumns = NonGutterColumns;    // Give us extra room for the ellipses    //  and any of the caret line that extends past the source -  if (TargetColumns > ellipses_space+CaretColumnsOutsideSource) -    TargetColumns -= ellipses_space+CaretColumnsOutsideSource; +  if (TargetColumns > EllipsesColumns + CaretColumnsOutsideSource) +    TargetColumns -= EllipsesColumns + CaretColumnsOutsideSource; -  while (SourceStart>0 || SourceEnd<SourceLine.size()) { +  while (SourceStart > 0 || SourceEnd < SourceLine.size()) {      bool ExpandedRegion = false; -    if (SourceStart>0) { -      unsigned NewStart = map.startOfPreviousColumn(SourceStart); +    if (SourceStart > 0) { +      Bytes NewStart = Map.startOfPreviousColumn(SourceStart);        // Skip over any whitespace we see here; we're looking for        // another bit of interesting text.        // FIXME: Detect non-ASCII whitespace characters too. -      while (NewStart && isWhitespace(SourceLine[NewStart])) -        NewStart = map.startOfPreviousColumn(NewStart); +      while (NewStart > 0 && isWhitespace(SourceLine[NewStart.V])) +        NewStart = Map.startOfPreviousColumn(NewStart);        // Skip over this bit of "interesting" text. -      while (NewStart) { -        unsigned Prev = map.startOfPreviousColumn(NewStart); -        if (isWhitespace(SourceLine[Prev])) +      while (NewStart > 0) { +        Bytes Prev = Map.startOfPreviousColumn(NewStart); +        if (isWhitespace(SourceLine[Prev.V]))            break;          NewStart = Prev;        } -      assert(map.byteToColumn(NewStart) != -1); -      unsigned NewColumns = map.byteToColumn(SourceEnd) - -                              map.byteToColumn(NewStart); +      assert(Map.byteToColumn(NewStart).isValid()); +      Columns NewColumns = +          Map.byteToColumn(SourceEnd) - Map.byteToColumn(NewStart);        if (NewColumns <= TargetColumns) {          SourceStart = NewStart;          ExpandedRegion = true;        }      } -    if (SourceEnd<SourceLine.size()) { -      unsigned NewEnd = map.startOfNextColumn(SourceEnd); +    if (SourceEnd < SourceLine.size()) { +      Bytes NewEnd = Map.startOfNextColumn(SourceEnd);        // Skip over any whitespace we see here; we're looking for        // another bit of interesting text.        // FIXME: Detect non-ASCII whitespace characters too. -      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) -        NewEnd = map.startOfNextColumn(NewEnd); +      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V])) +        NewEnd = Map.startOfNextColumn(NewEnd);        // Skip over this bit of "interesting" text. -      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) -        NewEnd = map.startOfNextColumn(NewEnd); +      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V])) +        NewEnd = Map.startOfNextColumn(NewEnd); -      assert(map.byteToColumn(NewEnd) != -1); -      unsigned NewColumns = map.byteToColumn(NewEnd) - -                              map.byteToColumn(SourceStart); +      assert(Map.byteToColumn(NewEnd).isValid()); +      Columns NewColumns = +          Map.byteToColumn(NewEnd) - Map.byteToColumn(SourceStart);        if (NewColumns <= TargetColumns) {          SourceEnd = NewEnd;          ExpandedRegion = true; @@ -464,39 +496,41 @@ static void selectInterestingSourceRegion(std::string &SourceLine,        break;    } -  CaretStart = map.byteToColumn(SourceStart); -  CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource; +  CaretStart = Map.byteToColumn(SourceStart); +  CaretEnd = Map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;    // [CaretStart, CaretEnd) is the slice we want. Update the various    // output lines to show only this slice. -  assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 && -         SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1); +  assert(CaretStart.isValid() && CaretEnd.isValid() && SourceStart.isValid() && +         SourceEnd.isValid());    assert(SourceStart <= SourceEnd);    assert(CaretStart <= CaretEnd); -  unsigned BackColumnsRemoved -    = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd); -  unsigned FrontColumnsRemoved = CaretStart; -  unsigned ColumnsKept = CaretEnd-CaretStart; +  Columns BackColumnsRemoved = +      Map.byteToColumn(Bytes{static_cast<int>(SourceLine.size())}) - +      Map.byteToColumn(SourceEnd); +  Columns FrontColumnsRemoved = CaretStart; +  Columns ColumnsKept = CaretEnd - CaretStart;    // We checked up front that the line needed truncation -  assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns); +  assert(FrontColumnsRemoved + ColumnsKept + BackColumnsRemoved > +         NonGutterColumns);    // The line needs some truncation, and we'd prefer to keep the front    //  if possible, so remove the back -  if (BackColumnsRemoved > strlen(back_ellipse)) -    SourceLine.replace(SourceEnd, std::string::npos, back_ellipse); +  if (BackColumnsRemoved > Columns(BackEllipse.size())) +    SourceLine.replace(SourceEnd.V, std::string::npos, BackEllipse);    // If that's enough then we're done -  if (FrontColumnsRemoved+ColumnsKept <= Columns) +  if (FrontColumnsRemoved + ColumnsKept <= Columns(NonGutterColumns))      return;    // Otherwise remove the front as well -  if (FrontColumnsRemoved > strlen(front_ellipse)) { -    SourceLine.replace(0, SourceStart, front_ellipse); -    CaretLine.replace(0, CaretStart, front_space); +  if (FrontColumnsRemoved > Columns(FrontEllipse.size())) { +    SourceLine.replace(0, SourceStart.V, FrontEllipse); +    CaretLine.replace(0, CaretStart.V, FrontSpace);      if (!FixItInsertionLine.empty()) -      FixItInsertionLine.replace(0, CaretStart, front_space); +      FixItInsertionLine.replace(0, CaretStart.V, FrontSpace);    }  } @@ -690,11 +724,21 @@ TextDiagnostic::printDiagnosticLevel(raw_ostream &OS,      switch (Level) {      case DiagnosticsEngine::Ignored:        llvm_unreachable("Invalid diagnostic type"); -    case DiagnosticsEngine::Note:    OS.changeColor(noteColor, true); break; -    case DiagnosticsEngine::Remark:  OS.changeColor(remarkColor, true); break; -    case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break; -    case DiagnosticsEngine::Error:   OS.changeColor(errorColor, true); break; -    case DiagnosticsEngine::Fatal:   OS.changeColor(fatalColor, true); break; +    case DiagnosticsEngine::Note: +      OS.changeColor(NoteColor, true); +      break; +    case DiagnosticsEngine::Remark: +      OS.changeColor(RemarkColor, true); +      break; +    case DiagnosticsEngine::Warning: +      OS.changeColor(WarningColor, true); +      break; +    case DiagnosticsEngine::Error: +      OS.changeColor(ErrorColor, true); +      break; +    case DiagnosticsEngine::Fatal: +      OS.changeColor(FatalColor, true); +      break;      }    } @@ -722,7 +766,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,    if (ShowColors && !IsSupplemental) {      // Print primary diagnostic messages in bold and without color, to visually      // indicate the transition from continuation notes and other output. -    OS.changeColor(savedColor, true); +    OS.changeColor(SavedColor, true);      Bold = true;    } @@ -800,7 +844,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,      return;    if (DiagOpts.ShowColors) -    OS.changeColor(savedColor, true); +    OS.changeColor(SavedColor, true);    emitFilename(PLoc.getFilename(), Loc.getManager());    switch (DiagOpts.getFormat()) { @@ -961,41 +1005,40 @@ maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,  struct LineRange {    unsigned LineNo; -  unsigned StartCol; -  unsigned EndCol; +  Bytes StartByte; +  Bytes EndByte;  };  /// Highlight \p R (with ~'s) on the current source line.  static void highlightRange(const LineRange &R, const SourceColumnMap &Map,                             std::string &CaretLine) {    // Pick the first non-whitespace column. -  unsigned StartColNo = R.StartCol; -  while (StartColNo < Map.getSourceLine().size() && -         (Map.getSourceLine()[StartColNo] == ' ' || -          Map.getSourceLine()[StartColNo] == '\t')) -    StartColNo = Map.startOfNextColumn(StartColNo); +  Bytes StartByte = R.StartByte; +  while (StartByte < Map.bytes() && (Map.getSourceLine()[StartByte.V] == ' ' || +                                     Map.getSourceLine()[StartByte.V] == '\t')) +    StartByte = Map.startOfNextColumn(StartByte);    // Pick the last non-whitespace column. -  unsigned EndColNo = -      std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size()); -  while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' || -                      Map.getSourceLine()[EndColNo - 1] == '\t')) -    EndColNo = Map.startOfPreviousColumn(EndColNo); +  Bytes EndByte = std::min(R.EndByte.V, Map.bytes().V); +  while (EndByte.V != 0 && (Map.getSourceLine()[EndByte.V - 1] == ' ' || +                            Map.getSourceLine()[EndByte.V - 1] == '\t')) +    EndByte = Map.startOfPreviousColumn(EndByte);    // If the start/end passed each other, then we are trying to highlight a    // range that just exists in whitespace. That most likely means we have    // a multi-line highlighting range that covers a blank line. -  if (StartColNo > EndColNo) +  if (StartByte > EndByte)      return; +  assert(StartByte <= EndByte && "Invalid range!");    // Fill the range with ~'s. -  StartColNo = Map.byteToContainingColumn(StartColNo); -  EndColNo = Map.byteToContainingColumn(EndColNo); +  Columns StartCol = Map.byteToContainingColumn(StartByte); +  Columns EndCol = Map.byteToContainingColumn(EndByte); + +  if (CaretLine.size() < static_cast<size_t>(EndCol.V)) +    CaretLine.resize(EndCol.V, ' '); -  assert(StartColNo <= EndColNo && "Invalid range!"); -  if (CaretLine.size() < EndColNo) -    CaretLine.resize(EndColNo, ' '); -  std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~'); +  std::fill(CaretLine.begin() + StartCol.V, CaretLine.begin() + EndCol.V, '~');  }  static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, @@ -1006,7 +1049,7 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,    std::string FixItInsertionLine;    if (Hints.empty() || !DiagOpts.ShowFixits)      return FixItInsertionLine; -  unsigned PrevHintEndCol = 0; +  Columns PrevHintEndCol = 0;    for (const auto &H : Hints) {      if (H.CodeToInsert.empty()) @@ -1024,12 +1067,13 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,        // Note: When modifying this function, be very careful about what is a        // "column" (printed width, platform-dependent) and what is a        // "byte offset" (SourceManager "column"). -      unsigned HintByteOffset = -          SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; +      Bytes HintByteOffset = +          Bytes(SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second)) +              .prev();        // The hint must start inside the source or right at the end -      assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1); -      unsigned HintCol = map.byteToContainingColumn(HintByteOffset); +      assert(HintByteOffset < map.bytes().next()); +      Columns HintCol = map.byteToContainingColumn(HintByteOffset);        // If we inserted a long previous hint, push this one forwards, and add        // an extra space to show that this is not part of the previous @@ -1043,11 +1087,11 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,        // This should NOT use HintByteOffset, because the source might have        // Unicode characters in earlier columns. -      unsigned NewFixItLineSize = FixItInsertionLine.size() + -                                  (HintCol - PrevHintEndCol) + -                                  H.CodeToInsert.size(); +      Columns NewFixItLineSize = Columns(FixItInsertionLine.size()) + +                                 (HintCol - PrevHintEndCol) + +                                 Columns(H.CodeToInsert.size());        if (NewFixItLineSize > FixItInsertionLine.size()) -        FixItInsertionLine.resize(NewFixItLineSize, ' '); +        FixItInsertionLine.resize(NewFixItLineSize.V, ' ');        std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(),                  FixItInsertionLine.end() - H.CodeToInsert.size()); @@ -1095,28 +1139,29 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,      if (EndLineNo < Lines.first || SM.getFileID(End) != FID)        continue; -    unsigned StartColumn = SM.getExpansionColumnNumber(Begin); -    unsigned EndColumn = SM.getExpansionColumnNumber(End); -    assert(StartColumn && "StartColumn must be valid, 0 is invalid"); -    assert(EndColumn && "EndColumn must be valid, 0 is invalid"); +    Bytes StartByte = SM.getExpansionColumnNumber(Begin); +    Bytes EndByte = SM.getExpansionColumnNumber(End); +    assert(StartByte.V != 0 && "StartByte must be valid, 0 is invalid"); +    assert(EndByte.V != 0 && "EndByte must be valid, 0 is invalid");      if (R.isTokenRange()) -      EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts); +      EndByte += Bytes(Lexer::MeasureTokenLength(End, SM, LangOpts));      // Only a single line.      if (StartLineNo == EndLineNo) { -      LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1}); +      LineRanges.push_back({StartLineNo, StartByte.prev(), EndByte.prev()});        continue;      }      // Start line. -    LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u}); +    LineRanges.push_back( +        {StartLineNo, StartByte.prev(), std::numeric_limits<int>::max()});      // Middle lines.      for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S) -      LineRanges.push_back({S, 0, ~0u}); +      LineRanges.push_back({S, 0, std::numeric_limits<int>::max()});      // End line. -    LineRanges.push_back({EndLineNo, 0, EndColumn - 1}); +    LineRanges.push_back({EndLineNo, 0, EndByte.prev()});    }    return LineRanges; @@ -1226,8 +1271,7 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,      if (TokenStartLine > EndLineNumber)        break; -    unsigned StartCol = -        SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; +    Bytes StartCol = SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;      if (Invalid)        continue; @@ -1235,14 +1279,14 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,      if (TokenStartLine == TokenEndLine) {        SmallVector<TextDiagnostic::StyleRange> &LineRanges =            SnippetRanges[TokenStartLine - StartLineNumber]; -      appendStyle(LineRanges, T, StartCol, T.getLength()); +      appendStyle(LineRanges, T, StartCol.V, T.getLength());        continue;      }      assert((TokenEndLine - TokenStartLine) >= 1);      // For tokens that span multiple lines (think multiline comments), we      // divide them into multiple StyleRanges. -    unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; +    Bytes EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;      if (Invalid)        continue; @@ -1258,9 +1302,9 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,                SnippetRanges[L - StartLineNumber];            if (L == TokenStartLine) // First line -            appendStyle(LineRanges, T, StartCol, LineLength); +            appendStyle(LineRanges, T, StartCol.V, LineLength);            else if (L == TokenEndLine) // Last line -            appendStyle(LineRanges, T, 0, EndCol); +            appendStyle(LineRanges, T, 0, EndCol.V);            else              appendStyle(LineRanges, T, 0, LineLength);          } @@ -1315,11 +1359,11 @@ void TextDiagnostic::emitSnippetAndCaret(    const char *BufEnd = BufStart + BufData.size();    unsigned CaretLineNo = Loc.getLineNumber(); -  unsigned CaretColNo = Loc.getColumnNumber(); +  Bytes CaretByte = Loc.getColumnNumber();    // Arbitrarily stop showing snippets when the line is too long.    static const size_t MaxLineLengthToPrint = 4096; -  if (CaretColNo > MaxLineLengthToPrint) +  if (CaretByte > MaxLineLengthToPrint)      return;    // Find the set of lines to include. @@ -1379,35 +1423,37 @@ void TextDiagnostic::emitSnippetAndCaret(      std::string SourceLine(LineStart, LineEnd);      // Remove trailing null bytes.      while (!SourceLine.empty() && SourceLine.back() == '\0' && -           (LineNo != CaretLineNo || SourceLine.size() > CaretColNo)) +           (LineNo != CaretLineNo || +            SourceLine.size() > static_cast<size_t>(CaretByte.V)))        SourceLine.pop_back();      // Build the byte to column map. -    const SourceColumnMap sourceColMap(SourceLine, DiagOpts.TabStop); +    const SourceColumnMap SourceColMap(SourceLine, DiagOpts.TabStop);      std::string CaretLine;      // Highlight all of the characters covered by Ranges with ~ characters.      for (const auto &LR : LineRanges) {        if (LR.LineNo == LineNo) -        highlightRange(LR, sourceColMap, CaretLine); +        highlightRange(LR, SourceColMap, CaretLine);      }      // Next, insert the caret itself.      if (CaretLineNo == LineNo) { -      size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1); -      CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' '); -      CaretLine[Col] = '^'; +      Columns Col = SourceColMap.byteToContainingColumn(CaretByte.prev()); +      CaretLine.resize( +          std::max(static_cast<size_t>(Col.V) + 1, CaretLine.size()), ' '); +      CaretLine[Col.V] = '^';      }      std::string FixItInsertionLine = -        buildFixItInsertionLine(FID, LineNo, sourceColMap, Hints, SM, DiagOpts); +        buildFixItInsertionLine(FID, LineNo, SourceColMap, Hints, SM, DiagOpts);      // If the source line is too long for our terminal, select only the      // "interesting" source region within that line. -    unsigned Columns = DiagOpts.MessageLength; -    if (Columns) +    Columns MessageLength = DiagOpts.MessageLength; +    if (MessageLength.V != 0)        selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, -                                    Columns, sourceColMap); +                                    MessageLength, SourceColMap);      // If we are in -fdiagnostics-print-source-range-info mode, we are trying      // to produce easily machine parsable output.  Add a space before the @@ -1425,7 +1471,7 @@ void TextDiagnostic::emitSnippetAndCaret(      if (!CaretLine.empty()) {        indentForLineNumbers();        if (DiagOpts.ShowColors) -        OS.changeColor(caretColor, true); +        OS.changeColor(CaretColor, true);        OS << CaretLine << '\n';        if (DiagOpts.ShowColors)          OS.resetColor(); @@ -1435,7 +1481,7 @@ void TextDiagnostic::emitSnippetAndCaret(        indentForLineNumbers();        if (DiagOpts.ShowColors)          // Print fixit line in color -        OS.changeColor(fixitColor, false); +        OS.changeColor(FixitColor, false);        if (DiagOpts.ShowSourceRanges)          OS << ' ';        OS << FixItInsertionLine << '\n'; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 1858912..33fff76 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -162,18 +162,12 @@ set(x86_files    adxintrin.h    ammintrin.h    amxavx512intrin.h -  amxbf16transposeintrin.h    amxcomplexintrin.h -  amxcomplextransposeintrin.h    amxfp16intrin.h -  amxfp16transposeintrin.h    amxfp8intrin.h    amxintrin.h    amxmovrsintrin.h -  amxmovrstransposeintrin.h    amxtf32intrin.h -  amxtf32transposeintrin.h -  amxtransposeintrin.h    avx10_2_512bf16intrin.h    avx10_2_512convertintrin.h    avx10_2_512minmaxintrin.h diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h deleted file mode 100644 index 86f09f2..0000000 --- a/clang/lib/Headers/amxbf16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error                                                                         \ -    "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_BF16TRANSPOSEINTRIN_H -#define __AMX_BF16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS                                                     \ -  __attribute__((__always_inline__, __nodebug__,                               \ -                 __target__("amx-bf16,amx-transpose"))) - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -///    tiles \a a and \a b, accumulating the intermediate single-precision -///    (32-bit) floating-point elements with elements in \a dst, and store the -///    32-bit result back to tile \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -///	tmp := dst.row[m] -///	FOR k := 0 TO (a.colsb / 4) - 1 -///		FOR n := 0 TO (dst.colsb / 4) - 1 -///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) * -///					FP32(b.row[k].bf16[2*n+0]) -///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) * -///					FP32(b.row[k].bf16[2*n+1]) -///		ENDFOR -///	ENDFOR -///	write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPBF16PS instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The 1st source tile. Max size is 1024 Bytes. -/// \param b -///    The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, -                         _tile1024i dst, _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -///    tiles src0 and src1, accumulating the intermediate single-precision -///    (32-bit) floating-point elements with elements in "dst", and store the -///    32-bit result back to tile "dst". -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0, -                                        __tile1024i src1) { -  dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, -                                       src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_BF16TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h deleted file mode 100644 index 11abaf9..0000000 --- a/clang/lib/Headers/amxcomplextransposeintrin.h +++ /dev/null @@ -1,303 +0,0 @@ -/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error                                                                         \ -    "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H -#define __AMX_COMPLEXTRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS                                                     \ -  __attribute__((__always_inline__, __nodebug__,                               \ -                 __target__("amx-complex,amx-transpose"))) - -/// Perform matrix multiplication of two tiles containing complex elements and -///    accumulate the results into a packed single precision tile. Each dword -///    element in input tiles \a a and \a b is interpreted as a complex number -///    with FP16 real part and FP16 imaginary part. -/// Calculates the imaginary part of the result. For each possible combination -///    of (transposed column of \a a, column of \a b), it performs a set of -///    multiplication and accumulations on all corresponding complex numbers -///    (one from \a a and one from \a b). The imaginary part of the \a a element -///    is multiplied with the real part of the corresponding \a b element, and -///    the real part of the \a a element is multiplied with the imaginary part -///    of the corresponding \a b elements. The two accumulated results are -///    added, and then accumulated into the corresponding row and column of -///    \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -///	tmp := dst.row[m] -///	FOR k := 0 TO a.rows - 1 -///		FOR n := 0 TO (dst.colsb / 4) - 1 -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -///		ENDFOR -///	ENDFOR -///	write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The 1st source tile. Max size is 1024 Bytes. -/// \param b -///    The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmimfp16ps(dst, a, b)                                          \ -  __builtin_ia32_ttcmmimfp16ps((dst), (a), (b)) - -/// Perform matrix multiplication of two tiles containing complex elements and -///    accumulate the results into a packed single precision tile. Each dword -///    element in input tiles \a a and \a b is interpreted as a complex number -///    with FP16 real part and FP16 imaginary part. -/// Calculates the real part of the result. For each possible combination -///    of (rtransposed colum of \a a, column of \a b), it performs a set of -///    multiplication and accumulations on all corresponding complex numbers -///    (one from \a a and one from \a b). The real part of the \a a element is -///    multiplied with the real part of the corresponding \a b element, and the -///    negated imaginary part of the \a a element is multiplied with the -///    imaginary part of the corresponding \a b elements. The two accumulated -///    results are added, and then accumulated into the corresponding row and -///    column of \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -///	tmp := dst.row[m] -///	FOR k := 0 TO a.rows - 1 -///		FOR n := 0 TO (dst.colsb / 4) - 1 -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) -///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) -///		ENDFOR -///	ENDFOR -///	write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The 1st source tile. Max size is 1024 Bytes. -/// \param b -///    The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmrlfp16ps(dst, a, b)                                          \ -  __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b)) - -/// Perform matrix conjugate transpose and multiplication of two tiles -///    containing complex elements and accumulate the results into a packed -///    single precision tile. Each dword element in input tiles \a a and \a b -///    is interpreted as a complex number with FP16 real part and FP16 imaginary -///    part. -/// Calculates the imaginary part of the result. For each possible combination -///    of (transposed column of \a a, column of \a b), it performs a set of -///    multiplication and accumulations on all corresponding complex numbers -///    (one from \a a and one from \a b). The negated imaginary part of the \a a -///    element is multiplied with the real part of the corresponding \a b -///    element, and the real part of the \a a element is multiplied with the -///    imaginary part of the corresponding \a b elements. The two accumulated -///    results are added, and then accumulated into the corresponding row and -///    column of \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -///	tmp := dst.row[m] -///	FOR k := 0 TO a.rows - 1 -///		FOR n := 0 TO (dst.colsb / 4) - 1 -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -///		ENDFOR -///	ENDFOR -///	write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The 1st source tile. Max size is 1024 Bytes. -/// \param b -///    The 2nd source tile. Max size is 1024 Bytes. -#define _tile_conjtcmmimfp16ps(dst, a, b)                                      \ -  __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b)) - -/// Perform conjugate transpose of an FP16-pair of complex elements from \a a -///    and writes the result to \a dst. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// void _tile_conjtfp16(__tile dst, __tile a); -/// \endcode -/// -/// \code{.operation} -/// FOR i := 0 TO dst.rows - 1 -///	FOR j := 0 TO (dst.colsb / 4) - 1 -///		tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0] -///		tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1] -///	ENDFOR -///	write_row_and_zero(dst, i, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTFP16 instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The source tile. Max size is 1024 Bytes. -#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a)) - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal( -    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, -    _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal( -    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, -    _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal( -    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, -    _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) { -  return __builtin_ia32_tconjtfp16_internal(m, n, src); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -///    accumulate the results into a packed single precision tile. Each dword -///    element in input tiles src0 and src1 is interpreted as a complex number -///    with FP16 real part and FP16 imaginary part. -///    This function calculates the imaginary part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0, -                                __tile1024i src1) { -  dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col, -                                          dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -///    accumulate the results into a packed single precision tile. Each dword -///    element in input tiles src0 and src1 is interpreted as a complex number -///    with FP16 real part and FP16 imaginary part. -///    This function calculates the real part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0, -                                __tile1024i src1) { -  dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col, -                                          dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix conjugate transpose and multiplication of two tiles -///    containing complex elements and accumulate the results into a packed -///    single precision tile. Each dword element in input tiles src0 and src1 -///    is interpreted as a complex number with FP16 real part and FP16 imaginary -///    part. -///    This function calculates the imaginary part of the result. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0, -                                    __tile1024i src1) { -  dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col, -                                              dst->tile, src0.tile, src1.tile); -} - -/// Perform conjugate transpose of an FP16-pair of complex elements from src and -///    writes the result to dst. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src -///    The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) { -  dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif // __x86_64__ -#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H diff --git a/clang/lib/Headers/amxfp16transposeintrin.h b/clang/lib/Headers/amxfp16transposeintrin.h deleted file mode 100644 index 191f8c6..0000000 --- a/clang/lib/Headers/amxfp16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error                                                                         \ -    "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_FP16TRANSPOSEINTRIN_H -#define __AMX_FP16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS                                                     \ -  __attribute__((__always_inline__, __nodebug__,                               \ -                 __target__("amx-fp16,amx-transpose"))) - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -///    tiles \a a and \a b, accumulating the intermediate single-precision -///    (32-bit) floating-point elements with elements in \a dst, and store the -///    32-bit result back to tile \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -///	tmp := dst.row[m] -///	FOR k := 0 TO (a.colsb / 4) - 1 -///		FOR n := 0 TO (dst.colsb / 4) - 1 -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * -///					FP32(b.row[k].fp16[2*n+0]) -///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * -///					FP32(b.row[k].fp16[2*n+1]) -///		ENDFOR -///	ENDFOR -///	write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPFP16PS instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param a -///    The 1st source tile. Max size is 1024 Bytes. -/// \param b -///    The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, -                         _tile1024i dst, _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -///    tiles src0 and src1, accumulating the intermediate single-precision -///    (32-bit) floating-point elements with elements in "dst", and store the -///    32-bit result back to tile "dst". -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0, -                                        __tile1024i src1) { -  dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, -                                       src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_FP16TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h index a7da10d..208aa358 100644 --- a/clang/lib/Headers/amxintrin.h +++ b/clang/lib/Headers/amxintrin.h @@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {  /// bytes. Since there is no 2D type in llvm IR, we use vector type to  /// represent 2D tile and the fixed size is maximum amx tile register size.  typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); -typedef int _tile1024i_1024a -    __attribute__((__vector_size__(1024), __aligned__(1024)));  /// This is internal intrinsic. C/C++ user should avoid calling it directly.  static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE diff --git a/clang/lib/Headers/amxmovrstransposeintrin.h b/clang/lib/Headers/amxmovrstransposeintrin.h deleted file mode 100644 index 5f48cba..0000000 --- a/clang/lib/Headers/amxmovrstransposeintrin.h +++ /dev/null @@ -1,200 +0,0 @@ -/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error                                                                         \ -    "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H -#define __AMX_MOVRS_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS                                                     \ -  __attribute__((__always_inline__, __nodebug__,                               \ -                 __target__("amx-transpose,amx-movrs"))) - -#define _tile_2rpntlvwz0rs(tdst, base, stride)                                 \ -  __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride) -#define _tile_2rpntlvwz0rst1(tdst, base, stride)                               \ -  __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride) -#define _tile_2rpntlvwz1rs(tdst, base, stride)                                 \ -  __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride) -#define _tile_2rpntlvwz1rst1(tdst, base, stride)                               \ -  __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride) - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  // Use __tile1024i_1024a* to escape the alignment check in -  // clang/test/Headers/x86-intrinsics-headers-clean.cpp -  __builtin_ia32_t2rpntlvwz0rs_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz0rst1_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz1rs_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz1rst1_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1, -                                const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                              &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1, -                                  const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                                &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1, -                                const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                              &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1, -                                  const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                                &dst1->tile, base, stride); -} - -#undef __DEFAULT_FN_ATTRS -#endif /* __x86_64__ */ -#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxtf32transposeintrin.h b/clang/lib/Headers/amxtf32transposeintrin.h deleted file mode 100644 index e1b90c1..0000000 --- a/clang/lib/Headers/amxtf32transposeintrin.h +++ /dev/null @@ -1,105 +0,0 @@ -/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error                                                                         \ -    "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_TF32TRANSPOSEINTRIN_H -#define __AMX_TF32TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE                                      \ -  __attribute__((__always_inline__, __nodebug__,                               \ -                 __target__("amx-tf32,amx-transpose"))) - -/// \code -/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \ -///                        constexpr int b); -/// \endcode -/// -/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction. -/// -/// \param srcdst -/// 	The destination tile. Max size is 1024 Bytes. -/// \param a -/// 	The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// 	The 2nd source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) { -/// 	dword[12:0] := 0 -/// 	dword[31:13] := x[31:13] -/// 	return dword -/// } -/// -/// DEFINE silence_snan_fp32(x[31:0]) { -/// 	IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0) -/// 		x.fraction[22] := 1 -/// 	return x -/// } -/// -/// elements_dest:= srcdst.colsb/4 -/// -/// FOR m := 0 TO (srcdst.rows-1) -/// 	tmp[511:0] := 0 -/// 	FOR k := 0 TO (a.rows-1) -/// 		FOR n := 0 TO (elements_dest-1) -/// 			a1e := silence_snan_fp32(a.row[k].fp32[m]) -/// 			a2e := silence_snan_fp32(b.row[k].fp32[n]) -/// 			s1e := zero_lower_mantissa_bits_fp32(a1e) -/// 			s2e := zero_lower_mantissa_bits_fp32(a2e) -/// 			tmp.fp32[n] += s1e * s2e -/// 		ENDFOR -/// 	ENDFOR -/// -/// 	FOR n := 0 TO (elements_dest-1) -/// 		tmp.fp32[n] += srcdst.row[m].fp32[n] -/// 	ENDFOR -///	write_row_and_zero(srcdst, m, tmp, srcdst.colsb) -/// -/// ENDFOR -/// -/// zero_upper_rows(srcdst, srcdst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_tmmultf32ps(srcdst, a, b)                                        \ -  __builtin_ia32_ttmmultf32ps((srcdst), (a), (b)) - -// dst = m x n (srcdest), src1 = k x m, src2 = k x n -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE -_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k, -                           _tile1024i dst, _tile1024i src1, _tile1024i src2) { -  return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do -/// Matrix Plus with dst. All the calculation is base on float32 but with the -/// lower 13-bit set to 0. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src0 -///    The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -///    The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TF32_TRANSPOSE -static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0, -                               __tile1024i src1) { -  dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col, -                                         dst->tile, src0.tile, src1.tile); -} - -#endif // __x86_64__ -#endif // __AMX_TF32TRANSPOSEINTRIN_H diff --git a/clang/lib/Headers/amxtransposeintrin.h b/clang/lib/Headers/amxtransposeintrin.h deleted file mode 100644 index b3fa37d..0000000 --- a/clang/lib/Headers/amxtransposeintrin.h +++ /dev/null @@ -1,248 +0,0 @@ -/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_TRANSPOSEINTRIN_H -#define __AMX_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TRANSPOSE                                           \ -  __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose"))) - -#define _tile_2rpntlvwz0(tdst, base, stride)                                   \ -  __builtin_ia32_t2rpntlvwz0(tdst, base, stride) -#define _tile_2rpntlvwz0t1(tdst, base, stride)                                 \ -  __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride) -#define _tile_2rpntlvwz1(tdst, base, stride)                                   \ -  __builtin_ia32_t2rpntlvwz1(tdst, base, stride) -#define _tile_2rpntlvwz1t1(tdst, base, stride)                                 \ -  __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride) - -/// Transpose 32-bit elements from \a src and write the result to \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_transposed(__tile dst, __tile src); -/// \endcode -/// -/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction. -/// -/// \param dst -/// 	The destination tile. Max size is 1024 Bytes. -/// \param src -/// 	The source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// -/// FOR i := 0 TO (dst.rows-1) -/// 	tmp[511:0] := 0 -/// 	FOR j := 0 TO (dst.colsb/4-1) -/// 		tmp.dword[j] := src.row[j].dword[i] -/// 	ENDFOR -/// 	dst.row[i] := tmp -/// ENDFOR -/// -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src) - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  // Use __tile1024i_1024a* to escape the alignment check in -  // clang/test/Headers/x86-intrinsics-headers-clean.cpp -  __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0, -                                      (_tile1024i_1024a *)dst1, base, -                                      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz0t1_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0, -                                      (_tile1024i_1024a *)dst1, base, -                                      (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal( -    unsigned short row, unsigned short col0, unsigned short col1, -    _tile1024i *dst0, _tile1024i *dst1, const void *base, -    __SIZE_TYPE__ stride) { -  __builtin_ia32_t2rpntlvwz1t1_internal( -      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, -      (__SIZE_TYPE__)(stride)); -} - -// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE -_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) { -  return __builtin_ia32_ttransposed_internal(m, n, src); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1, -                              const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                            &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1, -                                const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                              &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1, -                              const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                            &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction. -/// -/// \param dst0 -///    First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -///    Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -///    A pointer to base address. -/// \param stride -///    The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1, -                                const void *base, __SIZE_TYPE__ stride) { -  _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, -                              &dst1->tile, base, stride); -} - -/// Transpose 32-bit elements from src and write the result to dst. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction. -/// -/// \param dst -///    The destination tile. Max size is 1024 Bytes. -/// \param src -///    The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_transposed(__tile1024i *dst, __tile1024i src) { -  dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile); -} - -#endif /* __x86_64__ */ -#endif /* __AMX_TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 35f012c..19064a4 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) {  #include <amxfp8intrin.h> -#include <amxtransposeintrin.h> -  #include <amxmovrsintrin.h> -#include <amxmovrstransposeintrin.h> -  #include <amxavx512intrin.h>  #include <amxtf32intrin.h> -#include <amxtf32transposeintrin.h> - -#include <amxbf16transposeintrin.h> - -#include <amxfp16transposeintrin.h> - -#include <amxcomplextransposeintrin.h> -  #include <avx512vp2intersectintrin.h>  #include <avx512vlvp2intersectintrin.h> diff --git a/clang/lib/Interpreter/InterpreterValuePrinter.cpp b/clang/lib/Interpreter/InterpreterValuePrinter.cpp index 0ed02f3..cfa50ee 100644 --- a/clang/lib/Interpreter/InterpreterValuePrinter.cpp +++ b/clang/lib/Interpreter/InterpreterValuePrinter.cpp @@ -411,7 +411,8 @@ public:    }    InterfaceKind VisitReferenceType(const ReferenceType *Ty) { -    ExprResult AddrOfE = S.CreateBuiltinUnaryOp(SourceLocation(), UO_AddrOf, E); +    ExprResult AddrOfE = S.CreateBuiltinUnaryOp(SourceLocation(), UO_AddrOf, +                                                E->IgnoreImpCasts());      assert(!AddrOfE.isInvalid() && "Can not create unary expression");      Args.push_back(AddrOfE.get());      return InterfaceKind::NoAlloc; @@ -537,7 +538,7 @@ llvm::Expected<Expr *> Interpreter::convertExprToValue(Expr *E) {    QualType DesugaredTy = Ty.getDesugaredType(Ctx);    // For lvalue struct, we treat it as a reference. -  if (DesugaredTy->isRecordType() && E->isLValue()) { +  if (DesugaredTy->isRecordType() && E->IgnoreImpCasts()->isLValue()) {      DesugaredTy = Ctx.getLValueReferenceType(DesugaredTy);      Ty = Ctx.getLValueReferenceType(Ty);    } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 65c324c..f05c28fd 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,    // file.    for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {      SmallString<256> Result(Dir); -    llvm::sys::fs::make_absolute(Result); +    FileMgr.makeAbsolutePath(Result);      if (ModuleName.contains(':'))        // The separator of C++20 modules partitions (':') is not good for file        // systems, here clang and gcc choose '-' by default since it is not a @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) {    StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash();    for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {      SmallString<256> CachePath(Dir); -    llvm::sys::fs::make_absolute(CachePath); +    FileMgr.makeAbsolutePath(CachePath);      llvm::sys::path::append(CachePath, ModuleCacheHash);      std::string FileName =          getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index e4b158e..7e4a164 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4248,6 +4248,13 @@ void Parser::ParseDeclarationSpecifiers(      // type-specifier      case tok::kw_short: +      if (!getLangOpts().NativeInt16Type) { +        Diag(Tok, diag::err_unknown_typename) << Tok.getName(); +        DS.SetTypeSpecError(); +        DS.SetRangeEnd(Tok.getLocation()); +        ConsumeToken(); +        goto DoneWithDeclSpec; +      }        isInvalid = DS.SetTypeSpecWidth(TypeSpecifierWidth::Short, Loc, PrevSpec,                                        DiagID, Policy);        break; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 25199c7..31bc941 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3221,6 +3221,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,      else        Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);      break; +  case OMPC_threadset:    case OMPC_fail:    case OMPC_proc_bind:    case OMPC_atomic_default_mem_order: diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index e32f437..139c4ab 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,    case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:    case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:    case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: -  case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { +  case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: +  case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: +  case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {      StringRef FeatureList(          getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));      if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f451787..ad2c2e4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3542,9 +3542,7 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {  bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,                                 unsigned FirstArg, FormatStringInfo *FSI) { -  bool IsCXXMember = false; -  if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) -    IsCXXMember = MD->isInstance(); +  bool HasImplicitThisParam = hasImplicitObjectParameter(D);    bool IsVariadic = false;    if (const FunctionType *FnTy = D->getFunctionType())      IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic(); @@ -3553,11 +3551,12 @@ bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,    else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))      IsVariadic = OMD->isVariadic(); -  return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI); +  return getFormatStringInfo(FormatIdx, FirstArg, HasImplicitThisParam, +                             IsVariadic, FSI);  }  bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg, -                               bool IsCXXMember, bool IsVariadic, +                               bool HasImplicitThisParam, bool IsVariadic,                                 FormatStringInfo *FSI) {    if (FirstArg == 0)      FSI->ArgPassingKind = FAPK_VAList; @@ -3571,7 +3570,7 @@ bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,    // The way the format attribute works in GCC, the implicit this argument    // of member functions is counted. However, it doesn't appear in our own    // lists, so decrement format_idx in that case. -  if (IsCXXMember) { +  if (HasImplicitThisParam) {      if(FSI->FormatIdx == 0)        return false;      --FSI->FormatIdx; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 0514d10..aa93507 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -10208,6 +10208,24 @@ void SemaCodeCompletion::CodeCompletePreprocessorDirective(bool InConditional) {    Builder.AddPlaceholderChunk("message");    Results.AddResult(Builder.TakeString()); +  if (getLangOpts().C23) { +    // #embed "file" +    Builder.AddTypedTextChunk("embed"); +    Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); +    Builder.AddTextChunk("\""); +    Builder.AddPlaceholderChunk("file"); +    Builder.AddTextChunk("\""); +    Results.AddResult(Builder.TakeString()); + +    // #embed <file> +    Builder.AddTypedTextChunk("embed"); +    Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); +    Builder.AddTextChunk("<"); +    Builder.AddPlaceholderChunk("file"); +    Builder.AddTextChunk(">"); +    Results.AddResult(Builder.TakeString()); +  } +    // Note: #ident and #sccs are such crazy anachronisms that we don't provide    // completions for them. And __include_macros is a Clang-internal extension    // that we don't want to encourage anyone to use. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 964a2a7..a9e7b44 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3785,7 +3785,7 @@ static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,    // In C++ the implicit 'this' function parameter also counts, and they are    // counted from one. -  bool HasImplicitThisParam = isInstanceMethod(D); +  bool HasImplicitThisParam = hasImplicitObjectParameter(D);    Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;    Info->Identifier = AL.getArgAsIdent(0)->getIdentifierInfo(); @@ -3926,7 +3926,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {      return;    } -  bool HasImplicitThisParam = isInstanceMethod(D); +  bool HasImplicitThisParam = hasImplicitObjectParameter(D);    int32_t NumArgs = getFunctionOrMethodNumParams(D);    FunctionDecl *FD = D->getAsFunction(); @@ -4110,7 +4110,7 @@ static void handleLifetimeCaptureByAttr(Sema &S, Decl *D,  }  void Sema::LazyProcessLifetimeCaptureByParams(FunctionDecl *FD) { -  bool HasImplicitThisParam = isInstanceMethod(FD); +  bool HasImplicitThisParam = hasImplicitObjectParameter(FD);    SmallVector<LifetimeCaptureByAttr *, 1> Attrs;    for (ParmVarDecl *PVD : FD->parameters())      if (auto *A = PVD->getAttr<LifetimeCaptureByAttr>()) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6d5cb0f..256f952 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -17216,6 +17216,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause(          static_cast<OpenMPSeverityClauseKind>(Argument), ArgumentLoc, StartLoc,          LParenLoc, EndLoc);      break; +  case OMPC_threadset: +    Res = ActOnOpenMPThreadsetClause(static_cast<OpenMPThreadsetKind>(Argument), +                                     ArgumentLoc, StartLoc, LParenLoc, EndLoc); +    break;    case OMPC_if:    case OMPC_final:    case OMPC_num_threads: @@ -17355,6 +17359,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(        OMPDefaultClause(M, MLoc, VCKind, VCKindLoc, StartLoc, LParenLoc, EndLoc);  } +OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, +                                                  SourceLocation KindLoc, +                                                  SourceLocation StartLoc, +                                                  SourceLocation LParenLoc, +                                                  SourceLocation EndLoc) { +  if (Kind == OMPC_THREADSET_unknown) { +    Diag(KindLoc, diag::err_omp_unexpected_clause_value) +        << getListOfPossibleValues(OMPC_threadset, /*First=*/0, +                                   /*Last=*/unsigned(OMPC_THREADSET_unknown)) +        << getOpenMPClauseName(OMPC_threadset); +    return nullptr; +  } + +  return new (getASTContext()) +      OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc); +} +  OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind,                                                   SourceLocation KindKwLoc,                                                   SourceLocation StartLoc, diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 850bcb1..2f61bdd 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -489,14 +489,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_tileloaddrst164:    case X86::BI__builtin_ia32_tilestored64:    case X86::BI__builtin_ia32_tilezero: -  case X86::BI__builtin_ia32_t2rpntlvwz0: -  case X86::BI__builtin_ia32_t2rpntlvwz0t1: -  case X86::BI__builtin_ia32_t2rpntlvwz1: -  case X86::BI__builtin_ia32_t2rpntlvwz1t1: -  case X86::BI__builtin_ia32_t2rpntlvwz0rst1: -  case X86::BI__builtin_ia32_t2rpntlvwz1rs: -  case X86::BI__builtin_ia32_t2rpntlvwz1rst1: -  case X86::BI__builtin_ia32_t2rpntlvwz0rs:    case X86::BI__builtin_ia32_tcvtrowps2bf16h:    case X86::BI__builtin_ia32_tcvtrowps2bf16l:    case X86::BI__builtin_ia32_tcvtrowps2phh: @@ -516,17 +508,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_tdpbhf8ps:    case X86::BI__builtin_ia32_tdphbf8ps:    case X86::BI__builtin_ia32_tdphf8ps: -  case X86::BI__builtin_ia32_ttdpbf16ps: -  case X86::BI__builtin_ia32_ttdpfp16ps: -  case X86::BI__builtin_ia32_ttcmmimfp16ps: -  case X86::BI__builtin_ia32_ttcmmrlfp16ps: -  case X86::BI__builtin_ia32_tconjtcmmimfp16ps:    case X86::BI__builtin_ia32_tmmultf32ps: -  case X86::BI__builtin_ia32_ttmmultf32ps:      return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2}); -  case X86::BI__builtin_ia32_ttransposed: -  case X86::BI__builtin_ia32_tconjtfp16: -    return CheckBuiltinTileArgumentsRange(TheCall, {0, 1});    }  }  static bool isX86_32Builtin(unsigned BuiltinID) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0c8c1d1..8c20078 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10624,6 +10624,13 @@ TreeTransform<Derived>::TransformOMPDefaultClause(OMPDefaultClause *C) {  template <typename Derived>  OMPClause * +TreeTransform<Derived>::TransformOMPThreadsetClause(OMPThreadsetClause *C) { +  // No need to rebuild this clause, no template-dependent parameters. +  return C; +} + +template <typename Derived> +OMPClause *  TreeTransform<Derived>::TransformOMPProcBindClause(OMPProcBindClause *C) {    return getDerived().RebuildOMPProcBindClause(        C->getProcBindKind(), C->getProcBindKindKwLoc(), C->getBeginLoc(), diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index c1b5cb7..e3106f8d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11255,6 +11255,9 @@ OMPClause *OMPClauseReader::readClause() {    case llvm::omp::OMPC_mergeable:      C = new (Context) OMPMergeableClause();      break; +  case llvm::omp::OMPC_threadset: +    C = new (Context) OMPThreadsetClause(); +    break;    case llvm::omp::OMPC_read:      C = new (Context) OMPReadClause();      break; @@ -11658,6 +11661,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) {    C->setDefaultVariableCategoryLocation(Record.readSourceLocation());  } +// Read the parameter of threadset clause. This will have been saved when +// OMPClauseWriter is called. +void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { +  C->setLParenLoc(Record.readSourceLocation()); +  SourceLocation ThreadsetKindLoc = Record.readSourceLocation(); +  C->setThreadsetKindLoc(ThreadsetKindLoc); +  OpenMPThreadsetKind TKind = +      static_cast<OpenMPThreadsetKind>(Record.readInt()); +  C->setThreadsetKind(TKind); +} +  void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {    C->setProcBindKind(static_cast<llvm::omp::ProcBindKind>(Record.readInt()));    C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 377e396..3ac338e 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7913,6 +7913,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) {    Record.AddSourceLocation(C->getDefaultVCLoc());  } +void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) { +  Record.AddSourceLocation(C->getLParenLoc()); +  Record.AddSourceLocation(C->getThreadsetKindLoc()); +  Record.writeEnum(C->getThreadsetKind()); +} +  void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) {    Record.push_back(unsigned(C->getProcBindKind()));    Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 171c786..b4bdec1 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) {        // `foo<int>` for which this range will be too short.  Doing so will        // require subcasing `NamedDecl`, because it doesn't provide virtual        // access to the \c DeclarationNameInfo. -      if (tooling::getText(R, *Result.Context) != D->getName()) -        return CharSourceRange(); +      StringRef Text = tooling::getText(R, *Result.Context); +      if (Text != D->getName()) +        return llvm::make_error<StringError>( +            llvm::errc::not_supported, +            "range selected by name(node id=" + ID + "): '" + Text + +                "' is different from decl name '" + D->getName() + "'");        return R;      }      if (const auto *E = Node.get<DeclRefExpr>()) { | 
