aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/ASTContext.cpp5
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp42
-rw-r--r--clang/lib/AST/ExprConstant.cpp46
-rw-r--r--clang/lib/AST/OpenMPClause.cpp8
-rw-r--r--clang/lib/AST/StmtProfile.cpp2
-rw-r--r--clang/lib/Basic/OpenMPKinds.cpp19
-rw-r--r--clang/lib/Basic/SourceManager.cpp3
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/Basic/Targets/PPC.h45
-rw-r--r--clang/lib/Basic/Targets/X86.cpp6
-rw-r--r--clang/lib/Basic/Targets/X86.h1
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp8
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.cpp36
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h81
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp27
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp6
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp81
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/ARM.cpp249
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/X86.cpp68
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp3
-rw-r--r--clang/lib/Driver/Driver.cpp15
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp78
-rw-r--r--clang/lib/Driver/ToolChains/HLSL.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/ZOS.cpp4
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp97
-rw-r--r--clang/lib/Format/Format.cpp111
-rw-r--r--clang/lib/Format/FormatToken.cpp4
-rw-r--r--clang/lib/Format/FormatToken.h6
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp25
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp11
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp2
-rw-r--r--clang/lib/Frontend/TextDiagnostic.cpp474
-rw-r--r--clang/lib/Headers/CMakeLists.txt6
-rw-r--r--clang/lib/Headers/amxbf16transposeintrin.h94
-rw-r--r--clang/lib/Headers/amxcomplextransposeintrin.h303
-rw-r--r--clang/lib/Headers/amxfp16transposeintrin.h94
-rw-r--r--clang/lib/Headers/amxintrin.h2
-rw-r--r--clang/lib/Headers/amxmovrstransposeintrin.h200
-rw-r--r--clang/lib/Headers/amxtf32transposeintrin.h105
-rw-r--r--clang/lib/Headers/amxtransposeintrin.h248
-rw-r--r--clang/lib/Headers/immintrin.h12
-rw-r--r--clang/lib/Interpreter/InterpreterValuePrinter.cpp5
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp4
-rw-r--r--clang/lib/Parse/ParseDecl.cpp7
-rw-r--r--clang/lib/Parse/ParseOpenMP.cpp1
-rw-r--r--clang/lib/Sema/SemaAMDGPU.cpp43
-rw-r--r--clang/lib/Sema/SemaChecking.cpp11
-rw-r--r--clang/lib/Sema/SemaCodeComplete.cpp18
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp6
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp21
-rw-r--r--clang/lib/Sema/SemaX86.cpp17
-rw-r--r--clang/lib/Sema/TreeTransform.h7
-rw-r--r--clang/lib/Serialization/ASTReader.cpp14
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp6
-rw-r--r--clang/lib/Tooling/Transformer/RangeSelector.cpp8
55 files changed, 1013 insertions, 1797 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 687cd46..2669f62 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
// Read the base type.
switch (*Str++) {
default: llvm_unreachable("Unknown builtin type letter!");
+ case 'e':
+ assert(HowLong == 0 && !Signed && !Unsigned &&
+ "Bad modifiers used with 'e'!");
+ Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty;
+ break;
case 'x':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'x'!");
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b3ab82d..8b57b96 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift(
static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
GetSourceIndex) {
assert(Call->getNumArgs() == 3);
@@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic(
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const Pointer &Src = (SrcVecIdx == 0) ? A : B;
- TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+
+ if (SrcIdx < 0) {
+ // Zero out this element
+ if (ElemT == PT_Float) {
+ Dst.elem<Floating>(DstIdx) = Floating(
+ S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
+ } else {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
+ }
+ } else {
+ const Pointer &Src = (SrcVecIdx == 0) ? A : B;
+ TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+ }
}
Dst.initializeAllElements();
@@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_shufpd256:
@@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
+ });
+ case X86::BI__builtin_ia32_insertps128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return std::pair<unsigned, int>{0, -1};
+ }
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
+ } else {
+ // Copy from destination vector (A)
+ return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
+ }
});
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d0404b9..97eeba8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
static bool evalShuffleGeneric(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
GetSourceIndex) {
const auto *VT = Call->getType()->getAs<VectorType>();
@@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric(
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const APValue &Src = (SrcVecIdx == 0) ? A : B;
- ResultElements.push_back(Src.getVectorElt(SrcIdx));
+
+ if (SrcIdx < 0) {
+ // Zero out this element
+ QualType ElemTy = VT->getElementType();
+ ResultElements.push_back(
+ APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+ } else {
+ const APValue &Src = (SrcVecIdx == 0) ? A : B;
+ ResultElements.push_back(Src.getVectorElt(SrcIdx));
+ }
}
Out = APValue(ResultElements.data(), ResultElements.size());
@@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 32;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
}))
return false;
return Success(R, E);
@@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 64;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_insertps128: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return {0, -1};
+ }
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return {1, static_cast<int>(SrcElem)};
+ } else {
+ // Copy from destination vector (A)
+ return {0, static_cast<int>(DstIdx)};
+ }
}))
return false;
return Success(R, E);
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 791df7e..59d9459 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -124,6 +124,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
case OMPC_nowait:
case OMPC_untied:
case OMPC_mergeable:
+ case OMPC_threadset:
case OMPC_threadprivate:
case OMPC_groupprivate:
case OMPC_flush:
@@ -2035,6 +2036,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) {
OS << ")";
}
+void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) {
+ OS << "threadset("
+ << getOpenMPSimpleClauseTypeName(OMPC_threadset,
+ unsigned(Node->getThreadsetKind()))
+ << ")";
+}
+
void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) {
OS << "proc_bind("
<< getOpenMPSimpleClauseTypeName(OMPC_proc_bind,
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 05b64cc..c909e1b 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -546,6 +546,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) {
void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { }
+void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {}
+
void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { }
void OMPClauseProfiler::VisitOMPUnifiedAddressClause(
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 64b2bff..3d41f2d 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -210,6 +210,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
#define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name)
#include "clang/Basic/OpenMPKinds.def"
.Default(OMPC_ALLOCATE_unknown);
+ case OMPC_threadset: {
+ unsigned Type = llvm::StringSwitch<unsigned>(Str)
+#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+ .Default(OMPC_THREADSET_unknown);
+ if (LangOpts.OpenMP < 60)
+ return OMPC_THREADSET_unknown;
+ return Type;
+ }
case OMPC_num_threads: {
unsigned Type = llvm::StringSwitch<unsigned>(Str)
#define OPENMP_NUMTHREADS_MODIFIER(Name) .Case(#Name, OMPC_NUMTHREADS_##Name)
@@ -565,6 +574,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
#include "clang/Basic/OpenMPKinds.def"
}
llvm_unreachable("Invalid OpenMP 'num_threads' clause modifier");
+ case OMPC_threadset:
+ switch (Type) {
+ case OMPC_THREADSET_unknown:
+ return "unknown";
+#define OPENMP_THREADSET_KIND(Name) \
+ case OMPC_THREADSET_##Name: \
+ return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+ }
+ llvm_unreachable("Invalid OpenMP 'threadset' clause modifier");
case OMPC_unknown:
case OMPC_threadprivate:
case OMPC_groupprivate:
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index d8ec837..938c648 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -608,8 +608,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
return FileID::get(LoadedID);
}
unsigned FileSize = File.getSize();
- llvm::ErrorOr<bool> NeedConversion =
- llvm::needConversion(Filename.str().c_str());
+ llvm::ErrorOr<bool> NeedConversion = llvm::needConversion(Filename);
if (NeedConversion && *NeedConversion) {
// Buffer size may increase due to potential z/OS EBCDIC to UTF-8
// conversion.
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index d4de704..d4d696b 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -356,12 +356,6 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (hasFastFMA())
Builder.defineMacro("FP_FAST_FMA");
- Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
- "compile-time-constant access to the wavefront size will "
- "be removed in a future release");
- Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
- "compile-time-constant access to the wavefront size will "
- "be removed in a future release");
Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
}
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 846b240..d2eb9c5 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -445,27 +445,17 @@ public:
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
IntMaxType = SignedLong;
Int64Type = SignedLong;
- std::string DataLayout;
if (Triple.isOSAIX()) {
// TODO: Set appropriate ABI for AIX platform.
- DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
LongDoubleWidth = 64;
LongDoubleAlign = DoubleAlign = 32;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
- } else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
- DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+ } else if ((Triple.getArch() == llvm::Triple::ppc64le) ||
+ Triple.isPPC64ELFv2ABI()) {
ABI = "elfv2";
} else {
- DataLayout = "E-m:e";
- if (Triple.isPPC64ELFv2ABI()) {
- ABI = "elfv2";
- DataLayout += "-Fn32";
- } else {
- ABI = "elfv1";
- DataLayout += "-Fi64";
- }
- DataLayout += "-i64:64-i128:128-n32:64";
+ ABI = "elfv1";
}
if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) {
@@ -473,14 +463,12 @@ public:
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
}
- if (Triple.isOSAIX() || Triple.isOSLinux())
- DataLayout += "-S128-v256:256:256-v512:512:512";
- resetDataLayout(DataLayout);
-
// Newer PPC64 instruction sets support atomics up to 16 bytes.
MaxAtomicPromoteWidth = 128;
// Baseline PPC64 supports inlining atomics up to 8 bytes.
MaxAtomicInlineWidth = 64;
+
+ calculateDataLayout();
}
void setMaxAtomicWidth() override {
@@ -495,10 +483,33 @@ public:
return TargetInfo::CharPtrBuiltinVaList;
}
+ void calculateDataLayout() {
+ std::string DataLayout;
+
+ if (getTriple().isOSAIX()) {
+ DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
+ } else if ((getTriple().getArch() == llvm::Triple::ppc64le)) {
+ DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+ } else {
+ DataLayout = "E-m:e";
+ if (ABI == "elfv2") {
+ DataLayout += "-Fn32";
+ } else {
+ DataLayout += "-Fi64";
+ }
+ DataLayout += "-i64:64-i128:128-n32:64";
+ }
+
+ if (getTriple().isOSAIX() || getTriple().isOSLinux())
+ DataLayout += "-S128-v256:256:256-v512:512:512";
+ resetDataLayout(DataLayout);
+ }
+
// PPC64 Linux-specific ABI options.
bool setABI(const std::string &Name) override {
if (Name == "elfv1" || Name == "elfv2") {
ABI = Name;
+ calculateDataLayout();
return true;
}
return false;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e71f10c..7a90c89 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXFP8 = true;
} else if (Feature == "+amx-movrs") {
HasAMXMOVRS = true;
- } else if (Feature == "+amx-transpose") {
- HasAMXTRANSPOSE = true;
} else if (Feature == "+amx-avx512") {
HasAMXAVX512 = true;
} else if (Feature == "+amx-tf32") {
@@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_FP8__");
if (HasAMXMOVRS)
Builder.defineMacro("__AMX_MOVRS__");
- if (HasAMXTRANSPOSE)
- Builder.defineMacro("__AMX_TRANSPOSE__");
if (HasAMXAVX512)
Builder.defineMacro("__AMX_AVX512__");
if (HasAMXTF32)
@@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("amx-movrs", true)
.Case("amx-tf32", true)
.Case("amx-tile", true)
- .Case("amx-transpose", true)
.Case("avx", true)
.Case("avx10.1", true)
.Case("avx10.2", true)
@@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("amx-movrs", HasAMXMOVRS)
.Case("amx-tf32", HasAMXTF32)
.Case("amx-tile", HasAMXTILE)
- .Case("amx-transpose", HasAMXTRANSPOSE)
.Case("avx", SSELevel >= AVX)
.Case("avx10.1", HasAVX10_1)
.Case("avx10.2", HasAVX10_2)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index be3a473..e7da262 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -160,7 +160,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXMOVRS = false;
- bool HasAMXTRANSPOSE = false;
bool HasAMXAVX512 = false;
bool HasAMXTF32 = false;
bool HasSERIALIZE = false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 3c9c7ec..0198a9d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -771,14 +771,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI_WriteBarrier:
case X86::BI_AddressOfReturnAddress:
case X86::BI__stosb:
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
case X86::BI__ud2:
case X86::BI__int2c:
case X86::BI__readfsbyte:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 71ff20a..5d5209b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -242,12 +242,19 @@ void CIRGenFunction::LexicalScope::cleanup() {
}
};
- if (returnBlock != nullptr) {
- // Write out the return block, which loads the value from `__retval` and
- // issues the `cir.return`.
+ // Cleanup are done right before codegen resumes a scope. This is where
+ // objects are destroyed. Process all return blocks.
+ // TODO(cir): Handle returning from a switch statement through a cleanup
+ // block. We can't simply jump to the cleanup block, because the cleanup block
+ // is not part of the case region. Either reemit all cleanups in the return
+ // block or wait for MLIR structured control flow to support early exits.
+ llvm::SmallVector<mlir::Block *> retBlocks;
+ for (mlir::Block *retBlock : localScope->getRetBlocks()) {
mlir::OpBuilder::InsertionGuard guard(builder);
- builder.setInsertionPointToEnd(returnBlock);
- (void)emitReturn(*returnLoc);
+ builder.setInsertionPointToEnd(retBlock);
+ retBlocks.push_back(retBlock);
+ mlir::Location retLoc = localScope->getRetLoc(retBlock);
+ emitReturn(retLoc);
}
auto insertCleanupAndLeave = [&](mlir::Block *insPt) {
@@ -274,19 +281,22 @@ void CIRGenFunction::LexicalScope::cleanup() {
if (localScope->depth == 0) {
// Reached the end of the function.
- if (returnBlock != nullptr) {
- if (returnBlock->getUses().empty()) {
- returnBlock->erase();
+ // Special handling only for single return block case
+ if (localScope->getRetBlocks().size() == 1) {
+ mlir::Block *retBlock = localScope->getRetBlocks()[0];
+ mlir::Location retLoc = localScope->getRetLoc(retBlock);
+ if (retBlock->getUses().empty()) {
+ retBlock->erase();
} else {
// Thread return block via cleanup block.
if (cleanupBlock) {
- for (mlir::BlockOperand &blockUse : returnBlock->getUses()) {
+ for (mlir::BlockOperand &blockUse : retBlock->getUses()) {
cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner());
brOp.setSuccessor(cleanupBlock);
}
}
- cir::BrOp::create(builder, *returnLoc, returnBlock);
+ cir::BrOp::create(builder, retLoc, retBlock);
return;
}
}
@@ -324,8 +334,10 @@ void CIRGenFunction::LexicalScope::cleanup() {
bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
if (!entryBlock && curBlock->empty()) {
curBlock->erase();
- if (returnBlock != nullptr && returnBlock->getUses().empty())
- returnBlock->erase();
+ for (mlir::Block *retBlock : retBlocks) {
+ if (retBlock->getUses().empty())
+ retBlock->erase();
+ }
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index c3fcd1a6..e5cecaa5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1103,44 +1103,69 @@ public:
// ---
private:
- // `returnBlock`, `returnLoc`, and all the functions that deal with them
- // will change and become more complicated when `switch` statements are
- // upstreamed. `case` statements within the `switch` are in the same scope
- // but have their own regions. Therefore the LexicalScope will need to
- // keep track of multiple return blocks.
- mlir::Block *returnBlock = nullptr;
- std::optional<mlir::Location> returnLoc;
-
- // See the comment on `getOrCreateRetBlock`.
+ // On switches we need one return block per region, since cases don't
+ // have their own scopes but are distinct regions nonetheless.
+
+ // TODO: This implementation should change once we have support for early
+ // exits in MLIR structured control flow (llvm-project#161575)
+ llvm::SmallVector<mlir::Block *> retBlocks;
+ llvm::DenseMap<mlir::Block *, mlir::Location> retLocs;
+ llvm::DenseMap<cir::CaseOp, unsigned> retBlockInCaseIndex;
+ std::optional<unsigned> normalRetBlockIndex;
+
+ // There's usually only one ret block per scope, but this needs to be
+ // get or create because of potential unreachable return statements, note
+ // that for those, all source location maps to the first one found.
mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
- assert(returnBlock == nullptr && "only one return block per scope");
- // Create the cleanup block but don't hook it up just yet.
+ assert((isa_and_nonnull<cir::CaseOp>(
+ cgf.builder.getBlock()->getParentOp()) ||
+ retBlocks.size() == 0) &&
+ "only switches can hold more than one ret block");
+
+ // Create the return block but don't hook it up just yet.
mlir::OpBuilder::InsertionGuard guard(cgf.builder);
- returnBlock =
- cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
- updateRetLoc(returnBlock, loc);
- return returnBlock;
+ auto *b = cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
+ retBlocks.push_back(b);
+ updateRetLoc(b, loc);
+ return b;
}
cir::ReturnOp emitReturn(mlir::Location loc);
void emitImplicitReturn();
public:
- mlir::Block *getRetBlock() { return returnBlock; }
- mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; }
- void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; }
-
- // Create the return block for this scope, or return the existing one.
- // This get-or-create logic is necessary to handle multiple return
- // statements within the same scope, which can happen if some of them are
- // dead code or if there is a `goto` into the middle of the scope.
+ llvm::ArrayRef<mlir::Block *> getRetBlocks() { return retBlocks; }
+ mlir::Location getRetLoc(mlir::Block *b) { return retLocs.at(b); }
+ void updateRetLoc(mlir::Block *b, mlir::Location loc) {
+ retLocs.insert_or_assign(b, loc);
+ }
+
mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
- if (returnBlock == nullptr) {
- returnBlock = createRetBlock(cgf, loc);
- return returnBlock;
+ // Check if we're inside a case region
+ if (auto caseOp = mlir::dyn_cast_if_present<cir::CaseOp>(
+ cgf.builder.getBlock()->getParentOp())) {
+ auto iter = retBlockInCaseIndex.find(caseOp);
+ if (iter != retBlockInCaseIndex.end()) {
+ // Reuse existing return block
+ mlir::Block *ret = retBlocks[iter->second];
+ updateRetLoc(ret, loc);
+ return ret;
+ }
+ // Create new return block
+ mlir::Block *ret = createRetBlock(cgf, loc);
+ retBlockInCaseIndex[caseOp] = retBlocks.size() - 1;
+ return ret;
}
- updateRetLoc(returnBlock, loc);
- return returnBlock;
+
+ if (normalRetBlockIndex) {
+ mlir::Block *ret = retBlocks[*normalRetBlockIndex];
+ updateRetLoc(ret, loc);
+ return ret;
+ }
+
+ mlir::Block *ret = createRetBlock(cgf, loc);
+ normalRetBlockIndex = retBlocks.size() - 1;
+ return ret;
}
mlir::Block *getEntryBlock() { return entryBlock; }
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 6af8066..ca579c9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -345,7 +345,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
if (Loc.isInvalid())
return;
- CurLoc = CGM.getContext().getSourceManager().getExpansionLoc(Loc);
+ CurLoc = CGM.getContext().getSourceManager().getFileLoc(Loc);
// If we've changed files in the middle of a lexical scope go ahead
// and create a new lexical scope with file node if it's different
@@ -572,7 +572,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
FileName = TheCU->getFile()->getFilename();
CSInfo = TheCU->getFile()->getChecksum();
} else {
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));
FileName = PLoc.getFilename();
if (FileName.empty()) {
@@ -599,7 +599,8 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
}
- return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
+ return createFile(FileName, CSInfo,
+ getSource(SM, SM.getFileID(SM.getFileLoc(Loc))));
}
llvm::DIFile *CGDebugInfo::createFile(
@@ -654,7 +655,7 @@ unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
if (Loc.isInvalid())
return 0;
SourceManager &SM = CGM.getContext().getSourceManager();
- return SM.getPresumedLoc(Loc).getLine();
+ return SM.getPresumedLoc(SM.getFileLoc(Loc)).getLine();
}
unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
@@ -666,7 +667,8 @@ unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
if (Loc.isInvalid() && CurLoc.isInvalid())
return 0;
SourceManager &SM = CGM.getContext().getSourceManager();
- PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc);
+ PresumedLoc PLoc =
+ SM.getPresumedLoc(Loc.isValid() ? SM.getFileLoc(Loc) : CurLoc);
return PLoc.isValid() ? PLoc.getColumn() : 0;
}
@@ -1174,14 +1176,16 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
}
llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) {
-
- StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt";
+ SmallString<32> Name;
+ llvm::raw_svector_ostream OS(Name);
+ OS << (Ty->isUnsigned() ? "unsigned _BitInt(" : "_BitInt(")
+ << Ty->getNumBits() << ")";
llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
? llvm::dwarf::DW_ATE_unsigned
: llvm::dwarf::DW_ATE_signed;
-
return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
- Encoding);
+ Encoding, llvm::DINode::FlagZero, 0,
+ Ty->getNumBits());
}
llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
@@ -5000,7 +5004,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
- if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty())
+ if (CurLoc.isInvalid() || LexicalBlockStack.empty())
return;
llvm::MDNode *Scope = LexicalBlockStack.back();
@@ -6276,7 +6280,8 @@ void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
const StringLiteral *S) {
SourceLocation Loc = S->getStrTokenLoc(0);
- PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc);
+ SourceManager &SM = CGM.getContext().getSourceManager();
+ PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));
if (!PLoc.isValid())
return;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 66fea92..121de42 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3731,6 +3731,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
DestructorsFlag = 0x8,
PriorityFlag = 0x20,
DetachableFlag = 0x40,
+ FreeAgentFlag = 0x80,
};
unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false;
@@ -3740,6 +3741,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
if (NeedsCleanup)
Flags = Flags | DestructorsFlag;
}
+ if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
+ OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
+ if (Kind == OMPC_THREADSET_omp_pool)
+ Flags = Flags | FreeAgentFlag;
+ }
if (Data.Priority.getInt())
Flags = Flags | PriorityFlag;
if (D.hasClausesOfKind<OMPDetachClause>())
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index f49a5af..9eab709 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
- return Builder.CreateCall(F, { Src });
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+ return Builder.CreateCall(F, {Src});
}
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
@@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
return emitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_sample_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60f9b86..15fa78d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
@@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
@@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::bitreverse;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
}
- case NEON::BI__builtin_neon_vaddv_u8:
- // FIXME: These are handled by the AArch64 scalar code.
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddv_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u8:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vmaxv_f16: {
Int = Intrinsic::aarch64_neon_fmaxv;
Ty = HalfTy;
@@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
return Builder.CreateTrunc(Ops[0], HalfTy);
}
- case NEON::BI__builtin_neon_vminv_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminv_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vminv_f16: {
Int = Intrinsic::aarch64_neon_fminv;
Ty = HalfTy;
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b924407..2381b2e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// instruction, but it will create a memset that won't be optimized away.
return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
}
- // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- IID = Intrinsic::x86_t2rpntlvwz0_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
- break;
- }
-
- // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
- {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
-
- auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
- assert(PtrTy && "arg3 must be of pointer type");
- QualType PtreeTy = PtrTy->getPointeeType();
- llvm::Type *TyPtee = ConvertType(PtreeTy);
-
- // Bitcast amx type (x86_amx) to vector type (256 x i32)
- // Then store tile0 into DstPtr0
- Value *T0 = Builder.CreateExtractValue(Call, 0);
- Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
- {TyPtee}, {T0});
- Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
-
- // Then store tile1 into DstPtr1
- Value *T1 = Builder.CreateExtractValue(Call, 1);
- Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
- {TyPtee}, {T1});
- Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
-
- // Note: Here we escape directly use x86_tilestored64_internal to store
- // the results due to it can't make sure the Mem written scope. This may
- // cause shapes reloads after first amx intrinsic, which current amx reg-
- // ister allocation has no ability to handle it.
-
- return Store;
- }
case X86::BI__ud2:
// llvm.trap makes a ud2a instruction on x86.
return EmitTrapCall(Intrinsic::trap);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 15d0b35..abd049a 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -260,7 +260,8 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()
? LangAS::Default
: QT->getPointeeType().getAddressSpace();
- if (AS == LangAS::Default || AS == LangAS::opencl_generic)
+ if (AS == LangAS::Default || AS == LangAS::opencl_generic ||
+ AS == LangAS::opencl_constant)
return llvm::ConstantPointerNull::get(PT);
auto &Ctx = CGM.getContext();
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 40ea513..71c5280 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -308,9 +308,18 @@ InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
auto ArgString = A->getAsString(Args);
std::string Nearest;
if (getOpts().findNearest(ArgString, Nearest, VisibilityMask) > 1) {
- if (!IsCLMode() &&
- getOpts().findExact(ArgString, Nearest,
- llvm::opt::Visibility(options::CC1Option))) {
+ if (IsFlangMode()) {
+ if (getOpts().findExact(ArgString, Nearest,
+ llvm::opt::Visibility(options::FC1Option))) {
+ DiagID = diag::err_drv_unknown_argument_with_suggestion;
+ Diags.Report(DiagID) << ArgString << "-Xflang " + Nearest;
+ } else {
+ DiagID = diag::err_drv_unknown_argument;
+ Diags.Report(DiagID) << ArgString;
+ }
+ } else if (!IsCLMode() && getOpts().findExact(ArgString, Nearest,
+ llvm::opt::Visibility(
+ options::CC1Option))) {
DiagID = diag::err_drv_unknown_argument_with_suggestion;
Diags.Report(DiagID) << ArgString << "-Xclang " + Nearest;
} else {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 79edc56..d3ab6f1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1414,17 +1414,18 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
GuardedControlStack = PBP.GuardedControlStack;
}
- bool HasPtrauthReturns = llvm::any_of(CmdArgs, [](const char *Arg) {
- return StringRef(Arg) == "-fptrauth-returns";
- });
+ Arg *PtrauthReturnsArg = Args.getLastArg(options::OPT_fptrauth_returns,
+ options::OPT_fno_ptrauth_returns);
+ bool HasPtrauthReturns =
+ PtrauthReturnsArg &&
+ PtrauthReturnsArg->getOption().matches(options::OPT_fptrauth_returns);
// GCS is currently untested with ptrauth-returns, but enabling this could be
// allowed in future after testing with a suitable system.
- if (HasPtrauthReturns &&
- (Scope != "none" || BranchProtectionPAuthLR || GuardedControlStack)) {
+ if (Scope != "none" || BranchProtectionPAuthLR || GuardedControlStack) {
if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << Triple.getTriple();
- else
+ else if (HasPtrauthReturns)
D.Diag(diag::err_drv_incompatible_options)
<< A->getAsString(Args) << "-fptrauth-returns";
}
@@ -1670,34 +1671,42 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
AddUnalignedAccessWarning(CmdArgs);
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_intrinsics,
- options::OPT_fno_ptrauth_intrinsics);
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_calls,
- options::OPT_fno_ptrauth_calls);
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_returns,
- options::OPT_fno_ptrauth_returns);
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_auth_traps,
- options::OPT_fno_ptrauth_auth_traps);
- Args.addOptInFlag(
- CmdArgs, options::OPT_fptrauth_vtable_pointer_address_discrimination,
- options::OPT_fno_ptrauth_vtable_pointer_address_discrimination);
- Args.addOptInFlag(
- CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination,
- options::OPT_fno_ptrauth_vtable_pointer_type_discrimination);
- Args.addOptInFlag(
- CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination,
- options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination);
- Args.addOptInFlag(
- CmdArgs, options::OPT_fptrauth_function_pointer_type_discrimination,
- options::OPT_fno_ptrauth_function_pointer_type_discrimination);
-
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_indirect_gotos,
- options::OPT_fno_ptrauth_indirect_gotos);
- Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini,
- options::OPT_fno_ptrauth_init_fini);
- Args.addOptInFlag(CmdArgs,
- options::OPT_fptrauth_init_fini_address_discrimination,
- options::OPT_fno_ptrauth_init_fini_address_discrimination);
+ if (Triple.isOSDarwin() ||
+ (Triple.isOSLinux() &&
+ Triple.getEnvironment() == llvm::Triple::PAuthTest)) {
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_intrinsics,
+ options::OPT_fno_ptrauth_intrinsics);
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_calls,
+ options::OPT_fno_ptrauth_calls);
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_returns,
+ options::OPT_fno_ptrauth_returns);
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_auth_traps,
+ options::OPT_fno_ptrauth_auth_traps);
+ Args.addOptInFlag(
+ CmdArgs, options::OPT_fptrauth_vtable_pointer_address_discrimination,
+ options::OPT_fno_ptrauth_vtable_pointer_address_discrimination);
+ Args.addOptInFlag(
+ CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination,
+ options::OPT_fno_ptrauth_vtable_pointer_type_discrimination);
+ Args.addOptInFlag(
+ CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination,
+ options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination);
+ Args.addOptInFlag(
+ CmdArgs, options::OPT_fptrauth_function_pointer_type_discrimination,
+ options::OPT_fno_ptrauth_function_pointer_type_discrimination);
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_indirect_gotos,
+ options::OPT_fno_ptrauth_indirect_gotos);
+ }
+ if (Triple.isOSLinux() &&
+ Triple.getEnvironment() == llvm::Triple::PAuthTest) {
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini,
+ options::OPT_fno_ptrauth_init_fini);
+ Args.addOptInFlag(
+ CmdArgs, options::OPT_fptrauth_init_fini_address_discrimination,
+ options::OPT_fno_ptrauth_init_fini_address_discrimination);
+ Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_elf_got,
+ options::OPT_fno_ptrauth_elf_got);
+ }
Args.addOptInFlag(CmdArgs, options::OPT_faarch64_jump_table_hardening,
options::OPT_fno_aarch64_jump_table_hardening);
@@ -3699,6 +3708,7 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs,
options::OPT_emit_obj,
options::OPT_disable_llvm_passes,
options::OPT_fnative_half_type,
+ options::OPT_fnative_int16_type,
options::OPT_hlsl_entrypoint,
options::OPT_fdx_rootsignature_define,
options::OPT_fdx_rootsignature_version,
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp
index 20a320e..8d3fba7 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -498,6 +498,15 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
continue;
}
+ if (A->getOption().getID() == options::OPT_enable_16bit_types) {
+ // Translate -enable-16bit-types into -fnative-half-type and
+ // -fnative-int16-type
+ DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_half_type));
+ DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_int16_type));
+ A->claim();
+ continue;
+ }
+
DAL->append(A);
}
diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp
index 57bcb3c..9a3c453 100644
--- a/clang/lib/Driver/ToolChains/ZOS.cpp
+++ b/clang/lib/Driver/ToolChains/ZOS.cpp
@@ -75,7 +75,7 @@ void zos::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
- Exec, CmdArgs, Inputs));
+ Exec, CmdArgs, Inputs, Output));
}
static std::string getLEHLQ(const ArgList &Args) {
@@ -213,7 +213,7 @@ void zos::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
- Exec, CmdArgs, Inputs));
+ Exec, CmdArgs, Inputs, Output));
}
ToolChain::RuntimeLibType ZOS::GetDefaultRuntimeLibType() const {
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index e5abf83..9ab024a 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
return CurrentState.BreakBeforeClosingBrace;
}
- // Allow breaking before the right parens with block indentation if there was
- // a break after the left parens, which is tracked by BreakBeforeClosingParen.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent &&
+ // Check need to break before the right parens if there was a break after
+ // the left parens, which is tracked by BreakBeforeClosingParen.
+ if ((Style.BreakBeforeCloseBracketFunction ||
+ Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop ||
+ Style.BreakBeforeCloseBracketSwitch) &&
Current.is(tok::r_paren)) {
return CurrentState.BreakBeforeClosingParen;
}
@@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&
Style.Cpp11BracedListStyle != FormatStyle::BLS_Block;
};
- if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
- !IsStartOfBracedList()) {
+ if (IsStartOfBracedList())
+ return Style.BreakAfterOpenBracketBracedList;
+ if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square))
return false;
- }
if (!Tok.Previous)
return true;
if (Tok.Previous->isIf())
- return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak;
- return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
- tok::kw_switch) &&
- !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await));
+ return Style.BreakAfterOpenBracketIf;
+ if (Tok.Previous->isLoop(Style))
+ return Style.BreakAfterOpenBracketLoop;
+ if (Tok.Previous->is(tok::kw_switch))
+ return Style.BreakAfterOpenBracketSwitch;
+ if (Style.BreakAfterOpenBracketFunction) {
+ return !Tok.Previous->is(TT_CastRParen) &&
+ !(Style.isJavaScript() && Tok.is(Keywords.kw_await));
+ }
+ return false;
};
auto IsFunctionCallParen = [](const FormatToken &Tok) {
return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous &&
Tok.Previous->is(tok::identifier);
};
- auto IsInTemplateString = [this](const FormatToken &Tok) {
+ auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) {
if (!Style.isJavaScript())
return false;
for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) {
if (Prev->is(TT_TemplateString) && Prev->opensScope())
return true;
- if (Prev->opensScope() ||
- (Prev->is(TT_TemplateString) && Prev->closesScope())) {
- break;
- }
+ if (Prev->opensScope() && !NestBlocks)
+ return false;
+ if (Prev->is(TT_TemplateString) && Prev->closesScope())
+ return false;
}
return false;
};
@@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) {
return true;
}
- if (const auto *Previous = Tok.Previous;
- !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen,
- TT_LambdaDefinitionLParen) &&
- !IsFunctionCallParen(*Previous))) {
+ const auto *Previous = TokAfterLParen.Previous;
+ assert(Previous); // IsOpeningBracket(Previous)
+ if (Previous->Previous &&
+ (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) ||
+ Previous->Previous->is(tok::kw_switch))) {
+ return false;
+ }
+ if (Previous->isNoneOf(TT_FunctionDeclarationLParen,
+ TT_LambdaDefinitionLParen) &&
+ !IsFunctionCallParen(*Previous)) {
return true;
}
- if (IsOpeningBracket(Tok) || IsInTemplateString(Tok))
+ if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true))
return true;
const auto *Next = Tok.Next;
return !Next || Next->isMemberAccess() ||
Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next);
};
- if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak ||
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) &&
- IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
+ if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
// Don't do this for simple (no expressions) one-argument function calls
// as that feels like needlessly wasting whitespace, e.g.:
//
@@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
// Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
// with args as children of the '(' and ',' tokens. It does not make sense to
// align the commas with the opening paren.
- if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
+ if (Style.AlignAfterOpenBracket &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause,
TT_TableGenDAGArgOpener,
@@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Previous.Previous->isNoneOf(tok::identifier, tok::l_paren,
BK_BracedInit))) ||
Previous.is(TT_VerilogMultiLineListLParen)) &&
- !IsInTemplateString(Current)) {
+ !IsInTemplateString(Current, false)) {
CurrentState.Indent = State.Column + Spaces;
CurrentState.IsAligned = true;
}
@@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
}
if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) {
- CurrentState.BreakBeforeClosingParen =
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent;
+ if (auto Previous = PreviousNonComment->Previous) {
+ if (Previous->isIf()) {
+ CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf;
+ } else if (Previous->isLoop(Style)) {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketLoop;
+ } else if (Previous->is(tok::kw_switch)) {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketSwitch;
+ } else {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketFunction;
+ }
+ }
}
if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener))
@@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
}
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent &&
- (Current.is(tok::r_paren) ||
- (Current.is(tok::r_brace) && Current.MatchingParen &&
- Current.MatchingParen->is(BK_BracedInit))) &&
+ if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) &&
+ Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) &&
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
}
+ if ((Style.BreakBeforeCloseBracketFunction ||
+ Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop ||
+ Style.BreakBeforeCloseBracketSwitch) &&
+ Current.is(tok::r_paren) && State.Stack.size() > 1) {
+ return State.Stack[State.Stack.size() - 2].LastSpace;
+ }
if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) &&
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
@@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
PrecedenceLevel < prec::Assignment) &&
(!Previous || Previous->isNot(tok::kw_return) ||
(!Style.isJava() && PrecedenceLevel > 0)) &&
- (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
- PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) &&
+ (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma ||
+ Current.NestingLevel == 0) &&
(!Style.isTableGen() ||
(Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
TT_TableGenDAGArgListCommaToBreak)))) {
@@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
if (PrecedenceLevel > prec::Unknown)
NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
if (PrecedenceLevel != prec::Conditional &&
- Current.isNot(TT_UnaryOperator) &&
- Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
+ Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) {
NewParenState.StartOfFunctionCall = State.Column;
}
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index edd126c..dd14fcd 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -32,6 +32,13 @@ using clang::format::FormatStyle;
LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat)
+enum BracketAlignmentStyle : int8_t {
+ BAS_Align,
+ BAS_DontAlign,
+ BAS_AlwaysBreak,
+ BAS_BlockIndent
+};
+
namespace llvm {
namespace yaml {
template <>
@@ -204,16 +211,16 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
}
};
-template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
- static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
- IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
- IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
- IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
- IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent);
+template <> struct ScalarEnumerationTraits<BracketAlignmentStyle> {
+ static void enumeration(IO &IO, BracketAlignmentStyle &Value) {
+ IO.enumCase(Value, "Align", BAS_Align);
+ IO.enumCase(Value, "DontAlign", BAS_DontAlign);
// For backward compatibility.
- IO.enumCase(Value, "true", FormatStyle::BAS_Align);
- IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
+ IO.enumCase(Value, "true", BAS_Align);
+ IO.enumCase(Value, "false", BAS_DontAlign);
+ IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak);
+ IO.enumCase(Value, "BlockIndent", BAS_BlockIndent);
}
};
@@ -979,6 +986,54 @@ template <> struct MappingTraits<FormatStyle> {
bool SpacesInCStyleCastParentheses = false;
bool SpacesInParentheses = false;
+ if (IO.outputting()) {
+ IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
+ } else {
+ // For backward compatibility.
+ BracketAlignmentStyle LocalBAS = BAS_Align;
+ if (IsGoogleOrChromium) {
+ FormatStyle::LanguageKind Language = Style.Language;
+ if (Language == FormatStyle::LK_None)
+ Language = ((FormatStyle *)IO.getContext())->Language;
+ if (Language == FormatStyle::LK_JavaScript)
+ LocalBAS = BAS_AlwaysBreak;
+ else if (Language == FormatStyle::LK_Java)
+ LocalBAS = BAS_DontAlign;
+ } else if (BasedOnStyle.equals_insensitive("webkit")) {
+ LocalBAS = BAS_DontAlign;
+ }
+ IO.mapOptional("AlignAfterOpenBracket", LocalBAS);
+ Style.BreakAfterOpenBracketBracedList = false;
+ Style.BreakAfterOpenBracketFunction = false;
+ Style.BreakAfterOpenBracketIf = false;
+ Style.BreakAfterOpenBracketLoop = false;
+ Style.BreakAfterOpenBracketSwitch = false;
+ Style.BreakBeforeCloseBracketBracedList = false;
+ Style.BreakBeforeCloseBracketFunction = false;
+ Style.BreakBeforeCloseBracketIf = false;
+ Style.BreakBeforeCloseBracketLoop = false;
+ Style.BreakBeforeCloseBracketSwitch = false;
+
+ switch (LocalBAS) {
+ case BAS_DontAlign:
+ Style.AlignAfterOpenBracket = false;
+ break;
+ case BAS_BlockIndent:
+ Style.BreakBeforeCloseBracketBracedList = true;
+ Style.BreakBeforeCloseBracketFunction = true;
+ Style.BreakBeforeCloseBracketIf = true;
+ [[fallthrough]];
+ case BAS_AlwaysBreak:
+ Style.BreakAfterOpenBracketBracedList = true;
+ Style.BreakAfterOpenBracketFunction = true;
+ Style.BreakAfterOpenBracketIf = true;
+ [[fallthrough]];
+ case BAS_Align:
+ Style.AlignAfterOpenBracket = true;
+ break;
+ }
+ }
+
// For backward compatibility.
if (!IO.outputting()) {
IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines);
@@ -1014,7 +1069,6 @@ template <> struct MappingTraits<FormatStyle> {
}
IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
- IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures);
IO.mapOptional("AlignConsecutiveAssignments",
Style.AlignConsecutiveAssignments);
@@ -1079,10 +1133,29 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes);
IO.mapOptional("BreakAfterJavaFieldAnnotations",
Style.BreakAfterJavaFieldAnnotations);
+ IO.mapOptional("BreakAfterOpenBracketBracedList",
+ Style.BreakAfterOpenBracketBracedList);
+ IO.mapOptional("BreakAfterOpenBracketFunction",
+ Style.BreakAfterOpenBracketFunction);
+ IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf);
+ IO.mapOptional("BreakAfterOpenBracketLoop",
+ Style.BreakAfterOpenBracketLoop);
+ IO.mapOptional("BreakAfterOpenBracketSwitch",
+ Style.BreakAfterOpenBracketSwitch);
IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType);
IO.mapOptional("BreakArrays", Style.BreakArrays);
IO.mapOptional("BreakBeforeBinaryOperators",
Style.BreakBeforeBinaryOperators);
+ IO.mapOptional("BreakBeforeCloseBracketBracedList",
+ Style.BreakBeforeCloseBracketBracedList);
+ IO.mapOptional("BreakBeforeCloseBracketFunction",
+ Style.BreakBeforeCloseBracketFunction);
+ IO.mapOptional("BreakBeforeCloseBracketIf",
+ Style.BreakBeforeCloseBracketIf);
+ IO.mapOptional("BreakBeforeCloseBracketLoop",
+ Style.BreakBeforeCloseBracketLoop);
+ IO.mapOptional("BreakBeforeCloseBracketSwitch",
+ Style.BreakBeforeCloseBracketSwitch);
IO.mapOptional("BreakBeforeConceptDeclarations",
Style.BreakBeforeConceptDeclarations);
IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
@@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) {
FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
FormatStyle LLVMStyle;
LLVMStyle.AccessModifierOffset = -2;
- LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
+ LLVMStyle.AlignAfterOpenBracket = true;
LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None;
LLVMStyle.AlignConsecutiveAssignments = {};
LLVMStyle.AlignConsecutiveAssignments.PadOperators = true;
@@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.BreakAdjacentStringLiterals = true;
LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave;
LLVMStyle.BreakAfterJavaFieldAnnotations = false;
+ LLVMStyle.BreakAfterOpenBracketBracedList = false;
+ LLVMStyle.BreakAfterOpenBracketFunction = false;
+ LLVMStyle.BreakAfterOpenBracketIf = false;
+ LLVMStyle.BreakAfterOpenBracketLoop = false;
+ LLVMStyle.BreakAfterOpenBracketSwitch = false;
LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None;
LLVMStyle.BreakArrays = true;
LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+ LLVMStyle.BreakBeforeCloseBracketBracedList = false;
+ LLVMStyle.BreakBeforeCloseBracketFunction = false;
+ LLVMStyle.BreakBeforeCloseBracketIf = false;
+ LLVMStyle.BreakBeforeCloseBracketLoop = false;
+ LLVMStyle.BreakBeforeCloseBracketSwitch = false;
LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always;
LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline;
LLVMStyle.BreakBeforeTemplateCloser = false;
@@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
if (Language == FormatStyle::LK_Java) {
- GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ GoogleStyle.AlignAfterOpenBracket = false;
GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;
GoogleStyle.AlignTrailingComments = {};
GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never;
@@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.SpaceAfterCStyleCast = true;
GoogleStyle.SpacesBeforeTrailingComments = 1;
} else if (Language == FormatStyle::LK_JavaScript) {
- GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+ GoogleStyle.BreakAfterOpenBracketBracedList = true;
+ GoogleStyle.BreakAfterOpenBracketFunction = true;
+ GoogleStyle.BreakAfterOpenBracketIf = true;
GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;
GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
// TODO: still under discussion whether to switch to SLS_All.
@@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() {
FormatStyle getWebKitStyle() {
FormatStyle Style = getLLVMStyle();
Style.AccessModifierOffset = -4;
- Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ Style.AlignAfterOpenBracket = false;
Style.AlignOperands = FormatStyle::OAS_DontAlign;
Style.AlignTrailingComments = {};
Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never;
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index d1c6264..28fdbcb 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
assert(MatchingParen);
assert(MatchingParen->is(tok::l_brace));
if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block ||
- Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
+ !Style.BreakBeforeCloseBracketBracedList) {
return false;
}
const auto *LBrace = MatchingParen;
@@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
return;
// Column format doesn't really make sense if we don't align after brackets.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
+ if (!Style.AlignAfterOpenBracket)
return;
FormatToken *ItemBegin = Token->Next;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 6f3d24a..d833130 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -666,6 +666,12 @@ public:
(endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
}
+ bool isLoop(const FormatStyle &Style) const {
+ return isOneOf(tok::kw_for, tok::kw_while) ||
+ (Style.isJavaScript() && isNot(tok::l_paren) && Previous &&
+ Previous->is(tok::kw_for));
+ }
+
bool closesScopeAfterBlock() const {
if (getBlockKind() == BK_Block)
return true;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 021d8c6..8e227da 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
return Style.PenaltyBreakOpenParenthesis;
- if (Left.is(tok::l_paren) && InFunctionDecl &&
- Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
+ if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket)
return 100;
- }
if (Left.is(tok::l_paren) && Left.Previous &&
(Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
Left.Previous->isIf())) {
@@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
// If we aren't aligning after opening parens/braces we can always break
// here unless the style does not want us to place all arguments on the
// next line.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
+ if (!Style.AlignAfterOpenBracket &&
(Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
return 0;
}
@@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
(Right.isBlockIndentedInitRBrace(Style)));
}
- // We only break before r_paren if we're in a block indented context.
+ // We can break before r_paren if we're in a block indented context or
+ // a control statement with an explicit style option.
if (Right.is(tok::r_paren)) {
- if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
- !Right.MatchingParen) {
+ if (!Right.MatchingParen)
return false;
- }
auto Next = Right.Next;
if (Next && Next->is(tok::r_paren))
Next = Next->Next;
if (Next && Next->is(tok::l_paren))
return false;
const FormatToken *Previous = Right.MatchingParen->Previous;
- return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
+ if (!Previous)
+ return false;
+ if (Previous->isIf())
+ return Style.BreakBeforeCloseBracketIf;
+ if (Previous->isLoop(Style))
+ return Style.BreakBeforeCloseBracketLoop;
+ if (Previous->is(tok::kw_switch))
+ return Style.BreakBeforeCloseBracketSwitch;
+ return Style.BreakBeforeCloseBracketFunction;
}
if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) &&
Right.is(TT_TrailingAnnotation) &&
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) {
+ Style.BreakBeforeCloseBracketFunction) {
return false;
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bd36eb4..1951e7f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4600,7 +4600,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
// Validate that if fnative-half-type is given, that
// the language standard is at least hlsl2018, and that
// the target shader model is at least 6.2.
- if (Args.getLastArg(OPT_fnative_half_type)) {
+ if (Args.getLastArg(OPT_fnative_half_type) ||
+ Args.getLastArg(OPT_fnative_int16_type)) {
const LangStandard &Std =
LangStandard::getLangStandardForKind(Opts.LangStd);
if (!(Opts.LangStd >= LangStandard::lang_hlsl2018 &&
@@ -4614,12 +4615,16 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported)
<< VulkanEnv << T.getOSName() << T.str();
}
- if (Args.getLastArg(OPT_fnative_half_type)) {
+ if (Args.getLastArg(OPT_fnative_half_type) ||
+ Args.getLastArg(OPT_fnative_int16_type)) {
+ const char *Str = Args.getLastArg(OPT_fnative_half_type)
+ ? "-fnative-half-type"
+ : "-fnative-int16-type";
const LangStandard &Std =
LangStandard::getLangStandardForKind(Opts.LangStd);
if (!(Opts.LangStd >= LangStandard::lang_hlsl2018))
Diags.Report(diag::err_drv_hlsl_16bit_types_unsupported)
- << "-fnative-half-type" << false << Std.getName();
+ << Str << false << Std.getName();
}
} else {
llvm_unreachable("expected DXIL or SPIR-V target");
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 47f1d5a..8602be1 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -399,7 +399,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__HLSL_202y",
Twine((unsigned)LangOptions::HLSLLangStd::HLSL_202y));
- if (LangOpts.NativeHalfType)
+ if (LangOpts.NativeHalfType && LangOpts.NativeInt16Type)
Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1");
// Shader target information
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index f5add2a..aea3e72 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -22,22 +22,16 @@
using namespace clang;
-static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN;
-static const enum raw_ostream::Colors remarkColor =
- raw_ostream::BLUE;
-static const enum raw_ostream::Colors fixitColor =
- raw_ostream::GREEN;
-static const enum raw_ostream::Colors caretColor =
- raw_ostream::GREEN;
-static const enum raw_ostream::Colors warningColor =
- raw_ostream::MAGENTA;
-static const enum raw_ostream::Colors templateColor =
- raw_ostream::CYAN;
-static const enum raw_ostream::Colors errorColor = raw_ostream::RED;
-static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
+static constexpr raw_ostream::Colors NoteColor = raw_ostream::CYAN;
+static constexpr raw_ostream::Colors RemarkColor = raw_ostream::BLUE;
+static constexpr raw_ostream::Colors FixitColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors CaretColor = raw_ostream::GREEN;
+static constexpr raw_ostream::Colors WarningColor = raw_ostream::MAGENTA;
+static constexpr raw_ostream::Colors TemplateColor = raw_ostream::CYAN;
+static constexpr raw_ostream::Colors ErrorColor = raw_ostream::RED;
+static constexpr raw_ostream::Colors FatalColor = raw_ostream::RED;
// Used for changing only the bold attribute.
-static const enum raw_ostream::Colors savedColor =
- raw_ostream::SAVEDCOLOR;
+static constexpr raw_ostream::Colors SavedColor = raw_ostream::SAVEDCOLOR;
// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
// is already taken for 'note'. Green is already used to underline
@@ -47,6 +41,43 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
+namespace {
+template <typename Sub> class ColumnsOrBytes {
+public:
+ int V = 0;
+ ColumnsOrBytes(int V) : V(V) {}
+ bool isValid() const { return V != -1; }
+ Sub next() const { return Sub(V + 1); }
+ Sub prev() const { return Sub(V - 1); }
+
+ bool operator>(Sub O) const { return V > O.V; }
+ bool operator<(Sub O) const { return V < O.V; }
+ bool operator<=(Sub B) const { return V <= B.V; }
+ bool operator!=(Sub C) const { return C.V != V; }
+
+ Sub operator+(Sub B) const { return Sub(V + B.V); }
+ Sub &operator+=(Sub B) {
+ V += B.V;
+ return *static_cast<Sub *>(this);
+ }
+ Sub operator-(Sub B) const { return Sub(V - B.V); }
+ Sub &operator-=(Sub B) {
+ V -= B.V;
+ return *static_cast<Sub *>(this);
+ }
+};
+
+class Bytes final : public ColumnsOrBytes<Bytes> {
+public:
+ Bytes(int V) : ColumnsOrBytes(V) {}
+};
+
+class Columns final : public ColumnsOrBytes<Columns> {
+public:
+ Columns(int V) : ColumnsOrBytes(V) {}
+};
+} // namespace
+
/// Add highlights to differences in template strings.
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
bool &Normal, bool Bold) {
@@ -58,11 +89,11 @@ static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
Str = Str.substr(Pos + 1);
if (Normal)
- OS.changeColor(templateColor, true);
+ OS.changeColor(TemplateColor, true);
else {
OS.resetColor();
if (Bold)
- OS.changeColor(savedColor, true);
+ OS.changeColor(SavedColor, true);
}
Normal = !Normal;
}
@@ -109,8 +140,8 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *I,
if (SourceLine[*I] == '\t') {
assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
"Invalid -ftabstop value");
- unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
- unsigned NumSpaces = TabStop - (Col % TabStop);
+ unsigned LineBytes = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
+ unsigned NumSpaces = TabStop - (LineBytes % TabStop);
assert(0 < NumSpaces && NumSpaces <= TabStop
&& "Invalid computation of space amt");
++(*I);
@@ -220,97 +251,99 @@ static void expandTabs(std::string &SourceLine, unsigned TabStop) {
/// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
/// display)
static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop,
- SmallVectorImpl<int> &BytesOut,
- SmallVectorImpl<int> &ColumnsOut) {
+ SmallVectorImpl<Bytes> &BytesOut,
+ SmallVectorImpl<Columns> &ColumnsOut) {
assert(BytesOut.empty());
assert(ColumnsOut.empty());
if (SourceLine.empty()) {
- BytesOut.resize(1u, 0);
- ColumnsOut.resize(1u, 0);
+ BytesOut.resize(1u, Bytes(0));
+ ColumnsOut.resize(1u, Columns(0));
return;
}
ColumnsOut.resize(SourceLine.size() + 1, -1);
- int Columns = 0;
+ Columns NumColumns = 0;
size_t I = 0;
while (I < SourceLine.size()) {
- ColumnsOut[I] = Columns;
- BytesOut.resize(Columns + 1, -1);
- BytesOut.back() = I;
+ ColumnsOut[I] = NumColumns;
+ BytesOut.resize(NumColumns.V + 1, -1);
+ BytesOut.back() = Bytes(I);
auto [Str, Printable] =
printableTextForNextCharacter(SourceLine, &I, TabStop);
- Columns += llvm::sys::locale::columnWidth(Str);
+ NumColumns += Columns(llvm::sys::locale::columnWidth(Str));
}
- ColumnsOut.back() = Columns;
- BytesOut.resize(Columns + 1, -1);
- BytesOut.back() = I;
+ ColumnsOut.back() = NumColumns;
+ BytesOut.resize(NumColumns.V + 1, -1);
+ BytesOut.back() = Bytes(I);
}
namespace {
struct SourceColumnMap {
SourceColumnMap(StringRef SourceLine, unsigned TabStop)
- : m_SourceLine(SourceLine) {
+ : SourceLine(SourceLine) {
- genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn);
+ genColumnByteMapping(SourceLine, TabStop, ColumnToByte, ByteToColumn);
- assert(m_byteToColumn.size()==SourceLine.size()+1);
- assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size());
- assert(m_byteToColumn.size()
- == static_cast<unsigned>(m_columnToByte.back()+1));
- assert(static_cast<unsigned>(m_byteToColumn.back()+1)
- == m_columnToByte.size());
+ assert(ByteToColumn.size() == SourceLine.size() + 1);
+ assert(0 < ByteToColumn.size() && 0 < ColumnToByte.size());
+ assert(ByteToColumn.size() ==
+ static_cast<unsigned>(ColumnToByte.back().V + 1));
+ assert(static_cast<unsigned>(ByteToColumn.back().V + 1) ==
+ ColumnToByte.size());
}
- int columns() const { return m_byteToColumn.back(); }
- int bytes() const { return m_columnToByte.back(); }
+ Columns columns() const { return ByteToColumn.back(); }
+ Bytes bytes() const { return ColumnToByte.back(); }
/// Map a byte to the column which it is at the start of, or return -1
/// if it is not at the start of a column (for a UTF-8 trailing byte).
- int byteToColumn(int n) const {
- assert(0<=n && n<static_cast<int>(m_byteToColumn.size()));
- return m_byteToColumn[n];
+ Columns byteToColumn(Bytes N) const {
+ assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size()));
+ return ByteToColumn[N.V];
}
/// Map a byte to the first column which contains it.
- int byteToContainingColumn(int N) const {
- assert(0 <= N && N < static_cast<int>(m_byteToColumn.size()));
- while (m_byteToColumn[N] == -1)
- --N;
- return m_byteToColumn[N];
+ Columns byteToContainingColumn(Bytes N) const {
+ assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size()));
+ while (!ByteToColumn[N.V].isValid())
+ --N.V;
+ return ByteToColumn[N.V];
}
/// Map a column to the byte which starts the column, or return -1 if
/// the column the second or subsequent column of an expanded tab or similar
/// multi-column entity.
- int columnToByte(int n) const {
- assert(0<=n && n<static_cast<int>(m_columnToByte.size()));
- return m_columnToByte[n];
+ Bytes columnToByte(Columns N) const {
+ assert(0 <= N.V && N.V < static_cast<int>(ColumnToByte.size()));
+ return ColumnToByte[N.V];
}
/// Map from a byte index to the next byte which starts a column.
- int startOfNextColumn(int N) const {
- assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1));
- while (byteToColumn(++N) == -1) {}
+ Bytes startOfNextColumn(Bytes N) const {
+ assert(0 <= N.V && N.V < static_cast<int>(ByteToColumn.size() - 1));
+ N = N.next();
+ while (!byteToColumn(N).isValid())
+ N = N.next();
return N;
}
/// Map from a byte index to the previous byte which starts a column.
- int startOfPreviousColumn(int N) const {
- assert(0 < N && N < static_cast<int>(m_byteToColumn.size()));
- while (byteToColumn(--N) == -1) {}
+ Bytes startOfPreviousColumn(Bytes N) const {
+ assert(0 < N.V && N.V < static_cast<int>(ByteToColumn.size()));
+ N = N.prev();
+ while (!byteToColumn(N).isValid())
+ N = N.prev();
return N;
}
- StringRef getSourceLine() const {
- return m_SourceLine;
- }
+ StringRef getSourceLine() const { return SourceLine; }
private:
- const std::string m_SourceLine;
- SmallVector<int,200> m_byteToColumn;
- SmallVector<int,200> m_columnToByte;
+ StringRef SourceLine;
+ SmallVector<Columns, 200> ByteToColumn;
+ SmallVector<Bytes, 200> ColumnToByte;
};
} // end anonymous namespace
@@ -319,14 +352,15 @@ private:
static void selectInterestingSourceRegion(std::string &SourceLine,
std::string &CaretLine,
std::string &FixItInsertionLine,
- unsigned Columns,
- const SourceColumnMap &map) {
- unsigned CaretColumns = CaretLine.size();
- unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine);
- unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()),
- std::max(CaretColumns, FixItColumns));
+ Columns NonGutterColumns,
+ const SourceColumnMap &Map) {
+ Columns CaretColumns = Columns(CaretLine.size());
+ Columns FixItColumns =
+ Columns(llvm::sys::locale::columnWidth(FixItInsertionLine));
+ Columns MaxColumns =
+ std::max({Map.columns().V, CaretColumns.V, FixItColumns.V});
// if the number of columns is less than the desired number we're done
- if (MaxColumns <= Columns)
+ if (MaxColumns <= NonGutterColumns)
return;
// No special characters are allowed in CaretLine.
@@ -334,13 +368,13 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
// Find the slice that we need to display the full caret line
// correctly.
- unsigned CaretStart = 0, CaretEnd = CaretLine.size();
- for (; CaretStart != CaretEnd; ++CaretStart)
- if (!isWhitespace(CaretLine[CaretStart]))
+ Columns CaretStart = 0, CaretEnd = CaretLine.size();
+ for (; CaretStart != CaretEnd; CaretStart = CaretStart.next())
+ if (!isWhitespace(CaretLine[CaretStart.V]))
break;
- for (; CaretEnd != CaretStart; --CaretEnd)
- if (!isWhitespace(CaretLine[CaretEnd - 1]))
+ for (; CaretEnd != CaretStart; CaretEnd = CaretEnd.prev())
+ if (!isWhitespace(CaretLine[CaretEnd.V - 1]))
break;
// caret has already been inserted into CaretLine so the above whitespace
@@ -349,39 +383,38 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
// If we have a fix-it line, make sure the slice includes all of the
// fix-it information.
if (!FixItInsertionLine.empty()) {
- unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size();
- for (; FixItStart != FixItEnd; ++FixItStart)
- if (!isWhitespace(FixItInsertionLine[FixItStart]))
- break;
-
- for (; FixItEnd != FixItStart; --FixItEnd)
- if (!isWhitespace(FixItInsertionLine[FixItEnd - 1]))
- break;
-
// We can safely use the byte offset FixItStart as the column offset
// because the characters up until FixItStart are all ASCII whitespace
// characters.
- unsigned FixItStartCol = FixItStart;
- unsigned FixItEndCol
- = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd));
-
- CaretStart = std::min(FixItStartCol, CaretStart);
- CaretEnd = std::max(FixItEndCol, CaretEnd);
+ Bytes FixItStart = 0;
+ Bytes FixItEnd = Bytes(FixItInsertionLine.size());
+ while (FixItStart != FixItEnd &&
+ isWhitespace(FixItInsertionLine[FixItStart.V]))
+ FixItStart = FixItStart.next();
+
+ while (FixItEnd != FixItStart &&
+ isWhitespace(FixItInsertionLine[FixItEnd.V - 1]))
+ FixItEnd = FixItEnd.prev();
+
+ Columns FixItStartCol = Columns(FixItStart.V);
+ Columns FixItEndCol = Columns(llvm::sys::locale::columnWidth(
+ FixItInsertionLine.substr(0, FixItEnd.V)));
+
+ CaretStart = std::min(FixItStartCol.V, CaretStart.V);
+ CaretEnd = std::max(FixItEndCol.V, CaretEnd.V);
}
// CaretEnd may have been set at the middle of a character
// If it's not at a character's first column then advance it past the current
// character.
- while (static_cast<int>(CaretEnd) < map.columns() &&
- -1 == map.columnToByte(CaretEnd))
- ++CaretEnd;
-
- assert((static_cast<int>(CaretStart) > map.columns() ||
- -1!=map.columnToByte(CaretStart)) &&
- "CaretStart must not point to a column in the middle of a source"
- " line character");
- assert((static_cast<int>(CaretEnd) > map.columns() ||
- -1!=map.columnToByte(CaretEnd)) &&
+ while (CaretEnd < Map.columns() && !Map.columnToByte(CaretEnd).isValid())
+ CaretEnd = CaretEnd.next();
+
+ assert(
+ (CaretStart > Map.columns() || Map.columnToByte(CaretStart).isValid()) &&
+ "CaretStart must not point to a column in the middle of a source"
+ " line character");
+ assert((CaretEnd > Map.columns() || Map.columnToByte(CaretEnd).isValid()) &&
"CaretEnd must not point to a column in the middle of a source line"
" character");
@@ -390,70 +423,69 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
// number of columns we have, try to grow the slice to encompass
// more context.
- unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart,
- map.columns()));
- unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd,
- map.columns()));
+ Bytes SourceStart = Map.columnToByte(std::min(CaretStart.V, Map.columns().V));
+ Bytes SourceEnd = Map.columnToByte(std::min(CaretEnd.V, Map.columns().V));
- unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart
- - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart));
+ Columns CaretColumnsOutsideSource =
+ CaretEnd - CaretStart -
+ (Map.byteToColumn(SourceEnd) - Map.byteToColumn(SourceStart));
- char const *front_ellipse = " ...";
- char const *front_space = " ";
- char const *back_ellipse = "...";
- unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse);
+ constexpr StringRef FrontEllipse = " ...";
+ constexpr StringRef FrontSpace = " ";
+ constexpr StringRef BackEllipse = "...";
+ Columns EllipsesColumns = Columns(FrontEllipse.size() + BackEllipse.size());
- unsigned TargetColumns = Columns;
+ Columns TargetColumns = NonGutterColumns;
// Give us extra room for the ellipses
// and any of the caret line that extends past the source
- if (TargetColumns > ellipses_space+CaretColumnsOutsideSource)
- TargetColumns -= ellipses_space+CaretColumnsOutsideSource;
+ if (TargetColumns > EllipsesColumns + CaretColumnsOutsideSource)
+ TargetColumns -= EllipsesColumns + CaretColumnsOutsideSource;
- while (SourceStart>0 || SourceEnd<SourceLine.size()) {
+ while (SourceStart > 0 || SourceEnd < SourceLine.size()) {
bool ExpandedRegion = false;
- if (SourceStart>0) {
- unsigned NewStart = map.startOfPreviousColumn(SourceStart);
+ if (SourceStart > 0) {
+ Bytes NewStart = Map.startOfPreviousColumn(SourceStart);
// Skip over any whitespace we see here; we're looking for
// another bit of interesting text.
// FIXME: Detect non-ASCII whitespace characters too.
- while (NewStart && isWhitespace(SourceLine[NewStart]))
- NewStart = map.startOfPreviousColumn(NewStart);
+ while (NewStart > 0 && isWhitespace(SourceLine[NewStart.V]))
+ NewStart = Map.startOfPreviousColumn(NewStart);
// Skip over this bit of "interesting" text.
- while (NewStart) {
- unsigned Prev = map.startOfPreviousColumn(NewStart);
- if (isWhitespace(SourceLine[Prev]))
+ while (NewStart > 0) {
+ Bytes Prev = Map.startOfPreviousColumn(NewStart);
+ if (isWhitespace(SourceLine[Prev.V]))
break;
NewStart = Prev;
}
- assert(map.byteToColumn(NewStart) != -1);
- unsigned NewColumns = map.byteToColumn(SourceEnd) -
- map.byteToColumn(NewStart);
+ assert(Map.byteToColumn(NewStart).isValid());
+ Columns NewColumns =
+ Map.byteToColumn(SourceEnd) - Map.byteToColumn(NewStart);
if (NewColumns <= TargetColumns) {
SourceStart = NewStart;
ExpandedRegion = true;
}
}
- if (SourceEnd<SourceLine.size()) {
- unsigned NewEnd = map.startOfNextColumn(SourceEnd);
+ if (SourceEnd < SourceLine.size()) {
+ Bytes NewEnd = Map.startOfNextColumn(SourceEnd);
// Skip over any whitespace we see here; we're looking for
// another bit of interesting text.
// FIXME: Detect non-ASCII whitespace characters too.
- while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
- NewEnd = map.startOfNextColumn(NewEnd);
+ while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V]))
+ NewEnd = Map.startOfNextColumn(NewEnd);
// Skip over this bit of "interesting" text.
- while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
- NewEnd = map.startOfNextColumn(NewEnd);
+ while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd.V]))
+ NewEnd = Map.startOfNextColumn(NewEnd);
- assert(map.byteToColumn(NewEnd) != -1);
- unsigned NewColumns = map.byteToColumn(NewEnd) -
- map.byteToColumn(SourceStart);
+ assert(Map.byteToColumn(NewEnd).isValid());
+ Columns NewColumns =
+ Map.byteToColumn(NewEnd) - Map.byteToColumn(SourceStart);
if (NewColumns <= TargetColumns) {
SourceEnd = NewEnd;
ExpandedRegion = true;
@@ -464,39 +496,41 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
break;
}
- CaretStart = map.byteToColumn(SourceStart);
- CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;
+ CaretStart = Map.byteToColumn(SourceStart);
+ CaretEnd = Map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;
// [CaretStart, CaretEnd) is the slice we want. Update the various
// output lines to show only this slice.
- assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 &&
- SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1);
+ assert(CaretStart.isValid() && CaretEnd.isValid() && SourceStart.isValid() &&
+ SourceEnd.isValid());
assert(SourceStart <= SourceEnd);
assert(CaretStart <= CaretEnd);
- unsigned BackColumnsRemoved
- = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd);
- unsigned FrontColumnsRemoved = CaretStart;
- unsigned ColumnsKept = CaretEnd-CaretStart;
+ Columns BackColumnsRemoved =
+ Map.byteToColumn(Bytes{static_cast<int>(SourceLine.size())}) -
+ Map.byteToColumn(SourceEnd);
+ Columns FrontColumnsRemoved = CaretStart;
+ Columns ColumnsKept = CaretEnd - CaretStart;
// We checked up front that the line needed truncation
- assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns);
+ assert(FrontColumnsRemoved + ColumnsKept + BackColumnsRemoved >
+ NonGutterColumns);
// The line needs some truncation, and we'd prefer to keep the front
// if possible, so remove the back
- if (BackColumnsRemoved > strlen(back_ellipse))
- SourceLine.replace(SourceEnd, std::string::npos, back_ellipse);
+ if (BackColumnsRemoved > Columns(BackEllipse.size()))
+ SourceLine.replace(SourceEnd.V, std::string::npos, BackEllipse);
// If that's enough then we're done
- if (FrontColumnsRemoved+ColumnsKept <= Columns)
+ if (FrontColumnsRemoved + ColumnsKept <= Columns(NonGutterColumns))
return;
// Otherwise remove the front as well
- if (FrontColumnsRemoved > strlen(front_ellipse)) {
- SourceLine.replace(0, SourceStart, front_ellipse);
- CaretLine.replace(0, CaretStart, front_space);
+ if (FrontColumnsRemoved > Columns(FrontEllipse.size())) {
+ SourceLine.replace(0, SourceStart.V, FrontEllipse);
+ CaretLine.replace(0, CaretStart.V, FrontSpace);
if (!FixItInsertionLine.empty())
- FixItInsertionLine.replace(0, CaretStart, front_space);
+ FixItInsertionLine.replace(0, CaretStart.V, FrontSpace);
}
}
@@ -690,11 +724,21 @@ TextDiagnostic::printDiagnosticLevel(raw_ostream &OS,
switch (Level) {
case DiagnosticsEngine::Ignored:
llvm_unreachable("Invalid diagnostic type");
- case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break;
- case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break;
- case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break;
- case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break;
- case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break;
+ case DiagnosticsEngine::Note:
+ OS.changeColor(NoteColor, true);
+ break;
+ case DiagnosticsEngine::Remark:
+ OS.changeColor(RemarkColor, true);
+ break;
+ case DiagnosticsEngine::Warning:
+ OS.changeColor(WarningColor, true);
+ break;
+ case DiagnosticsEngine::Error:
+ OS.changeColor(ErrorColor, true);
+ break;
+ case DiagnosticsEngine::Fatal:
+ OS.changeColor(FatalColor, true);
+ break;
}
}
@@ -722,7 +766,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,
if (ShowColors && !IsSupplemental) {
// Print primary diagnostic messages in bold and without color, to visually
// indicate the transition from continuation notes and other output.
- OS.changeColor(savedColor, true);
+ OS.changeColor(SavedColor, true);
Bold = true;
}
@@ -800,7 +844,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
return;
if (DiagOpts.ShowColors)
- OS.changeColor(savedColor, true);
+ OS.changeColor(SavedColor, true);
emitFilename(PLoc.getFilename(), Loc.getManager());
switch (DiagOpts.getFormat()) {
@@ -961,41 +1005,40 @@ maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,
struct LineRange {
unsigned LineNo;
- unsigned StartCol;
- unsigned EndCol;
+ Bytes StartByte;
+ Bytes EndByte;
};
/// Highlight \p R (with ~'s) on the current source line.
static void highlightRange(const LineRange &R, const SourceColumnMap &Map,
std::string &CaretLine) {
// Pick the first non-whitespace column.
- unsigned StartColNo = R.StartCol;
- while (StartColNo < Map.getSourceLine().size() &&
- (Map.getSourceLine()[StartColNo] == ' ' ||
- Map.getSourceLine()[StartColNo] == '\t'))
- StartColNo = Map.startOfNextColumn(StartColNo);
+ Bytes StartByte = R.StartByte;
+ while (StartByte < Map.bytes() && (Map.getSourceLine()[StartByte.V] == ' ' ||
+ Map.getSourceLine()[StartByte.V] == '\t'))
+ StartByte = Map.startOfNextColumn(StartByte);
// Pick the last non-whitespace column.
- unsigned EndColNo =
- std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size());
- while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' ||
- Map.getSourceLine()[EndColNo - 1] == '\t'))
- EndColNo = Map.startOfPreviousColumn(EndColNo);
+ Bytes EndByte = std::min(R.EndByte.V, Map.bytes().V);
+ while (EndByte.V != 0 && (Map.getSourceLine()[EndByte.V - 1] == ' ' ||
+ Map.getSourceLine()[EndByte.V - 1] == '\t'))
+ EndByte = Map.startOfPreviousColumn(EndByte);
// If the start/end passed each other, then we are trying to highlight a
// range that just exists in whitespace. That most likely means we have
// a multi-line highlighting range that covers a blank line.
- if (StartColNo > EndColNo)
+ if (StartByte > EndByte)
return;
+ assert(StartByte <= EndByte && "Invalid range!");
// Fill the range with ~'s.
- StartColNo = Map.byteToContainingColumn(StartColNo);
- EndColNo = Map.byteToContainingColumn(EndColNo);
+ Columns StartCol = Map.byteToContainingColumn(StartByte);
+ Columns EndCol = Map.byteToContainingColumn(EndByte);
+
+ if (CaretLine.size() < static_cast<size_t>(EndCol.V))
+ CaretLine.resize(EndCol.V, ' ');
- assert(StartColNo <= EndColNo && "Invalid range!");
- if (CaretLine.size() < EndColNo)
- CaretLine.resize(EndColNo, ' ');
- std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~');
+ std::fill(CaretLine.begin() + StartCol.V, CaretLine.begin() + EndCol.V, '~');
}
static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,
@@ -1006,7 +1049,7 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,
std::string FixItInsertionLine;
if (Hints.empty() || !DiagOpts.ShowFixits)
return FixItInsertionLine;
- unsigned PrevHintEndCol = 0;
+ Columns PrevHintEndCol = 0;
for (const auto &H : Hints) {
if (H.CodeToInsert.empty())
@@ -1024,12 +1067,13 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,
// Note: When modifying this function, be very careful about what is a
// "column" (printed width, platform-dependent) and what is a
// "byte offset" (SourceManager "column").
- unsigned HintByteOffset =
- SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
+ Bytes HintByteOffset =
+ Bytes(SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second))
+ .prev();
// The hint must start inside the source or right at the end
- assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1);
- unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
+ assert(HintByteOffset < map.bytes().next());
+ Columns HintCol = map.byteToContainingColumn(HintByteOffset);
// If we inserted a long previous hint, push this one forwards, and add
// an extra space to show that this is not part of the previous
@@ -1043,11 +1087,11 @@ static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,
// This should NOT use HintByteOffset, because the source might have
// Unicode characters in earlier columns.
- unsigned NewFixItLineSize = FixItInsertionLine.size() +
- (HintCol - PrevHintEndCol) +
- H.CodeToInsert.size();
+ Columns NewFixItLineSize = Columns(FixItInsertionLine.size()) +
+ (HintCol - PrevHintEndCol) +
+ Columns(H.CodeToInsert.size());
if (NewFixItLineSize > FixItInsertionLine.size())
- FixItInsertionLine.resize(NewFixItLineSize, ' ');
+ FixItInsertionLine.resize(NewFixItLineSize.V, ' ');
std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(),
FixItInsertionLine.end() - H.CodeToInsert.size());
@@ -1095,28 +1139,29 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
if (EndLineNo < Lines.first || SM.getFileID(End) != FID)
continue;
- unsigned StartColumn = SM.getExpansionColumnNumber(Begin);
- unsigned EndColumn = SM.getExpansionColumnNumber(End);
- assert(StartColumn && "StartColumn must be valid, 0 is invalid");
- assert(EndColumn && "EndColumn must be valid, 0 is invalid");
+ Bytes StartByte = SM.getExpansionColumnNumber(Begin);
+ Bytes EndByte = SM.getExpansionColumnNumber(End);
+ assert(StartByte.V != 0 && "StartByte must be valid, 0 is invalid");
+ assert(EndByte.V != 0 && "EndByte must be valid, 0 is invalid");
if (R.isTokenRange())
- EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts);
+ EndByte += Bytes(Lexer::MeasureTokenLength(End, SM, LangOpts));
// Only a single line.
if (StartLineNo == EndLineNo) {
- LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1});
+ LineRanges.push_back({StartLineNo, StartByte.prev(), EndByte.prev()});
continue;
}
// Start line.
- LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u});
+ LineRanges.push_back(
+ {StartLineNo, StartByte.prev(), std::numeric_limits<int>::max()});
// Middle lines.
for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S)
- LineRanges.push_back({S, 0, ~0u});
+ LineRanges.push_back({S, 0, std::numeric_limits<int>::max()});
// End line.
- LineRanges.push_back({EndLineNo, 0, EndColumn - 1});
+ LineRanges.push_back({EndLineNo, 0, EndByte.prev()});
}
return LineRanges;
@@ -1226,8 +1271,7 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,
if (TokenStartLine > EndLineNumber)
break;
- unsigned StartCol =
- SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
+ Bytes StartCol = SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;
@@ -1235,14 +1279,14 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,
if (TokenStartLine == TokenEndLine) {
SmallVector<TextDiagnostic::StyleRange> &LineRanges =
SnippetRanges[TokenStartLine - StartLineNumber];
- appendStyle(LineRanges, T, StartCol, T.getLength());
+ appendStyle(LineRanges, T, StartCol.V, T.getLength());
continue;
}
assert((TokenEndLine - TokenStartLine) >= 1);
// For tokens that span multiple lines (think multiline comments), we
// divide them into multiple StyleRanges.
- unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
+ Bytes EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
if (Invalid)
continue;
@@ -1258,9 +1302,9 @@ highlightLines(StringRef FileData, unsigned StartLineNumber,
SnippetRanges[L - StartLineNumber];
if (L == TokenStartLine) // First line
- appendStyle(LineRanges, T, StartCol, LineLength);
+ appendStyle(LineRanges, T, StartCol.V, LineLength);
else if (L == TokenEndLine) // Last line
- appendStyle(LineRanges, T, 0, EndCol);
+ appendStyle(LineRanges, T, 0, EndCol.V);
else
appendStyle(LineRanges, T, 0, LineLength);
}
@@ -1315,11 +1359,11 @@ void TextDiagnostic::emitSnippetAndCaret(
const char *BufEnd = BufStart + BufData.size();
unsigned CaretLineNo = Loc.getLineNumber();
- unsigned CaretColNo = Loc.getColumnNumber();
+ Bytes CaretByte = Loc.getColumnNumber();
// Arbitrarily stop showing snippets when the line is too long.
static const size_t MaxLineLengthToPrint = 4096;
- if (CaretColNo > MaxLineLengthToPrint)
+ if (CaretByte > MaxLineLengthToPrint)
return;
// Find the set of lines to include.
@@ -1379,35 +1423,37 @@ void TextDiagnostic::emitSnippetAndCaret(
std::string SourceLine(LineStart, LineEnd);
// Remove trailing null bytes.
while (!SourceLine.empty() && SourceLine.back() == '\0' &&
- (LineNo != CaretLineNo || SourceLine.size() > CaretColNo))
+ (LineNo != CaretLineNo ||
+ SourceLine.size() > static_cast<size_t>(CaretByte.V)))
SourceLine.pop_back();
// Build the byte to column map.
- const SourceColumnMap sourceColMap(SourceLine, DiagOpts.TabStop);
+ const SourceColumnMap SourceColMap(SourceLine, DiagOpts.TabStop);
std::string CaretLine;
// Highlight all of the characters covered by Ranges with ~ characters.
for (const auto &LR : LineRanges) {
if (LR.LineNo == LineNo)
- highlightRange(LR, sourceColMap, CaretLine);
+ highlightRange(LR, SourceColMap, CaretLine);
}
// Next, insert the caret itself.
if (CaretLineNo == LineNo) {
- size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1);
- CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' ');
- CaretLine[Col] = '^';
+ Columns Col = SourceColMap.byteToContainingColumn(CaretByte.prev());
+ CaretLine.resize(
+ std::max(static_cast<size_t>(Col.V) + 1, CaretLine.size()), ' ');
+ CaretLine[Col.V] = '^';
}
std::string FixItInsertionLine =
- buildFixItInsertionLine(FID, LineNo, sourceColMap, Hints, SM, DiagOpts);
+ buildFixItInsertionLine(FID, LineNo, SourceColMap, Hints, SM, DiagOpts);
// If the source line is too long for our terminal, select only the
// "interesting" source region within that line.
- unsigned Columns = DiagOpts.MessageLength;
- if (Columns)
+ Columns MessageLength = DiagOpts.MessageLength;
+ if (MessageLength.V != 0)
selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine,
- Columns, sourceColMap);
+ MessageLength, SourceColMap);
// If we are in -fdiagnostics-print-source-range-info mode, we are trying
// to produce easily machine parsable output. Add a space before the
@@ -1425,7 +1471,7 @@ void TextDiagnostic::emitSnippetAndCaret(
if (!CaretLine.empty()) {
indentForLineNumbers();
if (DiagOpts.ShowColors)
- OS.changeColor(caretColor, true);
+ OS.changeColor(CaretColor, true);
OS << CaretLine << '\n';
if (DiagOpts.ShowColors)
OS.resetColor();
@@ -1435,7 +1481,7 @@ void TextDiagnostic::emitSnippetAndCaret(
indentForLineNumbers();
if (DiagOpts.ShowColors)
// Print fixit line in color
- OS.changeColor(fixitColor, false);
+ OS.changeColor(FixitColor, false);
if (DiagOpts.ShowSourceRanges)
OS << ' ';
OS << FixItInsertionLine << '\n';
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 1858912..33fff76 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -162,18 +162,12 @@ set(x86_files
adxintrin.h
ammintrin.h
amxavx512intrin.h
- amxbf16transposeintrin.h
amxcomplexintrin.h
- amxcomplextransposeintrin.h
amxfp16intrin.h
- amxfp16transposeintrin.h
amxfp8intrin.h
amxintrin.h
amxmovrsintrin.h
- amxmovrstransposeintrin.h
amxtf32intrin.h
- amxtf32transposeintrin.h
- amxtransposeintrin.h
avx10_2_512bf16intrin.h
avx10_2_512convertintrin.h
avx10_2_512minmaxintrin.h
diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h
deleted file mode 100644
index 86f09f2..0000000
--- a/clang/lib/Headers/amxbf16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_BF16TRANSPOSEINTRIN_H
-#define __AMX_BF16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-bf16,amx-transpose")))
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-/// tiles \a a and \a b, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in \a dst, and store the
-/// 32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO (a.colsb / 4) - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
-/// FP32(b.row[k].bf16[2*n+0])
-/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
-/// FP32(b.row[k].bf16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPBF16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-/// tiles src0 and src1, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in "dst", and store the
-/// 32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
- src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_BF16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h
deleted file mode 100644
index 11abaf9..0000000
--- a/clang/lib/Headers/amxcomplextransposeintrin.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
-#define __AMX_COMPLEXTRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-complex,amx-transpose")))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles \a a and \a b is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// Calculates the imaginary part of the result. For each possible combination
-/// of (transposed column of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The imaginary part of the \a a element
-/// is multiplied with the real part of the corresponding \a b element, and
-/// the real part of the \a a element is multiplied with the imaginary part
-/// of the corresponding \a b elements. The two accumulated results are
-/// added, and then accumulated into the corresponding row and column of
-/// \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmimfp16ps(dst, a, b) \
- __builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles \a a and \a b is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// Calculates the real part of the result. For each possible combination
-/// of (rtransposed colum of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The real part of the \a a element is
-/// multiplied with the real part of the corresponding \a b element, and the
-/// negated imaginary part of the \a a element is multiplied with the
-/// imaginary part of the corresponding \a b elements. The two accumulated
-/// results are added, and then accumulated into the corresponding row and
-/// column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
-/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmrlfp16ps(dst, a, b) \
- __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-/// containing complex elements and accumulate the results into a packed
-/// single precision tile. Each dword element in input tiles \a a and \a b
-/// is interpreted as a complex number with FP16 real part and FP16 imaginary
-/// part.
-/// Calculates the imaginary part of the result. For each possible combination
-/// of (transposed column of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The negated imaginary part of the \a a
-/// element is multiplied with the real part of the corresponding \a b
-/// element, and the real part of the \a a element is multiplied with the
-/// imaginary part of the corresponding \a b elements. The two accumulated
-/// results are added, and then accumulated into the corresponding row and
-/// column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_conjtcmmimfp16ps(dst, a, b) \
- __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from \a a
-/// and writes the result to \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtfp16(__tile dst, __tile a);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR i := 0 TO dst.rows - 1
-/// FOR j := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0]
-/// tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1]
-/// ENDFOR
-/// write_row_and_zero(dst, i, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTFP16 instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The source tile. Max size is 1024 Bytes.
-#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
- return __builtin_ia32_tconjtfp16_internal(m, n, src);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles src0 and src1 is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles src0 and src1 is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// This function calculates the real part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-/// containing complex elements and accumulate the results into a packed
-/// single precision tile. Each dword element in input tiles src0 and src1
-/// is interpreted as a complex number with FP16 real part and FP16 imaginary
-/// part.
-/// This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from src and
-/// writes the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) {
- dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif // __x86_64__
-#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxfp16transposeintrin.h b/clang/lib/Headers/amxfp16transposeintrin.h
deleted file mode 100644
index 191f8c6..0000000
--- a/clang/lib/Headers/amxfp16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_FP16TRANSPOSEINTRIN_H
-#define __AMX_FP16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-fp16,amx-transpose")))
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-/// tiles \a a and \a b, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in \a dst, and store the
-/// 32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO (a.colsb / 4) - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
-/// FP32(b.row[k].fp16[2*n+0])
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
-/// FP32(b.row[k].fp16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-/// tiles src0 and src1, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in "dst", and store the
-/// 32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
- src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_FP16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index a7da10d..208aa358 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {
/// bytes. Since there is no 2D type in llvm IR, we use vector type to
/// represent 2D tile and the fixed size is maximum amx tile register size.
typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64)));
-typedef int _tile1024i_1024a
- __attribute__((__vector_size__(1024), __aligned__(1024)));
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
diff --git a/clang/lib/Headers/amxmovrstransposeintrin.h b/clang/lib/Headers/amxmovrstransposeintrin.h
deleted file mode 100644
index 5f48cba..0000000
--- a/clang/lib/Headers/amxmovrstransposeintrin.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
-#define __AMX_MOVRS_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-transpose,amx-movrs")))
-
-#define _tile_2rpntlvwz0rs(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
-#define _tile_2rpntlvwz0rst1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
-#define _tile_2rpntlvwz1rs(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
-#define _tile_2rpntlvwz1rst1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- // Use __tile1024i_1024a* to escape the alignment check in
- // clang/test/Headers/x86-intrinsics-headers-clean.cpp
- __builtin_ia32_t2rpntlvwz0rs_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz0rst1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1rs_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1rst1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-#undef __DEFAULT_FN_ATTRS
-#endif /* __x86_64__ */
-#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxtf32transposeintrin.h b/clang/lib/Headers/amxtf32transposeintrin.h
deleted file mode 100644
index e1b90c1..0000000
--- a/clang/lib/Headers/amxtf32transposeintrin.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_TF32TRANSPOSEINTRIN_H
-#define __AMX_TF32TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-tf32,amx-transpose")))
-
-/// \code
-/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \
-/// constexpr int b);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param srcdst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
-/// dword[12:0] := 0
-/// dword[31:13] := x[31:13]
-/// return dword
-/// }
-///
-/// DEFINE silence_snan_fp32(x[31:0]) {
-/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
-/// x.fraction[22] := 1
-/// return x
-/// }
-///
-/// elements_dest:= srcdst.colsb/4
-///
-/// FOR m := 0 TO (srcdst.rows-1)
-/// tmp[511:0] := 0
-/// FOR k := 0 TO (a.rows-1)
-/// FOR n := 0 TO (elements_dest-1)
-/// a1e := silence_snan_fp32(a.row[k].fp32[m])
-/// a2e := silence_snan_fp32(b.row[k].fp32[n])
-/// s1e := zero_lower_mantissa_bits_fp32(a1e)
-/// s2e := zero_lower_mantissa_bits_fp32(a2e)
-/// tmp.fp32[n] += s1e * s2e
-/// ENDFOR
-/// ENDFOR
-///
-/// FOR n := 0 TO (elements_dest-1)
-/// tmp.fp32[n] += srcdst.row[m].fp32[n]
-/// ENDFOR
-/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
-///
-/// ENDFOR
-///
-/// zero_upper_rows(srcdst, srcdst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_tmmultf32ps(srcdst, a, b) \
- __builtin_ia32_ttmmultf32ps((srcdst), (a), (b))
-
-// dst = m x n (srcdest), src1 = k x m, src2 = k x n
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do
-/// Matrix Plus with dst. All the calculation is base on float32 but with the
-/// lower 13-bit set to 0.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-#endif // __x86_64__
-#endif // __AMX_TF32TRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxtransposeintrin.h b/clang/lib/Headers/amxtransposeintrin.h
deleted file mode 100644
index b3fa37d..0000000
--- a/clang/lib/Headers/amxtransposeintrin.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_TRANSPOSEINTRIN_H
-#define __AMX_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TRANSPOSE \
- __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
-
-#define _tile_2rpntlvwz0(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0(tdst, base, stride)
-#define _tile_2rpntlvwz0t1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride)
-#define _tile_2rpntlvwz1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1(tdst, base, stride)
-#define _tile_2rpntlvwz1t1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride)
-
-/// Transpose 32-bit elements from \a src and write the result to \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_transposed(__tile dst, __tile src);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-///
-/// FOR i := 0 TO (dst.rows-1)
-/// tmp[511:0] := 0
-/// FOR j := 0 TO (dst.colsb/4-1)
-/// tmp.dword[j] := src.row[j].dword[i]
-/// ENDFOR
-/// dst.row[i] := tmp
-/// ENDFOR
-///
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src)
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- // Use __tile1024i_1024a* to escape the alignment check in
- // clang/test/Headers/x86-intrinsics-headers-clean.cpp
- __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
- (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz0t1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
- (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1t1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE
-_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) {
- return __builtin_ia32_ttransposed_internal(m, n, src);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Transpose 32-bit elements from src and write the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_transposed(__tile1024i *dst, __tile1024i src) {
- dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile);
-}
-
-#endif /* __x86_64__ */
-#endif /* __AMX_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 35f012c..19064a4 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) {
#include <amxfp8intrin.h>
-#include <amxtransposeintrin.h>
-
#include <amxmovrsintrin.h>
-#include <amxmovrstransposeintrin.h>
-
#include <amxavx512intrin.h>
#include <amxtf32intrin.h>
-#include <amxtf32transposeintrin.h>
-
-#include <amxbf16transposeintrin.h>
-
-#include <amxfp16transposeintrin.h>
-
-#include <amxcomplextransposeintrin.h>
-
#include <avx512vp2intersectintrin.h>
#include <avx512vlvp2intersectintrin.h>
diff --git a/clang/lib/Interpreter/InterpreterValuePrinter.cpp b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
index 0ed02f3..cfa50ee 100644
--- a/clang/lib/Interpreter/InterpreterValuePrinter.cpp
+++ b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
@@ -411,7 +411,8 @@ public:
}
InterfaceKind VisitReferenceType(const ReferenceType *Ty) {
- ExprResult AddrOfE = S.CreateBuiltinUnaryOp(SourceLocation(), UO_AddrOf, E);
+ ExprResult AddrOfE = S.CreateBuiltinUnaryOp(SourceLocation(), UO_AddrOf,
+ E->IgnoreImpCasts());
assert(!AddrOfE.isInvalid() && "Can not create unary expression");
Args.push_back(AddrOfE.get());
return InterfaceKind::NoAlloc;
@@ -537,7 +538,7 @@ llvm::Expected<Expr *> Interpreter::convertExprToValue(Expr *E) {
QualType DesugaredTy = Ty.getDesugaredType(Ctx);
// For lvalue struct, we treat it as a reference.
- if (DesugaredTy->isRecordType() && E->isLValue()) {
+ if (DesugaredTy->isRecordType() && E->IgnoreImpCasts()->isLValue()) {
DesugaredTy = Ctx.getLValueReferenceType(DesugaredTy);
Ty = Ctx.getLValueReferenceType(Ty);
}
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 65c324c..f05c28fd 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,
// file.
for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {
SmallString<256> Result(Dir);
- llvm::sys::fs::make_absolute(Result);
+ FileMgr.makeAbsolutePath(Result);
if (ModuleName.contains(':'))
// The separator of C++20 modules partitions (':') is not good for file
// systems, here clang and gcc choose '-' by default since it is not a
@@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) {
StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash();
for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {
SmallString<256> CachePath(Dir);
- llvm::sys::fs::make_absolute(CachePath);
+ FileMgr.makeAbsolutePath(CachePath);
llvm::sys::path::append(CachePath, ModuleCacheHash);
std::string FileName =
getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath);
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index e4b158e..7e4a164 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4248,6 +4248,13 @@ void Parser::ParseDeclarationSpecifiers(
// type-specifier
case tok::kw_short:
+ if (!getLangOpts().NativeInt16Type) {
+ Diag(Tok, diag::err_unknown_typename) << Tok.getName();
+ DS.SetTypeSpecError();
+ DS.SetRangeEnd(Tok.getLocation());
+ ConsumeToken();
+ goto DoneWithDeclSpec;
+ }
isInvalid = DS.SetTypeSpecWidth(TypeSpecifierWidth::Short, Loc, PrevSpec,
DiagID, Policy);
break;
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 25199c7..31bc941 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3221,6 +3221,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
else
Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);
break;
+ case OMPC_threadset:
case OMPC_fail:
case OMPC_proc_bind:
case OMPC_atomic_default_mem_order:
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e32f437..139c4ab 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
- case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
StringRef FeatureList(
getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f451787..ad2c2e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3542,9 +3542,7 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {
bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
unsigned FirstArg, FormatStringInfo *FSI) {
- bool IsCXXMember = false;
- if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
- IsCXXMember = MD->isInstance();
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
bool IsVariadic = false;
if (const FunctionType *FnTy = D->getFunctionType())
IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
@@ -3553,11 +3551,12 @@ bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
IsVariadic = OMD->isVariadic();
- return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI);
+ return getFormatStringInfo(FormatIdx, FirstArg, HasImplicitThisParam,
+ IsVariadic, FSI);
}
bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
- bool IsCXXMember, bool IsVariadic,
+ bool HasImplicitThisParam, bool IsVariadic,
FormatStringInfo *FSI) {
if (FirstArg == 0)
FSI->ArgPassingKind = FAPK_VAList;
@@ -3571,7 +3570,7 @@ bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
// The way the format attribute works in GCC, the implicit this argument
// of member functions is counted. However, it doesn't appear in our own
// lists, so decrement format_idx in that case.
- if (IsCXXMember) {
+ if (HasImplicitThisParam) {
if(FSI->FormatIdx == 0)
return false;
--FSI->FormatIdx;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 0514d10..aa93507 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -10208,6 +10208,24 @@ void SemaCodeCompletion::CodeCompletePreprocessorDirective(bool InConditional) {
Builder.AddPlaceholderChunk("message");
Results.AddResult(Builder.TakeString());
+ if (getLangOpts().C23) {
+ // #embed "file"
+ Builder.AddTypedTextChunk("embed");
+ Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+ Builder.AddTextChunk("\"");
+ Builder.AddPlaceholderChunk("file");
+ Builder.AddTextChunk("\"");
+ Results.AddResult(Builder.TakeString());
+
+ // #embed <file>
+ Builder.AddTypedTextChunk("embed");
+ Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+ Builder.AddTextChunk("<");
+ Builder.AddPlaceholderChunk("file");
+ Builder.AddTextChunk(">");
+ Results.AddResult(Builder.TakeString());
+ }
+
// Note: #ident and #sccs are such crazy anachronisms that we don't provide
// completions for them. And __include_macros is a Clang-internal extension
// that we don't want to encourage anyone to use.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 964a2a7..a9e7b44 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3785,7 +3785,7 @@ static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
// In C++ the implicit 'this' function parameter also counts, and they are
// counted from one.
- bool HasImplicitThisParam = isInstanceMethod(D);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
Info->Identifier = AL.getArgAsIdent(0)->getIdentifierInfo();
@@ -3926,7 +3926,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
return;
}
- bool HasImplicitThisParam = isInstanceMethod(D);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
int32_t NumArgs = getFunctionOrMethodNumParams(D);
FunctionDecl *FD = D->getAsFunction();
@@ -4110,7 +4110,7 @@ static void handleLifetimeCaptureByAttr(Sema &S, Decl *D,
}
void Sema::LazyProcessLifetimeCaptureByParams(FunctionDecl *FD) {
- bool HasImplicitThisParam = isInstanceMethod(FD);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(FD);
SmallVector<LifetimeCaptureByAttr *, 1> Attrs;
for (ParmVarDecl *PVD : FD->parameters())
if (auto *A = PVD->getAttr<LifetimeCaptureByAttr>())
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 6d5cb0f..256f952 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -17216,6 +17216,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause(
static_cast<OpenMPSeverityClauseKind>(Argument), ArgumentLoc, StartLoc,
LParenLoc, EndLoc);
break;
+ case OMPC_threadset:
+ Res = ActOnOpenMPThreadsetClause(static_cast<OpenMPThreadsetKind>(Argument),
+ ArgumentLoc, StartLoc, LParenLoc, EndLoc);
+ break;
case OMPC_if:
case OMPC_final:
case OMPC_num_threads:
@@ -17355,6 +17359,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(
OMPDefaultClause(M, MLoc, VCKind, VCKindLoc, StartLoc, LParenLoc, EndLoc);
}
+OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind,
+ SourceLocation KindLoc,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ if (Kind == OMPC_THREADSET_unknown) {
+ Diag(KindLoc, diag::err_omp_unexpected_clause_value)
+ << getListOfPossibleValues(OMPC_threadset, /*First=*/0,
+ /*Last=*/unsigned(OMPC_THREADSET_unknown))
+ << getOpenMPClauseName(OMPC_threadset);
+ return nullptr;
+ }
+
+ return new (getASTContext())
+ OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc);
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind,
SourceLocation KindKwLoc,
SourceLocation StartLoc,
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 850bcb1..2f61bdd 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -489,14 +489,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_tileloaddrst164:
case X86::BI__builtin_ia32_tilestored64:
case X86::BI__builtin_ia32_tilezero:
- case X86::BI__builtin_ia32_t2rpntlvwz0:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1:
- case X86::BI__builtin_ia32_t2rpntlvwz1:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs:
case X86::BI__builtin_ia32_tcvtrowps2bf16h:
case X86::BI__builtin_ia32_tcvtrowps2bf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
@@ -516,17 +508,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_tdpbhf8ps:
case X86::BI__builtin_ia32_tdphbf8ps:
case X86::BI__builtin_ia32_tdphf8ps:
- case X86::BI__builtin_ia32_ttdpbf16ps:
- case X86::BI__builtin_ia32_ttdpfp16ps:
- case X86::BI__builtin_ia32_ttcmmimfp16ps:
- case X86::BI__builtin_ia32_ttcmmrlfp16ps:
- case X86::BI__builtin_ia32_tconjtcmmimfp16ps:
case X86::BI__builtin_ia32_tmmultf32ps:
- case X86::BI__builtin_ia32_ttmmultf32ps:
return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2});
- case X86::BI__builtin_ia32_ttransposed:
- case X86::BI__builtin_ia32_tconjtfp16:
- return CheckBuiltinTileArgumentsRange(TheCall, {0, 1});
}
}
static bool isX86_32Builtin(unsigned BuiltinID) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 0c8c1d1..8c20078 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10624,6 +10624,13 @@ TreeTransform<Derived>::TransformOMPDefaultClause(OMPDefaultClause *C) {
template <typename Derived>
OMPClause *
+TreeTransform<Derived>::TransformOMPThreadsetClause(OMPThreadsetClause *C) {
+ // No need to rebuild this clause, no template-dependent parameters.
+ return C;
+}
+
+template <typename Derived>
+OMPClause *
TreeTransform<Derived>::TransformOMPProcBindClause(OMPProcBindClause *C) {
return getDerived().RebuildOMPProcBindClause(
C->getProcBindKind(), C->getProcBindKindKwLoc(), C->getBeginLoc(),
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c1b5cb7..e3106f8d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11255,6 +11255,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_mergeable:
C = new (Context) OMPMergeableClause();
break;
+ case llvm::omp::OMPC_threadset:
+ C = new (Context) OMPThreadsetClause();
+ break;
case llvm::omp::OMPC_read:
C = new (Context) OMPReadClause();
break;
@@ -11658,6 +11661,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) {
C->setDefaultVariableCategoryLocation(Record.readSourceLocation());
}
+// Read the parameter of threadset clause. This will have been saved when
+// OMPClauseWriter is called.
+void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) {
+ C->setLParenLoc(Record.readSourceLocation());
+ SourceLocation ThreadsetKindLoc = Record.readSourceLocation();
+ C->setThreadsetKindLoc(ThreadsetKindLoc);
+ OpenMPThreadsetKind TKind =
+ static_cast<OpenMPThreadsetKind>(Record.readInt());
+ C->setThreadsetKind(TKind);
+}
+
void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {
C->setProcBindKind(static_cast<llvm::omp::ProcBindKind>(Record.readInt()));
C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 377e396..3ac338e 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7913,6 +7913,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) {
Record.AddSourceLocation(C->getDefaultVCLoc());
}
+void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) {
+ Record.AddSourceLocation(C->getLParenLoc());
+ Record.AddSourceLocation(C->getThreadsetKindLoc());
+ Record.writeEnum(C->getThreadsetKind());
+}
+
void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) {
Record.push_back(unsigned(C->getProcBindKind()));
Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 171c786..b4bdec1 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) {
// `foo<int>` for which this range will be too short. Doing so will
// require subcasing `NamedDecl`, because it doesn't provide virtual
// access to the \c DeclarationNameInfo.
- if (tooling::getText(R, *Result.Context) != D->getName())
- return CharSourceRange();
+ StringRef Text = tooling::getText(R, *Result.Context);
+ if (Text != D->getName())
+ return llvm::make_error<StringError>(
+ llvm::errc::not_supported,
+ "range selected by name(node id=" + ID + "): '" + Text +
+ "' is different from decl name '" + D->getName() + "'");
return R;
}
if (const auto *E = Node.get<DeclRefExpr>()) {