aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp42
-rw-r--r--clang/lib/AST/CommentSema.cpp2
-rw-r--r--clang/lib/AST/ExprConstant.cpp46
-rw-r--r--clang/lib/Basic/Targets/AVR.cpp34
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp2
-rw-r--r--clang/lib/Basic/Targets/PPC.h52
-rw-r--r--clang/lib/Basic/Targets/X86.cpp6
-rw-r--r--clang/lib/Basic/Targets/X86.h1
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp8
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.cpp36
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h81
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp2
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp2
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp57
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/X86.cpp68
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp3
-rw-r--r--clang/lib/Driver/Driver.cpp12
-rw-r--r--clang/lib/Driver/ToolChains/Arch/M68k.cpp12
-rw-r--r--clang/lib/Driver/ToolChains/Arch/Mips.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp1
-rw-r--r--clang/lib/Driver/ToolChains/Darwin.cpp12
-rw-r--r--clang/lib/Driver/ToolChains/HLSL.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/Solaris.cpp2
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp97
-rw-r--r--clang/lib/Format/Format.cpp111
-rw-r--r--clang/lib/Format/FormatToken.cpp4
-rw-r--r--clang/lib/Format/FormatToken.h6
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp25
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp35
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp2
-rw-r--r--clang/lib/Headers/CMakeLists.txt6
-rw-r--r--clang/lib/Headers/amxbf16transposeintrin.h94
-rw-r--r--clang/lib/Headers/amxcomplextransposeintrin.h303
-rw-r--r--clang/lib/Headers/amxfp16transposeintrin.h94
-rw-r--r--clang/lib/Headers/amxintrin.h2
-rw-r--r--clang/lib/Headers/amxmovrstransposeintrin.h200
-rw-r--r--clang/lib/Headers/amxtf32transposeintrin.h105
-rw-r--r--clang/lib/Headers/amxtransposeintrin.h248
-rw-r--r--clang/lib/Headers/hlsl/hlsl_compat_overloads.h2
-rw-r--r--clang/lib/Headers/hvx_hexagon_protos.h393
-rw-r--r--clang/lib/Headers/immintrin.h12
-rw-r--r--clang/lib/Parse/ParseDecl.cpp7
-rw-r--r--clang/lib/Sema/SemaChecking.cpp11
-rw-r--r--clang/lib/Sema/SemaCodeComplete.cpp18
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp6
-rw-r--r--clang/lib/Sema/SemaX86.cpp17
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp2
-rw-r--r--clang/lib/Tooling/Transformer/RangeSelector.cpp8
48 files changed, 917 insertions, 1385 deletions
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b3ab82d..8b57b96 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift(
static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
GetSourceIndex) {
assert(Call->getNumArgs() == 3);
@@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic(
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const Pointer &Src = (SrcVecIdx == 0) ? A : B;
- TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+
+ if (SrcIdx < 0) {
+ // Zero out this element
+ if (ElemT == PT_Float) {
+ Dst.elem<Floating>(DstIdx) = Floating(
+ S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
+ } else {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
+ }
+ } else {
+ const Pointer &Src = (SrcVecIdx == 0) ? A : B;
+ TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+ }
}
Dst.initializeAllElements();
@@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_shufpd256:
@@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
+ });
+ case X86::BI__builtin_ia32_insertps128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return std::pair<unsigned, int>{0, -1};
+ }
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
+ } else {
+ // Copy from destination vector (A)
+ return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
+ }
});
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp
index 27ff5ab..d5ba240 100644
--- a/clang/lib/AST/CommentSema.cpp
+++ b/clang/lib/AST/CommentSema.cpp
@@ -225,7 +225,7 @@ static ParamCommandPassDirection getParamPassDirection(StringRef Arg) {
return llvm::StringSwitch<ParamCommandPassDirection>(Arg)
.Case("[in]", ParamCommandPassDirection::In)
.Case("[out]", ParamCommandPassDirection::Out)
- .Cases("[in,out]", "[out,in]", ParamCommandPassDirection::InOut)
+ .Cases({"[in,out]", "[out,in]"}, ParamCommandPassDirection::InOut)
.Default(static_cast<ParamCommandPassDirection>(-1));
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d0404b9..97eeba8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
static bool evalShuffleGeneric(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
GetSourceIndex) {
const auto *VT = Call->getType()->getAs<VectorType>();
@@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric(
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const APValue &Src = (SrcVecIdx == 0) ? A : B;
- ResultElements.push_back(Src.getVectorElt(SrcIdx));
+
+ if (SrcIdx < 0) {
+ // Zero out this element
+ QualType ElemTy = VT->getElementType();
+ ResultElements.push_back(
+ APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+ } else {
+ const APValue &Src = (SrcVecIdx == 0) ? A : B;
+ ResultElements.push_back(Src.getVectorElt(SrcIdx));
+ }
}
Out = APValue(ResultElements.data(), ResultElements.size());
@@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 32;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
}))
return false;
return Success(R, E);
@@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 64;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_insertps128: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return {0, -1};
+ }
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return {1, static_cast<int>(SrcElem)};
+ } else {
+ // Copy from destination vector (A)
+ return {0, static_cast<int>(DstIdx)};
+ }
}))
return false;
return Success(R, E);
diff --git a/clang/lib/Basic/Targets/AVR.cpp b/clang/lib/Basic/Targets/AVR.cpp
index 2673669..90b4ac1 100644
--- a/clang/lib/Basic/Targets/AVR.cpp
+++ b/clang/lib/Basic/Targets/AVR.cpp
@@ -30,13 +30,13 @@ struct LLVM_LIBRARY_VISIBILITY MCUInfo {
// NOTE: This list has been synchronized with gcc-avr 5.4.0 and avr-libc 2.0.0.
static MCUInfo AVRMcus[] = {
- {"avr1", NULL, "1", 0},
+ {"avr1", nullptr, "1", 0},
{"at90s1200", "__AVR_AT90S1200__", "1", 0},
{"attiny11", "__AVR_ATtiny11__", "1", 0},
{"attiny12", "__AVR_ATtiny12__", "1", 0},
{"attiny15", "__AVR_ATtiny15__", "1", 0},
{"attiny28", "__AVR_ATtiny28__", "1", 0},
- {"avr2", NULL, "2", 1},
+ {"avr2", nullptr, "2", 1},
{"at90s2313", "__AVR_AT90S2313__", "2", 1},
{"at90s2323", "__AVR_AT90S2323__", "2", 1},
{"at90s2333", "__AVR_AT90S2333__", "2", 1},
@@ -50,7 +50,7 @@ static MCUInfo AVRMcus[] = {
{"at90s8515", "__AVR_AT90S8515__", "2", 1},
{"at90c8534", "__AVR_AT90c8534__", "2", 1},
{"at90s8535", "__AVR_AT90S8535__", "2", 1},
- {"avr25", NULL, "25", 1},
+ {"avr25", nullptr, "25", 1},
{"ata5272", "__AVR_ATA5272__", "25", 1},
{"ata6616c", "__AVR_ATA6616c__", "25", 1},
{"attiny13", "__AVR_ATtiny13__", "25", 1},
@@ -80,13 +80,13 @@ static MCUInfo AVRMcus[] = {
{"attiny48", "__AVR_ATtiny48__", "25", 1},
{"attiny88", "__AVR_ATtiny88__", "25", 1},
{"attiny828", "__AVR_ATtiny828__", "25", 1},
- {"avr3", NULL, "3", 1},
+ {"avr3", nullptr, "3", 1},
{"at43usb355", "__AVR_AT43USB355__", "3", 1},
{"at76c711", "__AVR_AT76C711__", "3", 1},
- {"avr31", NULL, "31", 1},
+ {"avr31", nullptr, "31", 1},
{"atmega103", "__AVR_ATmega103__", "31", 1},
{"at43usb320", "__AVR_AT43USB320__", "31", 1},
- {"avr35", NULL, "35", 1},
+ {"avr35", nullptr, "35", 1},
{"attiny167", "__AVR_ATtiny167__", "35", 1},
{"at90usb82", "__AVR_AT90USB82__", "35", 1},
{"at90usb162", "__AVR_AT90USB162__", "35", 1},
@@ -97,7 +97,7 @@ static MCUInfo AVRMcus[] = {
{"atmega16u2", "__AVR_ATmega16U2__", "35", 1},
{"atmega32u2", "__AVR_ATmega32U2__", "35", 1},
{"attiny1634", "__AVR_ATtiny1634__", "35", 1},
- {"avr4", NULL, "4", 1},
+ {"avr4", nullptr, "4", 1},
{"atmega8", "__AVR_ATmega8__", "4", 1},
{"ata6289", "__AVR_ATA6289__", "4", 1},
{"atmega8a", "__AVR_ATmega8A__", "4", 1},
@@ -123,7 +123,7 @@ static MCUInfo AVRMcus[] = {
{"at90pwm3", "__AVR_AT90PWM3__", "4", 1},
{"at90pwm3b", "__AVR_AT90PWM3B__", "4", 1},
{"at90pwm81", "__AVR_AT90PWM81__", "4", 1},
- {"avr5", NULL, "5", 1},
+ {"avr5", nullptr, "5", 1},
{"ata5702m322", "__AVR_ATA5702M322__", "5", 1},
{"ata5782", "__AVR_ATA5782__", "5", 1},
{"ata5790", "__AVR_ATA5790__", "5", 1},
@@ -230,7 +230,7 @@ static MCUInfo AVRMcus[] = {
{"at90scr100", "__AVR_AT90SCR100__", "5", 1},
{"at94k", "__AVR_AT94K__", "5", 1},
{"m3000", "__AVR_AT000__", "5", 1},
- {"avr51", NULL, "51", 2},
+ {"avr51", nullptr, "51", 2},
{"atmega128", "__AVR_ATmega128__", "51", 2},
{"atmega128a", "__AVR_ATmega128A__", "51", 2},
{"atmega1280", "__AVR_ATmega1280__", "51", 2},
@@ -243,12 +243,12 @@ static MCUInfo AVRMcus[] = {
{"at90can128", "__AVR_AT90CAN128__", "51", 2},
{"at90usb1286", "__AVR_AT90USB1286__", "51", 2},
{"at90usb1287", "__AVR_AT90USB1287__", "51", 2},
- {"avr6", NULL, "6", 4},
+ {"avr6", nullptr, "6", 4},
{"atmega2560", "__AVR_ATmega2560__", "6", 4},
{"atmega2561", "__AVR_ATmega2561__", "6", 4},
{"atmega256rfr2", "__AVR_ATmega256RFR2__", "6", 4},
{"atmega2564rfr2", "__AVR_ATmega2564RFR2__", "6", 4},
- {"avrxmega2", NULL, "102", 1},
+ {"avrxmega2", nullptr, "102", 1},
{"atxmega16a4", "__AVR_ATxmega16A4__", "102", 1},
{"atxmega16a4u", "__AVR_ATxmega16A4U__", "102", 1},
{"atxmega16c4", "__AVR_ATxmega16C4__", "102", 1},
@@ -262,7 +262,7 @@ static MCUInfo AVRMcus[] = {
{"atxmega32e5", "__AVR_ATxmega32E5__", "102", 1},
{"atxmega16e5", "__AVR_ATxmega16E5__", "102", 1},
{"atxmega8e5", "__AVR_ATxmega8E5__", "102", 1},
- {"avrxmega4", NULL, "104", 1},
+ {"avrxmega4", nullptr, "104", 1},
{"atxmega64a3", "__AVR_ATxmega64A3__", "104", 1},
{"atxmega64a3u", "__AVR_ATxmega64A3U__", "104", 1},
{"atxmega64a4u", "__AVR_ATxmega64A4U__", "104", 1},
@@ -271,10 +271,10 @@ static MCUInfo AVRMcus[] = {
{"atxmega64c3", "__AVR_ATxmega64C3__", "104", 1},
{"atxmega64d3", "__AVR_ATxmega64D3__", "104", 1},
{"atxmega64d4", "__AVR_ATxmega64D4__", "104", 1},
- {"avrxmega5", NULL, "105", 1},
+ {"avrxmega5", nullptr, "105", 1},
{"atxmega64a1", "__AVR_ATxmega64A1__", "105", 1},
{"atxmega64a1u", "__AVR_ATxmega64A1U__", "105", 1},
- {"avrxmega6", NULL, "106", 6},
+ {"avrxmega6", nullptr, "106", 6},
{"atxmega128a3", "__AVR_ATxmega128A3__", "106", 2},
{"atxmega128a3u", "__AVR_ATxmega128A3U__", "106", 2},
{"atxmega128b1", "__AVR_ATxmega128B1__", "106", 2},
@@ -294,11 +294,11 @@ static MCUInfo AVRMcus[] = {
{"atxmega256d3", "__AVR_ATxmega256D3__", "106", 4},
{"atxmega384c3", "__AVR_ATxmega384C3__", "106", 6},
{"atxmega384d3", "__AVR_ATxmega384D3__", "106", 6},
- {"avrxmega7", NULL, "107", 2},
+ {"avrxmega7", nullptr, "107", 2},
{"atxmega128a1", "__AVR_ATxmega128A1__", "107", 2},
{"atxmega128a1u", "__AVR_ATxmega128A1U__", "107", 2},
{"atxmega128a4u", "__AVR_ATxmega128A4U__", "107", 2},
- {"avrtiny", NULL, "100", 0},
+ {"avrtiny", nullptr, "100", 0},
{"attiny4", "__AVR_ATtiny4__", "100", 0},
{"attiny5", "__AVR_ATtiny5__", "100", 0},
{"attiny9", "__AVR_ATtiny9__", "100", 0},
@@ -307,7 +307,7 @@ static MCUInfo AVRMcus[] = {
{"attiny40", "__AVR_ATtiny40__", "100", 0},
{"attiny102", "__AVR_ATtiny102__", "100", 0},
{"attiny104", "__AVR_ATtiny104__", "100", 0},
- {"avrxmega3", NULL, "103", 1},
+ {"avrxmega3", nullptr, "103", 1},
{"attiny202", "__AVR_ATtiny202__", "103", 1},
{"attiny402", "__AVR_ATtiny402__", "103", 1},
{"attiny204", "__AVR_ATtiny204__", "103", 1},
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 9651c38..ec4e40b 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -171,7 +171,7 @@ ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
- .Cases("ptx", "nvptx", true)
+ .Cases({"ptx", "nvptx"}, true)
.Default(false);
}
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 846b240..d4ada2a 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -125,9 +125,8 @@ public:
.Cases({"power3", "pwr3"}, ArchDefinePpcgr)
.Cases({"power4", "pwr4"},
ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
- .Cases("power5", "pwr5",
- ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr |
- ArchDefinePpcsq)
+ .Cases({"power5", "pwr5"}, ArchDefinePwr5 | ArchDefinePwr4 |
+ ArchDefinePpcgr | ArchDefinePpcsq)
.Cases({"power5x", "pwr5x"},
ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 |
ArchDefinePpcgr | ArchDefinePpcsq)
@@ -166,7 +165,7 @@ public:
ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 |
ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 |
ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
- .Cases("8548", "e500", ArchDefineE500)
+ .Cases({"8548", "e500"}, ArchDefineE500)
.Default(ArchDefineNone);
}
return CPUKnown;
@@ -445,27 +444,17 @@ public:
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
IntMaxType = SignedLong;
Int64Type = SignedLong;
- std::string DataLayout;
if (Triple.isOSAIX()) {
// TODO: Set appropriate ABI for AIX platform.
- DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
LongDoubleWidth = 64;
LongDoubleAlign = DoubleAlign = 32;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
- } else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
- DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+ } else if ((Triple.getArch() == llvm::Triple::ppc64le) ||
+ Triple.isPPC64ELFv2ABI()) {
ABI = "elfv2";
} else {
- DataLayout = "E-m:e";
- if (Triple.isPPC64ELFv2ABI()) {
- ABI = "elfv2";
- DataLayout += "-Fn32";
- } else {
- ABI = "elfv1";
- DataLayout += "-Fi64";
- }
- DataLayout += "-i64:64-i128:128-n32:64";
+ ABI = "elfv1";
}
if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) {
@@ -473,14 +462,12 @@ public:
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
}
- if (Triple.isOSAIX() || Triple.isOSLinux())
- DataLayout += "-S128-v256:256:256-v512:512:512";
- resetDataLayout(DataLayout);
-
// Newer PPC64 instruction sets support atomics up to 16 bytes.
MaxAtomicPromoteWidth = 128;
// Baseline PPC64 supports inlining atomics up to 8 bytes.
MaxAtomicInlineWidth = 64;
+
+ calculateDataLayout();
}
void setMaxAtomicWidth() override {
@@ -495,10 +482,33 @@ public:
return TargetInfo::CharPtrBuiltinVaList;
}
+ void calculateDataLayout() {
+ std::string DataLayout;
+
+ if (getTriple().isOSAIX()) {
+ DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
+ } else if ((getTriple().getArch() == llvm::Triple::ppc64le)) {
+ DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+ } else {
+ DataLayout = "E-m:e";
+ if (ABI == "elfv2") {
+ DataLayout += "-Fn32";
+ } else {
+ DataLayout += "-Fi64";
+ }
+ DataLayout += "-i64:64-i128:128-n32:64";
+ }
+
+ if (getTriple().isOSAIX() || getTriple().isOSLinux())
+ DataLayout += "-S128-v256:256:256-v512:512:512";
+ resetDataLayout(DataLayout);
+ }
+
// PPC64 Linux-specific ABI options.
bool setABI(const std::string &Name) override {
if (Name == "elfv1" || Name == "elfv2") {
ABI = Name;
+ calculateDataLayout();
return true;
}
return false;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e71f10c..7a90c89 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXFP8 = true;
} else if (Feature == "+amx-movrs") {
HasAMXMOVRS = true;
- } else if (Feature == "+amx-transpose") {
- HasAMXTRANSPOSE = true;
} else if (Feature == "+amx-avx512") {
HasAMXAVX512 = true;
} else if (Feature == "+amx-tf32") {
@@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_FP8__");
if (HasAMXMOVRS)
Builder.defineMacro("__AMX_MOVRS__");
- if (HasAMXTRANSPOSE)
- Builder.defineMacro("__AMX_TRANSPOSE__");
if (HasAMXAVX512)
Builder.defineMacro("__AMX_AVX512__");
if (HasAMXTF32)
@@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("amx-movrs", true)
.Case("amx-tf32", true)
.Case("amx-tile", true)
- .Case("amx-transpose", true)
.Case("avx", true)
.Case("avx10.1", true)
.Case("avx10.2", true)
@@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("amx-movrs", HasAMXMOVRS)
.Case("amx-tf32", HasAMXTF32)
.Case("amx-tile", HasAMXTILE)
- .Case("amx-transpose", HasAMXTRANSPOSE)
.Case("avx", SSELevel >= AVX)
.Case("avx10.1", HasAVX10_1)
.Case("avx10.2", HasAVX10_2)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index be3a473..e7da262 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -160,7 +160,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXMOVRS = false;
- bool HasAMXTRANSPOSE = false;
bool HasAMXAVX512 = false;
bool HasAMXTF32 = false;
bool HasSERIALIZE = false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 3c9c7ec..0198a9d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -771,14 +771,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI_WriteBarrier:
case X86::BI_AddressOfReturnAddress:
case X86::BI__stosb:
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
case X86::BI__ud2:
case X86::BI__int2c:
case X86::BI__readfsbyte:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 71ff20a..5d5209b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -242,12 +242,19 @@ void CIRGenFunction::LexicalScope::cleanup() {
}
};
- if (returnBlock != nullptr) {
- // Write out the return block, which loads the value from `__retval` and
- // issues the `cir.return`.
+ // Cleanup are done right before codegen resumes a scope. This is where
+ // objects are destroyed. Process all return blocks.
+ // TODO(cir): Handle returning from a switch statement through a cleanup
+ // block. We can't simply jump to the cleanup block, because the cleanup block
+ // is not part of the case region. Either reemit all cleanups in the return
+ // block or wait for MLIR structured control flow to support early exits.
+ llvm::SmallVector<mlir::Block *> retBlocks;
+ for (mlir::Block *retBlock : localScope->getRetBlocks()) {
mlir::OpBuilder::InsertionGuard guard(builder);
- builder.setInsertionPointToEnd(returnBlock);
- (void)emitReturn(*returnLoc);
+ builder.setInsertionPointToEnd(retBlock);
+ retBlocks.push_back(retBlock);
+ mlir::Location retLoc = localScope->getRetLoc(retBlock);
+ emitReturn(retLoc);
}
auto insertCleanupAndLeave = [&](mlir::Block *insPt) {
@@ -274,19 +281,22 @@ void CIRGenFunction::LexicalScope::cleanup() {
if (localScope->depth == 0) {
// Reached the end of the function.
- if (returnBlock != nullptr) {
- if (returnBlock->getUses().empty()) {
- returnBlock->erase();
+ // Special handling only for single return block case
+ if (localScope->getRetBlocks().size() == 1) {
+ mlir::Block *retBlock = localScope->getRetBlocks()[0];
+ mlir::Location retLoc = localScope->getRetLoc(retBlock);
+ if (retBlock->getUses().empty()) {
+ retBlock->erase();
} else {
// Thread return block via cleanup block.
if (cleanupBlock) {
- for (mlir::BlockOperand &blockUse : returnBlock->getUses()) {
+ for (mlir::BlockOperand &blockUse : retBlock->getUses()) {
cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner());
brOp.setSuccessor(cleanupBlock);
}
}
- cir::BrOp::create(builder, *returnLoc, returnBlock);
+ cir::BrOp::create(builder, retLoc, retBlock);
return;
}
}
@@ -324,8 +334,10 @@ void CIRGenFunction::LexicalScope::cleanup() {
bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
if (!entryBlock && curBlock->empty()) {
curBlock->erase();
- if (returnBlock != nullptr && returnBlock->getUses().empty())
- returnBlock->erase();
+ for (mlir::Block *retBlock : retBlocks) {
+ if (retBlock->getUses().empty())
+ retBlock->erase();
+ }
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index c3fcd1a6..e5cecaa5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1103,44 +1103,69 @@ public:
// ---
private:
- // `returnBlock`, `returnLoc`, and all the functions that deal with them
- // will change and become more complicated when `switch` statements are
- // upstreamed. `case` statements within the `switch` are in the same scope
- // but have their own regions. Therefore the LexicalScope will need to
- // keep track of multiple return blocks.
- mlir::Block *returnBlock = nullptr;
- std::optional<mlir::Location> returnLoc;
-
- // See the comment on `getOrCreateRetBlock`.
+ // On switches we need one return block per region, since cases don't
+ // have their own scopes but are distinct regions nonetheless.
+
+ // TODO: This implementation should change once we have support for early
+ // exits in MLIR structured control flow (llvm-project#161575)
+ llvm::SmallVector<mlir::Block *> retBlocks;
+ llvm::DenseMap<mlir::Block *, mlir::Location> retLocs;
+ llvm::DenseMap<cir::CaseOp, unsigned> retBlockInCaseIndex;
+ std::optional<unsigned> normalRetBlockIndex;
+
+ // There's usually only one ret block per scope, but this needs to be
+ // get or create because of potential unreachable return statements, note
+ // that for those, all source location maps to the first one found.
mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
- assert(returnBlock == nullptr && "only one return block per scope");
- // Create the cleanup block but don't hook it up just yet.
+ assert((isa_and_nonnull<cir::CaseOp>(
+ cgf.builder.getBlock()->getParentOp()) ||
+ retBlocks.size() == 0) &&
+ "only switches can hold more than one ret block");
+
+ // Create the return block but don't hook it up just yet.
mlir::OpBuilder::InsertionGuard guard(cgf.builder);
- returnBlock =
- cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
- updateRetLoc(returnBlock, loc);
- return returnBlock;
+ auto *b = cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
+ retBlocks.push_back(b);
+ updateRetLoc(b, loc);
+ return b;
}
cir::ReturnOp emitReturn(mlir::Location loc);
void emitImplicitReturn();
public:
- mlir::Block *getRetBlock() { return returnBlock; }
- mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; }
- void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; }
-
- // Create the return block for this scope, or return the existing one.
- // This get-or-create logic is necessary to handle multiple return
- // statements within the same scope, which can happen if some of them are
- // dead code or if there is a `goto` into the middle of the scope.
+ llvm::ArrayRef<mlir::Block *> getRetBlocks() { return retBlocks; }
+ mlir::Location getRetLoc(mlir::Block *b) { return retLocs.at(b); }
+ void updateRetLoc(mlir::Block *b, mlir::Location loc) {
+ retLocs.insert_or_assign(b, loc);
+ }
+
mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
- if (returnBlock == nullptr) {
- returnBlock = createRetBlock(cgf, loc);
- return returnBlock;
+ // Check if we're inside a case region
+ if (auto caseOp = mlir::dyn_cast_if_present<cir::CaseOp>(
+ cgf.builder.getBlock()->getParentOp())) {
+ auto iter = retBlockInCaseIndex.find(caseOp);
+ if (iter != retBlockInCaseIndex.end()) {
+ // Reuse existing return block
+ mlir::Block *ret = retBlocks[iter->second];
+ updateRetLoc(ret, loc);
+ return ret;
+ }
+ // Create new return block
+ mlir::Block *ret = createRetBlock(cgf, loc);
+ retBlockInCaseIndex[caseOp] = retBlocks.size() - 1;
+ return ret;
}
- updateRetLoc(returnBlock, loc);
- return returnBlock;
+
+ if (normalRetBlockIndex) {
+ mlir::Block *ret = retBlocks[*normalRetBlockIndex];
+ updateRetLoc(ret, loc);
+ return ret;
+ }
+
+ mlir::Block *ret = createRetBlock(cgf, loc);
+ normalRetBlockIndex = retBlocks.size() - 1;
+ return ret;
}
mlir::Block *getEntryBlock() { return entryBlock; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 5010137..527dfd2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -126,7 +126,7 @@ class OpenACCClauseCIREmitter final
.CaseLower("default", mlir::acc::DeviceType::Default)
.CaseLower("host", mlir::acc::DeviceType::Host)
.CaseLower("multicore", mlir::acc::DeviceType::Multicore)
- .CasesLower("nvidia", "acc_device_nvidia",
+ .CasesLower({"nvidia", "acc_device_nvidia"},
mlir::acc::DeviceType::Nvidia)
.CaseLower("radeon", mlir::acc::DeviceType::Radeon);
}
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 3c31314..b967a26 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -313,7 +313,7 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) {
.Case("kernel", llvm::CodeModel::Kernel)
.Case("medium", llvm::CodeModel::Medium)
.Case("large", llvm::CodeModel::Large)
- .Cases("default", "", ~1u)
+ .Cases({"default", ""}, ~1u)
.Default(~0u);
assert(CodeModel != ~0u && "invalid code model!");
if (CodeModel == ~1u)
diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
index 6da65b6..8a1cab3 100644
--- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
@@ -375,28 +375,28 @@ static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
CGF.EmitScalarExpr(E->getArg(1))});
}
-static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
- const CallExpr *E, CodeGenFunction &CGF) {
+static bool EnsureNativeHalfSupport(unsigned BuiltinID, const CallExpr *E,
+ CodeGenFunction &CGF) {
auto &C = CGF.CGM.getContext();
- if (!(C.getLangOpts().NativeHalfType ||
- !C.getTargetInfo().useFP16ConversionIntrinsics())) {
+ if (!C.getLangOpts().NativeHalfType &&
+ C.getTargetInfo().useFP16ConversionIntrinsics()) {
CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getQuotedName(BuiltinID) +
" requires native half type support.");
- return nullptr;
+ return false;
}
+ return true;
+}
- if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
- return MakeLdg(CGF, E);
-
- if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
- return MakeLdu(IntrinsicID, CGF, E);
+static Value *MakeHalfType(Function *Intrinsic, unsigned BuiltinID,
+ const CallExpr *E, CodeGenFunction &CGF) {
+ if (!EnsureNativeHalfSupport(BuiltinID, E, CGF))
+ return nullptr;
SmallVector<Value *, 16> Args;
- auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
- auto *FTy = F->getFunctionType();
+ auto *FTy = Intrinsic->getFunctionType();
unsigned ICEArguments = 0;
ASTContext::GetBuiltinTypeError Error;
- C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ CGF.CGM.getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
assert(Error == ASTContext::GE_None && "Should not codegen an error");
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
assert((ICEArguments & (1 << i)) == 0);
@@ -407,8 +407,14 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
Args.push_back(ArgValue);
}
- return CGF.Builder.CreateCall(F, Args);
+ return CGF.Builder.CreateCall(Intrinsic, Args);
}
+
+static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
+ const CallExpr *E, CodeGenFunction &CGF) {
+ return MakeHalfType(CGF.CGM.getIntrinsic(IntrinsicID), BuiltinID, E, CGF);
+}
+
} // namespace
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
@@ -913,9 +919,14 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
}
// The following builtins require half type support
case NVPTX::BI__nvvm_ex2_approx_f16:
- return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
+ return MakeHalfType(
+ CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx, Builder.getHalfTy()),
+ BuiltinID, E, *this);
case NVPTX::BI__nvvm_ex2_approx_f16x2:
- return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
+ return MakeHalfType(
+ CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,
+ FixedVectorType::get(Builder.getHalfTy(), 2)),
+ BuiltinID, E, *this);
case NVPTX::BI__nvvm_ff2f16x2_rn:
return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
@@ -1049,12 +1060,22 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_fabs_d:
return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
EmitScalarExpr(E->getArg(0)));
+ case NVPTX::BI__nvvm_ex2_approx_d:
+ case NVPTX::BI__nvvm_ex2_approx_f:
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,
+ EmitScalarExpr(E->getArg(0)));
+ case NVPTX::BI__nvvm_ex2_approx_ftz_f:
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,
+ EmitScalarExpr(E->getArg(0)));
case NVPTX::BI__nvvm_ldg_h:
case NVPTX::BI__nvvm_ldg_h2:
- return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
+ return EnsureNativeHalfSupport(BuiltinID, E, *this) ? MakeLdg(*this, E)
+ : nullptr;
case NVPTX::BI__nvvm_ldu_h:
case NVPTX::BI__nvvm_ldu_h2:
- return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
+ return EnsureNativeHalfSupport(BuiltinID, E, *this)
+ ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E)
+ : nullptr;
case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b924407..2381b2e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// instruction, but it will create a memset that won't be optimized away.
return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
}
- // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
- Intrinsic::ID IID;
- switch (BuiltinID) {
- default:
- llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
- IID = Intrinsic::x86_t2rpntlvwz0_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
- IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
- IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
- IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
- IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
- break;
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
- IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
- break;
- }
-
- // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
- {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
-
- auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
- assert(PtrTy && "arg3 must be of pointer type");
- QualType PtreeTy = PtrTy->getPointeeType();
- llvm::Type *TyPtee = ConvertType(PtreeTy);
-
- // Bitcast amx type (x86_amx) to vector type (256 x i32)
- // Then store tile0 into DstPtr0
- Value *T0 = Builder.CreateExtractValue(Call, 0);
- Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
- {TyPtee}, {T0});
- Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
-
- // Then store tile1 into DstPtr1
- Value *T1 = Builder.CreateExtractValue(Call, 1);
- Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
- {TyPtee}, {T1});
- Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
-
- // Note: Here we escape directly use x86_tilestored64_internal to store
- // the results due to it can't make sure the Mem written scope. This may
- // cause shapes reloads after first amx intrinsic, which current amx reg-
- // ister allocation has no ability to handle it.
-
- return Store;
- }
case X86::BI__ud2:
// llvm.trap makes a ud2a instruction on x86.
return EmitTrapCall(Intrinsic::trap);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 15d0b35..abd049a 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -260,7 +260,8 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()
? LangAS::Default
: QT->getPointeeType().getAddressSpace();
- if (AS == LangAS::Default || AS == LangAS::opencl_generic)
+ if (AS == LangAS::Default || AS == LangAS::opencl_generic ||
+ AS == LangAS::opencl_constant)
return llvm::ConstantPointerNull::get(PT);
auto &Ctx = CGM.getContext();
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 71c5280..51618d1 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2540,10 +2540,14 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
}
if (C.getArgs().hasArg(options::OPT_print_runtime_dir)) {
- if (std::optional<std::string> RuntimePath = TC.getRuntimePath())
- llvm::outs() << *RuntimePath << '\n';
- else
- llvm::outs() << TC.getCompilerRTPath() << '\n';
+ for (auto RuntimePath :
+ {TC.getRuntimePath(), std::make_optional(TC.getCompilerRTPath())}) {
+ if (RuntimePath && getVFS().exists(*RuntimePath)) {
+ llvm::outs() << *RuntimePath << '\n';
+ return false;
+ }
+ }
+ llvm::outs() << "(runtime dir is not present)" << '\n';
return false;
}
diff --git a/clang/lib/Driver/ToolChains/Arch/M68k.cpp b/clang/lib/Driver/ToolChains/Arch/M68k.cpp
index 1037c0e..708ec84 100644
--- a/clang/lib/Driver/ToolChains/Arch/M68k.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/M68k.cpp
@@ -36,12 +36,12 @@ std::string m68k::getM68kTargetCPU(const ArgList &Args) {
return "generic";
return llvm::StringSwitch<std::string>(CPUName)
- .Cases("m68000", "68000", "M68000")
- .Cases("m68010", "68010", "M68010")
- .Cases("m68020", "68020", "M68020")
- .Cases("m68030", "68030", "M68030")
- .Cases("m68040", "68040", "M68040")
- .Cases("m68060", "68060", "M68060")
+ .Cases({"m68000", "68000"}, "M68000")
+ .Cases({"m68010", "68010"}, "M68010")
+ .Cases({"m68020", "68020"}, "M68020")
+ .Cases({"m68030", "68030"}, "M68030")
+ .Cases({"m68040", "68040"}, "M68040")
+ .Cases({"m68060", "68060"}, "M68060")
.Default(CPUName.str());
}
// FIXME: Throw error when multiple sub-architecture flag exist
diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
index 6a6a4ee..8d7b85d 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -117,7 +117,7 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
// Deduce CPU name from ABI name.
CPUName = llvm::StringSwitch<const char *>(ABIName)
.Case("o32", DefMips32CPU)
- .Cases("n32", "n64", DefMips64CPU)
+ .Cases({"n32", "n64"}, DefMips64CPU)
.Default("");
}
@@ -467,7 +467,7 @@ bool mips::isNaN2008(const Driver &D, const ArgList &Args,
// NaN2008 is the default for MIPS32r6/MIPS64r6.
return llvm::StringSwitch<bool>(getCPUName(D, Args, Triple))
- .Cases("mips32r6", "mips64r6", true)
+ .Cases({"mips32r6", "mips64r6"}, true)
.Default(false);
}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 4e8f63e..d3ab6f1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3708,6 +3708,7 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs,
options::OPT_emit_obj,
options::OPT_disable_llvm_passes,
options::OPT_fnative_half_type,
+ options::OPT_fnative_int16_type,
options::OPT_hlsl_entrypoint,
options::OPT_fdx_rootsignature_define,
options::OPT_fdx_rootsignature_version,
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index cc5bcd1..2fb7652 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1035,12 +1035,12 @@ static const char *ArmMachOArchName(StringRef Arch) {
.Case("xscale", "xscale")
.Case("armv4t", "armv4t")
.Case("armv7", "armv7")
- .Cases("armv7a", "armv7-a", "armv7")
- .Cases("armv7r", "armv7-r", "armv7")
- .Cases("armv7em", "armv7e-m", "armv7em")
- .Cases("armv7k", "armv7-k", "armv7k")
- .Cases("armv7m", "armv7-m", "armv7m")
- .Cases("armv7s", "armv7-s", "armv7s")
+ .Cases({"armv7a", "armv7-a"}, "armv7")
+ .Cases({"armv7r", "armv7-r"}, "armv7")
+ .Cases({"armv7em", "armv7e-m"}, "armv7em")
+ .Cases({"armv7k", "armv7-k"}, "armv7k")
+ .Cases({"armv7m", "armv7-m"}, "armv7m")
+ .Cases({"armv7s", "armv7-s"}, "armv7s")
.Default(nullptr);
}
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp
index 20a320e..8d3fba7 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -498,6 +498,15 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
continue;
}
+ if (A->getOption().getID() == options::OPT_enable_16bit_types) {
+ // Translate -enable-16bit-types into -fnative-half-type and
+ // -fnative-int16-type
+ DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_half_type));
+ DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_fnative_int16_type));
+ A->claim();
+ continue;
+ }
+
DAL->append(A);
}
diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp
index 02aa598..64c7d1c 100644
--- a/clang/lib/Driver/ToolChains/Solaris.cpp
+++ b/clang/lib/Driver/ToolChains/Solaris.cpp
@@ -346,7 +346,7 @@ SanitizerMask Solaris::getSupportedSanitizers() const {
const char *Solaris::getDefaultLinker() const {
// FIXME: Only handle Solaris ld and GNU ld here.
return llvm::StringSwitch<const char *>(getDriver().getPreferredLinker())
- .Cases("bfd", "gld", "/usr/gnu/bin/ld")
+ .Cases({"bfd", "gld"}, "/usr/gnu/bin/ld")
.Default("/usr/bin/ld");
}
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index e5abf83..9ab024a 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
return CurrentState.BreakBeforeClosingBrace;
}
- // Allow breaking before the right parens with block indentation if there was
- // a break after the left parens, which is tracked by BreakBeforeClosingParen.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent &&
+ // Check need to break before the right parens if there was a break after
+ // the left parens, which is tracked by BreakBeforeClosingParen.
+ if ((Style.BreakBeforeCloseBracketFunction ||
+ Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop ||
+ Style.BreakBeforeCloseBracketSwitch) &&
Current.is(tok::r_paren)) {
return CurrentState.BreakBeforeClosingParen;
}
@@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&
Style.Cpp11BracedListStyle != FormatStyle::BLS_Block;
};
- if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
- !IsStartOfBracedList()) {
+ if (IsStartOfBracedList())
+ return Style.BreakAfterOpenBracketBracedList;
+ if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square))
return false;
- }
if (!Tok.Previous)
return true;
if (Tok.Previous->isIf())
- return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak;
- return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
- tok::kw_switch) &&
- !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await));
+ return Style.BreakAfterOpenBracketIf;
+ if (Tok.Previous->isLoop(Style))
+ return Style.BreakAfterOpenBracketLoop;
+ if (Tok.Previous->is(tok::kw_switch))
+ return Style.BreakAfterOpenBracketSwitch;
+ if (Style.BreakAfterOpenBracketFunction) {
+ return !Tok.Previous->is(TT_CastRParen) &&
+ !(Style.isJavaScript() && Tok.is(Keywords.kw_await));
+ }
+ return false;
};
auto IsFunctionCallParen = [](const FormatToken &Tok) {
return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous &&
Tok.Previous->is(tok::identifier);
};
- auto IsInTemplateString = [this](const FormatToken &Tok) {
+ auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) {
if (!Style.isJavaScript())
return false;
for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) {
if (Prev->is(TT_TemplateString) && Prev->opensScope())
return true;
- if (Prev->opensScope() ||
- (Prev->is(TT_TemplateString) && Prev->closesScope())) {
- break;
- }
+ if (Prev->opensScope() && !NestBlocks)
+ return false;
+ if (Prev->is(TT_TemplateString) && Prev->closesScope())
+ return false;
}
return false;
};
@@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) {
return true;
}
- if (const auto *Previous = Tok.Previous;
- !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen,
- TT_LambdaDefinitionLParen) &&
- !IsFunctionCallParen(*Previous))) {
+ const auto *Previous = TokAfterLParen.Previous;
+ assert(Previous); // IsOpeningBracket(Previous)
+ if (Previous->Previous &&
+ (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) ||
+ Previous->Previous->is(tok::kw_switch))) {
+ return false;
+ }
+ if (Previous->isNoneOf(TT_FunctionDeclarationLParen,
+ TT_LambdaDefinitionLParen) &&
+ !IsFunctionCallParen(*Previous)) {
return true;
}
- if (IsOpeningBracket(Tok) || IsInTemplateString(Tok))
+ if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true))
return true;
const auto *Next = Tok.Next;
return !Next || Next->isMemberAccess() ||
Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next);
};
- if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak ||
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) &&
- IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
+ if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
// Don't do this for simple (no expressions) one-argument function calls
// as that feels like needlessly wasting whitespace, e.g.:
//
@@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
// Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
// with args as children of the '(' and ',' tokens. It does not make sense to
// align the commas with the opening paren.
- if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
+ if (Style.AlignAfterOpenBracket &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause,
TT_TableGenDAGArgOpener,
@@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Previous.Previous->isNoneOf(tok::identifier, tok::l_paren,
BK_BracedInit))) ||
Previous.is(TT_VerilogMultiLineListLParen)) &&
- !IsInTemplateString(Current)) {
+ !IsInTemplateString(Current, false)) {
CurrentState.Indent = State.Column + Spaces;
CurrentState.IsAligned = true;
}
@@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
}
if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) {
- CurrentState.BreakBeforeClosingParen =
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent;
+ if (auto Previous = PreviousNonComment->Previous) {
+ if (Previous->isIf()) {
+ CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf;
+ } else if (Previous->isLoop(Style)) {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketLoop;
+ } else if (Previous->is(tok::kw_switch)) {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketSwitch;
+ } else {
+ CurrentState.BreakBeforeClosingParen =
+ Style.BreakBeforeCloseBracketFunction;
+ }
+ }
}
if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener))
@@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
}
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent &&
- (Current.is(tok::r_paren) ||
- (Current.is(tok::r_brace) && Current.MatchingParen &&
- Current.MatchingParen->is(BK_BracedInit))) &&
+ if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) &&
+ Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) &&
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
}
+ if ((Style.BreakBeforeCloseBracketFunction ||
+ Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop ||
+ Style.BreakBeforeCloseBracketSwitch) &&
+ Current.is(tok::r_paren) && State.Stack.size() > 1) {
+ return State.Stack[State.Stack.size() - 2].LastSpace;
+ }
if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) &&
State.Stack.size() > 1) {
return State.Stack[State.Stack.size() - 2].LastSpace;
@@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
PrecedenceLevel < prec::Assignment) &&
(!Previous || Previous->isNot(tok::kw_return) ||
(!Style.isJava() && PrecedenceLevel > 0)) &&
- (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
- PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) &&
+ (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma ||
+ Current.NestingLevel == 0) &&
(!Style.isTableGen() ||
(Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
TT_TableGenDAGArgListCommaToBreak)))) {
@@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
if (PrecedenceLevel > prec::Unknown)
NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
if (PrecedenceLevel != prec::Conditional &&
- Current.isNot(TT_UnaryOperator) &&
- Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
+ Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) {
NewParenState.StartOfFunctionCall = State.Column;
}
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index edd126c..dd14fcd 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -32,6 +32,13 @@ using clang::format::FormatStyle;
LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat)
+enum BracketAlignmentStyle : int8_t {
+ BAS_Align,
+ BAS_DontAlign,
+ BAS_AlwaysBreak,
+ BAS_BlockIndent
+};
+
namespace llvm {
namespace yaml {
template <>
@@ -204,16 +211,16 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
}
};
-template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
- static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
- IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
- IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
- IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
- IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent);
+template <> struct ScalarEnumerationTraits<BracketAlignmentStyle> {
+ static void enumeration(IO &IO, BracketAlignmentStyle &Value) {
+ IO.enumCase(Value, "Align", BAS_Align);
+ IO.enumCase(Value, "DontAlign", BAS_DontAlign);
// For backward compatibility.
- IO.enumCase(Value, "true", FormatStyle::BAS_Align);
- IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
+ IO.enumCase(Value, "true", BAS_Align);
+ IO.enumCase(Value, "false", BAS_DontAlign);
+ IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak);
+ IO.enumCase(Value, "BlockIndent", BAS_BlockIndent);
}
};
@@ -979,6 +986,54 @@ template <> struct MappingTraits<FormatStyle> {
bool SpacesInCStyleCastParentheses = false;
bool SpacesInParentheses = false;
+ if (IO.outputting()) {
+ IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
+ } else {
+ // For backward compatibility.
+ BracketAlignmentStyle LocalBAS = BAS_Align;
+ if (IsGoogleOrChromium) {
+ FormatStyle::LanguageKind Language = Style.Language;
+ if (Language == FormatStyle::LK_None)
+ Language = ((FormatStyle *)IO.getContext())->Language;
+ if (Language == FormatStyle::LK_JavaScript)
+ LocalBAS = BAS_AlwaysBreak;
+ else if (Language == FormatStyle::LK_Java)
+ LocalBAS = BAS_DontAlign;
+ } else if (BasedOnStyle.equals_insensitive("webkit")) {
+ LocalBAS = BAS_DontAlign;
+ }
+ IO.mapOptional("AlignAfterOpenBracket", LocalBAS);
+ Style.BreakAfterOpenBracketBracedList = false;
+ Style.BreakAfterOpenBracketFunction = false;
+ Style.BreakAfterOpenBracketIf = false;
+ Style.BreakAfterOpenBracketLoop = false;
+ Style.BreakAfterOpenBracketSwitch = false;
+ Style.BreakBeforeCloseBracketBracedList = false;
+ Style.BreakBeforeCloseBracketFunction = false;
+ Style.BreakBeforeCloseBracketIf = false;
+ Style.BreakBeforeCloseBracketLoop = false;
+ Style.BreakBeforeCloseBracketSwitch = false;
+
+ switch (LocalBAS) {
+ case BAS_DontAlign:
+ Style.AlignAfterOpenBracket = false;
+ break;
+ case BAS_BlockIndent:
+ Style.BreakBeforeCloseBracketBracedList = true;
+ Style.BreakBeforeCloseBracketFunction = true;
+ Style.BreakBeforeCloseBracketIf = true;
+ [[fallthrough]];
+ case BAS_AlwaysBreak:
+ Style.BreakAfterOpenBracketBracedList = true;
+ Style.BreakAfterOpenBracketFunction = true;
+ Style.BreakAfterOpenBracketIf = true;
+ [[fallthrough]];
+ case BAS_Align:
+ Style.AlignAfterOpenBracket = true;
+ break;
+ }
+ }
+
// For backward compatibility.
if (!IO.outputting()) {
IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines);
@@ -1014,7 +1069,6 @@ template <> struct MappingTraits<FormatStyle> {
}
IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
- IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures);
IO.mapOptional("AlignConsecutiveAssignments",
Style.AlignConsecutiveAssignments);
@@ -1079,10 +1133,29 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes);
IO.mapOptional("BreakAfterJavaFieldAnnotations",
Style.BreakAfterJavaFieldAnnotations);
+ IO.mapOptional("BreakAfterOpenBracketBracedList",
+ Style.BreakAfterOpenBracketBracedList);
+ IO.mapOptional("BreakAfterOpenBracketFunction",
+ Style.BreakAfterOpenBracketFunction);
+ IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf);
+ IO.mapOptional("BreakAfterOpenBracketLoop",
+ Style.BreakAfterOpenBracketLoop);
+ IO.mapOptional("BreakAfterOpenBracketSwitch",
+ Style.BreakAfterOpenBracketSwitch);
IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType);
IO.mapOptional("BreakArrays", Style.BreakArrays);
IO.mapOptional("BreakBeforeBinaryOperators",
Style.BreakBeforeBinaryOperators);
+ IO.mapOptional("BreakBeforeCloseBracketBracedList",
+ Style.BreakBeforeCloseBracketBracedList);
+ IO.mapOptional("BreakBeforeCloseBracketFunction",
+ Style.BreakBeforeCloseBracketFunction);
+ IO.mapOptional("BreakBeforeCloseBracketIf",
+ Style.BreakBeforeCloseBracketIf);
+ IO.mapOptional("BreakBeforeCloseBracketLoop",
+ Style.BreakBeforeCloseBracketLoop);
+ IO.mapOptional("BreakBeforeCloseBracketSwitch",
+ Style.BreakBeforeCloseBracketSwitch);
IO.mapOptional("BreakBeforeConceptDeclarations",
Style.BreakBeforeConceptDeclarations);
IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
@@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) {
FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
FormatStyle LLVMStyle;
LLVMStyle.AccessModifierOffset = -2;
- LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
+ LLVMStyle.AlignAfterOpenBracket = true;
LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None;
LLVMStyle.AlignConsecutiveAssignments = {};
LLVMStyle.AlignConsecutiveAssignments.PadOperators = true;
@@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.BreakAdjacentStringLiterals = true;
LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave;
LLVMStyle.BreakAfterJavaFieldAnnotations = false;
+ LLVMStyle.BreakAfterOpenBracketBracedList = false;
+ LLVMStyle.BreakAfterOpenBracketFunction = false;
+ LLVMStyle.BreakAfterOpenBracketIf = false;
+ LLVMStyle.BreakAfterOpenBracketLoop = false;
+ LLVMStyle.BreakAfterOpenBracketSwitch = false;
LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None;
LLVMStyle.BreakArrays = true;
LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+ LLVMStyle.BreakBeforeCloseBracketBracedList = false;
+ LLVMStyle.BreakBeforeCloseBracketFunction = false;
+ LLVMStyle.BreakBeforeCloseBracketIf = false;
+ LLVMStyle.BreakBeforeCloseBracketLoop = false;
+ LLVMStyle.BreakBeforeCloseBracketSwitch = false;
LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always;
LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline;
LLVMStyle.BreakBeforeTemplateCloser = false;
@@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
if (Language == FormatStyle::LK_Java) {
- GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ GoogleStyle.AlignAfterOpenBracket = false;
GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;
GoogleStyle.AlignTrailingComments = {};
GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never;
@@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.SpaceAfterCStyleCast = true;
GoogleStyle.SpacesBeforeTrailingComments = 1;
} else if (Language == FormatStyle::LK_JavaScript) {
- GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+ GoogleStyle.BreakAfterOpenBracketBracedList = true;
+ GoogleStyle.BreakAfterOpenBracketFunction = true;
+ GoogleStyle.BreakAfterOpenBracketIf = true;
GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign;
GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
// TODO: still under discussion whether to switch to SLS_All.
@@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() {
FormatStyle getWebKitStyle() {
FormatStyle Style = getLLVMStyle();
Style.AccessModifierOffset = -4;
- Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ Style.AlignAfterOpenBracket = false;
Style.AlignOperands = FormatStyle::OAS_DontAlign;
Style.AlignTrailingComments = {};
Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never;
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index d1c6264..28fdbcb 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
assert(MatchingParen);
assert(MatchingParen->is(tok::l_brace));
if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block ||
- Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
+ !Style.BreakBeforeCloseBracketBracedList) {
return false;
}
const auto *LBrace = MatchingParen;
@@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
return;
// Column format doesn't really make sense if we don't align after brackets.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
+ if (!Style.AlignAfterOpenBracket)
return;
FormatToken *ItemBegin = Token->Next;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 6f3d24a..d833130 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -666,6 +666,12 @@ public:
(endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
}
+ bool isLoop(const FormatStyle &Style) const {
+ return isOneOf(tok::kw_for, tok::kw_while) ||
+ (Style.isJavaScript() && isNot(tok::l_paren) && Previous &&
+ Previous->is(tok::kw_for));
+ }
+
bool closesScopeAfterBlock() const {
if (getBlockKind() == BK_Block)
return true;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 021d8c6..8e227da 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
return Style.PenaltyBreakOpenParenthesis;
- if (Left.is(tok::l_paren) && InFunctionDecl &&
- Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
+ if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket)
return 100;
- }
if (Left.is(tok::l_paren) && Left.Previous &&
(Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
Left.Previous->isIf())) {
@@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
// If we aren't aligning after opening parens/braces we can always break
// here unless the style does not want us to place all arguments on the
// next line.
- if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
+ if (!Style.AlignAfterOpenBracket &&
(Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
return 0;
}
@@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
(Right.isBlockIndentedInitRBrace(Style)));
}
- // We only break before r_paren if we're in a block indented context.
+ // We can break before r_paren if we're in a block indented context or
+ // a control statement with an explicit style option.
if (Right.is(tok::r_paren)) {
- if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
- !Right.MatchingParen) {
+ if (!Right.MatchingParen)
return false;
- }
auto Next = Right.Next;
if (Next && Next->is(tok::r_paren))
Next = Next->Next;
if (Next && Next->is(tok::l_paren))
return false;
const FormatToken *Previous = Right.MatchingParen->Previous;
- return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
+ if (!Previous)
+ return false;
+ if (Previous->isIf())
+ return Style.BreakBeforeCloseBracketIf;
+ if (Previous->isLoop(Style))
+ return Style.BreakBeforeCloseBracketLoop;
+ if (Previous->is(tok::kw_switch))
+ return Style.BreakBeforeCloseBracketSwitch;
+ return Style.BreakBeforeCloseBracketFunction;
}
if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) &&
Right.is(TT_TrailingAnnotation) &&
- Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) {
+ Style.BreakBeforeCloseBracketFunction) {
return false;
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bd36eb4..be7c1d3 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4049,18 +4049,18 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
// -cl-std only applies for OpenCL language standards.
// Override the -std option in this case.
if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) {
- LangStandard::Kind OpenCLLangStd
- = llvm::StringSwitch<LangStandard::Kind>(A->getValue())
- .Cases("cl", "CL", LangStandard::lang_opencl10)
- .Cases("cl1.0", "CL1.0", LangStandard::lang_opencl10)
- .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11)
- .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
- .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
- .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30)
- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp10)
- .Cases("clc++1.0", "CLC++1.0", LangStandard::lang_openclcpp10)
- .Cases("clc++2021", "CLC++2021", LangStandard::lang_openclcpp2021)
- .Default(LangStandard::lang_unspecified);
+ LangStandard::Kind OpenCLLangStd =
+ llvm::StringSwitch<LangStandard::Kind>(A->getValue())
+ .Cases({"cl", "CL"}, LangStandard::lang_opencl10)
+ .Cases({"cl1.0", "CL1.0"}, LangStandard::lang_opencl10)
+ .Cases({"cl1.1", "CL1.1"}, LangStandard::lang_opencl11)
+ .Cases({"cl1.2", "CL1.2"}, LangStandard::lang_opencl12)
+ .Cases({"cl2.0", "CL2.0"}, LangStandard::lang_opencl20)
+ .Cases({"cl3.0", "CL3.0"}, LangStandard::lang_opencl30)
+ .Cases({"clc++", "CLC++"}, LangStandard::lang_openclcpp10)
+ .Cases({"clc++1.0", "CLC++1.0"}, LangStandard::lang_openclcpp10)
+ .Cases({"clc++2021", "CLC++2021"}, LangStandard::lang_openclcpp2021)
+ .Default(LangStandard::lang_unspecified);
if (OpenCLLangStd == LangStandard::lang_unspecified) {
Diags.Report(diag::err_drv_invalid_value)
@@ -4600,7 +4600,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
// Validate that if fnative-half-type is given, that
// the language standard is at least hlsl2018, and that
// the target shader model is at least 6.2.
- if (Args.getLastArg(OPT_fnative_half_type)) {
+ if (Args.getLastArg(OPT_fnative_half_type) ||
+ Args.getLastArg(OPT_fnative_int16_type)) {
const LangStandard &Std =
LangStandard::getLangStandardForKind(Opts.LangStd);
if (!(Opts.LangStd >= LangStandard::lang_hlsl2018 &&
@@ -4614,12 +4615,16 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported)
<< VulkanEnv << T.getOSName() << T.str();
}
- if (Args.getLastArg(OPT_fnative_half_type)) {
+ if (Args.getLastArg(OPT_fnative_half_type) ||
+ Args.getLastArg(OPT_fnative_int16_type)) {
+ const char *Str = Args.getLastArg(OPT_fnative_half_type)
+ ? "-fnative-half-type"
+ : "-fnative-int16-type";
const LangStandard &Std =
LangStandard::getLangStandardForKind(Opts.LangStd);
if (!(Opts.LangStd >= LangStandard::lang_hlsl2018))
Diags.Report(diag::err_drv_hlsl_16bit_types_unsupported)
- << "-fnative-half-type" << false << Std.getName();
+ << Str << false << Std.getName();
}
} else {
llvm_unreachable("expected DXIL or SPIR-V target");
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 47f1d5a..8602be1 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -399,7 +399,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__HLSL_202y",
Twine((unsigned)LangOptions::HLSLLangStd::HLSL_202y));
- if (LangOpts.NativeHalfType)
+ if (LangOpts.NativeHalfType && LangOpts.NativeInt16Type)
Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1");
// Shader target information
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 1858912..33fff76 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -162,18 +162,12 @@ set(x86_files
adxintrin.h
ammintrin.h
amxavx512intrin.h
- amxbf16transposeintrin.h
amxcomplexintrin.h
- amxcomplextransposeintrin.h
amxfp16intrin.h
- amxfp16transposeintrin.h
amxfp8intrin.h
amxintrin.h
amxmovrsintrin.h
- amxmovrstransposeintrin.h
amxtf32intrin.h
- amxtf32transposeintrin.h
- amxtransposeintrin.h
avx10_2_512bf16intrin.h
avx10_2_512convertintrin.h
avx10_2_512minmaxintrin.h
diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h
deleted file mode 100644
index 86f09f2..0000000
--- a/clang/lib/Headers/amxbf16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_BF16TRANSPOSEINTRIN_H
-#define __AMX_BF16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-bf16,amx-transpose")))
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-/// tiles \a a and \a b, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in \a dst, and store the
-/// 32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO (a.colsb / 4) - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
-/// FP32(b.row[k].bf16[2*n+0])
-/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
-/// FP32(b.row[k].bf16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPBF16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-/// tiles src0 and src1, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in "dst", and store the
-/// 32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
- src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_BF16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h
deleted file mode 100644
index 11abaf9..0000000
--- a/clang/lib/Headers/amxcomplextransposeintrin.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
-#define __AMX_COMPLEXTRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-complex,amx-transpose")))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles \a a and \a b is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// Calculates the imaginary part of the result. For each possible combination
-/// of (transposed column of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The imaginary part of the \a a element
-/// is multiplied with the real part of the corresponding \a b element, and
-/// the real part of the \a a element is multiplied with the imaginary part
-/// of the corresponding \a b elements. The two accumulated results are
-/// added, and then accumulated into the corresponding row and column of
-/// \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmimfp16ps(dst, a, b) \
- __builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles \a a and \a b is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// Calculates the real part of the result. For each possible combination
-/// of (rtransposed colum of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The real part of the \a a element is
-/// multiplied with the real part of the corresponding \a b element, and the
-/// negated imaginary part of the \a a element is multiplied with the
-/// imaginary part of the corresponding \a b elements. The two accumulated
-/// results are added, and then accumulated into the corresponding row and
-/// column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
-/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmrlfp16ps(dst, a, b) \
- __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-/// containing complex elements and accumulate the results into a packed
-/// single precision tile. Each dword element in input tiles \a a and \a b
-/// is interpreted as a complex number with FP16 real part and FP16 imaginary
-/// part.
-/// Calculates the imaginary part of the result. For each possible combination
-/// of (transposed column of \a a, column of \a b), it performs a set of
-/// multiplication and accumulations on all corresponding complex numbers
-/// (one from \a a and one from \a b). The negated imaginary part of the \a a
-/// element is multiplied with the real part of the corresponding \a b
-/// element, and the real part of the \a a element is multiplied with the
-/// imaginary part of the corresponding \a b elements. The two accumulated
-/// results are added, and then accumulated into the corresponding row and
-/// column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO a.rows - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_conjtcmmimfp16ps(dst, a, b) \
- __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from \a a
-/// and writes the result to \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtfp16(__tile dst, __tile a);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR i := 0 TO dst.rows - 1
-/// FOR j := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0]
-/// tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1]
-/// ENDFOR
-/// write_row_and_zero(dst, i, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTFP16 instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The source tile. Max size is 1024 Bytes.
-#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
- unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
- _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
- return __builtin_ia32_tconjtfp16_internal(m, n, src);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles src0 and src1 is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-/// accumulate the results into a packed single precision tile. Each dword
-/// element in input tiles src0 and src1 is interpreted as a complex number
-/// with FP16 real part and FP16 imaginary part.
-/// This function calculates the real part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-/// containing complex elements and accumulate the results into a packed
-/// single precision tile. Each dword element in input tiles src0 and src1
-/// is interpreted as a complex number with FP16 real part and FP16 imaginary
-/// part.
-/// This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from src and
-/// writes the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) {
- dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif // __x86_64__
-#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxfp16transposeintrin.h b/clang/lib/Headers/amxfp16transposeintrin.h
deleted file mode 100644
index 191f8c6..0000000
--- a/clang/lib/Headers/amxfp16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_FP16TRANSPOSEINTRIN_H
-#define __AMX_FP16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-fp16,amx-transpose")))
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-/// tiles \a a and \a b, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in \a dst, and store the
-/// 32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-/// tmp := dst.row[m]
-/// FOR k := 0 TO (a.colsb / 4) - 1
-/// FOR n := 0 TO (dst.colsb / 4) - 1
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
-/// FP32(b.row[k].fp16[2*n+0])
-/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
-/// FP32(b.row[k].fp16[2*n+1])
-/// ENDFOR
-/// ENDFOR
-/// write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPFP16PS instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-/// tiles src0 and src1, accumulating the intermediate single-precision
-/// (32-bit) floating-point elements with elements in "dst", and store the
-/// 32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
- src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_FP16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index a7da10d..208aa358 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {
/// bytes. Since there is no 2D type in llvm IR, we use vector type to
/// represent 2D tile and the fixed size is maximum amx tile register size.
typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64)));
-typedef int _tile1024i_1024a
- __attribute__((__vector_size__(1024), __aligned__(1024)));
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
diff --git a/clang/lib/Headers/amxmovrstransposeintrin.h b/clang/lib/Headers/amxmovrstransposeintrin.h
deleted file mode 100644
index 5f48cba..0000000
--- a/clang/lib/Headers/amxmovrstransposeintrin.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
-#define __AMX_MOVRS_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-transpose,amx-movrs")))
-
-#define _tile_2rpntlvwz0rs(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
-#define _tile_2rpntlvwz0rst1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
-#define _tile_2rpntlvwz1rs(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
-#define _tile_2rpntlvwz1rst1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- // Use __tile1024i_1024a* to escape the alignment check in
- // clang/test/Headers/x86-intrinsics-headers-clean.cpp
- __builtin_ia32_t2rpntlvwz0rs_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz0rst1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1rs_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1rst1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-#undef __DEFAULT_FN_ATTRS
-#endif /* __x86_64__ */
-#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxtf32transposeintrin.h b/clang/lib/Headers/amxtf32transposeintrin.h
deleted file mode 100644
index e1b90c1..0000000
--- a/clang/lib/Headers/amxtf32transposeintrin.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error \
- "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_TF32TRANSPOSEINTRIN_H
-#define __AMX_TF32TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-tf32,amx-transpose")))
-
-/// \code
-/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \
-/// constexpr int b);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param srcdst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
-/// dword[12:0] := 0
-/// dword[31:13] := x[31:13]
-/// return dword
-/// }
-///
-/// DEFINE silence_snan_fp32(x[31:0]) {
-/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
-/// x.fraction[22] := 1
-/// return x
-/// }
-///
-/// elements_dest:= srcdst.colsb/4
-///
-/// FOR m := 0 TO (srcdst.rows-1)
-/// tmp[511:0] := 0
-/// FOR k := 0 TO (a.rows-1)
-/// FOR n := 0 TO (elements_dest-1)
-/// a1e := silence_snan_fp32(a.row[k].fp32[m])
-/// a2e := silence_snan_fp32(b.row[k].fp32[n])
-/// s1e := zero_lower_mantissa_bits_fp32(a1e)
-/// s2e := zero_lower_mantissa_bits_fp32(a2e)
-/// tmp.fp32[n] += s1e * s2e
-/// ENDFOR
-/// ENDFOR
-///
-/// FOR n := 0 TO (elements_dest-1)
-/// tmp.fp32[n] += srcdst.row[m].fp32[n]
-/// ENDFOR
-/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
-///
-/// ENDFOR
-///
-/// zero_upper_rows(srcdst, srcdst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_tmmultf32ps(srcdst, a, b) \
- __builtin_ia32_ttmmultf32ps((srcdst), (a), (b))
-
-// dst = m x n (srcdest), src1 = k x m, src2 = k x n
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
- _tile1024i dst, _tile1024i src1, _tile1024i src2) {
- return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do
-/// Matrix Plus with dst. All the calculation is base on float32 but with the
-/// lower 13-bit set to 0.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
- dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col,
- dst->tile, src0.tile, src1.tile);
-}
-
-#endif // __x86_64__
-#endif // __AMX_TF32TRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxtransposeintrin.h b/clang/lib/Headers/amxtransposeintrin.h
deleted file mode 100644
index b3fa37d..0000000
--- a/clang/lib/Headers/amxtransposeintrin.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_TRANSPOSEINTRIN_H
-#define __AMX_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TRANSPOSE \
- __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
-
-#define _tile_2rpntlvwz0(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0(tdst, base, stride)
-#define _tile_2rpntlvwz0t1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride)
-#define _tile_2rpntlvwz1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1(tdst, base, stride)
-#define _tile_2rpntlvwz1t1(tdst, base, stride) \
- __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride)
-
-/// Transpose 32-bit elements from \a src and write the result to \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_transposed(__tile dst, __tile src);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-///
-/// FOR i := 0 TO (dst.rows-1)
-/// tmp[511:0] := 0
-/// FOR j := 0 TO (dst.colsb/4-1)
-/// tmp.dword[j] := src.row[j].dword[i]
-/// ENDFOR
-/// dst.row[i] := tmp
-/// ENDFOR
-///
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src)
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- // Use __tile1024i_1024a* to escape the alignment check in
- // clang/test/Headers/x86-intrinsics-headers-clean.cpp
- __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
- (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz0t1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
- (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal(
- unsigned short row, unsigned short col0, unsigned short col1,
- _tile1024i *dst0, _tile1024i *dst1, const void *base,
- __SIZE_TYPE__ stride) {
- __builtin_ia32_t2rpntlvwz1t1_internal(
- row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
- (__SIZE_TYPE__)(stride));
-}
-
-// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE
-_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) {
- return __builtin_ia32_ttransposed_internal(m, n, src);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction.
-///
-/// \param dst0
-/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-/// A pointer to base address.
-/// \param stride
-/// The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1,
- const void *base, __SIZE_TYPE__ stride) {
- _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
- &dst1->tile, base, stride);
-}
-
-/// Transpose 32-bit elements from src and write the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_transposed(__tile1024i *dst, __tile1024i src) {
- dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile);
-}
-
-#endif /* __x86_64__ */
-#endif /* __AMX_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
index fe4277e..ee243ab 100644
--- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
+++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#ifndef _HLSL_COMPAT_OVERLOADS_H_
-#define _HLSl_COMPAT_OVERLOADS_H_
+#define _HLSL_COMPAT_OVERLOADS_H_
namespace hlsl {
diff --git a/clang/lib/Headers/hvx_hexagon_protos.h b/clang/lib/Headers/hvx_hexagon_protos.h
index fd120a5..19309a4 100644
--- a/clang/lib/Headers/hvx_hexagon_protos.h
+++ b/clang/lib/Headers/hvx_hexagon_protos.h
@@ -5605,6 +5605,399 @@
__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_f8)(Vu, Vv)
#endif /* __HEXAGON_ARCH___ >= 79 */
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=vabs(Vu32.hf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vhf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vhf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=vabs(Vu32.qf16)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vqf16(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vqf16(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=vabs(Vu32.qf32)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vqf32(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vqf32(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=vabs(Vu32.sf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vsf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vsf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32=valign4(Vu32,Vv32,Rt8)
+ C Intrinsic Prototype: HVX_Vector Q6_V_valign4_VVR(HVX_Vector Vu, HVX_Vector
+ Vv, Word32 Rt) Instruction Type: CVI_VA Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_V_valign4_VVR(Vu, Vv, Rt) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valign4)(Vu, Vv, Rt)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.bf=Vuu32.qf32
+ C Intrinsic Prototype: HVX_Vector Q6_Vbf_equals_Wqf32(HVX_VectorPair Vuu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vbf_equals_Wqf32(Vuu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_bf_qf32)(Vuu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.f8=Vu32.qf16
+ C Intrinsic Prototype: HVX_Vector Q6_V_equals_Vqf16(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_V_equals_Vqf16(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_f8_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.h=Vu32.hf:rnd
+ C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf_rnd(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vh_equals_Vhf_rnd(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf_rnd)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vdd32.qf16=Vu32.f8
+ C Intrinsic Prototype: HVX_VectorPair Q6_Wqf16_equals_V(HVX_Vector Vu)
+ Instruction Type: CVI_VP_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Wqf16_equals_V(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_f8)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=Vu32.hf
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vhf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_equals_Vhf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=Vu32.qf16
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vqf16(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_equals_Vqf16(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=Vu32.qf32
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vqf32(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_equals_Vqf32(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=Vu32.sf
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vsf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_equals_Vsf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qd4=vcmp.eq(Vu32.hf,Vv32.hf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhfVhf(HVX_Vector Vu,
+ HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VhfVhf(Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf)(Vu, Vv)), -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4&=vcmp.eq(Vu32.hf,Vv32.hf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhfVhf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVhfVhf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_and)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4|=vcmp.eq(Vu32.hf,Vv32.hf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhfVhf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVhfVhf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_or)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4^=vcmp.eq(Vu32.hf,Vv32.hf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhfVhf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVhfVhf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_xor)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qd4=vcmp.eq(Vu32.sf,Vv32.sf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VsfVsf(HVX_Vector Vu,
+ HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VsfVsf(Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf)(Vu, Vv)), -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4&=vcmp.eq(Vu32.sf,Vv32.sf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVsfVsf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVsfVsf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_and)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4|=vcmp.eq(Vu32.sf,Vv32.sf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVsfVsf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVsfVsf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_or)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Qx4^=vcmp.eq(Vu32.sf,Vv32.sf)
+ C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVsfVsf(HVX_VectorPred
+ Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution
+ Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVsfVsf(Qx, Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \
+ (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_xor)( \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \
+ Vv)), \
+ -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.w=vilog2(Vu32.hf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vhf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vw_vilog2_Vhf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.w=vilog2(Vu32.qf16)
+ C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf16(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf16(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.w=vilog2(Vu32.qf32)
+ C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf32(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf32(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.w=vilog2(Vu32.sf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vsf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vw_vilog2_Vsf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=vneg(Vu32.hf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vhf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vhf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=vneg(Vu32.qf16)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vqf16(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vqf16(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=vneg(Vu32.qf32)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vqf32(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vqf32(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=vneg(Vu32.sf)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vsf(HVX_Vector Vu)
+ Instruction Type: CVI_VS
+ Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vsf(Vu) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.qf16)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVqf16(HVX_Vector Vu,
+ HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf16_vsub_VhfVqf16(Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_mix)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+ Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.qf32)
+ C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVqf32(HVX_Vector Vu,
+ HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123
+ ========================================================================== */
+
+#define Q6_Vqf32_vsub_VsfVqf32(Vu, Vv) \
+ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_mix)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
#endif /* __HVX__ */
#endif
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 35f012c..19064a4 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) {
#include <amxfp8intrin.h>
-#include <amxtransposeintrin.h>
-
#include <amxmovrsintrin.h>
-#include <amxmovrstransposeintrin.h>
-
#include <amxavx512intrin.h>
#include <amxtf32intrin.h>
-#include <amxtf32transposeintrin.h>
-
-#include <amxbf16transposeintrin.h>
-
-#include <amxfp16transposeintrin.h>
-
-#include <amxcomplextransposeintrin.h>
-
#include <avx512vp2intersectintrin.h>
#include <avx512vlvp2intersectintrin.h>
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index e4b158e..7e4a164 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4248,6 +4248,13 @@ void Parser::ParseDeclarationSpecifiers(
// type-specifier
case tok::kw_short:
+ if (!getLangOpts().NativeInt16Type) {
+ Diag(Tok, diag::err_unknown_typename) << Tok.getName();
+ DS.SetTypeSpecError();
+ DS.SetRangeEnd(Tok.getLocation());
+ ConsumeToken();
+ goto DoneWithDeclSpec;
+ }
isInvalid = DS.SetTypeSpecWidth(TypeSpecifierWidth::Short, Loc, PrevSpec,
DiagID, Policy);
break;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f451787..ad2c2e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3542,9 +3542,7 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {
bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
unsigned FirstArg, FormatStringInfo *FSI) {
- bool IsCXXMember = false;
- if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
- IsCXXMember = MD->isInstance();
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
bool IsVariadic = false;
if (const FunctionType *FnTy = D->getFunctionType())
IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
@@ -3553,11 +3551,12 @@ bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
IsVariadic = OMD->isVariadic();
- return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI);
+ return getFormatStringInfo(FormatIdx, FirstArg, HasImplicitThisParam,
+ IsVariadic, FSI);
}
bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
- bool IsCXXMember, bool IsVariadic,
+ bool HasImplicitThisParam, bool IsVariadic,
FormatStringInfo *FSI) {
if (FirstArg == 0)
FSI->ArgPassingKind = FAPK_VAList;
@@ -3571,7 +3570,7 @@ bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
// The way the format attribute works in GCC, the implicit this argument
// of member functions is counted. However, it doesn't appear in our own
// lists, so decrement format_idx in that case.
- if (IsCXXMember) {
+ if (HasImplicitThisParam) {
if(FSI->FormatIdx == 0)
return false;
--FSI->FormatIdx;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 0514d10..aa93507 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -10208,6 +10208,24 @@ void SemaCodeCompletion::CodeCompletePreprocessorDirective(bool InConditional) {
Builder.AddPlaceholderChunk("message");
Results.AddResult(Builder.TakeString());
+ if (getLangOpts().C23) {
+ // #embed "file"
+ Builder.AddTypedTextChunk("embed");
+ Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+ Builder.AddTextChunk("\"");
+ Builder.AddPlaceholderChunk("file");
+ Builder.AddTextChunk("\"");
+ Results.AddResult(Builder.TakeString());
+
+ // #embed <file>
+ Builder.AddTypedTextChunk("embed");
+ Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+ Builder.AddTextChunk("<");
+ Builder.AddPlaceholderChunk("file");
+ Builder.AddTextChunk(">");
+ Results.AddResult(Builder.TakeString());
+ }
+
// Note: #ident and #sccs are such crazy anachronisms that we don't provide
// completions for them. And __include_macros is a Clang-internal extension
// that we don't want to encourage anyone to use.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 964a2a7..a9e7b44 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3785,7 +3785,7 @@ static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
// In C++ the implicit 'this' function parameter also counts, and they are
// counted from one.
- bool HasImplicitThisParam = isInstanceMethod(D);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
Info->Identifier = AL.getArgAsIdent(0)->getIdentifierInfo();
@@ -3926,7 +3926,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
return;
}
- bool HasImplicitThisParam = isInstanceMethod(D);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(D);
int32_t NumArgs = getFunctionOrMethodNumParams(D);
FunctionDecl *FD = D->getAsFunction();
@@ -4110,7 +4110,7 @@ static void handleLifetimeCaptureByAttr(Sema &S, Decl *D,
}
void Sema::LazyProcessLifetimeCaptureByParams(FunctionDecl *FD) {
- bool HasImplicitThisParam = isInstanceMethod(FD);
+ bool HasImplicitThisParam = hasImplicitObjectParameter(FD);
SmallVector<LifetimeCaptureByAttr *, 1> Attrs;
for (ParmVarDecl *PVD : FD->parameters())
if (auto *A = PVD->getAttr<LifetimeCaptureByAttr>())
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 850bcb1..2f61bdd 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -489,14 +489,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_tileloaddrst164:
case X86::BI__builtin_ia32_tilestored64:
case X86::BI__builtin_ia32_tilezero:
- case X86::BI__builtin_ia32_t2rpntlvwz0:
- case X86::BI__builtin_ia32_t2rpntlvwz0t1:
- case X86::BI__builtin_ia32_t2rpntlvwz1:
- case X86::BI__builtin_ia32_t2rpntlvwz1t1:
- case X86::BI__builtin_ia32_t2rpntlvwz0rst1:
- case X86::BI__builtin_ia32_t2rpntlvwz1rs:
- case X86::BI__builtin_ia32_t2rpntlvwz1rst1:
- case X86::BI__builtin_ia32_t2rpntlvwz0rs:
case X86::BI__builtin_ia32_tcvtrowps2bf16h:
case X86::BI__builtin_ia32_tcvtrowps2bf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
@@ -516,17 +508,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_tdpbhf8ps:
case X86::BI__builtin_ia32_tdphbf8ps:
case X86::BI__builtin_ia32_tdphf8ps:
- case X86::BI__builtin_ia32_ttdpbf16ps:
- case X86::BI__builtin_ia32_ttdpfp16ps:
- case X86::BI__builtin_ia32_ttcmmimfp16ps:
- case X86::BI__builtin_ia32_ttcmmrlfp16ps:
- case X86::BI__builtin_ia32_tconjtcmmimfp16ps:
case X86::BI__builtin_ia32_tmmultf32ps:
- case X86::BI__builtin_ia32_ttmmultf32ps:
return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2});
- case X86::BI__builtin_ia32_ttransposed:
- case X86::BI__builtin_ia32_tconjtfp16:
- return CheckBuiltinTileArgumentsRange(TheCall, {0, 1});
}
}
static bool isX86_32Builtin(unsigned BuiltinID) {
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index 42f52d0..eebecdb 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -350,7 +350,7 @@ void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
// See `test/ClangScanDeps/diagnostic-pragmas.c` for an example.
llvm::erase_if(DiagOpts.Warnings, [](StringRef Warning) {
return llvm::StringSwitch<bool>(Warning)
- .Cases("pch-vfs-diff", "error=pch-vfs-diff", false)
+ .Cases({"pch-vfs-diff", "error=pch-vfs-diff"}, false)
.StartsWith("no-error=", false)
.Default(true);
});
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 171c786..b4bdec1 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) {
// `foo<int>` for which this range will be too short. Doing so will
// require subcasing `NamedDecl`, because it doesn't provide virtual
// access to the \c DeclarationNameInfo.
- if (tooling::getText(R, *Result.Context) != D->getName())
- return CharSourceRange();
+ StringRef Text = tooling::getText(R, *Result.Context);
+ if (Text != D->getName())
+ return llvm::make_error<StringError>(
+ llvm::errc::not_supported,
+ "range selected by name(node id=" + ID + "): '" + Text +
+ "' is different from decl name '" + D->getName() + "'");
return R;
}
if (const auto *E = Node.get<DeclRefExpr>()) {