aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/DXILResource.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h6
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp19
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp19
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp22
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp3
-rw-r--r--llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp23
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp2
-rw-r--r--llvm/test/Analysis/DXILResource/buffer-frombinding.ll19
-rw-r--r--llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll19
-rw-r--r--llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll17
-rw-r--r--llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll32
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll264
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll162
-rw-r--r--llvm/test/MC/AMDGPU/buffer-op-swz-operand.s43
-rw-r--r--llvm/test/Other/print-on-crash.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/fold-selective-shift.ll22
-rw-r--r--llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll11
-rw-r--r--llvm/test/Transforms/InstCombine/trunc-sext.ll16
-rw-r--r--llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s245
-rw-r--r--llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s45
23 files changed, 954 insertions, 42 deletions
diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index c7aff16..2b0dcb9 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -293,6 +293,7 @@ public:
struct TypedInfo {
dxil::ElementType ElementTy;
+ dxil::ElementType DXILStorageTy;
uint32_t ElementCount;
bool operator==(const TypedInfo &RHS) const {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 7f9bf12..be78647 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,7 +143,11 @@ public:
decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
- LLVM_ABI DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+ /// Returns the Unit from the .debug_info or .debug_types section by the index
+ /// entry.
+ LLVM_ABI DWARFUnit *
+ getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
+ const DWARFSection *Section = nullptr);
/// Read units from a .debug_info or .debug_types section. Calls made
/// before finishedInfoUnits() are assumed to be for .debug_info sections,
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 6f19a68..27114e0 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -206,6 +206,14 @@ static dxil::ElementType toDXILElementType(Type *Ty, bool IsSigned) {
return ElementType::Invalid;
}
+static dxil::ElementType toDXILStorageType(dxil::ElementType ET) {
+ if (ET == dxil::ElementType::U64 || ET == dxil::ElementType::F64 ||
+ ET == dxil::ElementType::I64 || ET == dxil::ElementType::SNormF64 ||
+ ET == dxil::ElementType::UNormF64)
+ return dxil::ElementType::U32;
+ return ET;
+}
+
ResourceTypeInfo::ResourceTypeInfo(TargetExtType *HandleTy,
const dxil::ResourceClass RC_,
const dxil::ResourceKind Kind_)
@@ -569,10 +577,11 @@ ResourceTypeInfo::TypedInfo ResourceTypeInfo::getTyped() const {
auto [ElTy, IsSigned] = getTypedElementType(Kind, HandleTy);
dxil::ElementType ET = toDXILElementType(ElTy, IsSigned);
+ dxil::ElementType DXILStorageTy = toDXILStorageType(ET);
uint32_t Count = 1;
if (auto *VTy = dyn_cast<FixedVectorType>(ElTy))
Count = VTy->getNumElements();
- return {ET, Count};
+ return {ET, DXILStorageTy, Count};
}
dxil::SamplerFeedbackType ResourceTypeInfo::getFeedbackType() const {
@@ -636,7 +645,10 @@ void ResourceTypeInfo::print(raw_ostream &OS, const DataLayout &DL) const {
OS << " Alignment: " << Struct.AlignLog2 << "\n";
} else if (isTyped()) {
TypedInfo Typed = getTyped();
- OS << " Element Type: " << getElementTypeName(Typed.ElementTy) << "\n"
+ OS << " Element Type: " << getElementTypeName(Typed.ElementTy);
+ if (Typed.ElementTy != Typed.DXILStorageTy)
+ OS << " (stored as " << getElementTypeName(Typed.DXILStorageTy) << ")";
+ OS << "\n"
<< " Element Count: " << Typed.ElementCount << "\n";
} else if (isFeedback())
OS << " Feedback Type: " << getSamplerFeedbackTypeName(getFeedbackType())
@@ -714,7 +726,8 @@ MDTuple *ResourceInfo::getAsMetadata(Module &M,
Tags.push_back(getIntMD(RTI.getStruct(DL).Stride));
} else if (RTI.isTyped()) {
Tags.push_back(getIntMD(llvm::to_underlying(ExtPropTags::ElementType)));
- Tags.push_back(getIntMD(llvm::to_underlying(RTI.getTyped().ElementTy)));
+ Tags.push_back(
+ getIntMD(llvm::to_underlying(RTI.getTyped().DXILStorageTy)));
} else if (RTI.isFeedback()) {
Tags.push_back(
getIntMD(llvm::to_underlying(ExtPropTags::SamplerFeedbackKind)));
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 73df62a..41cea45 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1344,9 +1344,20 @@ void DWARFContext::dump(
DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
- if (const auto *R = TUI.getFromHash(Hash))
- return dyn_cast_or_null<DWARFTypeUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ if (const auto *R = TUI.getFromHash(Hash)) {
+ if (TUI.getVersion() >= 5) {
+ return dyn_cast_or_null<DWARFTypeUnit>(
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
+ } else {
+ DWARFUnit *TypesUnit = nullptr;
+ getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
+ if (!TypesUnit)
+ TypesUnit =
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_EXT_TYPES, &S);
+ });
+ return dyn_cast_or_null<DWARFTypeUnit>(TypesUnit);
+ }
+ }
return nullptr;
}
return State->getTypeUnitMap(IsDWO).lookup(Hash);
@@ -1358,7 +1369,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
return dyn_cast_or_null<DWARFCompileUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
return nullptr;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ef59c82..da0bf03 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -161,17 +161,24 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
return nullptr;
}
-DWARFUnit *
-DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
- const auto *CUOff = E.getContribution(DW_SECT_INFO);
+DWARFUnit *DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E,
+ DWARFSectionKind Sec,
+ const DWARFSection *Section) {
+ const auto *CUOff = E.getContribution(Sec);
if (!CUOff)
return nullptr;
uint64_t Offset = CUOff->getOffset();
- auto end = begin() + getNumInfoUnits();
+ auto begin = this->begin();
+ auto end = begin + getNumInfoUnits();
+
+ if (Sec == DW_SECT_EXT_TYPES) {
+ begin = end;
+ end = this->end();
+ }
auto *CU =
- std::upper_bound(begin(), end, CUOff->getOffset(),
+ std::upper_bound(begin, end, CUOff->getOffset(),
[](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
@@ -181,13 +188,14 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
if (!Parser)
return nullptr;
- auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+ auto U = Parser(Offset, Sec, Section, &E);
if (!U)
return nullptr;
auto *NewCU = U.get();
this->insert(CU, std::move(U));
- ++NumInfoUnits;
+ if (Sec == DW_SECT_INFO)
+ ++NumInfoUnits;
return NewCU;
}
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index de29330..7290a86 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -2499,7 +2499,7 @@ void PrintCrashIRInstrumentation::registerCallbacks(
[&PIC, this](StringRef PassID, Any IR) {
SavedIR.clear();
raw_string_ostream OS(SavedIR);
- OS << formatv("*** Dump of {0}IR Before Last Pass {1}",
+ OS << formatv("; *** Dump of {0}IR Before Last Pass {1}",
llvm::forcePrintModuleIR() ? "Module " : "", PassID);
if (!isInteresting(IR, PassID, PIC.getPassNameForClassName(PassID))) {
OS << " Filtered Out ***\n";
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5580e4c..09338c5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9028,6 +9028,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
+ // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
+ // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
+ Inst.addOperand(MCOperand::createImm(0));
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
index dc84ae4..9da3bdb 100644
--- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
@@ -49,7 +49,7 @@ static StringRef getRCPrefix(dxil::ResourceClass RC) {
static StringRef getFormatName(const dxil::ResourceTypeInfo &RI) {
if (RI.isTyped()) {
- switch (RI.getTyped().ElementTy) {
+ switch (RI.getTyped().DXILStorageTy) {
case dxil::ElementType::I1:
return "i1";
case dxil::ElementType::I16:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9b9fe26..f939e7a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1525,7 +1525,15 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
// Try to extend the entire expression tree to the wide destination type.
- if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
+ bool ShouldExtendExpression = true;
+ Value *TruncSrc = nullptr;
+ // It is not desirable to extend expression in the trunc + sext pattern when
+ // destination type is narrower than original (pre-trunc) type.
+ if (match(Src, m_Trunc(m_Value(TruncSrc))))
+ if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
+ ShouldExtendExpression = false;
+ if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1545,13 +1553,18 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
ShAmt);
}
- Value *X;
- if (match(Src, m_Trunc(m_Value(X)))) {
+ Value *X = TruncSrc;
+ if (X) {
// If the input has more sign bits than bits truncated, then convert
// directly to final type.
unsigned XBitSize = X->getType()->getScalarSizeInBits();
- if (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)
- return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
+ if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
+ auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
+ ResTrunc->setHasNoSignedWrap(true);
+ return Res;
+ }
// If input is a trunc from the destination type, then convert into shifts.
if (Src->hasOneUse() && X->getType() == DestTy) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 651e305..550dfc5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -105,6 +105,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
if (~KnownShrBits.Zero != ShlAmt)
return nullptr;
+ IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+ IC.Builder.SetInsertPoint(I);
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
diff --git a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
index ab1945d..d92010e 100644
--- a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
+++ b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
@@ -9,6 +9,7 @@
@Four.str = private unnamed_addr constant [5 x i8] c"Four\00", align 1
@Array.str = private unnamed_addr constant [6 x i8] c"Array\00", align 1
@Five.str = private unnamed_addr constant [5 x i8] c"Five\00", align 1
+@Six.str = private unnamed_addr constant [4 x i8] c"Six\00", align 1
@CB.str = private unnamed_addr constant [3 x i8] c"CB\00", align 1
@Constants.str = private unnamed_addr constant [10 x i8] c"Constants\00", align 1
@@ -137,6 +138,23 @@ define void @test_typedbuffer() {
; CHECK: Element Type: f32
; CHECK: Element Count: 4
+ %uav4 = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 5, i32 0, i32 1, i32 0, ptr @Six.str)
+ ; CHECK: Resource [[UAV4:[0-9]+]]:
+ ; CHECK: Name: Six
+ ; CHECK: Binding:
+ ; CHECK: Record ID: 4
+ ; CHECK: Space: 5
+ ; CHECK: Lower Bound: 0
+ ; CHECK: Size: 1
+ ; CHECK: Globally Coherent: 0
+ ; CHECK: Counter Direction: Unknown
+ ; CHECK: Class: UAV
+ ; CHECK: Kind: Buffer
+ ; CHECK: IsROV: 0
+ ; CHECK: Element Type: f64 (stored as u32)
+ ; CHECK: Element Count: 1
+
%cb0 = call target("dx.CBuffer", {float})
@llvm.dx.resource.handlefrombinding(i32 1, i32 0, i32 1, i32 0, ptr @CB.str)
; CHECK: Resource [[CB0:[0-9]+]]:
@@ -175,6 +193,7 @@ define void @test_typedbuffer() {
; CHECK-DAG: Call bound to [[UAV1]]: %uav1 =
; CHECK-DAG: Call bound to [[UAV2]]: %uav2_1 =
; CHECK-DAG: Call bound to [[UAV2]]: %uav2_2 =
+; CHECK-DAG: Call bound to [[UAV4]]: %uav4 =
; CHECK-DAG: Call bound to [[CB0]]: %cb0 =
; CHECK-DAG: Call bound to [[CB1]]: %cb1 =
diff --git a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
index a2059be..0062f90 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/srv_metadata.ll
@@ -22,14 +22,14 @@ target triple = "dxil-pc-shadermodel6.6-compute"
; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ---------
; PRINT-NEXT:; Zero texture f16 buf T0 t0 1
; PRINT-NEXT:; One texture f32 buf T1 t1 1
-; PRINT-NEXT:; Two texture f64 buf T2 t2 1
+; PRINT-NEXT:; Two texture u32 buf T2 t2 1
; PRINT-NEXT:; Three texture i32 buf T3 t3 1
; PRINT-NEXT:; Four texture byte r/o T4 t5 1
; PRINT-NEXT:; Five texture struct r/o T5 t6 1
-; PRINT-NEXT:; Six texture u64 buf T6 t10,space2 1
+; PRINT-NEXT:; Six texture u32 buf T6 t10,space2 1
; PRINT-NEXT:; Array texture f32 buf T7 t4,space3 100
-; PRINT-NEXT:; Array2 texture f64 buf T8 t2,space4 unbounded
-; PRINT-NEXT:; Seven texture u64 buf T9 t20,space5 1
+; PRINT-NEXT:; Array2 texture u32 buf T8 t2,space4 unbounded
+; PRINT-NEXT:; Seven texture u32 buf T9 t20,space5 1
;
define void @test() #0 {
@@ -120,15 +120,14 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: ![[Half]] = !{i32 0, i32 8}
; CHECK: ![[One]] = !{i32 1, ptr @One, !"One", i32 0, i32 1, i32 1, i32 10, i32 0, ![[Float:[0-9]+]]}
; CHECK: ![[Float]] = !{i32 0, i32 9}
-; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i32 0, ![[Double:[0-9]+]]}
-; CHECK: ![[Double]] = !{i32 0, i32 10}
+; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i32 0, ![[U32:[0-9]+]]}
+; CHECK: ![[U32]] = !{i32 0, i32 5}
; CHECK: ![[Three]] = !{i32 3, ptr @Three, !"Three", i32 0, i32 3, i32 1, i32 10, i32 0, ![[I32:[0-9]+]]}
; CHECK: ![[I32]] = !{i32 0, i32 4}
; CHECK: ![[Four]] = !{i32 4, ptr @Four, !"Four", i32 0, i32 5, i32 1, i32 11, i32 0, null}
; CHECK: ![[Five]] = !{i32 5, ptr @Five, !"Five", i32 0, i32 6, i32 1, i32 12, i32 0, ![[FiveStride:[0-9]+]]}
; CHECK: ![[FiveStride]] = !{i32 1, i32 2}
-; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U64:[0-9]+]]}
-; CHECK: ![[U64]] = !{i32 0, i32 7}
+; CHECK: ![[Six]] = !{i32 6, ptr @Six, !"Six", i32 2, i32 10, i32 1, i32 10, i32 0, ![[U32:[0-9]+]]}
; CHECK: ![[Array]] = !{i32 7, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i32 0, ![[Float]]}
-; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[Double]]}
-; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U64]]}
+; CHECK: ![[Array2]] = !{i32 8, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i32 0, ![[U32]]}
+; CHECK: ![[Seven]] = !{i32 9, ptr @Seven, !"Seven", i32 5, i32 20, i32 1, i32 10, i32 0, ![[U32]]}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
index 5b2b3ef..d377a52 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/uav_metadata.ll
@@ -25,17 +25,17 @@ target triple = "dxil-pc-shadermodel6.6-compute"
; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ---------
; PRINT-NEXT:; Zero UAV f16 buf U0 u0 1
; PRINT-NEXT:; One UAV f32 buf U1 u1 1
-; PRINT-NEXT:; Two UAV f64 buf U2 u2 1
+; PRINT-NEXT:; Two UAV u32 buf U2 u2 1
; PRINT-NEXT:; Three UAV i32 buf U3 u3 1
; PRINT-NEXT:; Four UAV byte r/w U4 u5 1
; PRINT-NEXT:; Five UAV struct r/w U5 u6 1
; PRINT-NEXT:; Six UAV i32 buf U6 u7 1
; PRINT-NEXT:; Seven UAV struct r/w U7 u8 1
; PRINT-NEXT:; Eight UAV byte r/w U8 u9 1
-; PRINT-NEXT:; Nine UAV u64 buf U9 u10,space2 1
+; PRINT-NEXT:; Nine UAV u32 buf U9 u10,space2 1
; PRINT-NEXT:; Array UAV f32 buf U10 u4,space3 100
-; PRINT-NEXT:; Array2 UAV f64 buf U11 u2,space4 unbounded
-; PRINT-NEXT:; Ten UAV u64 buf U12 u22,space5 1
+; PRINT-NEXT:; Array2 UAV u32 buf U11 u2,space4 unbounded
+; PRINT-NEXT:; Ten UAV u32 buf U12 u22,space5 1
define void @test() #0 {
; RWBuffer<half4> Zero : register(u0)
@@ -144,8 +144,8 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: ![[Half]] = !{i32 0, i32 8}
; CHECK: ![[One]] = !{i32 1, ptr @One, !"One", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Float:[0-9]+]]}
; CHECK: ![[Float]] = !{i32 0, i32 9}
-; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i1 false, i1 false, i1 false, ![[Double:[0-9]+]]}
-; CHECK: ![[Double]] = !{i32 0, i32 10}
+; CHECK: ![[Two]] = !{i32 2, ptr @Two, !"Two", i32 0, i32 2, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U32:[0-9]+]]}
+; CHECK: ![[U32]] = !{i32 0, i32 5}
; CHECK: ![[Three]] = !{i32 3, ptr @Three, !"Three", i32 0, i32 3, i32 1, i32 10, i1 false, i1 false, i1 false, ![[I32:[0-9]+]]}
; CHECK: ![[I32]] = !{i32 0, i32 4}
; CHECK: ![[Four]] = !{i32 4, ptr @Four, !"Four", i32 0, i32 5, i32 1, i32 11, i1 false, i1 false, i1 false, null}
@@ -155,8 +155,7 @@ attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
; CHECK: ![[Seven]] = !{i32 7, ptr @Seven, !"Seven", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 true, ![[SevenStride:[0-9]+]]}
; CHECK: ![[SevenStride]] = !{i32 1, i32 16}
; CHECK: ![[Eight]] = !{i32 8, ptr @Eight, !"Eight", i32 0, i32 9, i32 1, i32 11, i1 false, i1 false, i1 true, null}
-; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]}
-; CHECK: ![[U64]] = !{i32 0, i32 7}
+; CHECK: ![[Nine]] = !{i32 9, ptr @Nine, !"Nine", i32 2, i32 10, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U32]]}
; CHECK: ![[Array]] = !{i32 10, ptr @Array, !"Array", i32 3, i32 4, i32 100, i32 10, i1 false, i1 false, i1 false, ![[Float]]}
-; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[Double]]}
+; CHECK: ![[Array2]] = !{i32 11, ptr @Array2, !"Array2", i32 4, i32 2, i32 -1, i32 10, i1 false, i1 false, i1 false, ![[U32]]}
; CHECK: ![[Ten]] = !{i32 12, ptr @Ten, !"Ten", i32 5, i32 22, i32 1, i32 10, i1 false, i1 false, i1 false, ![[U64:[0-9]+]]}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
new file mode 100644
index 0000000..93fcd42
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+
+define float @flog2_s(float %x) nounwind {
+; LA32-LABEL: flog2_s:
+; LA32: # %bb.0:
+; LA32-NEXT: b log2f
+;
+; LA64-LABEL: flog2_s:
+; LA64: # %bb.0:
+; LA64-NEXT: pcaddu18i $t8, %call36(log2f)
+; LA64-NEXT: jr $t8
+ %y = call float @llvm.log2.f32(float %x)
+ ret float %y
+}
+
+define double @flog2_d(double %x) nounwind {
+; LA32-LABEL: flog2_d:
+; LA32: # %bb.0:
+; LA32-NEXT: b log2
+;
+; LA64-LABEL: flog2_d:
+; LA64: # %bb.0:
+; LA64-NEXT: pcaddu18i $t8, %call36(log2)
+; LA64-NEXT: jr $t8
+ %y = call double @llvm.log2.f64(double %x)
+ ret double %y
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
new file mode 100644
index 0000000..68f2e3a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
+
+declare <8 x float> @llvm.log2.v8f32(<8 x float>)
+declare <4 x double> @llvm.log2.v4f64(<4 x double>)
+
+define void @flog2_v8f32(ptr %res, ptr %a) nounwind {
+; LA32-LABEL: flog2_v8f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -128
+; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 5
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 4
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 32
+; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 48
+; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 1
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 0
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 2
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 32
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 3
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 48
+; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
+; LA32-NEXT: xvst $xr1, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 128
+; LA32-NEXT: ret
+;
+; LA64-LABEL: flog2_v8f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -128
+; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 5
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 4
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 6
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 32
+; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 7
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 48
+; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 1
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 2
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 32
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 3
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 48
+; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
+; LA64-NEXT: xvst $xr1, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 128
+; LA64-NEXT: ret
+entry:
+ %v = load <8 x float>, ptr %a
+ %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v)
+ store <8 x float> %r, ptr %res
+ ret void
+}
+
+define void @flog2_v4f64(ptr %res, ptr %a) nounwind {
+; LA32-LABEL: flog2_v4f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -112
+; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 3
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.d $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 0
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.d $vr0, $vr1, 16
+; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA32-NEXT: xvst $xr0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 112
+; LA32-NEXT: ret
+;
+; LA64-LABEL: flog2_v4f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -112
+; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 3
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 2
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA64-NEXT: xvst $xr0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 112
+; LA64-NEXT: ret
+entry:
+ %v = load <4 x double>, ptr %a
+ %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v)
+ store <4 x double> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
new file mode 100644
index 0000000..e5e75ec
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+
+define void @flog2_v4f32(ptr %res, ptr %a) nounwind {
+; LA32-LABEL: flog2_v4f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr0, $vr1, 16
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vreplvei.w $vr0, $vr0, 2
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 32
+; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vreplvei.w $vr0, $vr0, 3
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA32-NEXT: bl log2f
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 48
+; LA32-NEXT: vst $vr1, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: flog2_v4f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 2
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 32
+; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 3
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 48
+; LA64-NEXT: vst $vr1, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %v = load <4 x float>, ptr %a
+ %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v)
+ store <4 x float> %r, ptr %res
+ ret void
+}
+
+define void @flog2_v2f64(ptr %res, ptr %a) nounwind {
+; LA32-LABEL: flog2_v2f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vreplvei.d $vr0, $vr0, 0
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; LA32-NEXT: bl log2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.d $vr0, $vr1, 16
+; LA32-NEXT: vst $vr0, $fp, 0
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: flog2_v2f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(log2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: vst $vr0, $fp, 0
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %v = load <2 x double>, ptr %a
+ %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v)
+ store <2 x double> %r, ptr %res
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s
new file mode 100644
index 0000000..8bd9148
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 --show-inst < %s | FileCheck %s
+
+// CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
+buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc
+// CHECK: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 slc ; <MCInst #13135 BUFFER_LOAD_DWORDX4_OFFEN_gfx11
+// CHECK-NEXT: ; <MCOperand Reg:10104>
+// CHECK-NEXT: ; <MCOperand Reg:486>
+// CHECK-NEXT: ; <MCOperand Reg:7754>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:4092>
+// CHECK-NEXT: ; <MCOperand Imm:2>
+// CHECK-NEXT: ; <MCOperand Imm:0>>
+buffer_store_dword v0, v1, s[0:3], 0 offen slc
+// CHECK: buffer_store_b32 v0, v1, s[0:3], 0 offen slc ; <MCInst #14553 BUFFER_STORE_DWORD_OFFEN_gfx11
+// CHECK-NEXT: ; <MCOperand Reg:486>
+// CHECK-NEXT: ; <MCOperand Reg:487>
+// CHECK-NEXT: ; <MCOperand Reg:7754>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:2>
+// CHECK-NEXT: ; <MCOperand Imm:0>>
+
+; tbuffer ops use autogenerate asm parsers
+tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc
+// CHECK: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc ; <MCInst #34095 TBUFFER_LOAD_FORMAT_XYZW_OFFEN_gfx11
+// CHECK-NEXT: ; <MCOperand Reg:10104>
+// CHECK-NEXT: ; <MCOperand Reg:486>
+// CHECK-NEXT: ; <MCOperand Reg:7754>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:4092>
+// CHECK-NEXT: ; <MCOperand Imm:49>
+// CHECK-NEXT: ; <MCOperand Imm:2>
+// CHECK-NEXT: ; <MCOperand Imm:0>>
+tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc
+// CHECK: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc ; <MCInst #34264 TBUFFER_STORE_FORMAT_D16_X_OFFEN_gfx11
+// CHECK-NEXT: ; <MCOperand Reg:486>
+// CHECK-NEXT: ; <MCOperand Reg:487>
+// CHECK-NEXT: ; <MCOperand Reg:7754>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:0>
+// CHECK-NEXT: ; <MCOperand Imm:33>
+// CHECK-NEXT: ; <MCOperand Imm:2>
+// CHECK-NEXT: ; <MCOperand Imm:0>>
diff --git a/llvm/test/Other/print-on-crash.ll b/llvm/test/Other/print-on-crash.ll
index 565da0c..f1e3414 100644
--- a/llvm/test/Other/print-on-crash.ll
+++ b/llvm/test/Other/print-on-crash.ll
@@ -17,7 +17,7 @@
; RUN: not --crash opt -print-on-crash -print-module-scope -passes=trigger-crash-module -filter-passes=blah < %s 2>&1 | FileCheck %s --check-prefix=CHECK_FILTERED
-; CHECK_SIMPLE: *** Dump of IR Before Last Pass {{.*}} Started ***
+; CHECK_SIMPLE: ; *** Dump of IR Before Last Pass {{.*}} Started ***
; CHECK_SIMPLE: @main
; CHECK_SIMPLE: entry:
; CHECK_NO_CRASH-NOT: *** Dump of IR
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
index 2b22965..dcfd933 100644
--- a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
+++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
@@ -21,6 +21,28 @@ define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
ret i16 %trunc
}
+; Will assert if InsertPoint is not set before creating an instruction
+; with IRBuilder
+define i16 @selective_shift_16_insertpt(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16_insertpt(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[ADD_ONE:%.*]] = add i16 [[SEL_V]], 1
+; CHECK-NEXT: ret i16 [[ADD_ONE]]
+;
+ %mask.bit = and i32 %mask, 16
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %sel = lshr i32 %pack, %mask.bit
+ %add.one = add i32 %sel, 1
+ %trunc = trunc i32 %add.one to i16
+ ret i16 %trunc
+}
+
define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
; CHECK-LABEL: define i16 @selective_shift_16.commute(
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
index 516f1a2..4128a15 100644
--- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
+++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
@@ -144,6 +144,17 @@ define i24 @wide_source_matching_signbits(i32 %x) {
ret i24 %c
}
+define i32 @wide_source_matching_signbits_has_nsw_flag(i64 %i) {
+; CHECK-LABEL: define i32 @wide_source_matching_signbits_has_nsw_flag(
+; CHECK-SAME: i64 [[I:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = trunc nsw i64 [[I]] to i32
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %a = trunc nsw i64 %i to i16
+ %b = sext i16 %a to i32
+ ret i32 %b
+}
+
; negative test - not enough sign-bits
define i24 @wide_source_not_matching_signbits(i32 %x) {
diff --git a/llvm/test/Transforms/InstCombine/trunc-sext.ll b/llvm/test/Transforms/InstCombine/trunc-sext.ll
new file mode 100644
index 0000000..ac143840
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/trunc-sext.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "i16:16:16-i32:32:32-i64:64:64-n16:32:64"
+
+define i32 @test(i64 %i) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i64 [[I:%.*]]) {
+; CHECK-NEXT: [[A:%.*]] = trunc i64 [[I]] to i16
+; CHECK-NEXT: [[B:%.*]] = sext i16 [[A]] to i32
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %a = trunc i64 %i to i16
+ %b = sext i16 %a to i32
+ ret i32 %b
+}
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
new file mode 100644
index 0000000..becd9d1
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -0,0 +1,245 @@
+## This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is
+## successfully retrieved.
+
+# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
+# RUN: llvm-dwp -e test.o -o test.dwp
+# RUN: llvm-dwarfdump test.dwp | FileCheck %s
+
+# Generated from:
+#
+# struct t1 { };
+# t1 v1;
+#
+# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.4.split.dwp.s -gdwarf-4
+
+# CHECK: DW_TAG_variable
+# CHECK: DW_AT_type ({{.*}} "t1")
+ .file "test.cpp"
+ .section .debug_types.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .quad -4149699470930386446 # Type Signature
+ .long 30 # Type DIE Offset
+ .byte 1 # Abbrev [1] 0x17:0xe DW_TAG_type_unit
+ .short 33 # DW_AT_language
+ .long 0 # DW_AT_stmt_list
+ .byte 2 # Abbrev [2] 0x1e:0x6 DW_TAG_structure_type
+ .byte 5 # DW_AT_calling_convention
+ .byte 1 # DW_AT_name
+ .byte 1 # DW_AT_byte_size
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .file 1 "." "test.cpp"
+ .type v1,@object # @v1
+ .bss
+ .globl v1
+v1:
+ .zero 1
+ .size v1, 1
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .ascii "\264B" # DW_AT_GNU_pubnames
+ .byte 25 # DW_FORM_flag_present
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .byte 14 # DW_FORM_strp
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .ascii "\263B" # DW_AT_GNU_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x19 DW_TAG_compile_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lskel_string0 # DW_AT_comp_dir
+ # DW_AT_GNU_pubnames
+ .long .Lskel_string1 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .long .Laddr_table_base0 # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "test.dwo" # string offset=2
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "v1" # string offset=0
+.Linfo_string1:
+ .asciz "t1" # string offset=3
+.Linfo_string2:
+ .asciz "clang version 22.0.0" # string offset=6
+.Linfo_string3:
+ .asciz "test.cpp" # string offset=27
+.Linfo_string4:
+ .asciz "test.dwo" # string offset=36
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 3
+ .long 6
+ .long 27
+ .long 36
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
+.Ldebug_info_dwo_start1:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 3 # Abbrev [3] 0xb:0x23 DW_TAG_compile_unit
+ .byte 2 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 3 # DW_AT_name
+ .byte 4 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .byte 4 # Abbrev [4] 0x19:0xb DW_TAG_variable
+ .byte 0 # DW_AT_name
+ .long 36 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 251
+ .byte 0
+ .byte 5 # Abbrev [5] 0x24:0x9 DW_TAG_structure_type
+ # DW_AT_declaration
+ .quad -4149699470930386446 # DW_AT_signature
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end1:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 65 # DW_TAG_type_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 54 # DW_AT_calling_convention
+ .byte 11 # DW_FORM_data1
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 60 # DW_AT_declaration
+ .byte 25 # DW_FORM_flag_present
+ .byte 105 # DW_AT_signature
+ .byte 32 # DW_FORM_ref_sig8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_line.dwo,"e",@progbits
+.Ltmp0:
+ .long .Ldebug_line_end0-.Ldebug_line_start0 # unit length
+.Ldebug_line_start0:
+ .short 4
+ .long .Lprologue_end0-.Lprologue_start0
+.Lprologue_start0:
+ .byte 1
+ .byte 1
+ .byte 1
+ .byte -5
+ .byte 14
+ .byte 1
+ .byte 0
+ .ascii "test.cpp"
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+.Lprologue_end0:
+.Ldebug_line_end0:
+ .section .debug_addr,"",@progbits
+.Laddr_table_base0:
+ .quad v1
+ .section .debug_gnu_pubnames,"",@progbits
+ .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
+.LpubNames_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 25 # DIE offset
+ .byte 32 # Attributes: VARIABLE, EXTERNAL
+ .asciz "v1" # External Name
+ .long 0 # End Mark
+.LpubNames_end0:
+ .section .debug_gnu_pubtypes,"",@progbits
+ .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
+.LpubTypes_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 36 # DIE offset
+ .byte 16 # Attributes: TYPE, EXTERNAL
+ .asciz "t1" # External Name
+ .long 0 # End Mark
+.LpubTypes_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s
new file mode 100644
index 0000000..932d9d1
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AMDGPU/buffer-op-swz-operand.s
@@ -0,0 +1,45 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
+
+buffer_load_dwordx4 v[30:33], v4, s[0:3], 0, offen offset:4092
+buffer_store_dword v0, v1, s[0:3], 0 offen
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 280
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 1
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 80 1.00 * U buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092
+# CHECK-NEXT: 1 80 1.00 * U buffer_store_dword v0, v1, s[0:3], 0 offen
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWBranch
+# CHECK-NEXT: [1] - HWExport
+# CHECK-NEXT: [2] - HWLGKM
+# CHECK-NEXT: [3] - HWSALU
+# CHECK-NEXT: [4] - HWVALU
+# CHECK-NEXT: [5] - HWVMEM
+# CHECK-NEXT: [6] - HWXDL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6]
+# CHECK-NEXT: - - - - - 2.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - buffer_load_dwordx4 v[30:33], v4, s[0:3], 0 offen offset:4092
+# CHECK-NEXT: - - - - - 1.00 - buffer_store_dword v0, v1, s[0:3], 0 offen