aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2025-07-23 23:33:42 -0700
committerFangrui Song <i@maskray.me>2025-07-23 23:33:42 -0700
commitb56bb19e66f399729379b9a1a7ee03ee8060592b (patch)
tree86190d8af525eab673a3f6b91f8b8dbf69c91087
parent8c36d94a1ff479dd8b31a3ca5737fc7c43127a5b (diff)
parent5bbf01f7cbd98f9bf79eeaeecd43c28e38cb94fa (diff)
downloadllvm-users/MaskRay/spr/main.goff-only-register-sections-within-mcobjectstreamerchangesection.zip
llvm-users/MaskRay/spr/main.goff-only-register-sections-within-mcobjectstreamerchangesection.tar.gz
llvm-users/MaskRay/spr/main.goff-only-register-sections-within-mcobjectstreamerchangesection.tar.bz2
Created using spr 1.3.5-bogner [skip ci]
-rw-r--r--libc/include/CMakeLists.txt1
-rw-r--r--libc/src/stdio/printf_core/CMakeLists.txt2
-rw-r--r--lldb/tools/lldb-rpc-gen/RPCCommon.cpp1
-rw-r--r--lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp96
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td6
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/build-vector.ll220
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/build-vector.ll125
-rw-r--r--llvm/test/CodeGen/RISCV/attributes.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll31
-rw-r--r--llvm/test/MC/ELF/section-sym-err.s7
-rw-r--r--llvm/test/MC/ELF/section-sym-err2.s6
-rw-r--r--llvm/test/MC/ELF/section-sym2.s39
-rw-r--r--llvm/test/MC/RISCV/attribute-arch.s2
-rw-r--r--mlir/lib/Dialect/Affine/Analysis/Utils.cpp7
-rw-r--r--mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp6
-rw-r--r--mlir/test/Dialect/Affine/loop-fusion-4.mlir28
17 files changed, 494 insertions, 89 deletions
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 18fd3f8..fafc137 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -187,6 +187,7 @@ add_header_macro(
arpa/inet.h
DEPENDS
.llvm_libc_common_h
+ .inttypes
)
add_header_macro(
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index c22f985..76eb0a2 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -44,6 +44,7 @@ add_header_library(
HDRS
core_structs.h
DEPENDS
+ libc.include.inttypes
libc.src.__support.CPP.string_view
libc.src.__support.FPUtil.fp_bits
)
@@ -97,6 +98,7 @@ add_header_library(
.core_structs
.printf_config
.writer
+ libc.include.inttypes
libc.src.__support.big_int
libc.src.__support.common
libc.src.__support.CPP.limits
diff --git a/lldb/tools/lldb-rpc-gen/RPCCommon.cpp b/lldb/tools/lldb-rpc-gen/RPCCommon.cpp
index 34791fa..6f0abe4 100644
--- a/lldb/tools/lldb-rpc-gen/RPCCommon.cpp
+++ b/lldb/tools/lldb-rpc-gen/RPCCommon.cpp
@@ -194,7 +194,6 @@ std::string lldb_rpc_gen::GetMangledName(ASTContext &Context,
return Mangled;
}
-static auto CheckTypeForLLDBPrivate = [](const Type *Ty) {};
bool lldb_rpc_gen::TypeIsFromLLDBPrivate(QualType T) {
auto CheckTypeForLLDBPrivate = [](const Type *Ty) {
if (!Ty)
diff --git a/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp b/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp
index fdcfee9..9b48796 100644
--- a/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp
+++ b/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp
@@ -102,8 +102,6 @@ public:
lldb_rpc_gen::GetMangledName(Context, MDecl);
const bool IsDisallowed =
lldb_rpc_gen::MethodIsDisallowed(Context, MDecl);
- const bool HasCallbackParameter =
- lldb_rpc_gen::HasCallbackParameter(MDecl);
SupportLevel MethodSupportLevel = GetMethodSupportLevel(MDecl);
if (MethodSupportLevel == eImplemented && !IsDisallowed) {
const lldb_rpc_gen::Method Method(MDecl, Policy, Context);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1b69188..65565b9 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+static Value *getMaskOperand(IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ case Intrinsic::vp_load:
+ return II->getOperand(1);
+ case Intrinsic::masked_load:
+ return II->getOperand(2);
+ case Intrinsic::vp_store:
+ return II->getOperand(2);
+ case Intrinsic::masked_store:
+ return II->getOperand(3);
+ }
+}
+
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
// mask.
static Value *getMask(Value *WideMask, unsigned Factor,
@@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
if (isa<ScalableVectorType>(Load->getType()))
return false;
- if (auto *LI = dyn_cast<LoadInst>(Load);
- LI && !LI->isSimple())
+ auto *LI = dyn_cast<LoadInst>(Load);
+ auto *II = dyn_cast<IntrinsicInst>(Load);
+ if (!LI && !II)
+ return false;
+
+ if (LI && !LI->isSimple())
return false;
// Check if all users of this load are shufflevectors. If we encounter any
@@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;
- Type *VecTy = FirstSVI->getType();
+ VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
// Check if other shufflevectors are also DE-interleaved of the same type
// and factor as the first shufflevector.
@@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
Value *Mask = nullptr;
- if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
- Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy));
+ if (LI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
+ Mask = getMask(getMaskOperand(II), Factor, VecTy);
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n");
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
+ << *Load << "\n");
}
// Try to create target specific intrinsics to replace the load and
@@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
bool InterleavedAccessImpl::lowerInterleavedStore(
Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) {
Value *StoredValue;
- if (auto *SI = dyn_cast<StoreInst>(Store)) {
+ auto *SI = dyn_cast<StoreInst>(Store);
+ auto *II = dyn_cast<IntrinsicInst>(Store);
+ if (SI) {
if (!SI->isSimple())
return false;
StoredValue = SI->getValueOperand();
- } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
- assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
- StoredValue = VPStore->getArgOperand(0);
} else {
- llvm_unreachable("unsupported store operation");
+ assert(II->getIntrinsicID() == Intrinsic::vp_store ||
+ II->getIntrinsicID() == Intrinsic::masked_store);
+ StoredValue = II->getArgOperand(0);
}
auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
@@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");
Value *Mask = nullptr;
- if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+ if (SI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
- Mask = getMask(VPStore->getMaskParam(), Factor,
+ Mask = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
- << "\n");
-
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
+ << *Store << "\n");
}
// Try to create target specific intrinsics to replace the store and
@@ -592,19 +615,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
assert(II);
// Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::vp_load:
- WideMask = II->getOperand(1);
- break;
- case Intrinsic::masked_load:
- WideMask = II->getOperand(2);
- break;
- }
-
- Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
+ Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI));
if (!Mask)
return false;
@@ -642,18 +653,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
Value *Mask = nullptr;
if (II) {
// Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::vp_store:
- WideMask = II->getOperand(2);
- break;
- case Intrinsic::masked_store:
- WideMask = II->getOperand(3);
- break;
- }
- Mask = getMask(WideMask, Factor,
+ Mask = getMask(getMaskOperand(II), Factor,
cast<VectorType>(InterleaveValues[0]->getType()));
if (!Mask)
return false;
@@ -687,11 +687,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
using namespace PatternMatch;
for (auto &I : instructions(F)) {
if (match(&I, m_CombineOr(m_Load(m_Value()),
- m_Intrinsic<Intrinsic::vp_load>())))
+ m_Intrinsic<Intrinsic::vp_load>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_load>()))
Changed |= lowerInterleavedLoad(&I, DeadInsts);
if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
- m_Intrinsic<Intrinsic::vp_store>())))
+ m_Intrinsic<Intrinsic::vp_store>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_store>()))
Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index f9c0b54..171940e 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1272,7 +1272,7 @@ def FeatureVendorXSfmm128t
def FeatureVendorXSfvqmaccdod
: RISCVExtension<1, 0,
"SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)",
- [FeatureStdExtZve32x]>;
+ [FeatureStdExtZve32x, FeatureStdExtZvl128b]>;
def HasVendorXSfvqmaccdod
: Predicate<"Subtarget->hasVendorXSfvqmaccdod()">,
AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod),
@@ -1281,7 +1281,7 @@ def HasVendorXSfvqmaccdod
def FeatureVendorXSfvqmaccqoq
: RISCVExtension<1, 0,
"SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4)",
- [FeatureStdExtZve32x]>;
+ [FeatureStdExtZve32x, FeatureStdExtZvl256b]>;
def HasVendorXSfvqmaccqoq
: Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq),
@@ -1290,7 +1290,7 @@ def HasVendorXSfvqmaccqoq
def FeatureVendorXSfvfwmaccqqq
: RISCVExtension<1, 0,
"SiFive Matrix Multiply Accumulate Instruction (4-by-4)",
- [FeatureStdExtZvfbfmin]>;
+ [FeatureStdExtZvfbfmin, FeatureStdExtZvl128b]>;
def HasVendorXSfvfwmaccqqq
: Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 61a915a..086ef54 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -352,6 +352,81 @@ entry:
ret void
}
+define void @buildvector_v32i8_partial(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a7, i8 %a8, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind {
+; CHECK-LABEL: buildvector_v32i8_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 96
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: ld.b $t0, $fp, 0
+; CHECK-NEXT: ld.b $t1, $fp, 8
+; CHECK-NEXT: ld.b $t2, $fp, 16
+; CHECK-NEXT: ld.b $t3, $fp, 24
+; CHECK-NEXT: ld.b $t4, $fp, 56
+; CHECK-NEXT: ld.b $t5, $fp, 48
+; CHECK-NEXT: ld.b $t6, $fp, 40
+; CHECK-NEXT: ld.b $t7, $fp, 32
+; CHECK-NEXT: st.b $t4, $sp, 63
+; CHECK-NEXT: st.b $t5, $sp, 60
+; CHECK-NEXT: st.b $t6, $sp, 59
+; CHECK-NEXT: st.b $t7, $sp, 55
+; CHECK-NEXT: st.b $t3, $sp, 54
+; CHECK-NEXT: st.b $t2, $sp, 52
+; CHECK-NEXT: st.b $t1, $sp, 50
+; CHECK-NEXT: st.b $t0, $sp, 49
+; CHECK-NEXT: st.b $a7, $sp, 47
+; CHECK-NEXT: st.b $a6, $sp, 40
+; CHECK-NEXT: st.b $a5, $sp, 39
+; CHECK-NEXT: st.b $a4, $sp, 37
+; CHECK-NEXT: st.b $a3, $sp, 34
+; CHECK-NEXT: st.b $a2, $sp, 33
+; CHECK-NEXT: st.b $a1, $sp, 32
+; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $fp, -96
+; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0
+ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1
+ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2
+ %ins3 = insertelement <32 x i8> %ins2, i8 undef, i32 3
+ %ins4 = insertelement <32 x i8> %ins3, i8 undef, i32 4
+ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5
+ %ins6 = insertelement <32 x i8> %ins5, i8 undef, i32 6
+ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7
+ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8
+ %ins9 = insertelement <32 x i8> %ins8, i8 undef, i32 9
+ %ins10 = insertelement <32 x i8> %ins9, i8 undef, i32 10
+ %ins11 = insertelement <32 x i8> %ins10, i8 undef, i32 11
+ %ins12 = insertelement <32 x i8> %ins11, i8 undef, i32 12
+ %ins13 = insertelement <32 x i8> %ins12, i8 undef, i32 13
+ %ins14 = insertelement <32 x i8> %ins13, i8 undef, i32 14
+ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15
+ %ins16 = insertelement <32 x i8> %ins15, i8 undef, i32 16
+ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17
+ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18
+ %ins19 = insertelement <32 x i8> %ins18, i8 undef, i32 19
+ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20
+ %ins21 = insertelement <32 x i8> %ins20, i8 undef, i32 21
+ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22
+ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23
+ %ins24 = insertelement <32 x i8> %ins23, i8 undef, i32 24
+ %ins25 = insertelement <32 x i8> %ins24, i8 undef, i32 25
+ %ins26 = insertelement <32 x i8> %ins25, i8 undef, i32 26
+ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27
+ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28
+ %ins29 = insertelement <32 x i8> %ins28, i8 undef, i32 29
+ %ins30 = insertelement <32 x i8> %ins29, i8 undef, i32 30
+ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31
+ store <32 x i8> %ins31, ptr %dst
+ ret void
+}
+
define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
; CHECK-LABEL: buildvector_v16i16:
; CHECK: # %bb.0: # %entry
@@ -419,6 +494,49 @@ entry:
ret void
}
+define void @buildvector_v16i16_partial(ptr %dst, i16 %a0, i16 %a2, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind {
+; CHECK-LABEL: buildvector_v16i16_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 96
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: st.h $a7, $sp, 58
+; CHECK-NEXT: st.h $a6, $sp, 56
+; CHECK-NEXT: st.h $a5, $sp, 46
+; CHECK-NEXT: st.h $a4, $sp, 44
+; CHECK-NEXT: st.h $a3, $sp, 42
+; CHECK-NEXT: st.h $a2, $sp, 36
+; CHECK-NEXT: st.h $a1, $sp, 32
+; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $fp, -96
+; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0
+ %ins1 = insertelement <16 x i16> %ins0, i16 undef, i32 1
+ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2
+ %ins3 = insertelement <16 x i16> %ins2, i16 undef, i32 3
+ %ins4 = insertelement <16 x i16> %ins3, i16 undef, i32 4
+ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5
+ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6
+ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7
+ %ins8 = insertelement <16 x i16> %ins7, i16 undef, i32 8
+ %ins9 = insertelement <16 x i16> %ins8, i16 undef, i32 9
+ %ins10 = insertelement <16 x i16> %ins9, i16 undef, i32 10
+ %ins11 = insertelement <16 x i16> %ins10, i16 undef, i32 11
+ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12
+ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13
+ %ins14 = insertelement <16 x i16> %ins13, i16 undef, i32 14
+ %ins15 = insertelement <16 x i16> %ins14, i16 undef, i32 15
+ store <16 x i16> %ins15, ptr %dst
+ ret void
+}
+
define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
; CHECK-LABEL: buildvector_v8i32:
; CHECK: # %bb.0: # %entry
@@ -446,6 +564,38 @@ entry:
ret void
}
+define void @buildvector_v8i32_partial(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind {
+; CHECK-LABEL: buildvector_v8i32_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 96
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: st.w $a4, $sp, 56
+; CHECK-NEXT: st.w $a3, $sp, 52
+; CHECK-NEXT: st.w $a2, $sp, 48
+; CHECK-NEXT: st.w $a1, $sp, 40
+; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $fp, -96
+; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x i32> undef, i32 undef, i32 0
+ %ins1 = insertelement <8 x i32> %ins0, i32 undef, i32 1
+ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2
+ %ins3 = insertelement <8 x i32> %ins2, i32 undef, i32 3
+ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4
+ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5
+ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6
+ %ins7 = insertelement <8 x i32> %ins6, i32 undef, i32 7
+ store <8 x i32> %ins7, ptr %dst
+ ret void
+}
+
define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind {
; CHECK-LABEL: buildvector_v4i64:
; CHECK: # %bb.0: # %entry
@@ -464,6 +614,25 @@ entry:
ret void
}
+define void @buildvector_v4i64_partial(ptr %dst, i64 %a1, i64 %a2) nounwind {
+; CHECK-LABEL: buildvector_v4i64_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
+; CHECK-NEXT: xvinsgr2vr.d $xr1, $a1, 0
+; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
+; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x i64> undef, i64 undef, i32 0
+ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1
+ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2
+ %ins3 = insertelement <4 x i64> %ins2, i64 undef, i32 3
+ store <4 x i64> %ins3, ptr %dst
+ ret void
+}
+
define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind {
; CHECK-LABEL: buildvector_v8f32:
; CHECK: # %bb.0: # %entry
@@ -497,6 +666,38 @@ entry:
ret void
}
+define void @buildvector_v8f32_partial(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind {
+; CHECK-LABEL: buildvector_v8f32_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $fp, $sp, 96
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: fst.s $fa3, $sp, 60
+; CHECK-NEXT: fst.s $fa2, $sp, 52
+; CHECK-NEXT: fst.s $fa1, $sp, 40
+; CHECK-NEXT: fst.s $fa0, $sp, 36
+; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $fp, -96
+; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x float> undef, float undef, i32 0
+ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1
+ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2
+ %ins3 = insertelement <8 x float> %ins2, float undef, i32 3
+ %ins4 = insertelement <8 x float> %ins3, float undef, i32 4
+ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5
+ %ins6 = insertelement <8 x float> %ins5, float undef, i32 6
+ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7
+ store <8 x float> %ins7, ptr %dst
+ ret void
+}
+
define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind {
; CHECK-LABEL: buildvector_v4f64:
; CHECK: # %bb.0: # %entry
@@ -517,3 +718,22 @@ entry:
store <4 x double> %ins3, ptr %dst
ret void
}
+
+define void @buildvector_v4f64_partial(ptr %dst, double %a0, double %a3) nounwind {
+; CHECK-LABEL: buildvector_v4f64_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
+; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
+; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x double> undef, double %a0, i32 0
+ %ins1 = insertelement <4 x double> %ins0, double undef, i32 1
+ %ins2 = insertelement <4 x double> %ins1, double undef, i32 2
+ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3
+ store <4 x double> %ins3, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index afc87d1..4dda012 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -272,6 +272,41 @@ entry:
ret void
}
+define void @buildvector_v16i8_partial(ptr %dst, i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) nounwind {
+; CHECK-LABEL: buildvector_v16i8_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.b $a6, $sp, 15
+; CHECK-NEXT: st.b $a5, $sp, 12
+; CHECK-NEXT: st.b $a4, $sp, 11
+; CHECK-NEXT: st.b $a3, $sp, 8
+; CHECK-NEXT: st.b $a2, $sp, 6
+; CHECK-NEXT: st.b $a1, $sp, 2
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <16 x i8> undef, i8 undef, i32 0
+ %ins1 = insertelement <16 x i8> %ins0, i8 undef, i32 1
+ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2
+ %ins3 = insertelement <16 x i8> %ins2, i8 undef, i32 3
+ %ins4 = insertelement <16 x i8> %ins3, i8 undef, i32 4
+ %ins5 = insertelement <16 x i8> %ins4, i8 undef, i32 5
+ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
+ %ins7 = insertelement <16 x i8> %ins6, i8 undef, i32 7
+ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
+ %ins9 = insertelement <16 x i8> %ins8, i8 undef, i32 9
+ %ins10 = insertelement <16 x i8> %ins9, i8 undef, i32 10
+ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
+ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
+ %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13
+ %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14
+ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
+ store <16 x i8> %ins15, ptr %dst
+ ret void
+}
+
define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
; CHECK-LABEL: buildvector_v8i16:
; CHECK: # %bb.0: # %entry
@@ -299,6 +334,31 @@ entry:
ret void
}
+define void @buildvector_v8i16_partial(ptr %dst, i16 %a1, i16 %a3, i16 %a4, i16 %a5) nounwind {
+; CHECK-LABEL: buildvector_v8i16_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.h $a4, $sp, 10
+; CHECK-NEXT: st.h $a3, $sp, 8
+; CHECK-NEXT: st.h $a2, $sp, 6
+; CHECK-NEXT: st.h $a1, $sp, 2
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0
+ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
+ %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2
+ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
+ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
+ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
+ %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6
+ %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7
+ store <8 x i16> %ins7, ptr %dst
+ ret void
+}
+
define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
; CHECK-LABEL: buildvector_v4i32:
; CHECK: # %bb.0: # %entry
@@ -317,6 +377,25 @@ entry:
ret void
}
+define void @buildvector_v4i32_partial(ptr %dst, i32 %a0, i32 %a3) nounwind {
+; CHECK-LABEL: buildvector_v4i32_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI23_0)
+; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI23_0)
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; CHECK-NEXT: vinsgr2vr.w $vr2, $a2, 0
+; CHECK-NEXT: vshuf.w $vr0, $vr2, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
+ %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1
+ %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2
+ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
+ store <4 x i32> %ins3, ptr %dst
+ ret void
+}
+
define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind {
; CHECK-LABEL: buildvector_v2i64:
; CHECK: # %bb.0: # %entry
@@ -331,6 +410,19 @@ entry:
ret void
}
+define void @buildvector_v2i64_partial(ptr %dst, i64 %a0) nounwind {
+; CHECK-LABEL: buildvector_v2i64_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0
+ %ins1 = insertelement <2 x i64> %ins0, i64 undef, i32 1
+ store <2 x i64> %ins1, ptr %dst
+ ret void
+}
+
define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind {
; CHECK-LABEL: buildvector_v4f32:
; CHECK: # %bb.0: # %entry
@@ -352,6 +444,25 @@ entry:
ret void
}
+define void @buildvector_v4f32_partial(ptr %dst, float %a0, float %a3) nounwind {
+; CHECK-LABEL: buildvector_v4f32_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI27_0)
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI27_0)
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
+; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x float> undef, float %a0, i32 0
+ %ins1 = insertelement <4 x float> %ins0, float undef, i32 1
+ %ins2 = insertelement <4 x float> %ins1, float undef, i32 2
+ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3
+ store <4 x float> %ins3, ptr %dst
+ ret void
+}
+
define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind {
; CHECK-LABEL: buildvector_v2f64:
; CHECK: # %bb.0: # %entry
@@ -367,6 +478,20 @@ entry:
ret void
}
+define void @buildvector_v2f64_partial(ptr %dst, double %a1) nounwind {
+; CHECK-LABEL: buildvector_v2f64_partial:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <2 x double> undef, double undef, i32 0
+ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
+ store <2 x double> %ins1, ptr %dst
+ ret void
+}
+
;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop.
define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index d566069..a28b818 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -435,7 +435,7 @@
; RV32XCVMEM: .attribute 5, "rv32i2p1_xcvmem1p0"
; RV32XCVSIMD: .attribute 5, "rv32i2p1_xcvsimd1p0"
; RV32XCVBI: .attribute 5, "rv32i2p1_xcvbi1p0"
-; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0"
+; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0"
; RV32XTHEADCMO: .attribute 5, "rv32i2p1_xtheadcmo1p0"
; RV32XTHEADCONDMOV: .attribute 5, "rv32i2p1_xtheadcondmov1p0"
; RV32XTHEADFMEMIDX: .attribute 5, "rv32i2p1_xtheadfmemidx1p0"
@@ -610,7 +610,7 @@
; RV64SVVPTC: .attribute 5, "rv64i2p1_svvptc1p0"
; RV64SVINVAL: .attribute 5, "rv64i2p1_svinval1p0"
; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p1_xventanacondops1p0"
-; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0"
+; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0"
; RV64XTHEADBA: .attribute 5, "rv64i2p1_xtheadba1p0"
; RV64XTHEADBB: .attribute 5, "rv64i2p1_xtheadbb1p0"
; RV64XTHEADBS: .attribute 5, "rv64i2p1_xtheadbs1p0"
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 7274e1b..26e324c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -2002,3 +2002,34 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
%res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
}
+
+define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) {
+; CHECK-LABEL: maskedload_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison)
+ %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+ %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
+ %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
+ %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
+ %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
+ %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+ %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+ %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+ %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
+ %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
+ ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
+}
+
+define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: maskedstore_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsseg2e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true))
+ ret void
+}
diff --git a/llvm/test/MC/ELF/section-sym-err.s b/llvm/test/MC/ELF/section-sym-err.s
index afed21d..2f7ab69 100644
--- a/llvm/test/MC/ELF/section-sym-err.s
+++ b/llvm/test/MC/ELF/section-sym-err.s
@@ -1,6 +1,9 @@
-// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64 %s -o %t 2>&1 | FileCheck %s
.section foo
foo:
+# CHECK: [[#@LINE-1]]:1: error: symbol 'foo' is already defined
-// CHECK: error: symbol 'foo' is already defined
+x1:
+.section x1
+# CHECK: <unknown>:0: error: invalid symbol redefinition
diff --git a/llvm/test/MC/ELF/section-sym-err2.s b/llvm/test/MC/ELF/section-sym-err2.s
deleted file mode 100644
index 27d8e9a..0000000
--- a/llvm/test/MC/ELF/section-sym-err2.s
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o 2>&1 | FileCheck %s
-
-foo:
-.section foo
-
-// CHECK: error: invalid symbol redefinition
diff --git a/llvm/test/MC/ELF/section-sym2.s b/llvm/test/MC/ELF/section-sym2.s
index b404ef7..167fc8c 100644
--- a/llvm/test/MC/ELF/section-sym2.s
+++ b/llvm/test/MC/ELF/section-sym2.s
@@ -1,24 +1,27 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj --symbols -r --expand-relocs - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64 %s -o %t
+# RUN: llvm-readelf -Srs %t | FileCheck %s
-// Test that we can forward reference a section.
+## Test that we can forward reference a section.
mov .rodata, %rsi
-.section .rodata
+mov .debug_info, %rsi
-// CHECK:Relocations [
-// CHECK: Section {{.*}} .rela.text {
-// CHECK: Relocation {
-// CHECK: Offset: 0x4
-// CHECK: Type: R_X86_64_32S (11)
-// CHECK: Symbol: .rodata
-// CHECK: Addend: 0x0
-// CHECK: }
-// CHECK: }
-// CHECK:]
+.section .rodata,"a"
+.section .debug_info,"G",@progbits,11,comdat; .long x1
+.section .debug_info,"G",@progbits,22,comdat; .long x2
+.section .debug_info,"",@progbits; .long x0
-// There is only one .rodata symbol
+# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1
+# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1
+# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1
-// CHECK:Symbols [
-// CHECK: Type: Section (0x3)
-// CHECK: Section: .rodata
-// CHECK-NOT: Section: .rodata
+# CHECK: Symbol table '.symtab' contains 8 entries:
+# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name
+# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
+# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .rodata
+# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 11 .debug_info
+# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 5 11
+# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 8 22
+# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x1
+# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x2
+# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x0
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index b7cd712..19cc4d5 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -448,7 +448,7 @@
# CHECK: .attribute 5, "rv32i2p1_zilsd1p0"
.attribute arch, "rv64i_xsfvfwmaccqqq"
-# CHECK: attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0"
+# CHECK: attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0"
.attribute arch, "rv32i_ssnpm1p0"
# CHECK: attribute 5, "rv32i2p1_ssnpm1p0"
diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index 4739290..a89c1ae 100644
--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -710,7 +710,7 @@ void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) {
void MemRefDependenceGraph::forEachMemRefInputEdge(
unsigned id, const std::function<void(Edge)> &callback) {
if (inEdges.count(id) > 0)
- forEachMemRefEdge(inEdges[id], callback);
+ forEachMemRefEdge(inEdges.at(id), callback);
}
// Calls 'callback' for each output edge from node 'id' which carries a
@@ -718,7 +718,7 @@ void MemRefDependenceGraph::forEachMemRefInputEdge(
void MemRefDependenceGraph::forEachMemRefOutputEdge(
unsigned id, const std::function<void(Edge)> &callback) {
if (outEdges.count(id) > 0)
- forEachMemRefEdge(outEdges[id], callback);
+ forEachMemRefEdge(outEdges.at(id), callback);
}
// Calls 'callback' for each edge in 'edges' which carries a memref
@@ -730,9 +730,6 @@ void MemRefDependenceGraph::forEachMemRefEdge(
if (!isa<MemRefType>(edge.value.getType()))
continue;
assert(nodes.count(edge.id) > 0);
- // Skip if 'edge.id' is not a loop nest.
- if (!isa<AffineForOp>(getNode(edge.id)->op))
- continue;
// Visit current input edge 'edge'.
callback(edge);
}
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
index 95848d0..1d5a665 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
@@ -1473,9 +1473,11 @@ public:
SmallVector<MemRefDependenceGraph::Edge, 2> inEdges;
mdg->forEachMemRefInputEdge(
dstNode->id, [&](MemRefDependenceGraph::Edge inEdge) {
- // Add 'inEdge' if it is a read-after-write dependence.
+ // Add 'inEdge' if it is a read-after-write dependence or an edge
+ // from a memref defining op (e.g. view-like op or alloc op).
if (dstNode->getLoadOpCount(inEdge.value) > 0 &&
- mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0)
+ (mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0 ||
+ inEdge.value.getDefiningOp() == mdg->getNode(inEdge.id)->op))
inEdges.push_back(inEdge);
});
diff --git a/mlir/test/Dialect/Affine/loop-fusion-4.mlir b/mlir/test/Dialect/Affine/loop-fusion-4.mlir
index b059b5a..04c8c3e 100644
--- a/mlir/test/Dialect/Affine/loop-fusion-4.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion-4.mlir
@@ -743,3 +743,31 @@ module {
return
}
}
+
+// SIBLING-MAXIMAL-LABEL: memref_cast_reused
+func.func @memref_cast_reused(%arg: memref<*xf32>) {
+ %alloc = memref.cast %arg : memref<*xf32> to memref<10xf32>
+ %alloc_0 = memref.alloc() : memref<10xf32>
+ %alloc_1 = memref.alloc() : memref<10xf32>
+ %cst = arith.constant 0.000000e+00 : f32
+ %cst_2 = arith.constant 1.000000e+00 : f32
+ affine.for %arg0 = 0 to 10 {
+ %0 = affine.load %alloc[%arg0] : memref<10xf32>
+ %1 = arith.addf %0, %cst_2 : f32
+ affine.store %1, %alloc_0[%arg0] : memref<10xf32>
+ }
+ affine.for %arg0 = 0 to 10 {
+ %0 = affine.load %alloc[%arg0] : memref<10xf32>
+ %1 = affine.load %alloc_1[0] : memref<10xf32>
+ %2 = arith.addf %0, %1 : f32
+ affine.store %2, %alloc_1[0] : memref<10xf32>
+ }
+ // SIBLING-MAXIMAL: affine.for %{{.*}} = 0 to 10
+ // SIBLING-MAXIMAL: addf
+ // SIBLING-MAXIMAL-NEXT: affine.store
+ // SIBLING-MAXIMAL-NEXT: affine.load
+ // SIBLING-MAXIMAL-NEXT: affine.load
+ // SIBLING-MAXIMAL-NEXT: addf
+ // SIBLING-MAXIMAL-NEXT: affine.store
+ return
+}