diff options
-rw-r--r-- | libc/include/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/src/stdio/printf_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lldb/tools/lldb-rpc-gen/RPCCommon.cpp | 1 | ||||
-rw-r--r-- | lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/InterleavedAccessPass.cpp | 96 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFeatures.td | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 220 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/build-vector.ll | 125 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/attributes.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll | 31 | ||||
-rw-r--r-- | llvm/test/MC/ELF/section-sym-err.s | 7 | ||||
-rw-r--r-- | llvm/test/MC/ELF/section-sym-err2.s | 6 | ||||
-rw-r--r-- | llvm/test/MC/ELF/section-sym2.s | 39 | ||||
-rw-r--r-- | llvm/test/MC/RISCV/attribute-arch.s | 2 | ||||
-rw-r--r-- | mlir/lib/Dialect/Affine/Analysis/Utils.cpp | 7 | ||||
-rw-r--r-- | mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp | 6 | ||||
-rw-r--r-- | mlir/test/Dialect/Affine/loop-fusion-4.mlir | 28 |
17 files changed, 494 insertions, 89 deletions
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 18fd3f8..fafc137 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -187,6 +187,7 @@ add_header_macro( arpa/inet.h DEPENDS .llvm_libc_common_h + .inttypes ) add_header_macro( diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index c22f985..76eb0a2 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -44,6 +44,7 @@ add_header_library( HDRS core_structs.h DEPENDS + libc.include.inttypes libc.src.__support.CPP.string_view libc.src.__support.FPUtil.fp_bits ) @@ -97,6 +98,7 @@ add_header_library( .core_structs .printf_config .writer + libc.include.inttypes libc.src.__support.big_int libc.src.__support.common libc.src.__support.CPP.limits diff --git a/lldb/tools/lldb-rpc-gen/RPCCommon.cpp b/lldb/tools/lldb-rpc-gen/RPCCommon.cpp index 34791fa..6f0abe4 100644 --- a/lldb/tools/lldb-rpc-gen/RPCCommon.cpp +++ b/lldb/tools/lldb-rpc-gen/RPCCommon.cpp @@ -194,7 +194,6 @@ std::string lldb_rpc_gen::GetMangledName(ASTContext &Context, return Mangled; } -static auto CheckTypeForLLDBPrivate = [](const Type *Ty) {}; bool lldb_rpc_gen::TypeIsFromLLDBPrivate(QualType T) { auto CheckTypeForLLDBPrivate = [](const Type *Ty) { if (!Ty) diff --git a/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp b/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp index fdcfee9..9b48796 100644 --- a/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp +++ b/lldb/tools/lldb-rpc-gen/lldb-rpc-gen.cpp @@ -102,8 +102,6 @@ public: lldb_rpc_gen::GetMangledName(Context, MDecl); const bool IsDisallowed = lldb_rpc_gen::MethodIsDisallowed(Context, MDecl); - const bool HasCallbackParameter = - lldb_rpc_gen::HasCallbackParameter(MDecl); SupportLevel MethodSupportLevel = GetMethodSupportLevel(MDecl); if (MethodSupportLevel == eImplemented && !IsDisallowed) { const lldb_rpc_gen::Method Method(MDecl, Policy, Context); diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1b69188..65565b9 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, return false; } +static Value *getMaskOperand(IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unexpected intrinsic"); + case Intrinsic::vp_load: + return II->getOperand(1); + case Intrinsic::masked_load: + return II->getOperand(2); + case Intrinsic::vp_store: + return II->getOperand(2); + case Intrinsic::masked_store: + return II->getOperand(3); + } +} + // Return the corresponded deinterleaved mask, or nullptr if there is no valid // mask. static Value *getMask(Value *WideMask, unsigned Factor, @@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( if (isa<ScalableVectorType>(Load->getType())) return false; - if (auto *LI = dyn_cast<LoadInst>(Load); - LI && !LI->isSimple()) + auto *LI = dyn_cast<LoadInst>(Load); + auto *II = dyn_cast<IntrinsicInst>(Load); + if (!LI && !II) + return false; + + if (LI && !LI->isSimple()) return false; // Check if all users of this load are shufflevectors. If we encounter any @@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( // Holds the corresponding index for each DE-interleave shuffle. SmallVector<unsigned, 4> Indices; - Type *VecTy = FirstSVI->getType(); + VectorType *VecTy = cast<VectorType>(FirstSVI->getType()); // Check if other shufflevectors are also DE-interleaved of the same type // and factor as the first shufflevector. @@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load); Value *Mask = nullptr; - if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) { - Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); + if (LI) { + LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); + } else { + // Check mask operand. Handle both all-true/false and interleaved mask. + Mask = getMask(getMaskOperand(II), Factor, VecTy); if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n"); - } else { - LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); + + LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: " + << *Load << "\n"); } // Try to create target specific intrinsics to replace the load and @@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts( bool InterleavedAccessImpl::lowerInterleavedStore( Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) { Value *StoredValue; - if (auto *SI = dyn_cast<StoreInst>(Store)) { + auto *SI = dyn_cast<StoreInst>(Store); + auto *II = dyn_cast<IntrinsicInst>(Store); + if (SI) { if (!SI->isSimple()) return false; StoredValue = SI->getValueOperand(); - } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { - assert(VPStore->getIntrinsicID() == Intrinsic::vp_store); - StoredValue = VPStore->getArgOperand(0); } else { - llvm_unreachable("unsupported store operation"); + assert(II->getIntrinsicID() == Intrinsic::vp_store || + II->getIntrinsicID() == Intrinsic::masked_store); + StoredValue = II->getArgOperand(0); } auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue); @@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore( "number of stored element should be a multiple of Factor"); Value *Mask = nullptr; - if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { + if (SI) { + LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); + } else { + // Check mask operand. Handle both all-true/false and interleaved mask. unsigned LaneMaskLen = NumStoredElements / Factor; - Mask = getMask(VPStore->getMaskParam(), Factor, + Mask = getMask(getMaskOperand(II), Factor, ElementCount::getFixed(LaneMaskLen)); if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store - << "\n"); - - } else { - LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: " + << *Store << "\n"); } // Try to create target specific intrinsics to replace the store and @@ -592,19 +615,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( assert(II); // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask; - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::vp_load: - WideMask = II->getOperand(1); - break; - case Intrinsic::masked_load: - WideMask = II->getOperand(2); - break; - } - - Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); + Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI)); if (!Mask) return false; @@ -642,18 +653,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( Value *Mask = nullptr; if (II) { // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask; - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::vp_store: - WideMask = II->getOperand(2); - break; - case Intrinsic::masked_store: - WideMask = II->getOperand(3); - break; - } - Mask = getMask(WideMask, Factor, + Mask = getMask(getMaskOperand(II), Factor, cast<VectorType>(InterleaveValues[0]->getType())); if (!Mask) return false; @@ -687,11 +687,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { using namespace PatternMatch; for (auto &I : instructions(F)) { if (match(&I, m_CombineOr(m_Load(m_Value()), - m_Intrinsic<Intrinsic::vp_load>()))) + m_Intrinsic<Intrinsic::vp_load>())) || + match(&I, m_Intrinsic<Intrinsic::masked_load>())) Changed |= lowerInterleavedLoad(&I, DeadInsts); if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()), - m_Intrinsic<Intrinsic::vp_store>()))) + m_Intrinsic<Intrinsic::vp_store>())) || + match(&I, m_Intrinsic<Intrinsic::masked_store>())) Changed |= lowerInterleavedStore(&I, DeadInsts); if (auto *II = dyn_cast<IntrinsicInst>(&I)) { diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index f9c0b54..171940e 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1272,7 +1272,7 @@ def FeatureVendorXSfmm128t def FeatureVendorXSfvqmaccdod : RISCVExtension<1, 0, "SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)", - [FeatureStdExtZve32x]>; + [FeatureStdExtZve32x, FeatureStdExtZvl128b]>; def HasVendorXSfvqmaccdod : Predicate<"Subtarget->hasVendorXSfvqmaccdod()">, AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod), @@ -1281,7 +1281,7 @@ def HasVendorXSfvqmaccdod def FeatureVendorXSfvqmaccqoq : RISCVExtension<1, 0, "SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4)", - [FeatureStdExtZve32x]>; + [FeatureStdExtZve32x, FeatureStdExtZvl256b]>; def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">, AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq), @@ -1290,7 +1290,7 @@ def HasVendorXSfvqmaccqoq def FeatureVendorXSfvfwmaccqqq : RISCVExtension<1, 0, "SiFive Matrix Multiply Accumulate Instruction (4-by-4)", - [FeatureStdExtZvfbfmin]>; + [FeatureStdExtZvfbfmin, FeatureStdExtZvl128b]>; def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">, AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq), diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index 61a915a..086ef54 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -352,6 +352,81 @@ entry: ret void } +define void @buildvector_v32i8_partial(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a7, i8 %a8, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind { +; CHECK-LABEL: buildvector_v32i8_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 96 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: ld.b $t0, $fp, 0 +; CHECK-NEXT: ld.b $t1, $fp, 8 +; CHECK-NEXT: ld.b $t2, $fp, 16 +; CHECK-NEXT: ld.b $t3, $fp, 24 +; CHECK-NEXT: ld.b $t4, $fp, 56 +; CHECK-NEXT: ld.b $t5, $fp, 48 +; CHECK-NEXT: ld.b $t6, $fp, 40 +; CHECK-NEXT: ld.b $t7, $fp, 32 +; CHECK-NEXT: st.b $t4, $sp, 63 +; CHECK-NEXT: st.b $t5, $sp, 60 +; CHECK-NEXT: st.b $t6, $sp, 59 +; CHECK-NEXT: st.b $t7, $sp, 55 +; CHECK-NEXT: st.b $t3, $sp, 54 +; CHECK-NEXT: st.b $t2, $sp, 52 +; CHECK-NEXT: st.b $t1, $sp, 50 +; CHECK-NEXT: st.b $t0, $sp, 49 +; CHECK-NEXT: st.b $a7, $sp, 47 +; CHECK-NEXT: st.b $a6, $sp, 40 +; CHECK-NEXT: st.b $a5, $sp, 39 +; CHECK-NEXT: st.b $a4, $sp, 37 +; CHECK-NEXT: st.b $a3, $sp, 34 +; CHECK-NEXT: st.b $a2, $sp, 33 +; CHECK-NEXT: st.b $a1, $sp, 32 +; CHECK-NEXT: xvld $xr0, $sp, 32 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $fp, -96 +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 undef, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 undef, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 undef, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 undef, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 undef, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 undef, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 undef, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 undef, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 undef, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 undef, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 undef, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 undef, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 undef, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 undef, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 undef, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 undef, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 undef, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { ; CHECK-LABEL: buildvector_v16i16: ; CHECK: # %bb.0: # %entry @@ -419,6 +494,49 @@ entry: ret void } +define void @buildvector_v16i16_partial(ptr %dst, i16 %a0, i16 %a2, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind { +; CHECK-LABEL: buildvector_v16i16_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 96 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: st.h $a7, $sp, 58 +; CHECK-NEXT: st.h $a6, $sp, 56 +; CHECK-NEXT: st.h $a5, $sp, 46 +; CHECK-NEXT: st.h $a4, $sp, 44 +; CHECK-NEXT: st.h $a3, $sp, 42 +; CHECK-NEXT: st.h $a2, $sp, 36 +; CHECK-NEXT: st.h $a1, $sp, 32 +; CHECK-NEXT: xvld $xr0, $sp, 32 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $fp, -96 +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 undef, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 undef, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 undef, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 undef, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 undef, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 undef, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 undef, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 undef, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 undef, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { ; CHECK-LABEL: buildvector_v8i32: ; CHECK: # %bb.0: # %entry @@ -446,6 +564,38 @@ entry: ret void } +define void @buildvector_v8i32_partial(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind { +; CHECK-LABEL: buildvector_v8i32_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 96 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: st.w $a4, $sp, 56 +; CHECK-NEXT: st.w $a3, $sp, 52 +; CHECK-NEXT: st.w $a2, $sp, 48 +; CHECK-NEXT: st.w $a1, $sp, 40 +; CHECK-NEXT: xvld $xr0, $sp, 32 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $fp, -96 +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i32> undef, i32 undef, i32 0 + %ins1 = insertelement <8 x i32> %ins0, i32 undef, i32 1 + %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <8 x i32> %ins2, i32 undef, i32 3 + %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 + %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 + %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 + %ins7 = insertelement <8 x i32> %ins6, i32 undef, i32 7 + store <8 x i32> %ins7, ptr %dst + ret void +} + define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { ; CHECK-LABEL: buildvector_v4i64: ; CHECK: # %bb.0: # %entry @@ -464,6 +614,25 @@ entry: ret void } +define void @buildvector_v4i64_partial(ptr %dst, i64 %a1, i64 %a2) nounwind { +; CHECK-LABEL: buildvector_v4i64_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a1, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i64> undef, i64 undef, i32 0 + %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 + %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 + %ins3 = insertelement <4 x i64> %ins2, i64 undef, i32 3 + store <4 x i64> %ins3, ptr %dst + ret void +} + define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { ; CHECK-LABEL: buildvector_v8f32: ; CHECK: # %bb.0: # %entry @@ -497,6 +666,38 @@ entry: ret void } +define void @buildvector_v8f32_partial(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind { +; CHECK-LABEL: buildvector_v8f32_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 96 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: fst.s $fa3, $sp, 60 +; CHECK-NEXT: fst.s $fa2, $sp, 52 +; CHECK-NEXT: fst.s $fa1, $sp, 40 +; CHECK-NEXT: fst.s $fa0, $sp, 36 +; CHECK-NEXT: xvld $xr0, $sp, 32 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $fp, -96 +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x float> undef, float undef, i32 0 + %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <8 x float> %ins2, float undef, i32 3 + %ins4 = insertelement <8 x float> %ins3, float undef, i32 4 + %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 + %ins6 = insertelement <8 x float> %ins5, float undef, i32 6 + %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 + store <8 x float> %ins7, ptr %dst + ret void +} + define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { ; CHECK-LABEL: buildvector_v4f64: ; CHECK: # %bb.0: # %entry @@ -517,3 +718,22 @@ entry: store <4 x double> %ins3, ptr %dst ret void } + +define void @buildvector_v4f64_partial(ptr %dst, double %a0, double %a3) nounwind { +; CHECK-LABEL: buildvector_v4f64_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x double> undef, double %a0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double undef, i32 1 + %ins2 = insertelement <4 x double> %ins1, double undef, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 + store <4 x double> %ins3, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index afc87d1..4dda012 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -272,6 +272,41 @@ entry: ret void } +define void @buildvector_v16i8_partial(ptr %dst, i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) nounwind { +; CHECK-LABEL: buildvector_v16i8_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.b $a6, $sp, 15 +; CHECK-NEXT: st.b $a5, $sp, 12 +; CHECK-NEXT: st.b $a4, $sp, 11 +; CHECK-NEXT: st.b $a3, $sp, 8 +; CHECK-NEXT: st.b $a2, $sp, 6 +; CHECK-NEXT: st.b $a1, $sp, 2 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 undef, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 undef, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 undef, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 undef, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 undef, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 undef, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 undef, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 undef, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ; CHECK-LABEL: buildvector_v8i16: ; CHECK: # %bb.0: # %entry @@ -299,6 +334,31 @@ entry: ret void } +define void @buildvector_v8i16_partial(ptr %dst, i16 %a1, i16 %a3, i16 %a4, i16 %a5) nounwind { +; CHECK-LABEL: buildvector_v8i16_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.h $a4, $sp, 10 +; CHECK-NEXT: st.h $a3, $sp, 8 +; CHECK-NEXT: st.h $a2, $sp, 6 +; CHECK-NEXT: st.h $a1, $sp, 2 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0 + %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2 + %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6 + %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7 + store <8 x i16> %ins7, ptr %dst + ret void +} + define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; CHECK-LABEL: buildvector_v4i32: ; CHECK: # %bb.0: # %entry @@ -317,6 +377,25 @@ entry: ret void } +define void @buildvector_v4i32_partial(ptr %dst, i32 %a0, i32 %a3) nounwind { +; CHECK-LABEL: buildvector_v4i32_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI23_0) +; CHECK-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI23_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a2, 0 +; CHECK-NEXT: vshuf.w $vr0, $vr2, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1 + %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2 + %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 + store <4 x i32> %ins3, ptr %dst + ret void +} + define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { ; CHECK-LABEL: buildvector_v2i64: ; CHECK: # %bb.0: # %entry @@ -331,6 +410,19 @@ entry: ret void } +define void @buildvector_v2i64_partial(ptr %dst, i64 %a0) nounwind { +; CHECK-LABEL: buildvector_v2i64_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <2 x i64> %ins0, i64 undef, i32 1 + store <2 x i64> %ins1, ptr %dst + ret void +} + define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { ; CHECK-LABEL: buildvector_v4f32: ; CHECK: # %bb.0: # %entry @@ -352,6 +444,25 @@ entry: ret void } +define void @buildvector_v4f32_partial(ptr %dst, float %a0, float %a3) nounwind { +; CHECK-LABEL: buildvector_v4f32_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI27_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI27_0) +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x float> undef, float %a0, i32 0 + %ins1 = insertelement <4 x float> %ins0, float undef, i32 1 + %ins2 = insertelement <4 x float> %ins1, float undef, i32 2 + %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 + store <4 x float> %ins3, ptr %dst + ret void +} + define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { ; CHECK-LABEL: buildvector_v2f64: ; CHECK: # %bb.0: # %entry @@ -367,6 +478,20 @@ entry: ret void } +define void @buildvector_v2f64_partial(ptr %dst, double %a1) nounwind { +; CHECK-LABEL: buildvector_v2f64_partial: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x double> undef, double undef, i32 0 + %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 + store <2 x double> %ins1, ptr %dst + ret void +} + ;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index d566069..a28b818 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -435,7 +435,7 @@ ; RV32XCVMEM: .attribute 5, "rv32i2p1_xcvmem1p0" ; RV32XCVSIMD: .attribute 5, "rv32i2p1_xcvsimd1p0" ; RV32XCVBI: .attribute 5, "rv32i2p1_xcvbi1p0" -; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" +; RV32XSFVFWMACCQQQ: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0" ; RV32XTHEADCMO: .attribute 5, "rv32i2p1_xtheadcmo1p0" ; RV32XTHEADCONDMOV: .attribute 5, "rv32i2p1_xtheadcondmov1p0" ; RV32XTHEADFMEMIDX: .attribute 5, "rv32i2p1_xtheadfmemidx1p0" @@ -610,7 +610,7 @@ ; RV64SVVPTC: .attribute 5, "rv64i2p1_svvptc1p0" ; RV64SVINVAL: .attribute 5, "rv64i2p1_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p1_xventanacondops1p0" -; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" +; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0" ; RV64XTHEADBA: .attribute 5, "rv64i2p1_xtheadba1p0" ; RV64XTHEADBB: .attribute 5, "rv64i2p1_xtheadbb1p0" ; RV64XTHEADBS: .attribute 5, "rv64i2p1_xtheadbs1p0" diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 7274e1b..26e324c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -2002,3 +2002,34 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } + +define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) { +; CHECK-LABEL: maskedload_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlseg5e32.v v8, (a0) +; CHECK-NEXT: ret + %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison) + %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15> + %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16> + %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17> + %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18> + %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19> + %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 + %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 + %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 + %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 + %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 + ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 +} + +define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: maskedstore_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg2e32.v v8, (a0) +; CHECK-NEXT: ret + %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true)) + ret void +} diff --git a/llvm/test/MC/ELF/section-sym-err.s b/llvm/test/MC/ELF/section-sym-err.s index afed21d..2f7ab69 100644 --- a/llvm/test/MC/ELF/section-sym-err.s +++ b/llvm/test/MC/ELF/section-sym-err.s @@ -1,6 +1,9 @@ -// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o 2>&1 | FileCheck %s +# RUN: not llvm-mc -filetype=obj -triple x86_64 %s -o %t 2>&1 | FileCheck %s .section foo foo: +# CHECK: [[#@LINE-1]]:1: error: symbol 'foo' is already defined -// CHECK: error: symbol 'foo' is already defined +x1: +.section x1 +# CHECK: <unknown>:0: error: invalid symbol redefinition diff --git a/llvm/test/MC/ELF/section-sym-err2.s b/llvm/test/MC/ELF/section-sym-err2.s deleted file mode 100644 index 27d8e9a..0000000 --- a/llvm/test/MC/ELF/section-sym-err2.s +++ /dev/null @@ -1,6 +0,0 @@ -// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o 2>&1 | FileCheck %s - -foo: -.section foo - -// CHECK: error: invalid symbol redefinition diff --git a/llvm/test/MC/ELF/section-sym2.s b/llvm/test/MC/ELF/section-sym2.s index b404ef7..167fc8c 100644 --- a/llvm/test/MC/ELF/section-sym2.s +++ b/llvm/test/MC/ELF/section-sym2.s @@ -1,24 +1,27 @@ -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj --symbols -r --expand-relocs - | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple x86_64 %s -o %t +# RUN: llvm-readelf -Srs %t | FileCheck %s -// Test that we can forward reference a section. +## Test that we can forward reference a section. mov .rodata, %rsi -.section .rodata +mov .debug_info, %rsi -// CHECK:Relocations [ -// CHECK: Section {{.*}} .rela.text { -// CHECK: Relocation { -// CHECK: Offset: 0x4 -// CHECK: Type: R_X86_64_32S (11) -// CHECK: Symbol: .rodata -// CHECK: Addend: 0x0 -// CHECK: } -// CHECK: } -// CHECK:] +.section .rodata,"a" +.section .debug_info,"G",@progbits,11,comdat; .long x1 +.section .debug_info,"G",@progbits,22,comdat; .long x2 +.section .debug_info,"",@progbits; .long x0 -// There is only one .rodata symbol +# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 +# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 +# CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 -// CHECK:Symbols [ -// CHECK: Type: Section (0x3) -// CHECK: Section: .rodata -// CHECK-NOT: Section: .rodata +# CHECK: Symbol table '.symtab' contains 8 entries: +# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .rodata +# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 11 .debug_info +# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 5 11 +# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 8 22 +# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x1 +# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x2 +# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x0 diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index b7cd712..19cc4d5 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -448,7 +448,7 @@ # CHECK: .attribute 5, "rv32i2p1_zilsd1p0" .attribute arch, "rv64i_xsfvfwmaccqqq" -# CHECK: attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" +# CHECK: attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xsfvfwmaccqqq1p0" .attribute arch, "rv32i_ssnpm1p0" # CHECK: attribute 5, "rv32i2p1_ssnpm1p0" diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp index 4739290..a89c1ae 100644 --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -710,7 +710,7 @@ void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) { void MemRefDependenceGraph::forEachMemRefInputEdge( unsigned id, const std::function<void(Edge)> &callback) { if (inEdges.count(id) > 0) - forEachMemRefEdge(inEdges[id], callback); + forEachMemRefEdge(inEdges.at(id), callback); } // Calls 'callback' for each output edge from node 'id' which carries a @@ -718,7 +718,7 @@ void MemRefDependenceGraph::forEachMemRefInputEdge( void MemRefDependenceGraph::forEachMemRefOutputEdge( unsigned id, const std::function<void(Edge)> &callback) { if (outEdges.count(id) > 0) - forEachMemRefEdge(outEdges[id], callback); + forEachMemRefEdge(outEdges.at(id), callback); } // Calls 'callback' for each edge in 'edges' which carries a memref @@ -730,9 +730,6 @@ void MemRefDependenceGraph::forEachMemRefEdge( if (!isa<MemRefType>(edge.value.getType())) continue; assert(nodes.count(edge.id) > 0); - // Skip if 'edge.id' is not a loop nest. - if (!isa<AffineForOp>(getNode(edge.id)->op)) - continue; // Visit current input edge 'edge'. callback(edge); } diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp index 95848d0..1d5a665 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp @@ -1473,9 +1473,11 @@ public: SmallVector<MemRefDependenceGraph::Edge, 2> inEdges; mdg->forEachMemRefInputEdge( dstNode->id, [&](MemRefDependenceGraph::Edge inEdge) { - // Add 'inEdge' if it is a read-after-write dependence. + // Add 'inEdge' if it is a read-after-write dependence or an edge + // from a memref defining op (e.g. view-like op or alloc op). if (dstNode->getLoadOpCount(inEdge.value) > 0 && - mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0) + (mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0 || + inEdge.value.getDefiningOp() == mdg->getNode(inEdge.id)->op)) inEdges.push_back(inEdge); }); diff --git a/mlir/test/Dialect/Affine/loop-fusion-4.mlir b/mlir/test/Dialect/Affine/loop-fusion-4.mlir index b059b5a..04c8c3e 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-4.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-4.mlir @@ -743,3 +743,31 @@ module { return } } + +// SIBLING-MAXIMAL-LABEL: memref_cast_reused +func.func @memref_cast_reused(%arg: memref<*xf32>) { + %alloc = memref.cast %arg : memref<*xf32> to memref<10xf32> + %alloc_0 = memref.alloc() : memref<10xf32> + %alloc_1 = memref.alloc() : memref<10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %cst_2 = arith.constant 1.000000e+00 : f32 + affine.for %arg0 = 0 to 10 { + %0 = affine.load %alloc[%arg0] : memref<10xf32> + %1 = arith.addf %0, %cst_2 : f32 + affine.store %1, %alloc_0[%arg0] : memref<10xf32> + } + affine.for %arg0 = 0 to 10 { + %0 = affine.load %alloc[%arg0] : memref<10xf32> + %1 = affine.load %alloc_1[0] : memref<10xf32> + %2 = arith.addf %0, %1 : f32 + affine.store %2, %alloc_1[0] : memref<10xf32> + } + // SIBLING-MAXIMAL: affine.for %{{.*}} = 0 to 10 + // SIBLING-MAXIMAL: addf + // SIBLING-MAXIMAL-NEXT: affine.store + // SIBLING-MAXIMAL-NEXT: affine.load + // SIBLING-MAXIMAL-NEXT: affine.load + // SIBLING-MAXIMAL-NEXT: addf + // SIBLING-MAXIMAL-NEXT: affine.store + return +} |