diff options
Diffstat (limited to 'llvm')
43 files changed, 631 insertions, 287 deletions
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 7b66177..d9087dd1 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -38,11 +38,6 @@ namespace pointer_union_detail { return std::min<int>({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...}); } - /// Find the first type in a list of types. - template <typename T, typename...> struct GetFirstType { - using type = T; - }; - /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion /// for the template arguments. template <typename ...PTs> class PointerUnionUIntTraits { @@ -264,8 +259,7 @@ struct PointerLikeTypeTraits<PointerUnion<PTs...>> { // Teach DenseMap how to use PointerUnions as keys. template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> { using Union = PointerUnion<PTs...>; - using FirstInfo = - DenseMapInfo<typename pointer_union_detail::GetFirstType<PTs...>::type>; + using FirstInfo = DenseMapInfo<TypeAtIndex<0, PTs...>>; static inline Union getEmptyKey() { return Union(FirstInfo::getEmptyKey()); } diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 5b20d6bd..658f262 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -161,12 +161,10 @@ using TypeAtIndex = std::tuple_element_t<I, std::tuple<Ts...>>; /// Helper which adds two underlying types of enumeration type. /// Implicit conversion to a common type is accepted. template <typename EnumTy1, typename EnumTy2, - typename UT1 = std::enable_if_t<std::is_enum<EnumTy1>::value, - std::underlying_type_t<EnumTy1>>, - typename UT2 = std::enable_if_t<std::is_enum<EnumTy2>::value, - std::underlying_type_t<EnumTy2>>> + typename = std::enable_if_t<std::is_enum_v<EnumTy1> && + std::is_enum_v<EnumTy2>>> constexpr auto addEnumValues(EnumTy1 LHS, EnumTy2 RHS) { - return static_cast<UT1>(LHS) + static_cast<UT2>(RHS); + return llvm::to_underlying(LHS) + llvm::to_underlying(RHS); } //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index ce969ef..ae446df 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -86,6 +86,7 @@ #include <type_traits> // std::is_integral, std::is_enum, std::underlying_type, // std::enable_if +#include "llvm/ADT/STLForwardCompat.h" // llvm::to_underlying #include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow namespace llvm { @@ -139,8 +140,7 @@ struct CheckedInt { template <typename Enum, std::enable_if_t<std::is_enum<Enum>::value, bool> = 0> static CheckedInt from(Enum FromValue) { - using type = std::underlying_type_t<Enum>; - return from<type>(static_cast<type>(FromValue)); + return from(llvm::to_underlying(FromValue)); } // Equality diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index 24d5c74..9f571b9 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -412,7 +412,7 @@ struct TracebackTable { static constexpr uint8_t LanguageIdShift = 16; // Byte 3 - static constexpr uint32_t IsGlobaLinkageMask = 0x0000'8000; + static constexpr uint32_t IsGlobalLinkageMask = 0x0000'8000; static constexpr uint32_t IsOutOfLineEpilogOrPrologueMask = 0x0000'4000; static constexpr uint32_t HasTraceBackTableOffsetMask = 0x0000'2000; static constexpr uint32_t IsInternalProcedureMask = 0x0000'1000; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 30bcff7..b5b4cd9 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15633,47 +15633,34 @@ void ScalarEvolution::LoopGuards::collectFromBlock( return false; }; - // Checks whether Expr is a non-negative constant, and Divisor is a positive - // constant, and returns their APInt in ExprVal and in DivisorVal. - auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor, - APInt &ExprVal, APInt &DivisorVal) { - auto *ConstExpr = dyn_cast<SCEVConstant>(Expr); - auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor); - if (!ConstExpr || !ConstDivisor) - return false; - ExprVal = ConstExpr->getAPInt(); - DivisorVal = ConstDivisor->getAPInt(); - return ExprVal.isNonNegative() && !DivisorVal.isNonPositive(); - }; - // Return a new SCEV that modifies \p Expr to the closest number divides by - // \p Divisor and greater or equal than Expr. - // For now, only handle constant Expr and Divisor. + // \p Divisor and greater or equal than Expr. For now, only handle constant + // Expr. auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr, - const SCEV *Divisor) { - APInt ExprVal; - APInt DivisorVal; - if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) + const APInt &DivisorVal) { + const APInt *ExprVal; + if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || + DivisorVal.isNonPositive()) return Expr; - APInt Rem = ExprVal.urem(DivisorVal); - if (!Rem.isZero()) - // return the SCEV: Expr + Divisor - Expr % Divisor - return SE.getConstant(ExprVal + DivisorVal - Rem); - return Expr; + APInt Rem = ExprVal->urem(DivisorVal); + if (Rem.isZero()) + return Expr; + // return the SCEV: Expr + Divisor - Expr % Divisor + return SE.getConstant(*ExprVal + DivisorVal - Rem); }; // Return a new SCEV that modifies \p Expr to the closest number divides by - // \p Divisor and less or equal than Expr. - // For now, only handle constant Expr and Divisor. + // \p Divisor and less or equal than Expr. For now, only handle constant + // Expr. auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr, - const SCEV *Divisor) { - APInt ExprVal; - APInt DivisorVal; - if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) + const APInt &DivisorVal) { + const APInt *ExprVal; + if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() || + DivisorVal.isNonPositive()) return Expr; - APInt Rem = ExprVal.urem(DivisorVal); + APInt Rem = ExprVal->urem(DivisorVal); // return the SCEV: Expr - Expr % Divisor - return SE.getConstant(ExprVal - Rem); + return SE.getConstant(*ExprVal - Rem); }; // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, @@ -15682,6 +15669,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock( std::function<const SCEV *(const SCEV *, const SCEV *)> ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr, const SCEV *Divisor) { + auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor); + if (!ConstDivisor) + return MinMaxExpr; + const APInt &DivisorVal = ConstDivisor->getAPInt(); + const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; SCEVTypes SCTy; if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, @@ -15692,8 +15684,8 @@ void ScalarEvolution::LoopGuards::collectFromBlock( assert(SE.isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!"); auto *DivisibleExpr = - IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor) - : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor); + IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, DivisorVal) + : GetNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal); SmallVector<const SCEV *> Ops = { ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; return SE.getMinMaxExpr(SCTy, Ops); @@ -15750,10 +15742,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock( }; const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); - const SCEV *DividesBy = nullptr; - const APInt &Multiple = SE.getConstantMultiple(RewrittenLHS); - if (!Multiple.isOne()) - DividesBy = SE.getConstant(Multiple); + const APInt &DividesBy = SE.getConstantMultiple(RewrittenLHS); // Collect rewrites for LHS and its transitive operands based on the // condition. @@ -15775,21 +15764,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock( [[fallthrough]]; case CmpInst::ICMP_SLT: { RHS = SE.getMinusSCEV(RHS, One); - RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; + RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy); break; } case CmpInst::ICMP_UGT: case CmpInst::ICMP_SGT: RHS = SE.getAddExpr(RHS, One); - RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; + RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy); break; case CmpInst::ICMP_ULE: case CmpInst::ICMP_SLE: - RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; + RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy); break; case CmpInst::ICMP_UGE: case CmpInst::ICMP_SGE: - RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; + RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy); break; default: break; @@ -15843,7 +15832,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock( case CmpInst::ICMP_NE: if (match(RHS, m_scev_Zero())) { const SCEV *OneAlignedUp = - DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One; + GetNextSCEVDividesByDivisor(One, DividesBy); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); } break; diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp index 7a8c8ad..ed1f750 100644 --- a/llvm/lib/Object/XCOFFObjectFile.cpp +++ b/llvm/lib/Object/XCOFFObjectFile.cpp @@ -1568,7 +1568,7 @@ uint8_t XCOFFTracebackTable::getLanguageID() const { } bool XCOFFTracebackTable::isGlobalLinkage() const { - return GETBITWITHMASK(0, IsGlobaLinkageMask); + return GETBITWITHMASK(0, IsGlobalLinkageMask); } bool XCOFFTracebackTable::isOutOfLineEpilogOrPrologue() const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b8761d97..30dfcf2b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5064,17 +5064,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool RenamableSrc) const { if (AArch64::GPR32spRegClass.contains(DestReg) && (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg( - DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); - MCRegister SrcRegX = TRI->getMatchingSuperReg( - SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegX, but a proper @@ -5097,14 +5095,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(DestRegX.isValid() && "Destination super-reg not valid"); MCRegister SrcRegX = SrcReg == AArch64::WZR ? AArch64::XZR - : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(SrcRegX.isValid() && "Source super-reg not valid"); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate @@ -5334,11 +5332,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5359,11 +5356,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5374,11 +5370,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5398,11 +5393,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5413,11 +5407,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5441,11 +5434,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5456,11 +5448,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5532,9 +5523,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } #ifndef NDEBUG - const TargetRegisterInfo &TRI = getRegisterInfo(); - errs() << TRI.getRegAsmName(DestReg) << " = COPY " - << TRI.getRegAsmName(SrcReg) << "\n"; + errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg) + << "\n"; #endif llvm_unreachable("unimplemented reg-to-reg copy"); } diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 023fd14..bcb3f50 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2404,7 +2404,7 @@ void PPCAIXAsmPrinter::emitTracebackTable() { << static_cast<unsigned>(((V) & (TracebackTable::Field##Mask)) >> \ (TracebackTable::Field##Shift)) - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobaLinkage); + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobalLinkage); GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsOutOfLineEpilogOrPrologue); EmitComment(); diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 27fba34..100f1ec 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1164,14 +1164,13 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, I.getOpcode() == TargetOpcode::G_USUBO) && "unexpected instruction"); - const Register DstReg = I.getOperand(0).getReg(); - const Register CarryOutReg = I.getOperand(1).getReg(); - const Register Op0Reg = I.getOperand(2).getReg(); - const Register Op1Reg = I.getOperand(3).getReg(); - bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || - I.getOpcode() == TargetOpcode::G_USUBO; - bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || - I.getOpcode() == TargetOpcode::G_USUBE; + auto &CarryMI = cast<GAddSubCarryOut>(I); + + const Register DstReg = CarryMI.getDstReg(); + const Register CarryOutReg = CarryMI.getCarryOutReg(); + const Register Op0Reg = CarryMI.getLHSReg(); + const Register Op1Reg = CarryMI.getRHSReg(); + bool IsSub = CarryMI.isSub(); const LLT DstTy = MRI.getType(DstReg); assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); @@ -1207,14 +1206,15 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, llvm_unreachable("selectUAddSub unsupported type."); } - const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); + const RegisterBank &CarryRB = *RBI.getRegBank(CarryOutReg, MRI, TRI); + const TargetRegisterClass *CarryRC = + getRegClass(MRI.getType(CarryOutReg), CarryRB); unsigned Opcode = IsSub ? OpSUB : OpADD; // G_UADDE/G_USUBE - find CarryIn def instruction. - if (HasCarryIn) { - Register CarryInReg = I.getOperand(4).getReg(); + if (auto CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) { + Register CarryInReg = CarryInMI->getCarryInReg(); MachineInstr *Def = MRI.getVRegDef(CarryInReg); while (Def->getOpcode() == TargetOpcode::G_TRUNC) { CarryInReg = Def->getOperand(1).getReg(); @@ -1227,11 +1227,12 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, Def->getOpcode() == TargetOpcode::G_USUBE || Def->getOpcode() == TargetOpcode::G_USUBO) { // carry set by prev ADD/SUB. - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), - X86::EFLAGS) - .addReg(CarryInReg); - if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::CMP8ri)) + .addReg(CarryInReg) + .addImm(1); + + if (!RBI.constrainGenericRegister(CarryInReg, *CarryRC, MRI)) return false; Opcode = IsSub ? OpSBB : OpADC; @@ -1250,11 +1251,11 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, .addReg(Op0Reg) .addReg(Op1Reg); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) - .addReg(X86::EFLAGS); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), CarryOutReg) + .addImm(X86::COND_B); if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || - !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) + !RBI.constrainGenericRegister(CarryOutReg, *CarryRC, MRI)) return false; I.eraseFromParent(); diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 11ef721..28fa2cd 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -194,11 +194,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .scalarize(0); getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO}) - .legalFor({{s8, s1}, {s16, s1}, {s32, s1}}) - .legalFor(Is64Bit, {{s64, s1}}) + .legalFor({{s8, s8}, {s16, s8}, {s32, s8}}) + .legalFor(Is64Bit, {{s64, s8}}) .widenScalarToNextPow2(0, /*Min=*/32) .clampScalar(0, s8, sMaxScalar) - .clampScalar(1, s1, s1) + .clampScalar(1, s8, s8) .scalarize(0); // integer multiply diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 7d376c3..fdfff16 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1642,6 +1642,19 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, return false; } + // The latch must be terminated by a BranchInst. + BasicBlock *Latch = Lp->getLoopLatch(); + if (Latch && !isa<BranchInst>(Latch->getTerminator())) { + reportVectorizationFailure( + "The loop latch terminator is not a BranchInst", + "loop control flow is not understood by vectorizer", "CFGNotUnderstood", + ORE, TheLoop); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + return Result; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 456fa4c..7651ba1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -322,7 +322,11 @@ public: VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) { - return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy)); + VPIRFlags Flags; + if (Opcode == Instruction::Trunc) + Flags = VPIRFlags::TruncFlagsTy(false, false); + return tryInsertInstruction( + new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 50136a8..b96d29e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8774,13 +8774,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( assert(!RecurrenceDescriptor::isMinMaxRecurrenceKind(RecurrenceKind) && "Unexpected truncated min-max recurrence!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = - new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); + VPWidenCastRecipe *Trunc; Instruction::CastOps ExtendOpc = RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt; - auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy); - Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); - Extnd->insertAfter(Trunc); + VPWidenCastRecipe *Extnd; + { + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint( + NewExitingVPV->getDefiningRecipe()->getParent(), + std::next(NewExitingVPV->getDefiningRecipe()->getIterator())); + Trunc = + Builder.createWidenCast(Instruction::Trunc, NewExitingVPV, RdxTy); + Extnd = Builder.createWidenCast(ExtendOpc, Trunc, PhiTy); + } if (PhiR->getOperand(1) == NewExitingVPV) PhiR->setOperand(1, Extnd->getVPSingleValue()); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2388375..a6f4bec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5291,19 +5291,19 @@ private: // data. for (TreeEntry *TE : Entries) { // Check if the user is commutative. - // The commutatives are handled later, as their oeprands can be + // The commutatives are handled later, as their operands can be // reordered. // Same applies even for non-commutative cmps, because we can invert // their predicate potentially and, thus, reorder the operands. bool IsCommutativeUser = ::isCommutative(User) || ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User); - EdgeInfo EI(TE, U.getOperandNo()); if (!IsCommutativeUser && !isa<CmpInst>(User)) { unsigned &OpCnt = OrderedEntriesCount.try_emplace(TE, 0).first->getSecond(); + EdgeInfo EI(TE, U.getOperandNo()); if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps) - return false; + continue; // Found copyable operand - continue. ++OpCnt; continue; @@ -5312,33 +5312,38 @@ private: .first->getSecond(); } } - // Check the commutative/cmp entries. - if (!PotentiallyReorderedEntriesCount.empty()) { - for (auto &P : PotentiallyReorderedEntriesCount) { - auto *It = find(P.first->Scalars, User); - assert(It != P.first->Scalars.end() && - "User is not in the tree entry"); - int Lane = std::distance(P.first->Scalars.begin(), It); - assert(Lane >= 0 && "Lane is not found"); - if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty()) - Lane = P.first->ReorderIndices[Lane]; - assert(Lane < static_cast<int>(P.first->Scalars.size()) && - "Couldn't find extract lane"); - SmallVector<unsigned> OpIndices; - for (unsigned OpIdx : - seq<unsigned>(::getNumberOfPotentiallyCommutativeOps( - P.first->getMainOp()))) { - if (P.first->getOperand(OpIdx)[Lane] == Op && - getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op)) - --P.getSecond(); - } - } - return all_of(PotentiallyReorderedEntriesCount, + if (PotentiallyReorderedEntriesCount.empty()) + return all_of(OrderedEntriesCount, [&](const std::pair<const TreeEntry *, unsigned> &P) { - return P.second == NumOps - 1; + return P.second == NumOps; }); - } - return true; + // Check the commutative/cmp entries. + for (auto &P : PotentiallyReorderedEntriesCount) { + auto *It = find(P.first->Scalars, User); + assert(It != P.first->Scalars.end() && "User is not in the tree entry"); + int Lane = std::distance(P.first->Scalars.begin(), It); + assert(Lane >= 0 && "Lane is not found"); + if (isa<StoreInst>(User) && !P.first->ReorderIndices.empty()) + Lane = P.first->ReorderIndices[Lane]; + assert(Lane < static_cast<int>(P.first->Scalars.size()) && + "Couldn't find extract lane"); + SmallVector<unsigned> OpIndices; + for (unsigned OpIdx : + seq<unsigned>(::getNumberOfPotentiallyCommutativeOps( + P.first->getMainOp()))) { + if (P.first->getOperand(OpIdx)[Lane] == Op && + getScheduleCopyableData(EdgeInfo(P.first, OpIdx), Op)) + --P.getSecond(); + } + } + return all_of(PotentiallyReorderedEntriesCount, + [&](const std::pair<const TreeEntry *, unsigned> &P) { + return P.second == NumOps - 1; + }) && + all_of(OrderedEntriesCount, + [&](const std::pair<const TreeEntry *, unsigned> &P) { + return P.second == NumOps; + }); } SmallVector<ScheduleCopyableData *> @@ -20071,7 +20076,9 @@ Value *BoUpSLP::vectorizeTree( // The is because source vector that supposed to feed this gather node was // inserted at the end of the block [after stab instruction]. So we need // to adjust insertion point again to the end of block. - if (isa<PHINode>(UserI)) { + if (isa<PHINode>(UserI) || + (TE->UserTreeIndex.UserTE->hasState() && + TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI)) { // Insert before all users. Instruction *InsertPt = PrevVec->getParent()->getTerminator(); for (User *U : PrevVec->users()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7563cd7..9bb8820 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1026,6 +1026,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { PredPHI->replaceAllUsesWith(Op); } + VPBuilder Builder(Def); VPValue *A; if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) { Type *TruncTy = TypeInfo.inferScalarType(Def); @@ -1041,18 +1042,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) ? Instruction::SExt : Instruction::ZExt; - auto *VPC = - new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); + auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A, + TruncTy); if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { // UnderlyingExt has distinct return type, used to retain legacy cost. - VPC->setUnderlyingValue(UnderlyingExt); + Ext->setUnderlyingValue(UnderlyingExt); } - VPC->insertBefore(&R); - Def->replaceAllUsesWith(VPC); + Def->replaceAllUsesWith(Ext); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { - auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); - VPC->insertBefore(&R); - Def->replaceAllUsesWith(VPC); + auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy); + Def->replaceAllUsesWith(Trunc); } } #ifndef NDEBUG @@ -1098,7 +1097,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(Def->getOperand(1)); // (x && y) || (x && z) -> x && (y || z) - VPBuilder Builder(Def); if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) && // Simplify only if one of the operands has one use to avoid creating an @@ -2206,20 +2204,20 @@ void VPlanTransforms::truncateToMinimalBitwidths( continue; assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate"); auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op); - VPWidenCastRecipe *NewOp = - IterIsEmpty - ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy, - VPIRFlags::TruncFlagsTy(false, false)) - : ProcessedIter->second; - R.setOperand(Idx, NewOp); - if (!IterIsEmpty) + if (!IterIsEmpty) { + R.setOperand(Idx, ProcessedIter->second); continue; - ProcessedIter->second = NewOp; - if (!Op->isLiveIn()) { - NewOp->insertBefore(&R); - } else { - PH->appendRecipe(NewOp); } + + VPBuilder Builder; + if (Op->isLiveIn()) + Builder.setInsertPoint(PH); + else + Builder.setInsertPoint(&R); + VPWidenCastRecipe *NewOp = + Builder.createWidenCast(Instruction::Trunc, Op, NewResTy); + ProcessedIter->second = NewOp; + R.setOperand(Idx, NewOp); } } diff --git a/llvm/test/CodeGen/PowerPC/aix-alloca-r31.ll b/llvm/test/CodeGen/PowerPC/aix-alloca-r31.ll index edfa0b9..2ee6e08 100644 --- a/llvm/test/CodeGen/PowerPC/aix-alloca-r31.ll +++ b/llvm/test/CodeGen/PowerPC/aix-alloca-r31.ll @@ -31,7 +31,7 @@ define i32 @varalloca() local_unnamed_addr { ; CHECK-ASM32-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-ASM32-NEXT: .byte 0x00 # Version = 0 ; CHECK-ASM32-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-ASM32-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-ASM32-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-ASM32-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-ASM32-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-ASM32-NEXT: # -IsFloatingPointPresent @@ -70,7 +70,7 @@ define i32 @varalloca() local_unnamed_addr { ; CHECK-ASM64-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-ASM64-NEXT: .byte 0x00 # Version = 0 ; CHECK-ASM64-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-ASM64-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-ASM64-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-ASM64-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-ASM64-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-ASM64-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll index 42bd478..8e4e0d3 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll @@ -49,7 +49,7 @@ entry: ; COMMON: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent @@ -70,7 +70,7 @@ entry: ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-redzone-boundary.mir b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-redzone-boundary.mir index 3d4b5a7..7041315 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-redzone-boundary.mir +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-redzone-boundary.mir @@ -25,7 +25,7 @@ body: | ; CHECK: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus - ; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + ; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent @@ -43,7 +43,7 @@ body: | ; CHECK: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus - ; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + ; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo.ll index 83e413a..f03a6c0 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo.ll @@ -82,7 +82,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent @@ -107,7 +107,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo_hasvarg.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo_hasvarg.ll index 8c0a589..26506f8 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo_hasvarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-vectorinfo_hasvarg.ll @@ -15,7 +15,7 @@ entry: ;CHECK-ASM: .vbyte 4, 0x00000000 # Traceback table begin ;CHECK-ASM-NEXT: .byte 0x00 # Version = 0 ;CHECK-ASM-NEXT: .byte 0x09 # Language = CPlusPlus -;CHECK-ASM-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +;CHECK-ASM-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ;CHECK-ASM-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ;CHECK-ASM-NEXT: # -HasControlledStorage, -IsTOCless ;CHECK-ASM-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll index ce97f37..2827155 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll @@ -138,7 +138,7 @@ entry: ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent @@ -167,7 +167,7 @@ entry: ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent @@ -190,7 +190,7 @@ entry: ; COMMON: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x22 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # +IsFloatingPointPresent @@ -217,7 +217,7 @@ entry: ; COMMON-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; COMMON-NEXT: .byte 0x00 # Version = 0 ; COMMON-NEXT: .byte 0x09 # Language = CPlusPlus -; COMMON-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; COMMON-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; COMMON-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; COMMON-NEXT: # -HasControlledStorage, -IsTOCless ; COMMON-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/PowerPC/aix-exception.ll b/llvm/test/CodeGen/PowerPC/aix-exception.ll index 5035d8e..5b364ef 100644 --- a/llvm/test/CodeGen/PowerPC/aix-exception.ll +++ b/llvm/test/CodeGen/PowerPC/aix-exception.ll @@ -113,7 +113,7 @@ eh.resume: ; preds = %catch.dispatch ; ASM: .vbyte 4, 0x00000000 # Traceback table begin ; ASM: .byte 0x00 # Version = 0 ; ASM: .byte 0x09 # Language = CPlusPlus -; ASM: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; ASM: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; ASM: # +HasTraceBackTableOffset, -IsInternalProcedure ; ASM: # -HasControlledStorage, -IsTOCless ; ASM: # -IsFloatingPointPresent diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll index 7bde1b7..7cdfd51 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %rdi, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_add_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = add i128 %arg1, %arg2 ret i128 %ret @@ -46,6 +56,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = add i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir index ec9db78..dae2ad6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X64-NEXT: $rax = COPY [[UADDO]](s64) ; X64-NEXT: $rdx = COPY [[UADDE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV4]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s8) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s8) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE2]](s32), [[UADDE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 19fe5b8..470a30fd 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -25,6 +25,7 @@ body: | ; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] ; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]] ; X64-NEXT: RET 0, implicit [[AND1]](s64) + ; ; X86-LABEL: name: test_ctlz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64) @@ -46,12 +47,15 @@ body: | ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C]](s32) ; X86-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; X86-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV6]], [[UV8]] + ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[USUBO1]](s8) + ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV7]], [[UV9]], [[ZEXT2]] + ; X86-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[USUBE1]](s8) ; X86-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) - ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s35) - ; X86-NEXT: RET 0, implicit [[ZEXT2]](s64) + ; X86-NEXT: [[TRUNC3:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) + ; X86-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC3]](s35) + ; X86-NEXT: RET 0, implicit [[ZEXT3]](s64) %0(s64) = COPY $rdx %1:_(s35) = G_TRUNC %0(s64) %2:_(s35) = G_CTLZ %1 @@ -97,6 +101,7 @@ body: | ; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_ctlz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir index ee2b9ee..ac3bf33 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X64-NEXT: $rax = COPY [[USUBO]](s64) ; X64-NEXT: $rdx = COPY [[USUBE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV4]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] - ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] - ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV4]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] + ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s8) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] + ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s8) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE2]](s32), [[USUBE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 9807d13..57e729f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -97,8 +97,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index e2d10423..f5d8477 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -99,8 +99,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll new file mode 100644 index 0000000..41d890b --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s + +define i32 @test_01(ptr %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq %rax, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: # %bb.2: # %backedge +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: imulq $4, %rsi, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: cmpl %edx, (%rcx) +; CHECK-NEXT: sete %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.3: # %failure +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq + +entry: + %scevgep = getelementptr i32, ptr %p, i64 -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv + %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: + unreachable +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index 8eac3eaf..76680ac 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -29,8 +29,8 @@ body: | bb.0 (%ir-block.0): %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF - %2(s1) = IMPLICIT_DEF - %3(s32), %4(s1) = G_UADDE %0, %1, %2 + %2(s8) = IMPLICIT_DEF + %3(s32), %4(s8) = G_UADDE %0, %1, %2 RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 773813f..b85180f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -27,25 +27,24 @@ body: | bb.0 (%ir-block.0): ; X32-LABEL: name: test_add_i64 ; X32: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags - ; X32: [[COPY:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eflags = COPY [[COPY]] - ; X32: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags - ; X32: [[COPY1:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eax = COPY [[ADD32rr]] - ; X32: $edx = COPY [[ADC32rr]] - ; X32: RET 0, implicit $eax, implicit $edx + ; X32-NEXT: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags + ; X32-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: CMP8ri [[SETCCr]], 1, implicit-def $eflags + ; X32-NEXT: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags + ; X32-NEXT: [[SETCCr1:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: $eax = COPY [[ADD32rr]] + ; X32-NEXT: $edx = COPY [[ADC32rr]] + ; X32-NEXT: RET 0, implicit $eax, implicit $edx %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF %2(s32) = IMPLICIT_DEF %3(s32) = IMPLICIT_DEF %9(s8) = G_CONSTANT i8 0 - %4(s1) = G_TRUNC %9(s8) - %5(s32), %6(s1) = G_UADDE %0, %2, %4 - %7(s32), %8(s1) = G_UADDE %1, %3, %6 + %5(s32), %6(s8) = G_UADDE %0, %2, %9 + %7(s32), %8(s8) = G_UADDE %1, %3, %6 $eax = COPY %5(s32) $edx = COPY %7(s32) RET 0, implicit $eax, implicit $edx diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll new file mode 100644 index 0000000..0cf1372 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel=1 -global-isel-abort=1 | FileCheck %s + +; Issue #120029 +define i16 @use_carry_bit(i16 %2) { +; CHECK-LABEL: use_carry_bit: +; CHECK: # %bb.0: +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: addw %di, %ax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: cmovnew %di, %ax +; CHECK-NEXT: retq + %uadd = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) + %res = extractvalue { i16, i1 } %uadd, 0 + %carry = extractvalue { i16, i1 } %uadd, 1 + %ret = select i1 %carry, i16 %2, i16 %res + ret i16 %ret +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll index 7a035f5..be75d7c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: subq %rdx, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: sbbq %rcx, %rsi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_sub_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = sub i128 %arg1, %arg2 ret i128 %ret @@ -47,6 +57,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = sub i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/pr49087.ll b/llvm/test/CodeGen/X86/pr49087.ll deleted file mode 100644 index 1a29222..0000000 --- a/llvm/test/CodeGen/X86/pr49087.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel < %s 2>&1 | FileCheck %s -; REQUIRES: asserts -; XFAIL: * - -define i32 @test_01(ptr %p, i64 %len, i32 %x) { -; CHECK-LABEL: test_01 - -entry: - %scevgep = getelementptr i32, ptr %p, i64 -1 - br label %loop - -loop: ; preds = %backedge, %entry - %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] - %iv.next = add i64 %iv, -1 - %cond_1 = icmp eq i64 %iv, 0 - br i1 %cond_1, label %exit, label %backedge - -backedge: ; preds = %loop - %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv - %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 - %cond_2 = icmp eq i32 %loaded, %x - br i1 %cond_2, label %failure, label %loop - -exit: ; preds = %loop - ret i32 -1 - -failure: - unreachable -} - diff --git a/llvm/test/DebugInfo/XCOFF/empty.ll b/llvm/test/DebugInfo/XCOFF/empty.ll index af2f74f..24655e5 100644 --- a/llvm/test/DebugInfo/XCOFF/empty.ll +++ b/llvm/test/DebugInfo/XCOFF/empty.ll @@ -61,7 +61,7 @@ entry: ; ASM32-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; ASM32-NEXT: .byte 0x00 # Version = 0 ; ASM32-NEXT: .byte 0x09 # Language = CPlusPlus -; ASM32-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; ASM32-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; ASM32-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; ASM32-NEXT: # -HasControlledStorage, -IsTOCless ; ASM32-NEXT: # -IsFloatingPointPresent @@ -264,7 +264,7 @@ entry: ; ASM64-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; ASM64-NEXT: .byte 0x00 # Version = 0 ; ASM64-NEXT: .byte 0x09 # Language = CPlusPlus -; ASM64-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; ASM64-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; ASM64-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; ASM64-NEXT: # -HasControlledStorage, -IsTOCless ; ASM64-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/DebugInfo/XCOFF/explicit-section.ll b/llvm/test/DebugInfo/XCOFF/explicit-section.ll index 0ae9289..3bcc0f9 100644 --- a/llvm/test/DebugInfo/XCOFF/explicit-section.ll +++ b/llvm/test/DebugInfo/XCOFF/explicit-section.ll @@ -65,7 +65,7 @@ entry: ; CHECK-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent @@ -113,7 +113,7 @@ entry: ; CHECK-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/DebugInfo/XCOFF/function-sections.ll b/llvm/test/DebugInfo/XCOFF/function-sections.ll index 6a86ae6..0b7a03b 100644 --- a/llvm/test/DebugInfo/XCOFF/function-sections.ll +++ b/llvm/test/DebugInfo/XCOFF/function-sections.ll @@ -60,7 +60,7 @@ entry: ; CHECK-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent @@ -95,7 +95,7 @@ entry: ; CHECK-NEXT: .vbyte 4, 0x00000000 # Traceback table begin ; CHECK-NEXT: .byte 0x00 # Version = 0 ; CHECK-NEXT: .byte 0x09 # Language = CPlusPlus -; CHECK-NEXT: .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; CHECK-NEXT: .byte 0x20 # -IsGlobalLinkage, -IsOutOfLineEpilogOrPrologue ; CHECK-NEXT: # +HasTraceBackTableOffset, -IsInternalProcedure ; CHECK-NEXT: # -HasControlledStorage, -IsTOCless ; CHECK-NEXT: # -IsFloatingPointPresent diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 000dc4a..232c354 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -44,8 +44,86 @@ loop: ; preds = %loop, %entry exit: ; preds = %loop ret void } + +; Test case for https://github.com/llvm/llvm-project/issues/162374. +define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @truncate_i16_to_i8_cse( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4294967296, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = trunc <vscale x 8 x i16> [[BROADCAST_SPLAT]] to <vscale x 8 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 8 x i8> [[TMP6]], i32 [[TMP9]] +; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1 +; CHECK-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4294967296, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[COUNT_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[VAL_ZEXT:%.*]] = zext i16 [[VAL]] to i64 +; CHECK-NEXT: [[VAL_TRUNC_ZEXT:%.*]] = trunc i64 [[VAL_ZEXT]] to i8 +; CHECK-NEXT: store i8 [[VAL_TRUNC_ZEXT]], ptr null, align 1 +; CHECK-NEXT: [[VAL_TRUNC:%.*]] = trunc i16 [[VAL]] to i8 +; CHECK-NEXT: store i8 [[VAL_TRUNC]], ptr [[DST]], align 1 +; CHECK-NEXT: [[COUNT_NEXT]] = add i32 [[COUNT]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[COUNT_NEXT]], 0 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] + %val = load i16, ptr %src, align 2 + %val.zext = zext i16 %val to i64 + %val.trunc.zext = trunc i64 %val.zext to i8 + store i8 %val.trunc.zext, ptr null, align 1 + %val.trunc = trunc i16 %val to i8 + store i8 %val.trunc, ptr %dst, align 1 + %count.next = add i32 %count, 1 + %exitcond = icmp eq i32 %count.next, 0 + %iv.next = add i64 %iv, 1 + br i1 %exitcond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index aed1e29..4db3d1e 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -1374,3 +1374,49 @@ exit.1: exit.2: ret i16 1 } + +; Loop with a switch terminator in the latch block. Cannot be vectorized +; currently. +; Test case for https://github.com/llvm/llvm-project/issues/156894. +define void @switch_in_latch(ptr %a) { +; CHECK-LABEL: @switch_in_latch( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: switch i32 [[IV_NEXT]], label [[LOOP]] [ +; CHECK-NEXT: i32 100, label [[EXIT:%.*]] +; CHECK-NEXT: ] +; CHECK: exit: +; CHECK-NEXT: ret void +; +; TAILFOLD-LABEL: @switch_in_latch( +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[LOOP:%.*]] +; TAILFOLD: loop: +; TAILFOLD-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]] +; TAILFOLD-NEXT: store i32 1, ptr [[GEP]], align 4 +; TAILFOLD-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; TAILFOLD-NEXT: switch i32 [[IV_NEXT]], label [[LOOP]] [ +; TAILFOLD-NEXT: i32 100, label [[EXIT:%.*]] +; TAILFOLD-NEXT: ] +; TAILFOLD: exit: +; TAILFOLD-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr i32, ptr %a, i32 %iv + store i32 1, ptr %gep, align 4 + %iv.next = add i32 %iv, 1 + switch i32 %iv.next, label %loop [i32 100, label %exit] + +exit: + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll new file mode 100644 index 0000000..0561466 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/non-commutative-second-arg-only-copyable.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s + +define i32 @main(ptr %q, ptr %a, i8 %.pre) { +; CHECK-LABEL: define i32 @main( +; CHECK-SAME: ptr [[Q:%.*]], ptr [[A:%.*]], i8 [[DOTPRE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i8, ptr [[Q]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[DOTPRE]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[DOTPRE1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 0, i32 1> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> <i32 poison, i32 1>, <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i32> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16> +; CHECK-NEXT: store <2 x i16> [[TMP6]], ptr [[A]], align 2 +; CHECK-NEXT: ret i32 0 +; +entry: + %.pre1 = load i8, ptr %q, align 1 + %conv11.i = sext i8 %.pre to i32 + %shl18.i = shl i32 %conv11.i, %conv11.i + %conv19.i = trunc i32 %shl18.i to i16 + store i16 %conv19.i, ptr %a, align 2 + %0 = sext i8 %.pre1 to i32 + %1 = add i32 %0, 1 + %shl18.i.1 = shl i32 1, %1 + %conv19.i.1 = trunc i32 %shl18.i.1 to i16 + %arrayidx21.i.1 = getelementptr i8, ptr %a, i64 2 + store i16 %conv19.i.1, ptr %arrayidx21.i.1, align 2 + ret i32 0 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-phi-node-reordered.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-phi-node-reordered.ll new file mode 100644 index 0000000..d01c35f --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-phi-node-reordered.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(i32 %arg, i32 %arg1) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i32 [[ARG:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[ARG]], i32 0 +; CHECK-NEXT: br label %[[BB6:.*]] +; CHECK: [[BB2:.*]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ [[TMP14:%.*]], %[[BB19:.*]] ] +; CHECK-NEXT: ret void +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP17:%.*]], %[[BB26:.*]] ], [ [[TMP16:%.*]], %[[BB27:.*]] ], [ zeroinitializer, %[[BB25:.*]] ] +; CHECK-NEXT: switch i8 0, label %[[BB11:.*]] [ +; CHECK-NEXT: i8 0, label %[[BB28:.*]] +; CHECK-NEXT: ] +; CHECK: [[BB11]]: +; CHECK-NEXT: [[PHI12:%.*]] = phi i32 [ 0, %[[BB28]] ], [ 0, %[[BB6]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP3]], %[[BB28]] ], [ zeroinitializer, %[[BB6]] ] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[ARG]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[PHI12]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> <i32 poison, i32 5, i32 2, i32 7> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3> +; CHECK-NEXT: switch i8 0, label %[[BB19]] [ +; CHECK-NEXT: i8 1, label %[[BB17:.*]] +; CHECK-NEXT: i8 0, label %[[BB18:.*]] +; CHECK-NEXT: ] +; CHECK: [[BB17]]: +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <4 x i32> <i32 0, i32 3, i32 6, i32 poison> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 1, i32 2, i32 7> +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB18]]: +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 2, i32 0> +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: +; CHECK-NEXT: [[TMP14]] = phi <4 x i32> [ [[TMP10]], %[[BB17]] ], [ [[TMP7]], %[[BB18]] ], [ [[TMP9]], %[[BB11]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP12]], %[[BB17]] ], [ [[TMP13]], %[[BB18]] ], [ [[TMP7]], %[[BB11]] ] +; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 2, i32 1> +; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB25]] +; CHECK: [[BB25]]: +; CHECK-NEXT: switch i8 0, label %[[BB6]] [ +; CHECK-NEXT: i8 0, label %[[BB26]] +; CHECK-NEXT: i8 1, label %[[BB27]] +; CHECK-NEXT: i8 6, label %[[BB27]] +; CHECK-NEXT: ] +; CHECK: [[BB26]]: +; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP0]], <4 x i32> <i32 4, i32 1, i32 2, i32 3> +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB27]]: +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB28]]: +; CHECK-NEXT: br label %[[BB11]] +; +bb: + br label %bb6 + +bb2: + %phi = phi i32 [ %phi21, %bb19 ] + %phi3 = phi i32 [ %phi22, %bb19 ] + %phi4 = phi i32 [ %phi23, %bb19 ] + %phi5 = phi i32 [ %phi24, %bb19 ] + ret void + +bb6: + %phi7 = phi i32 [ 0, %bb ], [ %phi24, %bb26 ], [ %phi24, %bb27 ], [ 0, %bb25 ] + %phi8 = phi i32 [ 0, %bb ], [ %arg1, %bb26 ], [ %phi23, %bb27 ], [ 0, %bb25 ] + %phi9 = phi i32 [ 0, %bb ], [ %phi22, %bb26 ], [ %phi20, %bb27 ], [ 0, %bb25 ] + %phi10 = phi i32 [ 0, %bb ], [ %phi21, %bb26 ], [ %phi21, %bb27 ], [ 0, %bb25 ] + switch i8 0, label %bb11 [ + i8 0, label %bb28 + ] + +bb11: + %phi12 = phi i32 [ 0, %bb28 ], [ 0, %bb6 ] + %phi13 = phi i32 [ %phi10, %bb28 ], [ 0, %bb6 ] + %phi14 = phi i32 [ %phi9, %bb28 ], [ 0, %bb6 ] + %phi15 = phi i32 [ %phi8, %bb28 ], [ 0, %bb6 ] + %phi16 = phi i32 [ %phi7, %bb28 ], [ 0, %bb6 ] + switch i8 0, label %bb19 [ + i8 1, label %bb17 + i8 0, label %bb18 + ] + +bb17: + %add = add i32 %phi16, 0 + br label %bb19 + +bb18: + br label %bb19 + +bb19: + %phi20 = phi i32 [ 0, %bb17 ], [ %arg, %bb18 ], [ %phi12, %bb11 ] + %phi21 = phi i32 [ %phi13, %bb17 ], [ %phi12, %bb18 ], [ 0, %bb11 ] + %phi22 = phi i32 [ %phi14, %bb17 ], [ 0, %bb18 ], [ 0, %bb11 ] + %phi23 = phi i32 [ %phi15, %bb17 ], [ %arg, %bb18 ], [ %arg, %bb11 ] + %phi24 = phi i32 [ %add, %bb17 ], [ %phi16, %bb18 ], [ %phi16, %bb11 ] + br i1 false, label %bb2, label %bb25 + +bb25: + switch i8 0, label %bb6 [ + i8 0, label %bb26 + i8 1, label %bb27 + i8 6, label %bb27 + ] + +bb26: + br label %bb6 + +bb27: + br label %bb6 + +bb28: + br label %bb11 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll index d626230..5253f9f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll @@ -6,7 +6,7 @@ define void @test(ptr %0, i1 %1, i1 %2) { ; CHECK-SAME: ptr [[TMP0:%.*]], i1 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br label %[[BB4:.*]] ; CHECK: [[BB4]]: -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], %[[TMP7:.*]] ], [ zeroinitializer, [[TMP3:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], %[[TMP7:.*]] ], [ zeroinitializer, [[TMP3:%.*]] ] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1> ; CHECK-NEXT: br i1 [[TMP1]], label %[[TMP7]], label %[[BB15:.*]] ; CHECK: [[TMP7]]: @@ -14,9 +14,9 @@ define void @test(ptr %0, i1 %1, i1 %2) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x i32>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i32> [[TMP10]], splat (i32 1) -; CHECK-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 poison>, <2 x i32> <i32 2, i32 1> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> +; CHECK-NEXT: [[TMP15]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> <i32 1, i32 poison>, <2 x i32> <i32 2, i32 1> ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16:.*]], label %[[BB4]] ; CHECK: [[BB15]]: ; CHECK-NEXT: br label %[[BB16]] diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp index ddee023..d274458 100644 --- a/llvm/unittests/Support/JobserverTest.cpp +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -355,6 +355,7 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { int CurrentActive = ++ActiveTasks; LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive << "\n"); + (void)i; int OldMax = MaxActiveTasks.load(); while (CurrentActive > OldMax) MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); |