diff options
Diffstat (limited to 'llvm/lib/Target')
31 files changed, 511 insertions, 95 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9926a4d..be2f2e4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16254,7 +16254,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { SplatVal > 1) { SDValue Pg = getPredicateForScalableVector(DAG, DL, VT); SDValue Res = - DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0), + DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0), DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32)); if (Negated) Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); @@ -22942,7 +22942,7 @@ static SDValue performIntrinsicCombine(SDNode *N, return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_sve_asrd: - return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) @@ -30047,7 +30047,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT); SDValue Res = - DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2); + DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2); if (Negated) Res = DAG.getNode(ISD::SUB, DL, ContainerVT, DAG.getConstant(0, DL, ContainerVT), Res); diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7322212..fe84193 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_USDOT : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>; def : GINodeEquiv<G_UDOT, AArch64udot>; def : GINodeEquiv<G_SDOT, AArch64sdot>; +def : GINodeEquiv<G_USDOT, AArch64usdot>; def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index b3c9656..343fd81 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -40,7 +40,11 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo( getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)), HasStackFrame(MFI.hasStackFrame() ? std::optional<bool>(MFI.hasStackFrame()) - : std::nullopt) {} + : std::nullopt), + HasStreamingModeChanges( + MFI.hasStreamingModeChanges() + ? std::optional<bool>(MFI.hasStreamingModeChanges()) + : std::nullopt) {} void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this); @@ -55,6 +59,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields( YamlMFI.StackSizePPR.value_or(0)); if (YamlMFI.HasStackFrame) setHasStackFrame(*YamlMFI.HasStackFrame); + if (YamlMFI.HasStreamingModeChanges) + setHasStreamingModeChanges(*YamlMFI.HasStreamingModeChanges); } static std::pair<bool, bool> GetSignReturnAddress(const Function &F) { diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index bd0a17d..d1832f4 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -645,6 +645,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { std::optional<uint64_t> StackSizeZPR; std::optional<uint64_t> StackSizePPR; std::optional<bool> HasStackFrame; + std::optional<bool> HasStreamingModeChanges; AArch64FunctionInfo() = default; AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI); @@ -659,6 +660,7 @@ template <> struct MappingTraits<AArch64FunctionInfo> { YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR); YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR); YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame); + YamlIO.mapOptional("hasStreamingModeChanges", MFI.HasStreamingModeChanges); } }; diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp index cdf2822..a90950d 100644 --- a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp @@ -75,6 +75,10 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) { if (Src != Dst) MRI->replaceRegWith(Dst, Src); + if (MI.getOperand(1).isUndef()) + for (MachineOperand &MO : MRI->use_operands(Dst)) + MO.setIsUndef(); + // MI must be erased from the basic block before recalculating the live // interval. LIS->RemoveMachineInstrFromMaps(MI); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index bc6b931..98a128e 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -265,7 +265,7 @@ def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> ]>; -def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>; +def AArch64asrd_m1 : SDNode<"AArch64ISD::ASRD_MERGE_OP1", SDT_AArch64Arith_Imm>; def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>; def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9e2d698..05a4313 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerTriOp(AArch64::G_UDOT); case Intrinsic::aarch64_neon_sdot: return LowerTriOp(AArch64::G_SDOT); + case Intrinsic::aarch64_neon_usdot: + return LowerTriOp(AArch64::G_USDOT); case Intrinsic::aarch64_neon_sqxtn: return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S); case Intrinsic::aarch64_neon_sqxtun: diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index 4749748..434ea67 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); + /// Attempts to find an insertion point before \p Inst where the status flags + /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of + /// the block is found. + std::pair<MachineBasicBlock::iterator, LiveRegs> + findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl<InstInfo>::const_iterator Inst); void emitStateChange(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ZAState From, ZAState To, LiveRegs PhysLiveRegs); @@ -337,6 +343,28 @@ private: MachineRegisterInfo *MRI = nullptr; }; +static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) { + LiveRegs PhysLiveRegs = LiveRegs::None; + if (!LiveUnits.available(AArch64::NZCV)) + PhysLiveRegs |= LiveRegs::NZCV; + // We have to track W0 and X0 separately as otherwise things can get + // confused if we attempt to preserve X0 but only W0 was defined. + if (!LiveUnits.available(AArch64::W0)) + PhysLiveRegs |= LiveRegs::W0; + if (!LiveUnits.available(AArch64::W0_HI)) + PhysLiveRegs |= LiveRegs::W0_HI; + return PhysLiveRegs; +} + +static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { + if (PhysLiveRegs & LiveRegs::NZCV) + LiveUnits.addReg(AArch64::NZCV); + if (PhysLiveRegs & LiveRegs::W0) + LiveUnits.addReg(AArch64::W0); + if (PhysLiveRegs & LiveRegs::W0_HI) + LiveUnits.addReg(AArch64::W0_HI); +} + FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && @@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { LiveRegUnits LiveUnits(*TRI); LiveUnits.addLiveOuts(MBB); - auto GetPhysLiveRegs = [&] { - LiveRegs PhysLiveRegs = LiveRegs::None; - if (!LiveUnits.available(AArch64::NZCV)) - PhysLiveRegs |= LiveRegs::NZCV; - // We have to track W0 and X0 separately as otherwise things can get - // confused if we attempt to preserve X0 but only W0 was defined. - if (!LiveUnits.available(AArch64::W0)) - PhysLiveRegs |= LiveRegs::W0; - if (!LiveUnits.available(AArch64::W0_HI)) - PhysLiveRegs |= LiveRegs::W0_HI; - return PhysLiveRegs; - }; - - Block.PhysLiveRegsAtExit = GetPhysLiveRegs(); + Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits); auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); auto FirstNonPhiInsertPt = MBB.getFirstNonPHI(); for (MachineInstr &MI : reverse(MBB)) { MachineBasicBlock::iterator MBBI(MI); LiveUnits.stepBackward(MI); - LiveRegs PhysLiveRegs = GetPhysLiveRegs(); + LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits); // The SMEStateAllocPseudo marker is added to a function if the save // buffer was allocated in SelectionDAG. It marks the end of the // allocation -- which is a safe point for this pass to insert any TPIDR2 @@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, return BundleStates; } +std::pair<MachineBasicBlock::iterator, LiveRegs> +MachineSMEABI::findStateChangeInsertionPoint( + MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl<InstInfo>::const_iterator Inst) { + LiveRegs PhysLiveRegs; + MachineBasicBlock::iterator InsertPt; + if (Inst != Block.Insts.end()) { + InsertPt = Inst->InsertPt; + PhysLiveRegs = Inst->PhysLiveRegs; + } else { + InsertPt = MBB.getFirstTerminator(); + PhysLiveRegs = Block.PhysLiveRegsAtExit; + } + + if (!(PhysLiveRegs & LiveRegs::NZCV)) + return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). + + // Find the previous state change. We can not move before this point. + MachineBasicBlock::iterator PrevStateChangeI; + if (Inst == Block.Insts.begin()) { + PrevStateChangeI = MBB.begin(); + } else { + // Note: `std::prev(Inst)` is the previous InstInfo. We only create an + // InstInfo object for instructions that require a specific ZA state, so the + // InstInfo is the site of the previous state change in the block (which can + // be several MIs earlier). + PrevStateChangeI = std::prev(Inst)->InsertPt; + } + + // Note: LiveUnits will only accurately track X0 and NZCV. + LiveRegUnits LiveUnits(*TRI); + setPhysLiveRegs(LiveUnits, PhysLiveRegs); + for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) { + // Don't move before/into a call (which may have a state change before it). + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall()) + break; + LiveUnits.stepBackward(*I); + if (LiveUnits.available(AArch64::NZCV)) + return {I, getPhysLiveRegs(LiveUnits)}; + } + return {InsertPt, PhysLiveRegs}; +} + void MachineSMEABI::insertStateChanges(EmitContext &Context, const FunctionInfo &FnInfo, const EdgeBundles &Bundles, @@ -490,10 +548,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, CurrentState = InState; for (auto &Inst : Block.Insts) { - if (CurrentState != Inst.NeededState) - emitStateChange(Context, MBB, Inst.InsertPt, CurrentState, - Inst.NeededState, Inst.PhysLiveRegs); - CurrentState = Inst.NeededState; + if (CurrentState != Inst.NeededState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, &Inst); + emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState, + PhysLiveRegs); + CurrentState = Inst.NeededState; + } } if (MBB.succ_empty()) @@ -501,9 +562,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, ZAState OutState = BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; - if (CurrentState != OutState) - emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState, - OutState, Block.PhysLiveRegsAtExit); + if (CurrentState != OutState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, Block.Insts.end()); + emitStateChange(Context, MBB, InsertPt, CurrentState, OutState, + PhysLiveRegs); + } } } diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index c684f9e..01a40c1 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates( if (TotalCyclesBetweenCandidates < NumMFMACycles - 1) InstrsToUnpack.insert(&Instr); } - return; } void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { @@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { HiDstOp.setIsRenamable(DstOp.isRenamable()); I.eraseFromParent(); - return; } MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I, diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9945ecc..0d7b6d1 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -161,8 +161,8 @@ namespace { friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { return TE.PseudoOpc < PseudoOpc; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, - const NEONLdStTableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned PseudoOpc, + const NEONLdStTableEntry &TE) { return PseudoOpc < TE.PseudoOpc; } }; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 82c43ff..26a8728 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {} /// Returns the bit offset to backpatch with the location of the real VST. void DXILBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. - if (!M.getTargetTriple().empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, - M.getTargetTriple().str(), 0 /*TODO*/); - const std::string &DL = M.getDataLayoutStr(); - if (!DL.empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + + // We need to hardcode a triple and datalayout that's compatible with the + // historical DXIL triple and datalayout from DXC. + StringRef Triple = "dxil-ms-dx"; + StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" + "f16:32-f32:32-f64:64-n8:16:32:64"; + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + if (!M.getModuleInlineAsm().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp index 1eb03bf..725f2b1 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp @@ -149,11 +149,6 @@ public: std::string Data; llvm::raw_string_ostream OS(Data); - Triple OriginalTriple = M.getTargetTriple(); - // Set to DXIL triple when write to bitcode. - // Only the output bitcode need to be DXIL triple. - M.setTargetTriple(Triple("dxil-ms-dx")); - // Perform late legalization of lifetime intrinsics that would otherwise // fail the Module Verifier if performed in an earlier pass legalizeLifetimeIntrinsics(M); @@ -165,9 +160,6 @@ public: // not-so-legal legalizations removeLifetimeIntrinsics(M); - // Recover triple. - M.setTargetTriple(OriginalTriple); - Constant *ModuleConstant = ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data)); auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true, diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 52e6b0b..68f5312 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -174,8 +174,8 @@ namespace { const TargetRegisterInfo *TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) - LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegSet &P); raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { OS << '{'; for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index 14b6bb3..9087f9d 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -272,15 +272,14 @@ namespace { OS << *I << ' ' << **I << '\n'; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeVect &S) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S); raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) { dump_node_container(OS, S); return OS; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const NodeToUsesMap &M); raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ for (const auto &I : M) { const UseSet &Us = I.second; @@ -914,9 +913,8 @@ namespace { const NodeToValueMap ⤅ }; - raw_ostream &operator<< (raw_ostream &OS, - const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; - raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const LocationAsBlock &Loc) { for (const auto &I : Loc.Map) { OS << I.first << " -> "; if (BasicBlock *B = cast_or_null<BasicBlock>(I.second)) diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 14a7ae7..3900aac 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -132,8 +132,7 @@ namespace { const TargetRegisterInfo &TRI; friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); }; - raw_ostream &operator<<(raw_ostream &OS, - const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P); raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { OS << "{ SplitB:" << PrintMB(P.FP.SplitB) << ", PredR:" << printReg(P.FP.PredR, &P.TRI) diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index f9fdab4..9c81e96 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -51,11 +51,11 @@ private: const TargetRegisterInfo &TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) - LLVM_ATTRIBUTE_UNUSED; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { - return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); - } +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegister &PR); +raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) { + return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); +} class HexagonGenPredicate : public MachineFunctionPass { public: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bfea50e..6b48a21 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) { return new HexagonTargetStreamer(S); } -static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) { if (STI->hasFeature(F)) STI->ToggleFeature(F); } -static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) { return STI->hasFeature(F); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2..8bf0d11 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple<uint32_t, uint8_t>; + +static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa<ConstantSDNode>(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast<ConstantSDNode>(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (FullVal.isZero() || FullVal.isAllOnes()) + return SDValue(); + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value"); + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 59f3387..880aca7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1472,6 +1472,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2384959..2d8c633 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> { 126>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C), +// NOT(B), NAND(B,C)} +// - C is the "false" case op NOR(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNor<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 129>; + + // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>; + + // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)), + 133>; + + // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 134>; + + // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 138>; + + // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)), + 140>; + + // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 142>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv +// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B), +// NOT(C)} +// - C is the "false" case op EQV(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectEqv<ValueType Vt>{ + // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 151>; + + // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 152>; + + // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 154>; + + // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 158>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// B, NOT(B)} +// - C is the "false" case op NOT(C) +// ============================================================================= +multiclass XXEvalTernarySelectNotC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>; + + // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>; + + // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>; + + // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>; + + // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>; + + // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// C, NOT(B)} +// - C is the "false" case op NOT(B) +// ============================================================================= +multiclass XXEvalTernarySelectNotB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>; + + // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>; + + // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>; + + // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>; + + // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>; + + // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)} +// - C is the "false" case op NAND(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNand<ValueType Vt>{ + // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>; + + // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>; + + // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 230>; + + // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 231>; + + // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 233>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { defm : XXEvalTernarySelectC<Ty>; defm : XXEvalTernarySelectXor<Ty>; defm : XXEvalTernarySelectOr<Ty>; + defm : XXEvalTernarySelectNor<Ty>; + defm : XXEvalTernarySelectEqv<Ty>; + defm : XXEvalTernarySelectNotC<Ty>; + defm : XXEvalTernarySelectNotB<Ty>; + defm : XXEvalTernarySelectNand<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 8851a0f..e857b2d 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() { bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) { return StringSwitch<bool>(Format) - .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true) - .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj", + .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true) + .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"}, STI.hasFeature(RISCV::FeatureStdExtZca)) - .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es", + .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"}, !STI.hasFeature(RISCV::Feature64Bit)) .Default(false); } diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 50730c6..ab93bba 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL"; bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) { // Return true if not one of the valid LMUL strings return StringSwitch<bool>(Data) - .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true) + .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true) .Default(false); } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index f863392a..637d61fe 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> // and floating point computation. // The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can // be one or two VA (Vector Arithmetic). -multiclass SiFive7ProcResources<bit extraVALU = false> { +multiclass SiFive7ProcResources<bit dualVALU = false> { let BufferSize = 0 in { def PipeA : ProcResource<1>; def PipeB : ProcResource<1>; @@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def FDiv : ProcResource<1>; // FP Division/Sqrt // Arithmetic sequencer(s) - if extraVALU then { + if dualVALU then { // VA1 can handle any vector airthmetic instruction. def VA1 : ProcResource<1>; // VA2 generally can only handle simple vector arithmetic. @@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"), !cast<ProcResource>(NAME#"PipeB")]>; - if extraVALU then + if dualVALU then def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"), !cast<ProcResource>(NAME#"VA2")]>; } @@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance { /// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and /// (2) WriteRes entries. It's parameterized by config values that will /// eventually be supplied by different SchedMachineModels. -multiclass SiFive7SchedResources<int vlen, bit extraVALU, +multiclass SiFive7SchedResources<int vlen, bit dualVALU, SiFive7FPLatencies fpLatencies, bit hasFastGather> { - defm SiFive7 : SiFive7ProcResources<extraVALU>; + defm SiFive7 : SiFive7ProcResources<dualVALU>; // Pull out defs from SiFive7ProcResources so we can refer to them by name. defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA); @@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU, defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv); defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv); // Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU. - defvar SiFive7VA1 = !if (extraVALU, + defvar SiFive7VA1 = !if (dualVALU, !cast<ProcResource>(NAME # SiFive7VA1), !cast<ProcResource>(NAME # SiFive7VA)); - defvar SiFive7VA1OrVA2 = !if (extraVALU, + defvar SiFive7VA1OrVA2 = !if (dualVALU, !cast<ProcResGroup>(NAME # SiFive7VA1OrVA2), !cast<ProcResource>(NAME # SiFive7VA)); defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA); @@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel { HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, HasStdExtZkr]; int VLEN = vlen; - bit HasExtraVALU = false; + bit HasDualVALU = false; SiFive7FPLatencies FPLatencies; bit HasFastGather = false; @@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> { } def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { - let HasExtraVALU = true; + let HasDualVALU = true; let FPLatencies = SiFive7LowFPLatencies; let HasFastGather = true; } @@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { /// Binding models to their scheduling resources. foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in { let SchedModel = model in - defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU, + defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU, model.FPLatencies, model.HasFastGather>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a466ab2..a0cff4d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform( SPIRV::Decoration::NonUniformEXT, {}); } } - return; } bool SPIRVInstructionSelector::extractSubvector( diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 2934c88..fa08d44 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } } -static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) -{ +[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) { for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) if (MRI->isPhysRegUsed(reg)) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index d9c8e22..6e99fc3 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) { .Case("i64", wasm::ValType::I64) .Case("f32", wasm::ValType::F32) .Case("f64", wasm::ValType::F64) - .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2", + .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"}, wasm::ValType::V128) .Case("funcref", wasm::ValType::FUNCREF) .Case("externref", wasm::ValType::EXTERNREF) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index a8908d4..ac251fd 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, // xacquire <insn> ; xacquire must be accompanied by 'lock' bool IsPrefix = StringSwitch<bool>(Name) - .Cases("cs", "ds", "es", "fs", "gs", "ss", true) - .Cases("rex64", "data32", "data16", "addr32", "addr16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) + .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true) + .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true) + .Cases({"xacquire", "xrelease"}, true) + .Cases({"acquire", "release"}, isParsingIntelSyntax()) .Default(false); auto isLockRepeatNtPrefix = [](StringRef N) { return StringSwitch<bool>(N) - .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) + .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"}, + true) .Default(false); }; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 2bf016a..6db780f 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,7 +1338,6 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); - // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel, } def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; -def : ProcModel<"pantherlake", AlderlakePModel, +foreach P = ["pantherlake", "wildcatlake"] in { +def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; +} def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index e0991aa..9f88fda 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -602,8 +602,7 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, - const TableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) { return V < TE.from; } }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c32b1a6..a0b64ff 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) { SDValue Src = Op1; SDValue Op10 = Op1.getOperand(0); - if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) { - // res, flags2 = sub 0, (and (xor X, -1), Y) + if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) && + llvm::isOneConstant(Op1.getOperand(1))) { + // res, flags2 = sub 0, (and (xor X, -1), 1) // cload/cstore ..., cond_ne, flag2 // -> - // res, flags2 = sub 0, (and X, Y) + // res, flags2 = sub 0, (and X, 1) // cload/cstore ..., cond_e, flag2 Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0), Op1.getOperand(1)); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0fd44b7..ec31675 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; + +// If the globaladdr is an absolute_symbol, don't bother using the sign extending +// instruction since there's no benefit to using it with absolute symbols. +def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{ + auto *GA = cast<GlobalAddressSDNode>(N); + return !GA->getGlobal()->getAbsoluteSymbolRange(); +}]>; +def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)), + (MOV64ri32 tglobaladdr:$dst)>, + Requires<[KernelCode]>; + def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), |