diff options
Diffstat (limited to 'llvm/lib/Target')
34 files changed, 1065 insertions, 160 deletions
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2c3870c..636d4f8a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) { Spec = AArch64::S_GOTPCREL; else if (Identifier == "plt") Spec = AArch64::S_PLT; + else if (Identifier == "funcinit") + Spec = AArch64::S_FUNCINIT; } if (Spec == AArch64::S_None) return Error(Loc, "invalid relocation specifier"); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a388216..892b8da 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup, } if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR) return ELF::R_AARCH64_AUTH_ABS64; + if (RefKind == AArch64::S_FUNCINIT) + return ELF::R_AARCH64_FUNCINIT64; return ELF::R_AARCH64_ABS64; } case AArch64::fixup_aarch64_add_imm12: diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 2b5cf34..bc090c6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = { {AArch64::S_GOT, "GOT"}, {AArch64::S_GOTPCREL, "GOTPCREL"}, {AArch64::S_PLT, "PLT"}, + {AArch64::S_FUNCINIT, "FUNCINIT"}, }; const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 0dfa61b..f2acff5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -164,6 +164,7 @@ enum { // ELF relocation specifiers in data directives: S_PLT = 0x400, S_GOTPCREL, + S_FUNCINIT, // Mach-O @ relocation specifiers: S_MACHO_GOT, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index dd6fa16..d71f728 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) if (auto *CalledFunction = CB.getCalledFunction()) CalledFn = SMEAttrs(*CalledFunction, TLI); + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80e985d..a2841c11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { return CacheLineAlign; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static bool isCopyFromRegOfInlineAsm(const SDNode *N) { assert(N->getOpcode() == ISD::CopyFromReg); do { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a44a247..d516330 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC_LO) { - if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC_LO) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp index 39e651d..8945ec3 100644 --- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp +++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp @@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, } // TODO -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index ca81d30..8ace2d2 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MD5.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <cstdint> #include <optional> using namespace llvm; @@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + assert(Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && + "Resource range is too large"); + BindInfo.UpperBound = (Binding.Size == UINT32_MAX) + ? UINT32_MAX + : Binding.LowerBound + Binding.Size - 1; BindInfo.Space = Binding.Space; BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind); BindInfo.Flags = Flags; diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 974f653..5f180d6 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } -LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. - static DecodeStatus - DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const MCDisassembler *Decoder) { +[[maybe_unused]] // Suppress warning temporarily. +static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg HvxVQRDecoderTable[] = { Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3, Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7}; diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index 5dde47a..a3296e0 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -419,8 +419,8 @@ namespace { using HCE = HexagonConstExtenders; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) @@ -435,8 +435,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) { if (P.Rs.Reg != 0) OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); else @@ -451,8 +451,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) { OS << "## " << (P.Ex.Neg ? "- " : "+ "); if (P.Ex.Rs.Reg != 0) OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); @@ -469,15 +469,15 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) { OS << '[' << P.ExtI.first << ", " << PrintExpr(P.ExtI.second, P.HRI) << ']'; return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) { assert(ED.OpNum != -1u); const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); const MachineFunction &MF = *MBB.getParent(); @@ -493,8 +493,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) { switch (ER.Kind) { case MachineOperand::MO_Immediate: OS << "imm:" << ER.V.ImmVal; @@ -527,8 +527,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) { OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; return OS; } @@ -540,8 +540,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) { OS << "{\n"; for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) { OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 4d96cfa..c7a4f68 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -789,7 +789,7 @@ struct ShuffleMask { } }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) { SM.print(OS); return OS; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 87d052b..e4c0a16 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -364,7 +364,7 @@ private: const HexagonVectorCombine &HVC; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; @@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; @@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; @@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index fa8ae60..2ff5843 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -111,7 +111,7 @@ namespace { friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { const ChainOfDependences &CD = D.Chain; int ChainSize = CD.size(); @@ -144,7 +144,7 @@ namespace { bool isDefined() { return Inst2Replace != nullptr; } }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { OS << "** ReuseValue ***\n"; OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index ca98269..e3094b4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -275,7 +275,7 @@ namespace HexagonII { INST_ICLASS_ALU32_3 = 0xf0000000 }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] static unsigned getMemAccessSizeInBytes(MemAccessSize S) { switch (S) { case ByteAccess: return 1; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2..8bf0d11 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple<uint32_t, uint8_t>; + +static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa<ConstantSDNode>(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast<ConstantSDNode>(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (FullVal.isZero() || FullVal.isAllOnes()) + return SDValue(); + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value"); + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 59f3387..880aca7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1472,6 +1472,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2384959..2d8c633 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> { 126>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C), +// NOT(B), NAND(B,C)} +// - C is the "false" case op NOR(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNor<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 129>; + + // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>; + + // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)), + 133>; + + // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 134>; + + // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 138>; + + // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)), + 140>; + + // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 142>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv +// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B), +// NOT(C)} +// - C is the "false" case op EQV(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectEqv<ValueType Vt>{ + // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 151>; + + // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 152>; + + // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 154>; + + // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 158>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// B, NOT(B)} +// - C is the "false" case op NOT(C) +// ============================================================================= +multiclass XXEvalTernarySelectNotC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>; + + // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>; + + // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>; + + // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>; + + // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>; + + // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// C, NOT(B)} +// - C is the "false" case op NOT(B) +// ============================================================================= +multiclass XXEvalTernarySelectNotB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>; + + // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>; + + // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>; + + // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>; + + // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>; + + // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)} +// - C is the "false" case op NAND(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNand<ValueType Vt>{ + // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>; + + // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>; + + // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 230>; + + // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 231>; + + // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 233>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { defm : XXEvalTernarySelectC<Ty>; defm : XXEvalTernarySelectXor<Ty>; defm : XXEvalTernarySelectOr<Ty>; + defm : XXEvalTernarySelectNor<Ty>; + defm : XXEvalTernarySelectEqv<Ty>; + defm : XXEvalTernarySelectNotC<Ty>; + defm : XXEvalTernarySelectNotB<Ty>; + defm : XXEvalTernarySelectNand<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 66ca436..de433e4 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -1100,6 +1100,12 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, --NumOps; if (RISCVII::hasRoundModeOp(TSFlags)) --NumOps; + if (RISCVII::hasTWidenOp(TSFlags)) + --NumOps; + if (RISCVII::hasTMOp(TSFlags)) + --NumOps; + if (RISCVII::hasTKOp(TSFlags)) + --NumOps; bool hasVLOutput = RISCVInstrInfo::isFaultOnlyFirstLoad(*MI); for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 974252a..9a6afa1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -516,6 +516,44 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); } +void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) { + if (!Subtarget->hasVendorXSfmmbase()) + return; + + assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); + + SDLoc DL(Node); + MVT XLenVT = Subtarget->getXLenVT(); + + unsigned IntNo = Node->getConstantOperandVal(0); + + assert((IntNo == Intrinsic::riscv_sf_vsettnt || + IntNo == Intrinsic::riscv_sf_vsettm || + IntNo == Intrinsic::riscv_sf_vsettk) && + "Unexpected XSfmm vset intrinsic"); + + unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2)); + unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3)); + unsigned PseudoOpCode = + IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT + : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM + : RISCV::PseudoSF_VSETTK; + + if (IntNo == Intrinsic::riscv_sf_vsettnt) { + unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0); + SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); + + ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT, + Node->getOperand(1), VTypeIOp)); + } else { + SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT); + SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT); + ReplaceNode(Node, + CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT, + Node->getOperand(1), Log2SEW, TWiden)); + } +} + bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { MVT VT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -847,6 +885,11 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { return true; } +static Register getTileReg(uint64_t TileNum) { + assert(TileNum <= 15 && "Invalid tile number"); + return RISCV::T0 + TileNum; +} + void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { if (!Subtarget->hasVInstructions()) return; @@ -2035,6 +2078,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case Intrinsic::riscv_vsetvli: case Intrinsic::riscv_vsetvlimax: return selectVSETVLI(Node); + case Intrinsic::riscv_sf_vsettnt: + case Intrinsic::riscv_sf_vsettm: + case Intrinsic::riscv_sf_vsettk: + return selectXSfmmVSET(Node); } break; } @@ -2458,6 +2505,142 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case Intrinsic::riscv_sf_vc_i_se: selectSF_VC_X_SE(Node); return; + case Intrinsic::riscv_sf_vlte8: + case Intrinsic::riscv_sf_vlte16: + case Intrinsic::riscv_sf_vlte32: + case Intrinsic::riscv_sf_vlte64: { + unsigned Log2SEW; + unsigned PseudoInst; + switch (IntNo) { + case Intrinsic::riscv_sf_vlte8: + PseudoInst = RISCV::PseudoSF_VLTE8; + Log2SEW = 3; + break; + case Intrinsic::riscv_sf_vlte16: + PseudoInst = RISCV::PseudoSF_VLTE16; + Log2SEW = 4; + break; + case Intrinsic::riscv_sf_vlte32: + PseudoInst = RISCV::PseudoSF_VLTE32; + Log2SEW = 5; + break; + case Intrinsic::riscv_sf_vlte64: + PseudoInst = RISCV::PseudoSF_VLTE64; + Log2SEW = 6; + break; + } + + SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); + SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT); + SDValue Operands[] = {Node->getOperand(2), + Node->getOperand(3), + Node->getOperand(4), + SEWOp, + TWidenOp, + Node->getOperand(0)}; + + MachineSDNode *TileLoad = + CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands); + if (auto *MemOp = dyn_cast<MemSDNode>(Node)) + CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()}); + + ReplaceNode(Node, TileLoad); + return; + } + case Intrinsic::riscv_sf_mm_s_s: + case Intrinsic::riscv_sf_mm_s_u: + case Intrinsic::riscv_sf_mm_u_s: + case Intrinsic::riscv_sf_mm_u_u: + case Intrinsic::riscv_sf_mm_e5m2_e5m2: + case Intrinsic::riscv_sf_mm_e5m2_e4m3: + case Intrinsic::riscv_sf_mm_e4m3_e5m2: + case Intrinsic::riscv_sf_mm_e4m3_e4m3: + case Intrinsic::riscv_sf_mm_f_f: { + bool HasFRM = false; + unsigned PseudoInst; + switch (IntNo) { + case Intrinsic::riscv_sf_mm_s_s: + PseudoInst = RISCV::PseudoSF_MM_S_S; + break; + case Intrinsic::riscv_sf_mm_s_u: + PseudoInst = RISCV::PseudoSF_MM_S_U; + break; + case Intrinsic::riscv_sf_mm_u_s: + PseudoInst = RISCV::PseudoSF_MM_U_S; + break; + case Intrinsic::riscv_sf_mm_u_u: + PseudoInst = RISCV::PseudoSF_MM_U_U; + break; + case Intrinsic::riscv_sf_mm_e5m2_e5m2: + PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e5m2_e4m3: + PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e4m3_e5m2: + PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e4m3_e4m3: + PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_f_f: + if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16) + PseudoInst = RISCV::PseudoSF_MM_F_F_ALT; + else + PseudoInst = RISCV::PseudoSF_MM_F_F; + HasFRM = true; + break; + } + uint64_t TileNum = Node->getConstantOperandVal(2); + SDValue Op1 = Node->getOperand(3); + SDValue Op2 = Node->getOperand(4); + MVT VT = Op1->getSimpleValueType(0); + unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); + SDValue TmOp = Node->getOperand(5); + SDValue TnOp = Node->getOperand(6); + SDValue TkOp = Node->getOperand(7); + SDValue TWidenOp = Node->getOperand(8); + SDValue Chain = Node->getOperand(0); + + // sf.mm.f.f with sew=32, twiden=2 is invalid + if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 && + TWidenOp->getAsZExtVal() == 2) + reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)"); + + SmallVector<SDValue, 10> Operands( + {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2}); + if (HasFRM) + Operands.push_back( + CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT)); + Operands.append({TmOp, TnOp, TkOp, + CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp, + Chain}); + + auto *NewNode = + CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands); + + ReplaceNode(Node, NewNode); + return; + } + case Intrinsic::riscv_sf_vtzero_t: { + uint64_t TileNum = Node->getConstantOperandVal(2); + SDValue Tm = Node->getOperand(3); + SDValue Tn = Node->getOperand(4); + SDValue Log2SEW = Node->getOperand(5); + SDValue TWiden = Node->getOperand(6); + SDValue Chain = Node->getOperand(0); + auto *NewNode = CurDAG->getMachineNode( + RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(), + {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW, + TWiden, Chain}); + + ReplaceNode(Node, NewNode); + return; + } } break; } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index f03b44c..19ee103 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -165,6 +165,7 @@ public: void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered); void selectVSETVLI(SDNode *Node); + void selectXSfmmVSET(SDNode *Node); void selectSF_VC_X_SE(SDNode *Node); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 5b06303..fee1d15 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -177,7 +177,7 @@ def EltDepsVL : EltDeps<vl=1, mask=0>; def EltDepsMask : EltDeps<vl=0, mask=1>; def EltDepsVLMask : EltDeps<vl=1, mask=1>; -class EEW <bits<2> val> { +class EEW<bits<2> val> { bits<2> Value = val; } def EEW1 : EEW<0>; @@ -185,6 +185,13 @@ def EEWSEWx1 : EEW<1>; def EEWSEWx2 : EEW<2>; def EEWSEWx4 : EEW<3>; +class AltFmtType<bits<2> val> { + bits<2> Value = val; +} +def DONT_CARE_ALTFMT : AltFmtType<0>; +def IS_NOT_ALTFMT : AltFmtType<1>; +def IS_ALTFMT : AltFmtType<2>; + class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : Instruction { let Namespace = "RISCV"; @@ -271,8 +278,8 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // 0 -> Don't care about altfmt bit in VTYPE. // 1 -> Is not altfmt. // 2 -> Is altfmt(BF16). - bits<2> AltFmtType = 0; - let TSFlags{28-27} = AltFmtType; + AltFmtType AltFmtType = DONT_CARE_ALTFMT; + let TSFlags{28-27} = AltFmtType.Value; // XSfmmbase bit HasTWidenOp = 0; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index c1b23af..65865ce 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -128,9 +128,6 @@ defvar TAIL_AGNOSTIC = 1; defvar TU_MU = 0; defvar TA_MU = 1; defvar TA_MA = 3; -defvar DONT_CARE_ALTFMT = 0; -defvar IS_NOT_ALTFMT = 1; -defvar IS_ALTFMT = 2; //===----------------------------------------------------------------------===// // Utilities. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index 5ad22e6b..d77a44a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -418,3 +418,54 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { ixlenimm:$twiden)>; def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>; } + +class VPatXSfmmTileStore<string intrinsic_name, + string inst_name, + int log2sew> : + Pat<(!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs2), + (XLenVT GPR:$rs1), + (XLenVT AVL:$tn)), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs2), + (XLenVT GPR:$rs1), + GPR:$tn, log2sew, 1)>; + +class VPatXSfmmTileMove_T_V<string intrinsic_name, + string inst_name, + ValueType reg_type, + int log2sew> : + Pat<(!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs1), + (reg_type VRM8:$vs2), + (XLenVT AVL:$atn)), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs1), + (reg_type VRM8:$vs2), + GPR:$atn, log2sew, 1)>; + +class VPatXSfmmTileMove_V_T<string intrinsic_name, + string inst_name, + ValueType result_type, + int log2sew> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs1), + (XLenVT AVL:$atn))), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs1), + GPR:$atn, log2sew, 1)>; + +class VPatXSfmmVTDiscard<string intrinsic_name, + string inst_name> : + Pat<(!cast<Intrinsic>(intrinsic_name)), + (!cast<Instruction>(inst_name))>; + +foreach eew = [8, 16, 32, 64] in + def : VPatXSfmmTileStore<"int_riscv_sf_vste" # eew, "PseudoSF_VSTE" # eew, !logtwo(eew)>; + +foreach vti = [VI8M8, VI16M8, VI32M8, VI64M8, VF16M8, VF32M8, VF64M8, VBF16M8] in { + def : VPatXSfmmTileMove_T_V<"int_riscv_sf_vtmv_t_v", "PseudoSF_VTMV_T_V", vti.Vector, vti.Log2SEW>; + def : VPatXSfmmTileMove_V_T<"int_riscv_sf_vtmv_v_t", "PseudoSF_VTMV_V_T", vti.Vector, vti.Log2SEW>; +} + +def : VPatXSfmmVTDiscard<"int_riscv_sf_vtdiscard", "PseudoSF_VTDISCARD">; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 96ad5c6..0a8838c 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() { return new RISCVVLOptimizer(); } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { OI.print(OS); return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const std::optional<OperandInfo> &OI) { if (OI) diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index b765fec..56a6168 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -139,8 +139,8 @@ void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) { // anymore. void SPIRVAsmPrinter::cleanUp(Module &M) { // Verifier disallows uses of intrinsic global variables. - for (StringRef GVName : {"llvm.global_ctors", "llvm.global_dtors", - "llvm.used", "llvm.compiler.used"}) { + for (StringRef GVName : + {"llvm.global_ctors", "llvm.global_dtors", "llvm.used"}) { if (GlobalVariable *GV = M.getNamedGlobal(GVName)) GV->setName(""); } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index e16c8f0..c6c6182 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -17,6 +17,7 @@ #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" @@ -2028,9 +2029,13 @@ Instruction *SPIRVEmitIntrinsics::visitUnreachableInst(UnreachableInst &I) { void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV, IRBuilder<> &B) { - // Skip special artifical variable llvm.global.annotations. - if (GV.getName() == "llvm.global.annotations") + // Skip special artificial variables. + static const StringSet<> ArtificialGlobals{"llvm.global.annotations", + "llvm.compiler.used"}; + + if (ArtificialGlobals.contains(GV.getName())) return; + Constant *Init = nullptr; if (hasInitializer(&GV)) { // Deduce element type and store results in Global Registry. diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index a0cf881..5a06ea3 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -24,6 +24,7 @@ class SystemZTargetMachine; namespace SystemZ { // Condition-code mask values. +const unsigned CCMASK_NONE = 0; const unsigned CCMASK_0 = 1 << 3; const unsigned CCMASK_1 = 1 << 2; const unsigned CCMASK_2 = 1 << 1; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a..de28faf 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -15,6 +15,7 @@ #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { default: break; } + } else if (Constraint.size() == 5 && Constraint.starts_with("{")) { + if (StringRef("{@cc}").compare(Constraint) == 0) + return C_Other; } return TargetLowering::getConstraintType(Constraint); } @@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } + if (Constraint[1] == '@') { + if (StringRef("{@cc}").compare(Constraint) == 0) + return std::make_pair(0u, &SystemZ::GR32BitRegClass); + } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } @@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister( return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; } +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +// Lower @cc targets via setcc. +SDValue SystemZTargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Glue, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { + if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0) + return SDValue(); + + // Check that return type is valid. + if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || + OpInfo.ConstraintVT.getSizeInBits() < 8) + report_fatal_error("Glue output operand is of invalid type"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.addLiveIn(SystemZ::CC); + + if (Glue.getNode()) { + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue); + Chain = Glue.getValue(1); + } else + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32); + return getCCResult(DAG, Glue); +} + void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { @@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Convert condition code in CCReg to an i32 value. -static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { - SDLoc DL(CCReg); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); - return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); -} - SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC( return SDValue(); } -static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { +static std::pair<SDValue, int> findCCUse(const SDValue &Val) { + switch (Val.getOpcode()) { + default: + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + case SystemZISD::IPM: + if (Val.getOperand(0).getOpcode() == SystemZISD::CLC || + Val.getOperand(0).getOpcode() == SystemZISD::STRCMP) + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP); + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY); + case SystemZISD::SELECT_CCMASK: { + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg.getOpcode() == SystemZISD::ICMP || + Op4CCReg.getOpcode() == SystemZISD::TM) { + auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0)); + if (OpCC != SDValue()) + return std::make_pair(OpCC, OpCCValid); + } + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + if (!CCValid) + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + int CCValidVal = CCValid->getZExtValue(); + return std::make_pair(Op4CCReg, CCValidVal); + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0)); + if (Op0CC != SDValue()) + return std::make_pair(Op0CC, Op0CCValid); + return findCCUse(Val.getOperand(1)); + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG); + +SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC, + SelectionDAG &DAG) { + SDLoc DL(Val); + auto Opcode = Val.getOpcode(); + switch (Opcode) { + default: + return {}; + case ISD::Constant: + return {Val, Val, Val, Val}; + case SystemZISD::IPM: { + SDValue IPMOp0 = Val.getOperand(0); + if (IPMOp0 != CC) + return {}; + SmallVector<SDValue, 4> ShiftedCCVals; + for (auto CC : {0, 1, 2, 3}) + ShiftedCCVals.emplace_back( + DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32)); + return ShiftedCCVals; + } + case SystemZISD::SELECT_CCMASK: { + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCValid || !CCMask) + return {}; + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); + const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); + if (TrueSDVals.empty() || FalseSDVals.empty()) + return {}; + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg != CC) + combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG); + if (Op4CCReg != CC) + return {}; + SmallVector<SDValue, 4> MergedSDVals; + for (auto &CCVal : {0, 1, 2, 3}) + MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0) + ? TrueSDVals[CCVal] + : FalseSDVals[CCVal]); + return MergedSDVals; + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SRA: + // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA + // would clobber CC). + if (!Val.hasOneUse()) + return {}; + [[fallthrough]]; + case ISD::SHL: + case ISD::SRL: + SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1); + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) + return {}; + SmallVector<SDValue, 4> BinaryOpSDVals; + for (auto CCVal : {0, 1, 2, 3}) + BinaryOpSDVals.emplace_back(DAG.getNode( + Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal])); + return BinaryOpSDVals; + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, - // If the CCReg instruction is itself a ICMP testing the condition + // If the CCReg instruction is itself a ICMP / TM testing the condition // code set by some other instruction, see whether we can directly // use that condition code. - - // Verify that we have an ICMP against some constant. - if (CCValid != SystemZ::CCMASK_ICMP) - return false; - auto *ICmp = CCReg.getNode(); - if (ICmp->getOpcode() != SystemZISD::ICMP) - return false; - auto *CompareLHS = ICmp->getOperand(0).getNode(); - auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); - if (!CompareRHS) + auto *CCNode = CCReg.getNode(); + if (!CCNode) return false; - // Optimize the case where CompareLHS is a SELECT_CCMASK. - if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { - // Verify that we have an appropriate mask for a EQ or NE comparison. - bool Invert = false; - if (CCMask == SystemZ::CCMASK_CMP_NE) - Invert = !Invert; - else if (CCMask != SystemZ::CCMASK_CMP_EQ) + if (CCNode->getOpcode() == SystemZISD::TM) { + if (CCValid != SystemZ::CCMASK_TM) return false; - - // Verify that the ICMP compares against one of select values. - auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); - if (!TrueVal) - return false; - auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!FalseVal) + auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + auto Result = Op0APVal & Op1APVal; + bool AllOnes = Result == Op1APVal; + bool AllZeros = Result == 0; + bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0; + return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1; + }; + SDValue Op0 = CCNode->getOperand(0); + SDValue Op1 = CCNode->getOperand(1); + auto [Op0CC, Op0CCValid] = findCCUse(Op0); + if (Op0CC == SDValue()) return false; - if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) - Invert = !Invert; - else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - - // Compute the effective CC mask for the new branch or select. - auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); - auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); - if (!NewCCValid || !NewCCMask) - return false; - CCValid = NewCCValid->getZExtValue(); - CCMask = NewCCMask->getZExtValue(); - if (Invert) - CCMask ^= CCValid; - - // Return the updated CCReg link. - CCReg = CompareLHS->getOperand(4); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCReg = Op0CC; + CCMask = NewCCMask; + CCValid = Op0CCValid; return true; } + if (CCNode->getOpcode() != SystemZISD::ICMP || + CCValid != SystemZ::CCMASK_ICMP) + return false; - // Optimize the case where CompareRHS is (SRA (SHL (IPM))). - if (CompareLHS->getOpcode() == ISD::SRA) { - auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!SRACount || SRACount->getZExtValue() != 30) - return false; - auto *SHL = CompareLHS->getOperand(0).getNode(); - if (SHL->getOpcode() != ISD::SHL) - return false; - auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); - if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) - return false; - auto *IPM = SHL->getOperand(0).getNode(); - if (IPM->getOpcode() != SystemZISD::IPM) - return false; - - // Avoid introducing CC spills (because SRA would clobber CC). - if (!CompareLHS->hasOneUse()) - return false; - // Verify that the ICMP compares against zero. - if (CompareRHS->getZExtValue() != 0) + SDValue CmpOp0 = CCNode->getOperand(0); + SDValue CmpOp1 = CCNode->getOperand(1); + SDValue CmpOp2 = CCNode->getOperand(2); + auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0); + if (Op0CC != SDValue()) { + const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - // Compute the effective CC mask for the new branch or select. - CCMask = SystemZ::reverseCCMask(CCMask); - - // Return the updated CCReg link. - CCReg = IPM->getOperand(0); + auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2); + auto CmpTypeVal = CmpType->getZExtValue(); + const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val, + const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + if (CmpTypeVal == SystemZICMP::SignedOnly) + return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2; + return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2; + }; + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCMask = NewCCMask; + CCReg = Op0CC; + CCValid = Op0CCValid; return true; } return false; } -SDValue SystemZTargetLowering::combineBR_CCMASK( - SDNode *N, DAGCombinerInfo &DCI) const { +// Merging versus split in multiple branches cost. +TargetLoweringBase::CondMergingParams +SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + const auto isFlagOutOpCC = [](const Value *V) { + using namespace llvm::PatternMatch; + const Value *RHSVal; + const APInt *RHSC; + if (const auto *I = dyn_cast<Instruction>(V)) { + // PatternMatch.h provides concise tree-based pattern match of llvm IR. + if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) || + match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) { + if (const auto *CB = dyn_cast<CallBase>(RHSVal)) { + if (CB->isInlineAsm()) { + const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); + return IA && + IA->getConstraintString().find("{@cc}") != std::string::npos; + } + } + } + } + return false; + }; + // Pattern (ICmp %asm) or (ICmp (And %asm)). + // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or + // BaseCost can be set >=2. If cost of instruction <= CostThreshold + // conditionals will be merged or else conditionals will be split. + if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs)) + return {3, 0, -1}; + // Default. + return {-1, -1, -1}; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N, + DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. @@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); - - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), @@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) - return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), - CCReg); + bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG); + + // Populate SDVals vector for each condition code ccval for given Val, which + // can again be another nested select_ccmask with the same CC. + const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) { + if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) { + SmallVector<SDValue, 4> Res; + if (Val.getOperand(4) != CCReg) + return SmallVector<SDValue, 4>{}; + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCMask) + return SmallVector<SDValue, 4>{}; + + int CCMaskVal = CCMask->getZExtValue(); + for (auto &CC : {0, 1, 2, 3}) + Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal + : FalseVal); + return Res; + } + return SmallVector<SDValue, 4>{Val, Val, Val, Val}; + }; + // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either + // with CCReg found by combineCCMask or original CCReg. + SDValue TrueVal = N->getOperand(0); + SDValue FalseVal = N->getOperand(1); + auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG); + auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG); + // TrueSDVals/FalseSDVals might be empty in case of non-constant + // TrueVal/FalseVal for select_ccmask, which can not be optimized further. + if (TrueSDVals.empty()) + TrueSDVals = constructCCSDValsFromSELECT(TrueVal); + if (FalseSDVals.empty()) + FalseSDVals = constructCCSDValsFromSELECT(FalseVal); + if (!TrueSDVals.empty() && !FalseSDVals.empty()) { + SmallSet<SDValue, 4> MergedSDValsSet; + // Ignoring CC values outside CCValiid. + for (auto CC : {0, 1, 2, 3}) { + if ((CCValidVal & ((1 << (3 - CC)))) != 0) + MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0) + ? TrueSDVals[CC] + : FalseSDVals[CC]); + } + if (MergedSDValsSet.size() == 1) + return *MergedSDValsSet.begin(); + if (MergedSDValsSet.size() == 2) { + auto BeginIt = MergedSDValsSet.begin(); + SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt); + if (NewTrueVal == FalseVal || NewFalseVal == TrueVal) + std::swap(NewTrueVal, NewFalseVal); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + NewCCMask <<= 1; + NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0) + ? (TrueSDVals[CC] == NewTrueVal) + : (FalseSDVals[CC] == NewTrueVal); + } + CCMaskVal = NewCCMask; + CCMaskVal &= CCValidVal; + TrueVal = NewTrueVal; + FalseVal = NewFalseVal; + IsCombinedCCReg = true; + } + } + + if (IsCombinedCCReg) + return DAG.getNode( + SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal, + FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); + return SDValue(); } - SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f8706b7..d5b7603 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -533,6 +533,18 @@ public: } const char *getTargetNodeName(unsigned Opcode) const override; + + // This function currently returns cost for srl/ipm/cc sequence for merging. + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + + // Handle Lowering flag assembly outputs. + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, + const AsmOperandInfo &Constraint, + SelectionDAG &DAG) const override; + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 47c24fc..f973949 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineOperand Src = MI.getOperand(3); MachineOperand Len = MI.getOperand(4); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memcpy is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memcpy doesn't need a zero check. + unsigned MemoryCopy = + Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryCopy)) + .add(DstMem) + .add(SrcMem) + .add(Dst) + .add(Src) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; @@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL, MachineOperand Val = MI.getOperand(2); MachineOperand Len = MI.getOperand(3); + // If the length is a constant, we don't actually need the check. + if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) { + if (Def->getOpcode() == WebAssembly::CONST_I32 || + Def->getOpcode() == WebAssembly::CONST_I64) { + if (Def->getOperand(1).getImm() == 0) { + // A zero-length memset is a no-op. + MI.eraseFromParent(); + return BB; + } + // A non-zero-length memset doesn't need a zero check. + unsigned MemoryFill = + Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32; + BuildMI(*BB, MI, DL, TII.get(MemoryFill)) + .add(Mem) + .add(Dst) + .add(Val) + .add(Len); + MI.eraseFromParent(); + return BB; + } + } + // We're going to add an extra use to `Len` to test if it's zero; that // use shouldn't be a kill, even if the original use is. MachineOperand NoKillLen = Len; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 28fa2cd..b81641f 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); + // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3af8b3e..6db780f 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1335,10 +1335,8 @@ def ProcessorFeatures { !listconcat(ARLFeatures, ARLSAdditionalFeatures); // Pantherlake - list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; list<SubtargetFeature> PTLFeatures = - !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]); - + !listremove(ARLSFeatures, [FeatureWIDEKL]); // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, @@ -1881,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel, } def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; -def : ProcModel<"pantherlake", AlderlakePModel, +foreach P = ["pantherlake", "wildcatlake"] in { +def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; +} def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2..c32b1a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) BaseCost += 1; + + // For OR conditions with EQ comparisons, prefer splitting into branches + // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops, + // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed + // comparisons (SLT, SGT) that can be optimized. + if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or && + match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && + match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) + return {-1, -1, -1}; + return {BaseCost, BrMergingLikelyBias.getValue(), BrMergingUnlikelyBias.getValue()}; } @@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { /// Return true if every element in Mask, is an in-place blend/select mask or is /// undef. -LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) { +[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) { unsigned NumElts = Mask.size(); for (auto [I, M] : enumerate(Mask)) if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts)) @@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, return DstVec; } -LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { +[[maybe_unused]] static bool isHorizOp(unsigned Opcode) { switch (Opcode) { case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); - LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. @@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); - if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. |