diff options
Diffstat (limited to 'llvm/lib/Target')
42 files changed, 512 insertions, 174 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be2f2e4..662d84b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom); setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); @@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: return LowerVECREDUCE(Op, DAG); + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_FMUL: + return LowerVECREDUCE_MUL(Op, DAG); case ISD::ATOMIC_LOAD_AND: return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: @@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, } } +SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + assert(SrcVT.isScalableVector() && "Unexpected operand type!"); + + SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT); + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); + SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags()); + + // Whilst we don't know the size of the vector we do know the maximum size so + // can perform a tree reduction with an identity vector, which means once we + // arrive at the result the remaining stages (when the vector is smaller than + // the maximum) have no affect. + + unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; + unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements()); + + for (unsigned I = 0; I < Stages; ++I) { + Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity); + Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1)); + } + + return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0); +} + SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>(); @@ -18144,8 +18176,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); - if (Factor != 2 && Factor != 4) { - LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); + if (Factor != 2 && Factor != 3 && Factor != 4) { + LLVM_DEBUG(dbgs() << "Matching ld2, ld3 and ld4 patterns failed\n"); return false; } auto *LI = dyn_cast<LoadInst>(Load); @@ -18223,8 +18255,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( Instruction *Store, Value *Mask, ArrayRef<Value *> InterleavedValues) const { unsigned Factor = InterleavedValues.size(); - if (Factor != 2 && Factor != 4) { - LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); + if (Factor != 2 && Factor != 3 && Factor != 4) { + LLVM_DEBUG(dbgs() << "Matching st2, st3 and st4 patterns failed\n"); return false; } StoreInst *SI = dyn_cast<StoreInst>(Store); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 00956fd..9495c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -752,6 +752,7 @@ private: SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7322212..fe84193 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_USDOT : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>; def : GINodeEquiv<G_UDOT, AArch64udot>; def : GINodeEquiv<G_SDOT, AArch64sdot>; +def : GINodeEquiv<G_USDOT, AArch64usdot>; def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 30dfcf2b..12c600f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -10600,6 +10600,9 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, Register DestReg = DestSrc->Destination->getReg(); Register SrcReg = DestSrc->Source->getReg(); + if (!DestReg.isValid() || !SrcReg.isValid()) + return std::nullopt; + auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); // If the described register is the destination, just return the source. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9e2d698..05a4313 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerTriOp(AArch64::G_UDOT); case Intrinsic::aarch64_neon_sdot: return LowerTriOp(AArch64::G_SDOT); + case Intrinsic::aarch64_neon_usdot: + return LowerTriOp(AArch64::G_USDOT); case Intrinsic::aarch64_neon_sqxtn: return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S); case Intrinsic::aarch64_neon_sqxtun: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index bfe2c80..a67b12a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -901,6 +901,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); + addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}}); + bool hasSALUFloat = ST->hasSALUFloatInsts(); addRulesForGOpcs({G_FADD}, Standard) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index be62395..e0375ea 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, } if (Imm == AMDGPU::EncValues::LITERAL_CONST) { - Op = decodeLiteralConstant( - Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64); + Op = decodeLiteralConstant(Desc, OpDesc); continue; } @@ -1545,21 +1544,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { if (HasLiteral) { - if (Literal64 != Val) + if (Literal != Val) return errOperand(Val, "More than one unique literal is illegal"); } HasLiteral = true; - Literal = Literal64 = Val; + Literal = Val; - bool UseLit64 = Hi_32(Literal64) == 0; + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const { +MCOperand +AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, + const MCOperandInfo &OpDesc) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1569,35 +1568,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, Twine(Bytes.size())); } HasLiteral = true; - Literal = Literal64 = eatBytes<uint32_t>(Bytes); - if (ExtendFP64) - Literal64 <<= 32; + Literal = eatBytes<uint32_t>(Bytes); } - int64_t Val = ExtendFP64 ? Literal64 : Literal; + // For disassembling always assume all inline constants are available. + bool HasInv2Pi = true; - bool CanUse64BitLiterals = - STI.hasFeature(AMDGPU::Feature64BitLiterals) && - !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); - - bool UseLit64 = false; - if (CanUse64BitLiterals) { - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) - UseLit64 = false; - else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64) - UseLit64 = Hi_32(Literal64) == 0; + // Invalid instruction codes may contain literals for inline-only + // operands, so we support them here as well. + int64_t Val = Literal; + bool UseLit = false; + switch (OpDesc.OperandType) { + default: + llvm_unreachable("Unexpected operand type!"); + case AMDGPU::OPERAND_REG_IMM_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: + UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2BF16: + UseLit = AMDGPU::isInlinableLiteralV2BF16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2FP16: + UseLit = AMDGPU::isInlinableLiteralV2F16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + break; + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2INT16: + UseLit = AMDGPU::isInlinableLiteralV2I16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_KIMM32: + UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + Val <<= 32; + break; + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi); + break; + case MCOI::OPERAND_REGISTER: + // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits + // decoding a literal in a position of a register operand. Give + // it special handling in the caller, decodeImmOperands(), instead + // of quietly allowing it here. + break; } - return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Val, getContext())) - : MCOperand::createImm(Val); + return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit( + LitModifier::Lit, Val, getContext())) + : MCOperand::createImm(Val); } -MCOperand -AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { +MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const { assert(STI.hasFeature(AMDGPU::Feature64BitLiterals)); if (!HasLiteral) { @@ -1606,25 +1649,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { Twine(Bytes.size())); } HasLiteral = true; - Literal64 = eatBytes<uint64_t>(Bytes); - } - - bool UseLit64 = false; - const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); - const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()]; - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) { - UseLit64 = false; - } else { - assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64); - UseLit64 = Hi_32(Literal64) == 0; + Literal = eatBytes<uint64_t>(Bytes); } + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1913,7 +1944,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst, return MCOperand::createImm(Val); if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) { - return decodeLiteral64Constant(Inst); + return decodeLiteral64Constant(); } switch (Width) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 2751857..d103d79 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -44,8 +44,7 @@ private: const unsigned HwModeRegClass; const unsigned TargetMaxInstBytes; mutable ArrayRef<uint8_t> Bytes; - mutable uint32_t Literal; - mutable uint64_t Literal64; + mutable uint64_t Literal; mutable bool HasLiteral; mutable std::optional<bool> EnableWavefrontSize32; unsigned CodeObjectVersion; @@ -144,9 +143,8 @@ public: MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const; MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const; - MCOperand decodeLiteral64Constant(const MCInst &Inst) const; + const MCOperandInfo &OpDesc) const; + MCOperand decodeLiteral64Constant() const; MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 58482ea..9fbf9e5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +static cl::opt<unsigned> PendingQueueLimit( + "amdgpu-scheduler-pending-queue-limit", cl::Hidden, + cl::desc( + "Max (Available+Pending) size to inspect pending queue (0 disables)"), + cl::init(256)); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) #define DUMP_MAX_REG_PRESSURE static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler( @@ -335,17 +341,52 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, } } +static bool shouldCheckPending(SchedBoundary &Zone, + const TargetSchedModel *SchedModel) { + bool HasBufferedModel = + SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize(); + unsigned Combined = Zone.Available.size() + Zone.Pending.size(); + return Combined <= PendingQueueLimit && HasBufferedModel; +} + +static SUnit *pickOnlyChoice(SchedBoundary &Zone, + const TargetSchedModel *SchedModel) { + // pickOnlyChoice() releases pending instructions and checks for new hazards. + SUnit *OnlyChoice = Zone.pickOnlyChoice(); + if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty()) + return OnlyChoice; + + return nullptr; +} + +void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current, + const SchedCandidate &Preferred) { + LLVM_DEBUG({ + dbgs() << "Prefer:\t\t"; + DAG->dumpNode(*Preferred.SU); + + if (Current.SU) { + dbgs() << "Not:\t"; + DAG->dumpNode(*Current.SU); + } + + dbgs() << "Reason:\t\t"; + traceCandidate(Preferred); + }); +} + // This function is mostly cut and pasted from // GenericScheduler::pickNodeFromQueue() void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand, + SchedCandidate &Cand, bool &IsPending, bool IsBottomUp) { const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI); ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = 0; unsigned VGPRPressure = 0; + IsPending = false; if (DAG->isTrackingPressure()) { if (!GCNTrackers) { SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; @@ -358,8 +399,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, VGPRPressure = T->getPressure().getArchVGPRNum(); } } - ReadyQueue &Q = Zone.Available; - for (SUnit *SU : Q) { + LLVM_DEBUG(dbgs() << "Available Q:\n"); + ReadyQueue &AQ = Zone.Available; + for (SUnit *SU : AQ) { SchedCandidate TryCand(ZonePolicy); initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, @@ -371,27 +413,55 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); + LLVM_DEBUG(printCandidateDecision(Cand, TryCand)); Cand.setBest(TryCand); - LLVM_DEBUG(traceCandidate(Cand)); + } else { + printCandidateDecision(TryCand, Cand); + } + } + + if (!shouldCheckPending(Zone, SchedModel)) + return; + + LLVM_DEBUG(dbgs() << "Pending Q:\n"); + ReadyQueue &PQ = Zone.Pending; + for (SUnit *SU : PQ) { + + SchedCandidate TryCand(ZonePolicy); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, + VGPRPressure, IsBottomUp); + // Pass SchedBoundary only when comparing nodes from the same boundary. + SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; + tryPendingCandidate(Cand, TryCand, ZoneArg); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(Zone.DAG, SchedModel); + LLVM_DEBUG(printCandidateDecision(Cand, TryCand)); + IsPending = true; + Cand.setBest(TryCand); + } else { + printCandidateDecision(TryCand, Cand); } } } // This function is mostly cut and pasted from // GenericScheduler::pickNodeBidirectional() -SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode, + bool &PickedPending) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. - if (SUnit *SU = Bot.pickOnlyChoice()) { + if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) { IsTopNode = false; return SU; } - if (SUnit *SU = Top.pickOnlyChoice()) { + if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) { IsTopNode = true; return SU; } - // Set the bottom-up policy based on the state of the current bottom zone and - // the instructions outside the zone, including the top zone. + // Set the bottom-up policy based on the state of the current bottom zone + // and the instructions outside the zone, including the top zone. CandPolicy BotPolicy; setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); // Set the top-down policy based on the state of the current top zone and @@ -399,12 +469,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { CandPolicy TopPolicy; setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); + bool BotPending = false; // See if BotCand is still valid (because we previously scheduled from Top). LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); if (!BotCand.isValid() || BotCand.SU->isScheduled || BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand, + BotPending, /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { @@ -414,6 +486,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { SchedCandidate TCand; TCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand, + BotPending, /*IsBottomUp=*/true); assert(TCand.SU == BotCand.SU && "Last pick result should correspond to re-picking right now"); @@ -421,12 +494,14 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { #endif } + bool TopPending = false; // Check if the top Q has a better candidate. LLVM_DEBUG(dbgs() << "Picking from Top:\n"); if (!TopCand.isValid() || TopCand.SU->isScheduled || TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand, + TopPending, /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { @@ -436,6 +511,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { SchedCandidate TCand; TCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand, + TopPending, /*IsBottomUp=*/false); assert(TCand.SU == TopCand.SU && "Last pick result should correspond to re-picking right now"); @@ -446,12 +522,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { // Pick best from BotCand and TopCand. LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); dbgs() << "Bot Cand: "; traceCandidate(BotCand);); - SchedCandidate Cand = BotCand; - TopCand.Reason = NoCand; - tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { - Cand.setBest(TopCand); + SchedCandidate Cand = BotPending ? TopCand : BotCand; + SchedCandidate TryCand = BotPending ? BotCand : TopCand; + PickedPending = BotPending && TopPending; + + TryCand.Reason = NoCand; + if (BotPending || TopPending) { + PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr); + } else { + tryCandidate(Cand, TryCand, nullptr); + } + + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); } + LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); IsTopNode = Cand.AtTop; @@ -466,35 +551,55 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); return nullptr; } + bool PickedPending; SUnit *SU; do { + PickedPending = false; if (RegionPolicy.OnlyTopDown) { - SU = Top.pickOnlyChoice(); + SU = pickOnlyChoice(Top, SchedModel); if (!SU) { CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, + PickedPending, /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find a candidate"); SU = TopCand.SU; } IsTopNode = true; } else if (RegionPolicy.OnlyBottomUp) { - SU = Bot.pickOnlyChoice(); + SU = pickOnlyChoice(Bot, SchedModel); if (!SU) { CandPolicy NoPolicy; BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand, + PickedPending, /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find a candidate"); SU = BotCand.SU; } IsTopNode = false; } else { - SU = pickNodeBidirectional(IsTopNode); + SU = pickNodeBidirectional(IsTopNode, PickedPending); } } while (SU->isScheduled); + if (PickedPending) { + unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle; + SchedBoundary &Zone = IsTopNode ? Top : Bot; + unsigned CurrentCycle = Zone.getCurrCycle(); + if (ReadyCycle > CurrentCycle) + Zone.bumpCycle(ReadyCycle); + + // FIXME: checkHazard() doesn't give information about which cycle the + // hazard will resolve so just keep bumping the cycle by 1. This could be + // made more efficient if checkHazard() returned more details. + while (Zone.checkHazard(SU)) + Zone.bumpCycle(Zone.getCurrCycle() + 1); + + Zone.releasePending(); + } + if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) @@ -540,6 +645,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const { return *std::next(CurrentStage); } +bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return true; + } + + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) + return TryCand.Reason != NoCand; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess, TRI, DAG->MF)) + return TryCand.Reason != NoCand; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, DAG->MF)) + return TryCand.Reason != NoCand; + + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return TryCand.Reason != NoCand; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return TryCand.Reason != NoCand; + } + + return false; +} + GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 8ea4267..975781f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -44,17 +44,32 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); /// heuristics to determine excess/critical pressure sets. class GCNSchedStrategy : public GenericScheduler { protected: - SUnit *pickNodeBidirectional(bool &IsTopNode); + SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending); void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand, bool IsBottomUp); + SchedCandidate &Cand, bool &IsPending, + bool IsBottomUp); void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp); + /// Evaluates instructions in the pending queue using a subset of scheduling + /// heuristics. + /// + /// Instructions that cannot be issued due to hardware constraints are placed + /// in the pending queue rather than the available queue, making them normally + /// invisible to scheduling heuristics. However, in certain scenarios (such as + /// avoiding register spilling), it may be beneficial to consider scheduling + /// these not-yet-ready instructions. + bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; + + void printCandidateDecision(const SchedCandidate &Current, + const SchedCandidate &Preferred); + std::vector<unsigned> Pressure; std::vector<unsigned> MaxPressure; diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index c684f9e..01a40c1 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates( if (TotalCyclesBetweenCandidates < NumMFMACycles - 1) InstrsToUnpack.insert(&Instr); } - return; } void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { @@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { HiDstOp.setIsRenamable(DstOp.isRenamable()); I.eraseFromParent(); - return; } MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a01a5fd..5e3195b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1697,9 +1697,6 @@ LLVM_READNONE bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi); LLVM_READNONE -bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); - -LLVM_READNONE bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi); LLVM_READNONE diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9945ecc..0d7b6d1 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -161,8 +161,8 @@ namespace { friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { return TE.PseudoOpc < PseudoOpc; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, - const NEONLdStTableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned PseudoOpc, + const NEONLdStTableEntry &TE) { return PseudoOpc < TE.PseudoOpc; } }; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 228114c..44c4830 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; def SplitDoubleTy : DXILOpParamType; def BinaryWithCarryTy : DXILOpParamType; +def DimensionsTy : DXILOpParamType; class DXILOpClass; @@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let attributes = [Attributes<DXIL1_0, [ReadOnly]>]; } +def GetDimensions : DXILOp<72, getDimensions> { + let Doc = "gets the dimensions of a buffer or texture"; + let arguments = [HandleTy, Int32Ty]; + let result = DimensionsTy; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def Barrier : DXILOp<80, barrier> { let Doc = "inserts a memory barrier in the shader"; let intrinsics = [ diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 1aed8f9..944b2e6 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) { return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c"); } +static StructType *getDimensionsType(LLVMContext &Ctx) { + Type *Int32Ty = Type::getInt32Ty(Ctx); + return getOrCreateStructType("dx.types.Dimensions", + {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getSplitDoubleType(Ctx); case OpParamType::BinaryWithCarryTy: return getBinaryWithCarryType(Ctx); + case OpParamType::DimensionsTy: + return getDimensionsType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 610d8b6..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -627,6 +627,28 @@ public: }); } + [[nodiscard]] bool lowerGetDimensionsX(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int32Ty = IRB.getInt32Ty(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Undef = UndefValue::get(Int32Ty); + + Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( + OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty); + if (Error E = OpCall.takeError()) + return E; + Value *Dim = IRB.CreateExtractValue(*OpCall, 0); + + CI->replaceAllUsesWith(Dim); + CI->eraseFromParent(); + return Error::success(); + }); + } + [[nodiscard]] bool lowerGetPointer(Function &F) { // These should have already been handled in DXILResourceAccess, so we can // just clean up the dead prototype. @@ -934,6 +956,9 @@ public: case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; + case Intrinsic::dx_resource_getdimensions_x: + HasErrors |= lowerGetDimensionsX(F); + break; case Intrinsic::ctpop: HasErrors |= lowerCtpopToCountBits(F); break; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 82c43ff..26a8728 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {} /// Returns the bit offset to backpatch with the location of the real VST. void DXILBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. - if (!M.getTargetTriple().empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, - M.getTargetTriple().str(), 0 /*TODO*/); - const std::string &DL = M.getDataLayoutStr(); - if (!DL.empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + + // We need to hardcode a triple and datalayout that's compatible with the + // historical DXIL triple and datalayout from DXC. + StringRef Triple = "dxil-ms-dx"; + StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" + "f16:32-f32:32-f64:64-n8:16:32:64"; + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + if (!M.getModuleInlineAsm().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp index 1eb03bf..725f2b1 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp @@ -149,11 +149,6 @@ public: std::string Data; llvm::raw_string_ostream OS(Data); - Triple OriginalTriple = M.getTargetTriple(); - // Set to DXIL triple when write to bitcode. - // Only the output bitcode need to be DXIL triple. - M.setTargetTriple(Triple("dxil-ms-dx")); - // Perform late legalization of lifetime intrinsics that would otherwise // fail the Module Verifier if performed in an earlier pass legalizeLifetimeIntrinsics(M); @@ -165,9 +160,6 @@ public: // not-so-legal legalizations removeLifetimeIntrinsics(M); - // Recover triple. - M.setTargetTriple(OriginalTriple); - Constant *ModuleConstant = ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data)); auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true, diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 5f180d6..3bd6ed4 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -66,6 +66,10 @@ public: void remapInstruction(MCInst &Instr) const; + Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const override; + private: bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, uint64_t &BytesToSkip, raw_ostream &CS) const; @@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, return Result; } +Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const { + // At the start of a symbol, force a fresh packet by resetting any + // in-progress bundle state. This prevents packets from straddling label + // boundaries when data (e.g. jump tables) appears in between. + Size = 0; + resetBundle(); + return true; +} + static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, ArrayRef<MCPhysReg> Table) { if (RegNo < Table.size()) { diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 52e6b0b..68f5312 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -174,8 +174,8 @@ namespace { const TargetRegisterInfo *TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) - LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegSet &P); raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { OS << '{'; for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index 14b6bb3..9087f9d 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -272,15 +272,14 @@ namespace { OS << *I << ' ' << **I << '\n'; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeVect &S) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S); raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) { dump_node_container(OS, S); return OS; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const NodeToUsesMap &M); raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ for (const auto &I : M) { const UseSet &Us = I.second; @@ -914,9 +913,8 @@ namespace { const NodeToValueMap ⤅ }; - raw_ostream &operator<< (raw_ostream &OS, - const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; - raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const LocationAsBlock &Loc) { for (const auto &I : Loc.Map) { OS << I.first << " -> "; if (BasicBlock *B = cast_or_null<BasicBlock>(I.second)) diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 14a7ae7..3900aac 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -132,8 +132,7 @@ namespace { const TargetRegisterInfo &TRI; friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); }; - raw_ostream &operator<<(raw_ostream &OS, - const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P); raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { OS << "{ SplitB:" << PrintMB(P.FP.SplitB) << ", PredR:" << printReg(P.FP.PredR, &P.TRI) diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index f9fdab4..9c81e96 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -51,11 +51,11 @@ private: const TargetRegisterInfo &TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) - LLVM_ATTRIBUTE_UNUSED; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { - return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); - } +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegister &PR); +raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) { + return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); +} class HexagonGenPredicate : public MachineFunctionPass { public: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bfea50e..6b48a21 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) { return new HexagonTargetStreamer(S); } -static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) { if (STI->hasFeature(F)) STI->ToggleFeature(F); } -static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) { return STI->hasFeature(F); } diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 1ce8d7e3..df0c8c1 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -264,9 +264,10 @@ public: } // end anonymous namespace -static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Type *OrigTy, + CCState &State); static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 7f1ff45..2fd7327 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3176,9 +3176,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, F64Regs); } -static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - Type *OrigTy, CCState &State) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Type *OrigTy, + CCState &State); #include "MipsGenCallingConv.inc" diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 8851a0f..e857b2d 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() { bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) { return StringSwitch<bool>(Format) - .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true) - .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj", + .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true) + .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"}, STI.hasFeature(RISCV::FeatureStdExtZca)) - .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es", + .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"}, !STI.hasFeature(RISCV::Feature64Bit)) .Default(false); } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 662d3f6..b1794b7 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .lower(); + LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[0]; + LLT EltTy = Query.Types[1]; + return VecTy.getElementType() == EltTy; + }; + + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + InsertVectorEltPred, typeIs(2, sXLen))) + .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred, + typeIs(2, sXLen))); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 50730c6..ab93bba 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL"; bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) { // Return true if not one of the valid LMUL strings return StringSwitch<bool>(Data) - .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true) + .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true) .Default(false); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7123a2d..eb87558 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1672,6 +1672,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.useRVVForFixedLengthVectors()) setTargetDAGCombine(ISD::BITCAST); + setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); + // Disable strict node mutation. IsStrictFPEnabled = true; EnableExtLdPromotion = true; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index f863392a..637d61fe 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> // and floating point computation. // The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can // be one or two VA (Vector Arithmetic). -multiclass SiFive7ProcResources<bit extraVALU = false> { +multiclass SiFive7ProcResources<bit dualVALU = false> { let BufferSize = 0 in { def PipeA : ProcResource<1>; def PipeB : ProcResource<1>; @@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def FDiv : ProcResource<1>; // FP Division/Sqrt // Arithmetic sequencer(s) - if extraVALU then { + if dualVALU then { // VA1 can handle any vector airthmetic instruction. def VA1 : ProcResource<1>; // VA2 generally can only handle simple vector arithmetic. @@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"), !cast<ProcResource>(NAME#"PipeB")]>; - if extraVALU then + if dualVALU then def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"), !cast<ProcResource>(NAME#"VA2")]>; } @@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance { /// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and /// (2) WriteRes entries. It's parameterized by config values that will /// eventually be supplied by different SchedMachineModels. -multiclass SiFive7SchedResources<int vlen, bit extraVALU, +multiclass SiFive7SchedResources<int vlen, bit dualVALU, SiFive7FPLatencies fpLatencies, bit hasFastGather> { - defm SiFive7 : SiFive7ProcResources<extraVALU>; + defm SiFive7 : SiFive7ProcResources<dualVALU>; // Pull out defs from SiFive7ProcResources so we can refer to them by name. defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA); @@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU, defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv); defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv); // Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU. - defvar SiFive7VA1 = !if (extraVALU, + defvar SiFive7VA1 = !if (dualVALU, !cast<ProcResource>(NAME # SiFive7VA1), !cast<ProcResource>(NAME # SiFive7VA)); - defvar SiFive7VA1OrVA2 = !if (extraVALU, + defvar SiFive7VA1OrVA2 = !if (dualVALU, !cast<ProcResGroup>(NAME # SiFive7VA1OrVA2), !cast<ProcResource>(NAME # SiFive7VA)); defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA); @@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel { HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, HasStdExtZkr]; int VLEN = vlen; - bit HasExtraVALU = false; + bit HasDualVALU = false; SiFive7FPLatencies FPLatencies; bit HasFastGather = false; @@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> { } def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { - let HasExtraVALU = true; + let HasDualVALU = true; let FPLatencies = SiFive7LowFPLatencies; let HasFastGather = true; } @@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { /// Binding models to their scheduling resources. foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in { let SchedModel = model in - defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU, + defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU, model.FPLatencies, model.HasFastGather>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a466ab2..a0cff4d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform( SPIRV::Decoration::NonUniformEXT, {}); } } - return; } bool SPIRVInstructionSelector::extractSubvector( diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 2934c88..fa08d44 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } } -static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) -{ +[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) { for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) if (MRI->isPhysRegUsed(reg)) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index d9c8e22..6e99fc3 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) { .Case("i64", wasm::ValType::I64) .Case("f32", wasm::ValType::F32) .Case("f64", wasm::ValType::F64) - .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2", + .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"}, wasm::ValType::V128) .Case("funcref", wasm::ValType::FUNCREF) .Case("externref", wasm::ValType::EXTERNREF) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 0f6e1ca..ed54404d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1763,6 +1763,26 @@ defm RELAXED_DOT : "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; +def : Pat< + (v8i16 (add + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 0), (i32 1), (i32 4), (i32 5), + (i32 8), (i32 9), (i32 12), (i32 13), + (i32 16), (i32 17), (i32 20), (i32 21), + (i32 24), (i32 25), (i32 28), (i32 29)), + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 2), (i32 3), (i32 6), (i32 7), + (i32 10), (i32 11), (i32 14), (i32 15), + (i32 18), (i32 19), (i32 22), (i32 23), + (i32 26), (i32 27), (i32 30), (i32 31))) + ), + (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs)) +>; + defm RELAXED_DOT_ADD : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), (outs), (ins), @@ -1771,6 +1791,18 @@ defm RELAXED_DOT_ADD : "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; +def : Pat< + (v4i32 (add + (v4i32 (int_wasm_extadd_pairwise_signed + (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))), + (v4i32 V128:$acc))), + (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc))) + >; + +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs), + (v16i8 V128:$rhs))), + (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>; + //===----------------------------------------------------------------------===// // Relaxed BFloat16 dot product //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index a8908d4..ac251fd 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, // xacquire <insn> ; xacquire must be accompanied by 'lock' bool IsPrefix = StringSwitch<bool>(Name) - .Cases("cs", "ds", "es", "fs", "gs", "ss", true) - .Cases("rex64", "data32", "data16", "addr32", "addr16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) + .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true) + .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true) + .Cases({"xacquire", "xrelease"}, true) + .Cases({"acquire", "release"}, isParsingIntelSyntax()) .Default(false); auto isLockRepeatNtPrefix = [](StringRef N) { return StringSwitch<bool>(N) - .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) + .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"}, + true) .Default(false); }; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index b81641f..28fa2cd 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,8 +414,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); - getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); - // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 6db780f..8e08d16 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,6 +1338,10 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); + // Novalake + list<SubtargetFeature> NVLFeatures = + !listconcat(PTLFeatures, [FeaturePREFETCHI]); + // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1883,6 +1887,9 @@ foreach P = ["pantherlake", "wildcatlake"] in { def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; } +def : ProcModel<"novalake", AlderlakePModel, ProcessorFeatures.NVLFeatures, + ProcessorFeatures.ADLTuning>; + def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index e0991aa..9f88fda 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -602,8 +602,7 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, - const TableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) { return V < TE.from; } }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c32b1a6..a0b64ff 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) { SDValue Src = Op1; SDValue Op10 = Op1.getOperand(0); - if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) { - // res, flags2 = sub 0, (and (xor X, -1), Y) + if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) && + llvm::isOneConstant(Op1.getOperand(1))) { + // res, flags2 = sub 0, (and (xor X, -1), 1) // cload/cstore ..., cond_ne, flag2 // -> - // res, flags2 = sub 0, (and X, Y) + // res, flags2 = sub 0, (and X, 1) // cload/cstore ..., cond_e, flag2 Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0), Op1.getOperand(1)); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0fd44b7..ec31675 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; + +// If the globaladdr is an absolute_symbol, don't bother using the sign extending +// instruction since there's no benefit to using it with absolute symbols. +def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{ + auto *GA = cast<GlobalAddressSDNode>(N); + return !GA->getGlobal()->getAbsoluteSymbolRange(); +}]>; +def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)), + (MOV64ri32 tglobaladdr:$dst)>, + Requires<[KernelCode]>; + def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 481a9be..1fca466f 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMADDUBSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 8) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } @@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMULHRSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 16) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } |