diff options
Diffstat (limited to 'llvm/lib/Target')
19 files changed, 233 insertions, 64 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 38718c4..7504f1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -150,7 +150,10 @@ public: if (!CVisited.insert(CII).second) continue; - if (CII->getParent() == II->getParent() && !IsLookThru(II)) + // Same-BB filter must look at the *user*; and allow non-lookthrough + // users when the def is a PHI (loop-header pattern). + if (CII->getParent() == II->getParent() && !IsLookThru(CII) && + !isa<PHINode>(II)) continue; if (isOpLegal(CII)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index d9bfeae..0a59132 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -2562,7 +2562,9 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { for (Function *F : NeedsPostProcess) Splitter.processFunction(*F); for (Function *F : Intrinsics) { - if (isRemovablePointerIntrinsic(F->getIntrinsicID())) { + // use_empty() can also occur with cases like masked load, which will + // have been rewritten out of the module by now but not erased. + if (F->use_empty() || isRemovablePointerIntrinsic(F->getIntrinsicID())) { F->eraseFromParent(); } else { std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F); diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 5be4713..9b11201 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -957,8 +957,10 @@ void LoongArchAsmParser::emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, : Inst.getOperand(2).getExpr(); InstSeq Insts; + // To distinguish between la.abs and %abs_hi20, la.abs will generate + // R_LARCH_MARK_LA and R_LARCH_ABS_HI20 relocations. Insts.push_back( - LoongArchAsmParser::Inst(LoongArch::LU12I_W, ELF::R_LARCH_ABS_HI20)); + LoongArchAsmParser::Inst(LoongArch::LU12I_W, ELF::R_LARCH_MARK_LA)); Insts.push_back( LoongArchAsmParser::Inst(LoongArch::ORI, ELF::R_LARCH_ABS_LO12)); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 098bcfa..4cfbfca 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2319,6 +2319,53 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } +/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible). +static SDValue +lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // LoongArch LASX only supports xvinsve0.{w/d}. + if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 && + VT != MVT::v4f64) + return SDValue(); + + MVT GRLenVT = Subtarget.getGRLenVT(); + int MaskSize = Mask.size(); + assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size"); + + // Check if exactly one element of the Mask is replaced by 'Replaced', while + // all other elements are either 'Base + i' or undef (-1). On success, return + // the index of the replaced element. Otherwise, just return -1. + auto checkReplaceOne = [&](int Base, int Replaced) -> int { + int Idx = -1; + for (int i = 0; i < MaskSize; ++i) { + if (Mask[i] == Base + i || Mask[i] == -1) + continue; + if (Mask[i] != Replaced) + return -1; + if (Idx == -1) + Idx = i; + else + return -1; + } + return Idx; + }; + + // Case 1: the lowest element of V2 replaces one element in V1. + int Idx = checkReplaceOne(0, MaskSize); + if (Idx != -1) + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2, + DAG.getConstant(Idx, DL, GRLenVT)); + + // Case 2: the lowest element of V1 replaces one element in V2. + Idx = checkReplaceOne(MaskSize, 0); + if (Idx != -1) + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1, + DAG.getConstant(Idx, DL, GRLenVT)); + + return SDValue(); +} + /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, SDValue V1, SDValue V2, @@ -2595,6 +2642,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, Zeroable))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; @@ -7453,6 +7503,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(XVPERM) NODE_NAME_CASE(XVREPLVE0) NODE_NAME_CASE(XVREPLVE0Q) + NODE_NAME_CASE(XVINSVE0) NODE_NAME_CASE(VPICK_SEXT_ELT) NODE_NAME_CASE(VPICK_ZEXT_ELT) NODE_NAME_CASE(VREPLVE) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 9b60a9f..8a4d774 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -151,6 +151,7 @@ enum NodeType : unsigned { XVPERM, XVREPLVE0, XVREPLVE0Q, + XVINSVE0, // Extended vector element extraction VPICK_SEXT_ELT, diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index bbc0489..5143d53 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>; def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>; def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>; def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>; +def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>; def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>; @@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm), (XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>; // XVINSVE0_{W/D} +def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm), + (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>; +def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm), + (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>; +def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm), + (XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>; +def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm), + (XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>; def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm), (XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32), uimm3:$imm)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp index 0d77617..8ecb62d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp @@ -32,6 +32,7 @@ static StringRef getLoongArchSpecifierName(uint16_t S) { return "b16"; case ELF::R_LARCH_B21: return "b21"; + case ELF::R_LARCH_MARK_LA: case ELF::R_LARCH_ABS_HI20: return "abs_hi20"; case ELF::R_LARCH_ABS_LO12: diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index b7ead5e..f0e2bc4 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -161,6 +161,13 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, case ELF::R_LARCH_B26: FixupKind = LoongArch::fixup_loongarch_b26; break; + case ELF::R_LARCH_MARK_LA: + // Match gas behavior: generate `R_LARCH_MARK_LA` relocation when using + // `la.abs`. + Fixups.push_back( + MCFixup::create(0, MCConstantExpr::create(0, Ctx), + FirstLiteralRelocationKind + ELF::R_LARCH_MARK_LA)); + [[fallthrough]]; case ELF::R_LARCH_ABS_HI20: FixupKind = LoongArch::fixup_loongarch_abs_hi20; break; diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index cb57c43..d4d9e54 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, // we need to invert the branch condition to jump over TrueBB when the // condition is false. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); // Insert branch instruction. BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC))) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 56db09a..70b6c7e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1023,6 +1023,37 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, Cond.push_back(LastInst.getOperand(1)); } +static unsigned getInverseXqcicmOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("Unexpected Opcode"); + case RISCV::QC_MVEQ: + return RISCV::QC_MVNE; + case RISCV::QC_MVNE: + return RISCV::QC_MVEQ; + case RISCV::QC_MVLT: + return RISCV::QC_MVGE; + case RISCV::QC_MVGE: + return RISCV::QC_MVLT; + case RISCV::QC_MVLTU: + return RISCV::QC_MVGEU; + case RISCV::QC_MVGEU: + return RISCV::QC_MVLTU; + case RISCV::QC_MVEQI: + return RISCV::QC_MVNEI; + case RISCV::QC_MVNEI: + return RISCV::QC_MVEQI; + case RISCV::QC_MVLTI: + return RISCV::QC_MVGEI; + case RISCV::QC_MVGEI: + return RISCV::QC_MVLTI; + case RISCV::QC_MVLTUI: + return RISCV::QC_MVGEUI; + case RISCV::QC_MVGEUI: + return RISCV::QC_MVLTUI; + } +} + unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) { switch (SelectOpc) { default: @@ -1134,7 +1165,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) { } } -RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { +RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) { switch (CC) { default: llvm_unreachable("Unrecognized conditional branch"); @@ -1554,7 +1585,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { return Register(); }; - unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC)); + unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC)); // Might be case 1. // Don't change 0 to 1 since we can use x0. @@ -1801,7 +1832,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI, // Add condition code, inverting if necessary. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); if (Invert) - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); NewMI.addImm(CC); // Copy the false register. @@ -3762,6 +3793,19 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, return false; // Operands 1 and 2 are commutable, if we switch the opcode. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); + case RISCV::QC_MVEQ: + case RISCV::QC_MVNE: + case RISCV::QC_MVLT: + case RISCV::QC_MVGE: + case RISCV::QC_MVLTU: + case RISCV::QC_MVGEU: + case RISCV::QC_MVEQI: + case RISCV::QC_MVNEI: + case RISCV::QC_MVLTI: + case RISCV::QC_MVGEI: + case RISCV::QC_MVLTUI: + case RISCV::QC_MVGEUI: + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 4); case RISCV::TH_MULA: case RISCV::TH_MULAW: case RISCV::TH_MULAH: @@ -3974,11 +4018,28 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, OpIdx2); } + case RISCV::QC_MVEQ: + case RISCV::QC_MVNE: + case RISCV::QC_MVLT: + case RISCV::QC_MVGE: + case RISCV::QC_MVLTU: + case RISCV::QC_MVGEU: + case RISCV::QC_MVEQI: + case RISCV::QC_MVNEI: + case RISCV::QC_MVLTI: + case RISCV::QC_MVGEI: + case RISCV::QC_MVLTUI: + case RISCV::QC_MVGEUI: { + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(getInverseXqcicmOpcode(MI.getOpcode()))); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, + OpIdx2); + } case RISCV::PseudoCCMOVGPRNoX0: case RISCV::PseudoCCMOVGPR: { // CCMOV can be commuted by inverting the condition. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(CC); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 2bc499b..42a0c4c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -44,7 +44,7 @@ enum CondCode { COND_INVALID }; -CondCode getOppositeBranchCondition(CondCode); +CondCode getInverseBranchCondition(CondCode); unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0); } // end of namespace RISCVCC diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 13b02d1..ff4a040 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -604,7 +604,7 @@ class QCILICC<bits<3> funct3, bits<2> funct2, DAGOperand InTyRs2, string opcodes let Inst{31-25} = {simm, funct2}; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class QCIMVCC<bits<3> funct3, string opcodestr> : RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs3), @@ -612,7 +612,7 @@ class QCIMVCC<bits<3> funct3, string opcodestr> let Constraints = "$rd = $rd_wb"; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class QCIMVCCI<bits<3> funct3, string opcodestr, DAGOperand immType> : RVInstR4<0b10, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, GPRNoX0:$rs1, immType:$imm, GPRNoX0:$rs3), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 563f3bb..d4124ae 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -167,6 +167,42 @@ static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) { return false; } +// If this is i64 AND is part of (X & -(1 << C1) & 0xffffffff) == C2 << C1), +// DAGCombiner can convert this to (sraiw X, C1) == sext(C2) for RV64. On RV32, +// the type will be split so only the lower 32 bits need to be compared using +// (srai/srli X, C) == C2. +static bool canUseShiftCmp(Instruction *Inst, const APInt &Imm) { + if (!Inst->hasOneUse()) + return false; + + // Look for equality comparison. + auto *Cmp = dyn_cast<ICmpInst>(*Inst->user_begin()); + if (!Cmp || !Cmp->isEquality()) + return false; + + // Right hand side of comparison should be a constant. + auto *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); + if (!C) + return false; + + uint64_t Mask = Imm.getZExtValue(); + + // Mask should be of the form -(1 << C) in the lower 32 bits. + if (!isUInt<32>(Mask) || !isPowerOf2_32(-uint32_t(Mask))) + return false; + + // Comparison constant should be a subset of Mask. + uint64_t CmpC = C->getZExtValue(); + if ((CmpC & Mask) != CmpC) + return false; + + // We'll need to sign extend the comparison constant and shift it right. Make + // sure the new constant can use addi/xori+seqz/snez. + unsigned ShiftBits = llvm::countr_zero(Mask); + int64_t NewCmpC = SignExtend64<32>(CmpC) >> ShiftBits; + return NewCmpC >= -2048 && NewCmpC <= 2048; +} + InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, @@ -224,6 +260,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() && canUseShiftPair(Inst, Imm)) return TTI::TCC_Free; + if (Inst && Idx == 1 && Imm.getBitWidth() == 64 && + canUseShiftCmp(Inst, Imm)) + return TTI::TCC_Free; Takes12BitImm = true; break; case Instruction::Add: diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index b4fc8da..db85e33 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -587,7 +587,8 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass( } if (FPClassTest PartialCheck = Mask & fcNan) { - auto InfWithQnanBitC = buildSPIRVConstant(IntTy, Inf | QNaNBitMask); + auto InfWithQnanBitC = + buildSPIRVConstant(IntTy, std::move(Inf) | QNaNBitMask); if (PartialCheck == fcNan) { // isnan(V) ==> abs(V) u> int(inf) appendToRes( @@ -613,7 +614,7 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass( APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); auto ExpMinusOne = assignSPIRVTy( MIRBuilder.buildSub(IntTy, Abs, buildSPIRVConstant(IntTy, ExpLSB))); - APInt MaxExpMinusOne = ExpMask - ExpLSB; + APInt MaxExpMinusOne = std::move(ExpMask) - ExpLSB; auto NormalRes = assignSPIRVTy( MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne, buildSPIRVConstant(IntTy, MaxExpMinusOne))); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e7eb67a..cd04ff5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, unsigned NumElts = VT.getVectorNumElements(); if (Subtarget.hasVBMI2() && EltSizeInBits > 8) { - if (IsFSHR) - std::swap(Op0, Op1); if (IsCstSplat) { + if (IsFSHR) + std::swap(Op0, Op1); uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits); SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8); return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, {Op0, Op1, Imm}, DAG, Subtarget); } - return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, + return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || @@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VALIGN) NODE_NAME_CASE(VSHLD) NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(VSHLDV) - NODE_NAME_CASE(VSHRDV) NODE_NAME_CASE(PSHUFD) NODE_NAME_CASE(PSHUFHW) NODE_NAME_CASE(PSHUFLW) @@ -45171,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45241,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDV: return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 8ab8c66..b55556a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -471,8 +471,7 @@ namespace llvm { // VBMI2 Concat & Shift. VSHLD, VSHRD, - VSHLDV, - VSHRDV, + // Shuffle Packed Values at 128-bit granularity. SHUF128, MOVDDUP, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2371ed4..564810c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; // VBMI2 //===----------------------------------------------------------------------===// -multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", - (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, + !if(SwapLR, + (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))), + (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>, T8, PD, EVEX, VVVV, Sched<[sched]>; defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", - (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (VTI.LdFrag addr:$src3))))>, + !if(SwapLR, + (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))), + (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>, T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; } } -multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86FoldableSchedWrite sched, X86VectorVTInfo VTI> - : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { + : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, "${src3}"#VTI.BroadcastStr#", $src2", "$src2, ${src3}"#VTI.BroadcastStr, - (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, + !if(SwapLR, + (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))), + (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>, T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, + defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, + defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, + defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>, EVEX_V128; } } -multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, +multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, + defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, + defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, + defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>, EVEX_V128; } } multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, - SDNode OpNode, X86SchedWriteWidths sched> { - defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, + SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> { + defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched, avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, + defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, + defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; } @@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, } // Concat & Shift -defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; -defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; +defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>; +defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>; defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index b4768590..031fdc1 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -25,18 +25,12 @@ let SchedRW = [WriteLEA] in { [(set GR32:$dst, lea32addr:$src)]>, OpSize32, Requires<[Not64BitMode]>; - let Predicates = [HasNDD], isCodeGenOnly = 1 in { - def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR8:$dst), (ins lea64_8mem:$src), - "lea{b}\t{$src|$dst}, {$dst|$src}", - [(set GR8:$dst, lea64_iaddr:$src)]>, - OpSize16, - Requires<[In64BitMode]>; - - def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins lea64_16mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", - [(set GR16:$dst, lea64_iaddr:$src)]>, - OpSize16, - Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in { + def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_8mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32; + + def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_16mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32; } def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -51,6 +45,11 @@ let SchedRW = [WriteLEA] in { [(set GR64:$dst, lea64addr:$src)]>; } // SchedRW +let Predicates = [HasNDD] in { + def : Pat<(i8 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_8r lea64_8mem:$src), sub_8bit)>; + def : Pat<(i16 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_16r lea64_16mem:$src), sub_16bit)>; +} + // Pseudo instruction for lea that prevent optimizer from eliminating // the instruction. let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in { diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 0c20ffe..5321ecf 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>; def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>; -def X86VShldv : SDNode<"X86ISD::VSHLDV", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>>; -def X86VShrdv : SDNode<"X86ISD::VSHRDV", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>>; def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; |