diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp | 148 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFeatures.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVGISel.td | 108 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 139 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.h | 19 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 74 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZa.td | 56 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 34 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 36 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVScheduleV.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 191 |
17 files changed, 611 insertions, 328 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index ecfb5fe..eb41588 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -334,7 +334,7 @@ static bool isLegalElementTypeForRVV(Type *EltTy, if (EltTy->isIntegerTy(64)) return Subtarget.hasVInstructionsI64(); if (EltTy->isHalfTy()) - return Subtarget.hasVInstructionsF16(); + return Subtarget.hasVInstructionsF16Minimal(); if (EltTy->isBFloatTy()) return Subtarget.hasVInstructionsBF16Minimal(); if (EltTy->isFloatTy()) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 71c21e4..53633ea 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -675,6 +675,45 @@ static void getOperandsForBranch(Register CondReg, RISCVCC::CondCode &CC, CC = getRISCVCCFromICmp(Pred); } +/// Select the RISC-V Zalasr opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. +static unsigned selectZalasrLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + default: + llvm_unreachable("Unexpected memory size"); + case 8: + return IsStore ? RISCV::SB_RL : RISCV::LB_AQ; + case 16: + return IsStore ? RISCV::SH_RL : RISCV::LH_AQ; + case 32: + return IsStore ? RISCV::SW_RL : RISCV::LW_AQ; + case 64: + return IsStore ? RISCV::SD_RL : RISCV::LD_AQ; + } +} + +/// Select the RISC-V regimm opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. \returns \p GenericOpc if the combination is unsupported. +static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + case 8: + // Prefer unsigned due to no c.lb in Zcb. + return IsStore ? RISCV::SB : RISCV::LBU; + case 16: + return IsStore ? RISCV::SH : RISCV::LH; + case 32: + return IsStore ? RISCV::SW : RISCV::LW; + case 64: + return IsStore ? RISCV::SD : RISCV::LD; + } + + return GenericOpc; +} + bool RISCVInstructionSelector::select(MachineInstr &MI) { MachineIRBuilder MIB(MI); @@ -736,6 +775,62 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { MI.eraseFromParent(); return true; } + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: { + bool IsSigned = Opc != TargetOpcode::G_ZEXT; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI->getType(SrcReg); + unsigned SrcSize = SrcTy.getSizeInBits(); + + if (SrcTy.isVector()) + return false; // Should be handled by imported patterns. + + assert((*RBI.getRegBank(DstReg, *MRI, TRI)).getID() == + RISCV::GPRBRegBankID && + "Unexpected ext regbank"); + + // Use addiw SrcReg, 0 (sext.w) for i32. + if (IsSigned && SrcSize == 32) { + MI.setDesc(TII.get(RISCV::ADDIW)); + MI.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use add.uw SrcReg, X0 (zext.w) for i32 with Zba. + if (!IsSigned && SrcSize == 32 && STI.hasStdExtZba()) { + MI.setDesc(TII.get(RISCV::ADD_UW)); + MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use sext.h/zext.h for i16 with Zbb. + if (SrcSize == 16 && STI.hasStdExtZbb()) { + MI.setDesc(TII.get(IsSigned ? RISCV::SEXT_H + : STI.isRV64() ? RISCV::ZEXT_H_RV64 + : RISCV::ZEXT_H_RV32)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use pack(w) SrcReg, X0 for i16 zext with Zbkb. + if (!IsSigned && SrcSize == 16 && STI.hasStdExtZbkb()) { + MI.setDesc(TII.get(STI.is64Bit() ? RISCV::PACKW : RISCV::PACK)); + MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Fall back to shift pair. + auto ShiftLeft = + MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {SrcReg}) + .addImm(STI.getXLen() - SrcSize); + constrainSelectedInstRegOperands(*ShiftLeft, TII, TRI, RBI); + auto ShiftRight = MIB.buildInstr(IsSigned ? RISCV::SRAI : RISCV::SRLI, + {DstReg}, {ShiftLeft}) + .addImm(STI.getXLen() - SrcSize); + constrainSelectedInstRegOperands(*ShiftRight, TII, TRI, RBI); + MI.eraseFromParent(); + return true; + } case TargetOpcode::G_FCONSTANT: { // TODO: Use constant pool for complex constants. Register DstReg = MI.getOperand(0).getReg(); @@ -836,6 +931,59 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return selectImplicitDef(MI, MIB); case TargetOpcode::G_UNMERGE_VALUES: return selectUnmergeValues(MI, MIB); + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + GLoadStore &LdSt = cast<GLoadStore>(MI); + const Register ValReg = LdSt.getReg(0); + const Register PtrReg = LdSt.getPointerReg(); + LLT PtrTy = MRI->getType(PtrReg); + + const RegisterBank &RB = *RBI.getRegBank(ValReg, *MRI, TRI); + if (RB.getID() != RISCV::GPRBRegBankID) + return false; + +#ifndef NDEBUG + const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, *MRI, TRI); + // Check that the pointer register is valid. + assert(PtrRB.getID() == RISCV::GPRBRegBankID && + "Load/Store pointer operand isn't a GPR"); + assert(PtrTy.isPointer() && "Load/Store pointer operand isn't a pointer"); +#endif + + // Can only handle AddressSpace 0. + if (PtrTy.getAddressSpace() != 0) + return false; + + unsigned MemSize = LdSt.getMemSizeInBits().getValue(); + AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); + + if (isStrongerThanMonotonic(Order)) { + MI.setDesc(TII.get(selectZalasrLoadStoreOp(Opc, MemSize))); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + const unsigned NewOpc = selectRegImmLoadStoreOp(MI.getOpcode(), MemSize); + if (NewOpc == MI.getOpcode()) + return false; + + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = selectAddrRegImm(MI.getOperand(1)); + if (!AddrModeFns) + return false; + + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, MI.getFlags()); + if (isa<GStore>(MI)) + NewInst.addUse(ValReg); + else + NewInst.addDef(ValReg); + NewInst.cloneMemRefs(MI); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI); + } default: return false; } diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index a537904..5dd4bf4 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -166,7 +166,7 @@ static unsigned getLRForRMW32(AtomicOrdering Ordering, return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::SequentiallyConsistent: - return RISCV::LR_W_AQ_RL; + return RISCV::LR_W_AQRL; } } @@ -210,7 +210,7 @@ static unsigned getLRForRMW64(AtomicOrdering Ordering, return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::SequentiallyConsistent: - return RISCV::LR_D_AQ_RL; + return RISCV::LR_D_AQRL; } } @@ -287,8 +287,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, break; } BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -375,8 +375,8 @@ static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, ScratchReg); BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -535,8 +535,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // sc.w scratch1, scratch1, (addr) // bnez scratch1, loop BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg) - .addReg(AddrReg) - .addReg(Scratch1Reg); + .addReg(Scratch1Reg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(Scratch1Reg) .addReg(RISCV::X0) @@ -674,8 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(NewValReg); + .addReg(NewValReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -707,8 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( MaskReg, ScratchReg); BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 27cf057..40c05e8 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -265,7 +265,7 @@ def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">; def FeatureStdExtZalasr - : RISCVExperimentalExtension<0, 1, "Load-Acquire and Store-Release Instructions">; + : RISCVExperimentalExtension<0, 9, "Load-Acquire and Store-Release Instructions">; def HasStdExtZalasr : Predicate<"Subtarget->hasStdExtZalasr()">, AssemblerPredicate<(all_of FeatureStdExtZalasr), "'Zalasr' (Load-Acquire and Store-Release Instructions)">; diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 6d01250..eba35ef 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -100,119 +100,11 @@ def : LdPat<load, LD, PtrVT>; def : StPat<store, SD, GPR, PtrVT>; } -// Load and store patterns for i16, needed because Zfh makes s16 load/store -// legal and regbank select may not constrain registers to FP. -def : LdPat<load, LH, i16>; -def : StPat<store, SH, GPR, i16>; - -def : LdPat<extloadi8, LBU, i16>; // Prefer unsigned due to no c.lb in Zcb. -def : StPat<truncstorei8, SB, GPR, i16>; - -let Predicates = [HasAtomicLdSt] in { - // Prefer unsigned due to no c.lb in Zcb. - def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>; - def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>; - - def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>; - def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>; -} - -let Predicates = [HasAtomicLdSt, IsRV64] in { - // Load pattern is in RISCVInstrInfoA.td and shared with RV32. - def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>; -} - //===----------------------------------------------------------------------===// // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// let Predicates = [IsRV64] in { -def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. -def : LdPat<extloadi16, LH, i32>; - -def : StPat<truncstorei8, SB, GPR, i32>; -def : StPat<truncstorei16, SH, GPR, i32>; - -def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; - def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; } - -let Predicates = [IsRV64, NoStdExtZba] in -def : Pat<(zext (i32 GPR:$src)), (SRLI (i64 (SLLI GPR:$src, 32)), 32)>; - -let Predicates = [IsRV32, NoStdExtZbb, NoStdExtZbkb] in -def : Pat<(XLenVT (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 16)), 16)>; - -let Predicates = [IsRV64, NoStdExtZbb, NoStdExtZbkb] in { -def : Pat<(i64 (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>; -def : Pat<(i32 (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>; -} - -let Predicates = [IsRV32, NoStdExtZbb] in -def : Pat<(XLenVT (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 16)), 16)>; - -let Predicates = [IsRV64, NoStdExtZbb] in { -def : Pat<(i64 (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>; -def : Pat<(i32 (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>; -} - -//===----------------------------------------------------------------------===// -// Zb* RV64 patterns not used by SelectionDAG. -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtZba, IsRV64] in { -def : Pat<(zext (i32 GPR:$src)), (ADD_UW GPR:$src, (XLenVT X0))>; -} - -let Predicates = [HasStdExtZbb] in -def : Pat<(i32 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>; -let Predicates = [HasStdExtZbb, IsRV64] in -def : Pat<(i64 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>; - -let Predicates = [HasStdExtZbb, IsRV32] in -def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV32 GPR:$rs)>; -let Predicates = [HasStdExtZbb, IsRV64] in { -def : Pat<(i64 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>; -def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>; -} - -let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV32] in -def : Pat<(i32 (zext (i16 GPR:$rs))), (PACK GPR:$rs, (XLenVT X0))>; -let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV64] in { -def : Pat<(i64 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>; -def : Pat<(i32 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>; -} - -//===----------------------------------------------------------------------===// -// Zalasr patterns not used by SelectionDAG -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtZalasr] in { - // the sequentially consistent loads use - // .aq instead of .aqrl to match the psABI/A.7 - def : PatLAQ<acquiring_load<atomic_load_aext_8>, LB_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_aext_8>, LB_AQ, i16>; - - def : PatLAQ<acquiring_load<atomic_load_nonext_16>, LH_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_nonext_16>, LH_AQ, i16>; - - def : PatSRL<releasing_store<atomic_store_8>, SB_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_8>, SB_RL, i16>; - - def : PatSRL<releasing_store<atomic_store_16>, SH_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_16>, SH_RL, i16>; -} - -let Predicates = [HasStdExtZalasr, IsRV64] in { - // Load pattern is in RISCVInstrInfoZalasr.td and shared with RV32. - def : PatSRL<releasing_store<atomic_store_32>, SW_RL, i32>; - def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL, i32>; -} diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6234714..7123a2d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15721,8 +15721,7 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Emit a negate of the setcc. - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); + return DAG.getNegative(N0.getOperand(0), DL, VT); } static SDValue performADDCombine(SDNode *N, @@ -16498,43 +16497,60 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, SDValue X = N->getOperand(0); if (Subtarget.hasShlAdd(3)) { - for (uint64_t Divisor : {3, 5, 9}) { - if (MulAmt % Divisor != 0) - continue; - uint64_t MulAmt2 = MulAmt / Divisor; - // 3/5/9 * 2^N -> shl (shXadd X, X), N - if (isPowerOf2_64(MulAmt2)) { - SDLoc DL(N); - SDValue X = N->getOperand(0); - // Put the shift first if we can fold a zext into the - // shift forming a slli.uw. - if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && - X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) { - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X, - DAG.getConstant(Log2_64(MulAmt2), DL, VT)); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), - Shl); - } - // Otherwise, put rhe shl second so that it can fold with following - // instructions (e.g. sext or add). - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - return DAG.getNode(ISD::SHL, DL, VT, Mul359, - DAG.getConstant(Log2_64(MulAmt2), DL, VT)); - } - - // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) - if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { - SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT), - Mul359); + int Shift; + if (int ShXAmount = isShifted359(MulAmt, Shift)) { + // 3/5/9 * 2^N -> shl (shXadd X, X), N + SDLoc DL(N); + SDValue X = N->getOperand(0); + // Put the shift first if we can fold a zext into the shift forming + // a slli.uw. + if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && + X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) { + SDValue Shl = + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT)); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, + DAG.getConstant(ShXAmount, DL, VT), Shl); } + // Otherwise, put the shl second so that it can fold with following + // instructions (e.g. sext or add). + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShXAmount, DL, VT), X); + return DAG.getNode(ISD::SHL, DL, VT, Mul359, + DAG.getConstant(Shift, DL, VT)); + } + + // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) + int ShX; + int ShY; + switch (MulAmt) { + case 3 * 5: + ShY = 1; + ShX = 2; + break; + case 3 * 9: + ShY = 1; + ShX = 3; + break; + case 5 * 5: + ShX = ShY = 2; + break; + case 5 * 9: + ShY = 2; + ShX = 3; + break; + case 9 * 9: + ShX = ShY = 3; + break; + default: + ShX = ShY = 0; + break; + } + if (ShX) { + SDLoc DL(N); + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShY, DL, VT), X); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, + DAG.getConstant(ShX, DL, VT), Mul359); } // If this is a power 2 + 2/4/8, we can use a shift followed by a single @@ -16557,18 +16573,14 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // variants we could implement. e.g. // (2^(1,2,3) * 3,5,9 + 1) << C2 // 2^(C1>3) * 3,5,9 +/- 1 - for (uint64_t Divisor : {3, 5, 9}) { - uint64_t C = MulAmt - 1; - if (C <= Divisor) - continue; - unsigned TZ = llvm::countr_zero(C); - if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { + if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) { + assert(Shift != 0 && "MulAmt=4,6,10 handled before"); + if (Shift <= 3) { SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShXAmount, DL, VT), X); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getConstant(TZ, DL, VT), X); + DAG.getConstant(Shift, DL, VT), X); } } @@ -16576,7 +16588,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { unsigned ScaleShift = llvm::countr_zero(MulAmt - 1); if (ScaleShift >= 1 && ScaleShift < 4) { - unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); + unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2)); SDLoc DL(N); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); @@ -16589,7 +16601,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) for (uint64_t Offset : {3, 5, 9}) { if (isPowerOf2_64(MulAmt + Offset)) { - unsigned ShAmt = Log2_64(MulAmt + Offset); + unsigned ShAmt = llvm::countr_zero(MulAmt + Offset); if (ShAmt >= VT.getSizeInBits()) continue; SDLoc DL(N); @@ -16608,21 +16620,16 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt2 = MulAmt / Divisor; // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples // of 25 which happen to be quite common. - for (uint64_t Divisor2 : {3, 5, 9}) { - if (MulAmt2 % Divisor2 != 0) - continue; - uint64_t MulAmt3 = MulAmt2 / Divisor2; - if (isPowerOf2_64(MulAmt3)) { - SDLoc DL(N); - SDValue Mul359A = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - SDValue Mul359B = DAG.getNode( - RISCVISD::SHL_ADD, DL, VT, Mul359A, - DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A); - return DAG.getNode(ISD::SHL, DL, VT, Mul359B, - DAG.getConstant(Log2_64(MulAmt3), DL, VT)); - } + if (int ShBAmount = isShifted359(MulAmt2, Shift)) { + SDLoc DL(N); + SDValue Mul359A = + DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); + SDValue Mul359B = + DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A, + DAG.getConstant(ShBAmount, DL, VT), Mul359A); + return DAG.getNode(ISD::SHL, DL, VT, Mul359B, + DAG.getConstant(Shift, DL, VT)); } } } @@ -16966,7 +16973,7 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc)) if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG()) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); + return DAG.getNegative(Src, DL, VT); // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1) if (Opc == ISD::XOR && SrcVT == MVT::i1 && diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7db4832..96e1078 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -4586,24 +4586,23 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(DestReg, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); - } else if (STI.hasShlAdd(3) && - ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || - (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || - (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { + } else if (int ShXAmount, ShiftAmount; + STI.hasShlAdd(3) && + (ShXAmount = isShifted359(Amount, ShiftAmount)) != 0) { // We can use Zba SHXADD+SLLI instructions for multiply in some cases. unsigned Opc; - uint32_t ShiftAmount; - if (Amount % 9 == 0) { - Opc = RISCV::SH3ADD; - ShiftAmount = Log2_64(Amount / 9); - } else if (Amount % 5 == 0) { - Opc = RISCV::SH2ADD; - ShiftAmount = Log2_64(Amount / 5); - } else if (Amount % 3 == 0) { + switch (ShXAmount) { + case 1: Opc = RISCV::SH1ADD; - ShiftAmount = Log2_64(Amount / 3); - } else { - llvm_unreachable("implied by if-clause"); + break; + case 2: + Opc = RISCV::SH2ADD; + break; + case 3: + Opc = RISCV::SH3ADD; + break; + default: + llvm_unreachable("unexpected result of isShifted359"); } if (ShiftAmount) BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 42a0c4c..c5eddb9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -25,6 +25,25 @@ namespace llvm { +// If Value is of the form C1<<C2, where C1 = 3, 5 or 9, +// returns log2(C1 - 1) and assigns Shift = C2. +// Otherwise, returns 0. +template <typename T> int isShifted359(T Value, int &Shift) { + if (Value == 0) + return 0; + Shift = llvm::countr_zero(Value); + switch (Value >> Shift) { + case 3: + return 1; + case 5: + return 2; + case 9: + return 3; + default: + return 0; + } +} + class RISCVSubtarget; static const MachineMemOperand::Flags MONontemporalBit0 = diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 9855c47..7a14929 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1980,7 +1980,7 @@ def : LdPat<sextloadi8, LB>; def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<sextloadi16, LH>; def : LdPat<extloadi16, LH>; -def : LdPat<load, LW, i32>; +def : LdPat<load, LW, i32>, Requires<[IsRV32]>; def : LdPat<zextloadi8, LBU>; def : LdPat<zextloadi16, LHU>; @@ -1994,7 +1994,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, def : StPat<truncstorei8, SB, GPR, XLenVT>; def : StPat<truncstorei16, SH, GPR, XLenVT>; -def : StPat<store, SW, GPR, i32>; +def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>; /// Fences diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 25accd9..571d72f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -24,36 +24,36 @@ class LR_r<bit aq, bit rl, bits<3> funct3, string opcodestr> } multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> { - def "" : LR_r<0, 0, funct3, opcodestr>; - def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">; - def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : LR_r<1, 1, funct3, opcodestr # ".aqrl">; + def "" : LR_r<0, 0, funct3, opcodestr>; + def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">; + def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : LR_r<1, 1, funct3, opcodestr # ".aqrl">; } let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SC_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00011, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass SC_r_aq_rl<bits<3> funct3, string opcodestr> { - def "" : SC_r<0, 0, funct3, opcodestr>; - def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">; - def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : SC_r<1, 1, funct3, opcodestr # ".aqrl">; + def "" : SC_r<0, 0, funct3, opcodestr>; + def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">; + def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : SC_r<1, 1, funct3, opcodestr # ".aqrl">; } let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> { - def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>; - def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">; - def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">; + def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>; + def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">; + def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">; + def _AQRL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">; } //===----------------------------------------------------------------------===// @@ -174,8 +174,9 @@ let Predicates = [HasAtomicLdSt] in { def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_32>, SW, GPR, XLenVT>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasAtomicLdSt, IsRV32] in { def : LdPat<relaxed_load<atomic_load_nonext_32>, LW, i32>; } @@ -188,31 +189,34 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { /// AMOs +class PatAMO<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> + : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs2, GPR:$rs1)>; + multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst#"_AQ"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst#"_RL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst#"_AQ"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst#"_RL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; } let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst), vt>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 7cf6d5f..20e2142 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -9,8 +9,8 @@ // This file describes the RISC-V instructions from the standard atomic 'Za*' // extensions: // - Zawrs (v1.0) : Wait-on-Reservation-Set. -// - Zacas (v1.0-rc1) : Atomic Compare-and-Swap. -// - Zabha (v1.0-rc1) : Byte and Halfword Atomic Memory Operations. +// - Zacas (v1.0) : Atomic Compare-and-Swap. +// - Zabha (v1.0) : Byte and Halfword Atomic Memory Operations. // //===----------------------------------------------------------------------===// @@ -44,15 +44,15 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, DAGOperand RC> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + (outs RC:$rd_wb), (ins RC:$rd, RC:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, DAGOperand RC> { - def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; - def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; - def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; - def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; + def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; + def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; + def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; + def _AQRL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; } let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in { @@ -71,48 +71,48 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) } @@ -140,7 +140,7 @@ def WRS_STO : WRSInst<0b000000011101, "wrs.sto">, Sched<[]>; // Zabha (Byte and Halfword Atomic Memory Operations) //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZabha] in { +let Predicates = [HasStdExtZabha], IsSignExtendingOpW = 1 in { defm AMOSWAP_B : AMO_rr_aq_rl<0b00001, 0b000, "amoswap.b">, Sched<[WriteAtomicB, ReadAtomicBA, ReadAtomicBD]>; defm AMOADD_B : AMO_rr_aq_rl<0b00000, 0b000, "amoadd.b">, @@ -181,7 +181,7 @@ defm AMOMAXU_H : AMO_rr_aq_rl<0b11100, 0b001, "amomaxu.h">, } // If Zacas extension is also implemented, Zabha further provides AMOCAS.[B|H]. -let Predicates = [HasStdExtZabha, HasStdExtZacas] in { +let Predicates = [HasStdExtZabha, HasStdExtZacas], IsSignExtendingOpW = 1 in { defm AMOCAS_B : AMO_cas_aq_rl<0b00101, 0b000, "amocas.b", GPR>; defm AMOCAS_H : AMO_cas_aq_rl<0b00101, 0b001, "amocas.h", GPR>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index 1deecd2..5f944034 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -30,21 +30,22 @@ class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr> opcodestr, "$rs2, $rs1"> { let rd = 0; } + multiclass LAQ_r_aq_rl<bits<3> funct3, string opcodestr> { - def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">; - def _AQ_RL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">; + def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">; + def _AQRL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">; } multiclass SRL_r_aq_rl<bits<3> funct3, string opcodestr> { - def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">; + def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">; } //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZalasr] in { +let Predicates = [HasStdExtZalasr], IsSignExtendingOpW = 1 in { defm LB : LAQ_r_aq_rl<0b000, "lb">; defm LH : LAQ_r_aq_rl<0b001, "lh">; defm LW : LAQ_r_aq_rl<0b010, "lw">; @@ -93,11 +94,12 @@ let Predicates = [HasStdExtZalasr] in { def : PatSRL<releasing_store<atomic_store_32>, SW_RL>; def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasStdExtZalasr, IsRV32] in { def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ, i32>; def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ, i32>; -} // Predicates = [HasStdExtZalasr] +} // Predicates = [HasStdExtZalasr, IsRV32] let Predicates = [HasStdExtZalasr, IsRV64] in { def : PatLAQ<acquiring_load<atomic_load_asext_32>, LW_AQ, i64>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index ce21d83..e519b72 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -702,13 +702,13 @@ def : Pat<(binop_allwusers<or> (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; def : Pat<(binop_allwusers<or> (or (zexti16 (XLenVT GPR:$rs1)), - (shl GPR:$op1rs1, (XLenVT 24))), - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (shl GPR:$op1rs2, (XLenVT 24))), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)), - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), - (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (sext_inreg (shl GPR:$op1rs2, (XLenVT 24)), i32))), (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; // Match a pattern of 2 halfwords being inserted into bits [63:32], with bits @@ -788,32 +788,32 @@ multiclass ShxAdd_UWPat<int i, Instruction shxadd_uw> { } multiclass Sh1Add_UWPat<Instruction sh1add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), (i64 0x1FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh1add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x1FFFFFFFE)), + (XLenVT GPR:$rs2)), (sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; } multiclass Sh2Add_UWPat<Instruction sh2add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), (i64 0x3FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh2add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x3FFFFFFFC)), + (XLenVT GPR:$rs2)), (sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; } multiclass Sh3Add_UWPat<Instruction sh3add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), - (XLenVT GPR:$rs2))), - (sh3add_uw (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>; + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), (i64 0x7FFFFFFFF)), + (XLenVT GPR:$rs2)), + (sh3add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x7FFFFFFF8)), + (XLenVT GPR:$rs2)), (sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 82e768d..6605a5c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -238,7 +238,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList> } class GPRRegisterClass<dag regList> - : RISCVRegisterClass<[XLenVT, XLenFVT, i32, i16], 32, regList> { + : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> { let RegInfos = XLenRI; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 3f2e7db..3e07eff 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -567,9 +567,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX8", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX8", [VCQ, VL], mx, IsWorstCase>; @@ -587,9 +590,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX16", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX16", [VCQ, VL], mx, IsWorstCase>; @@ -604,9 +610,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX32", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX32", [VCQ, VL], mx, IsWorstCase>; @@ -621,9 +630,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX64", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX64", [VCQ, VL], mx, IsWorstCase>; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 6c7658c..01a4308 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -67,42 +67,41 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources, // ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has // Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite // is created similarly if IsWorstCase is true. -multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, - list<ProcResourceKind> resources, - int predLat, list<int> predAcquireCycles, - list<int> predReleaseCycles, int noPredLat, - list<int> noPredAcquireCycles, - list<int> noPredReleaseCycles, - string mx, bit IsWorstCase> { - defvar nameMX = name # "_" # mx; - +multiclass LMULWriteResVariantImpl<string name, string writeResName, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + bit IsWorstCase> { // Define the different behaviors - def nameMX # "_Pred" : SchedWriteRes<resources>{ + def writeResName # "_Pred" : SchedWriteRes<predResources>{ let Latency = predLat; let AcquireAtCycles = predAcquireCycles; let ReleaseAtCycles = predReleaseCycles; } - def nameMX # "_NoPred" : SchedWriteRes<resources> { + def writeResName # "_NoPred" : SchedWriteRes<noPredResources> { let Latency = noPredLat; let AcquireAtCycles = noPredAcquireCycles; let ReleaseAtCycles = noPredReleaseCycles; } // Define SchedVars - def nameMX # PredSchedVar - : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>; - def nameMX # NoPredSchedVar - : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX #"_NoPred")]>; + def writeResName # PredSchedVar + : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # writeResName # "_Pred")]>; + def writeResName # NoPredSchedVar + : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # writeResName #"_NoPred")]>; // Allow multiclass to refer to SchedVars -- need to have NAME prefix. - defvar PredSchedVar = !cast<SchedVar>(NAME # nameMX # PredSchedVar); - defvar NoPredSchedVar = !cast<SchedVar>(NAME # nameMX # NoPredSchedVar); + defvar PredSchedVar = !cast<SchedVar>(NAME # writeResName # PredSchedVar); + defvar NoPredSchedVar = !cast<SchedVar>(NAME # writeResName # NoPredSchedVar); // Tie behavior to predicate - def NAME # nameMX # "_Variant" + def NAME # writeResName # "_Variant" : SchedWriteVariant<[PredSchedVar, NoPredSchedVar]>; def : SchedAlias< - !cast<SchedReadWrite>(nameMX), - !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>; + !cast<SchedReadWrite>(writeResName), + !cast<SchedReadWrite>(NAME # writeResName # "_Variant")>; if IsWorstCase then { def NAME # name # "_WorstCase_Variant" @@ -113,6 +112,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, } } +multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, bit IsWorstCase> { + defm "" : LMULWriteResVariantImpl<name, name # "_" # mx, Pred, predResources, + predLat, predAcquireCycles, + predReleaseCycles, noPredResources, + noPredLat, noPredAcquireCycles, + noPredReleaseCycles, + IsWorstCase>; +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ee25f69..7bc0b5b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2747,20 +2747,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, Intrinsic::ID IID = Inst->getIntrinsicID(); LLVMContext &C = Inst->getContext(); bool HasMask = false; + + auto getSegNum = [](const IntrinsicInst *II, unsigned PtrOperandNo, + bool IsWrite) -> int64_t { + if (auto *TarExtTy = + dyn_cast<TargetExtType>(II->getArgOperand(0)->getType())) + return TarExtTy->getIntParameter(0); + + return 1; + }; + switch (IID) { case Intrinsic::riscv_vle_mask: case Intrinsic::riscv_vse_mask: + case Intrinsic::riscv_vlseg2_mask: + case Intrinsic::riscv_vlseg3_mask: + case Intrinsic::riscv_vlseg4_mask: + case Intrinsic::riscv_vlseg5_mask: + case Intrinsic::riscv_vlseg6_mask: + case Intrinsic::riscv_vlseg7_mask: + case Intrinsic::riscv_vlseg8_mask: + case Intrinsic::riscv_vsseg2_mask: + case Intrinsic::riscv_vsseg3_mask: + case Intrinsic::riscv_vsseg4_mask: + case Intrinsic::riscv_vsseg5_mask: + case Intrinsic::riscv_vsseg6_mask: + case Intrinsic::riscv_vsseg7_mask: + case Intrinsic::riscv_vsseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vle: - case Intrinsic::riscv_vse: { + case Intrinsic::riscv_vse: + case Intrinsic::riscv_vlseg2: + case Intrinsic::riscv_vlseg3: + case Intrinsic::riscv_vlseg4: + case Intrinsic::riscv_vlseg5: + case Intrinsic::riscv_vlseg6: + case Intrinsic::riscv_vlseg7: + case Intrinsic::riscv_vlseg8: + case Intrinsic::riscv_vsseg2: + case Intrinsic::riscv_vsseg3: + case Intrinsic::riscv_vsseg4: + case Intrinsic::riscv_vsseg5: + case Intrinsic::riscv_vsseg6: + case Intrinsic::riscv_vsseg7: + case Intrinsic::riscv_vsseg8: { // Intrinsic interface: // riscv_vle(merge, ptr, vl) // riscv_vle_mask(merge, ptr, mask, vl, policy) // riscv_vse(val, ptr, vl) // riscv_vse_mask(val, ptr, mask, vl, policy) + // riscv_vlseg#(merge, ptr, vl, sew) + // riscv_vlseg#_mask(merge, ptr, mask, vl, policy, sew) + // riscv_vsseg#(val, ptr, vl, sew) + // riscv_vsseg#_mask(val, ptr, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 1 - HasMask; @@ -2771,23 +2823,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, if (HasMask) Mask = Inst->getArgOperand(VLIndex - 1); Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Alignment, Mask, EVL); return true; } case Intrinsic::riscv_vlse_mask: case Intrinsic::riscv_vsse_mask: + case Intrinsic::riscv_vlsseg2_mask: + case Intrinsic::riscv_vlsseg3_mask: + case Intrinsic::riscv_vlsseg4_mask: + case Intrinsic::riscv_vlsseg5_mask: + case Intrinsic::riscv_vlsseg6_mask: + case Intrinsic::riscv_vlsseg7_mask: + case Intrinsic::riscv_vlsseg8_mask: + case Intrinsic::riscv_vssseg2_mask: + case Intrinsic::riscv_vssseg3_mask: + case Intrinsic::riscv_vssseg4_mask: + case Intrinsic::riscv_vssseg5_mask: + case Intrinsic::riscv_vssseg6_mask: + case Intrinsic::riscv_vssseg7_mask: + case Intrinsic::riscv_vssseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vlse: - case Intrinsic::riscv_vsse: { + case Intrinsic::riscv_vsse: + case Intrinsic::riscv_vlsseg2: + case Intrinsic::riscv_vlsseg3: + case Intrinsic::riscv_vlsseg4: + case Intrinsic::riscv_vlsseg5: + case Intrinsic::riscv_vlsseg6: + case Intrinsic::riscv_vlsseg7: + case Intrinsic::riscv_vlsseg8: + case Intrinsic::riscv_vssseg2: + case Intrinsic::riscv_vssseg3: + case Intrinsic::riscv_vssseg4: + case Intrinsic::riscv_vssseg5: + case Intrinsic::riscv_vssseg6: + case Intrinsic::riscv_vssseg7: + case Intrinsic::riscv_vssseg8: { // Intrinsic interface: // riscv_vlse(merge, ptr, stride, vl) // riscv_vlse_mask(merge, ptr, stride, mask, vl, policy) // riscv_vsse(val, ptr, stride, vl) // riscv_vsse_mask(val, ptr, stride, mask, vl, policy) + // riscv_vlsseg#(merge, ptr, offset, vl, sew) + // riscv_vlsseg#_mask(merge, ptr, offset, mask, vl, policy, sew) + // riscv_vssseg#(val, ptr, offset, vl, sew) + // riscv_vssseg#_mask(val, ptr, offset, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 2 - HasMask; @@ -2809,6 +2910,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, if (HasMask) Mask = Inst->getArgOperand(VLIndex - 1); Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Alignment, Mask, EVL, Stride); return true; @@ -2817,19 +2925,89 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::riscv_vluxei_mask: case Intrinsic::riscv_vsoxei_mask: case Intrinsic::riscv_vsuxei_mask: + case Intrinsic::riscv_vloxseg2_mask: + case Intrinsic::riscv_vloxseg3_mask: + case Intrinsic::riscv_vloxseg4_mask: + case Intrinsic::riscv_vloxseg5_mask: + case Intrinsic::riscv_vloxseg6_mask: + case Intrinsic::riscv_vloxseg7_mask: + case Intrinsic::riscv_vloxseg8_mask: + case Intrinsic::riscv_vluxseg2_mask: + case Intrinsic::riscv_vluxseg3_mask: + case Intrinsic::riscv_vluxseg4_mask: + case Intrinsic::riscv_vluxseg5_mask: + case Intrinsic::riscv_vluxseg6_mask: + case Intrinsic::riscv_vluxseg7_mask: + case Intrinsic::riscv_vluxseg8_mask: + case Intrinsic::riscv_vsoxseg2_mask: + case Intrinsic::riscv_vsoxseg3_mask: + case Intrinsic::riscv_vsoxseg4_mask: + case Intrinsic::riscv_vsoxseg5_mask: + case Intrinsic::riscv_vsoxseg6_mask: + case Intrinsic::riscv_vsoxseg7_mask: + case Intrinsic::riscv_vsoxseg8_mask: + case Intrinsic::riscv_vsuxseg2_mask: + case Intrinsic::riscv_vsuxseg3_mask: + case Intrinsic::riscv_vsuxseg4_mask: + case Intrinsic::riscv_vsuxseg5_mask: + case Intrinsic::riscv_vsuxseg6_mask: + case Intrinsic::riscv_vsuxseg7_mask: + case Intrinsic::riscv_vsuxseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vloxei: case Intrinsic::riscv_vluxei: case Intrinsic::riscv_vsoxei: - case Intrinsic::riscv_vsuxei: { + case Intrinsic::riscv_vsuxei: + case Intrinsic::riscv_vloxseg2: + case Intrinsic::riscv_vloxseg3: + case Intrinsic::riscv_vloxseg4: + case Intrinsic::riscv_vloxseg5: + case Intrinsic::riscv_vloxseg6: + case Intrinsic::riscv_vloxseg7: + case Intrinsic::riscv_vloxseg8: + case Intrinsic::riscv_vluxseg2: + case Intrinsic::riscv_vluxseg3: + case Intrinsic::riscv_vluxseg4: + case Intrinsic::riscv_vluxseg5: + case Intrinsic::riscv_vluxseg6: + case Intrinsic::riscv_vluxseg7: + case Intrinsic::riscv_vluxseg8: + case Intrinsic::riscv_vsoxseg2: + case Intrinsic::riscv_vsoxseg3: + case Intrinsic::riscv_vsoxseg4: + case Intrinsic::riscv_vsoxseg5: + case Intrinsic::riscv_vsoxseg6: + case Intrinsic::riscv_vsoxseg7: + case Intrinsic::riscv_vsoxseg8: + case Intrinsic::riscv_vsuxseg2: + case Intrinsic::riscv_vsuxseg3: + case Intrinsic::riscv_vsuxseg4: + case Intrinsic::riscv_vsuxseg5: + case Intrinsic::riscv_vsuxseg6: + case Intrinsic::riscv_vsuxseg7: + case Intrinsic::riscv_vsuxseg8: { // Intrinsic interface (only listed ordered version): // riscv_vloxei(merge, ptr, index, vl) // riscv_vloxei_mask(merge, ptr, index, mask, vl, policy) // riscv_vsoxei(val, ptr, index, vl) // riscv_vsoxei_mask(val, ptr, index, mask, vl, policy) + // riscv_vloxseg#(merge, ptr, index, vl, sew) + // riscv_vloxseg#_mask(merge, ptr, index, mask, vl, policy, sew) + // riscv_vsoxseg#(val, ptr, index, vl, sew) + // riscv_vsoxseg#_mask(val, ptr, index, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 2 - HasMask; @@ -2845,6 +3023,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, Mask = ConstantInt::getTrue(MaskType); } Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Value *OffsetOp = Inst->getArgOperand(PtrOperandNo + 1); Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Align(1), Mask, EVL, |