aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp148
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp24
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp139
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp29
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h19
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td74
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZa.td56
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td34
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td55
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp191
17 files changed, 611 insertions, 328 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index ecfb5fe..eb41588 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -334,7 +334,7 @@ static bool isLegalElementTypeForRVV(Type *EltTy,
if (EltTy->isIntegerTy(64))
return Subtarget.hasVInstructionsI64();
if (EltTy->isHalfTy())
- return Subtarget.hasVInstructionsF16();
+ return Subtarget.hasVInstructionsF16Minimal();
if (EltTy->isBFloatTy())
return Subtarget.hasVInstructionsBF16Minimal();
if (EltTy->isFloatTy())
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 71c21e4..53633ea 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -675,6 +675,45 @@ static void getOperandsForBranch(Register CondReg, RISCVCC::CondCode &CC,
CC = getRISCVCCFromICmp(Pred);
}
+/// Select the RISC-V Zalasr opcode for the G_LOAD or G_STORE operation
+/// \p GenericOpc, appropriate for the GPR register bank and of memory access
+/// size \p OpSize.
+static unsigned selectZalasrLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
+ const bool IsStore = GenericOpc == TargetOpcode::G_STORE;
+ switch (OpSize) {
+ default:
+ llvm_unreachable("Unexpected memory size");
+ case 8:
+ return IsStore ? RISCV::SB_RL : RISCV::LB_AQ;
+ case 16:
+ return IsStore ? RISCV::SH_RL : RISCV::LH_AQ;
+ case 32:
+ return IsStore ? RISCV::SW_RL : RISCV::LW_AQ;
+ case 64:
+ return IsStore ? RISCV::SD_RL : RISCV::LD_AQ;
+ }
+}
+
+/// Select the RISC-V regimm opcode for the G_LOAD or G_STORE operation
+/// \p GenericOpc, appropriate for the GPR register bank and of memory access
+/// size \p OpSize. \returns \p GenericOpc if the combination is unsupported.
+static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
+ const bool IsStore = GenericOpc == TargetOpcode::G_STORE;
+ switch (OpSize) {
+ case 8:
+ // Prefer unsigned due to no c.lb in Zcb.
+ return IsStore ? RISCV::SB : RISCV::LBU;
+ case 16:
+ return IsStore ? RISCV::SH : RISCV::LH;
+ case 32:
+ return IsStore ? RISCV::SW : RISCV::LW;
+ case 64:
+ return IsStore ? RISCV::SD : RISCV::LD;
+ }
+
+ return GenericOpc;
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -736,6 +775,62 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
MI.eraseFromParent();
return true;
}
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT: {
+ bool IsSigned = Opc != TargetOpcode::G_ZEXT;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI->getType(SrcReg);
+ unsigned SrcSize = SrcTy.getSizeInBits();
+
+ if (SrcTy.isVector())
+ return false; // Should be handled by imported patterns.
+
+ assert((*RBI.getRegBank(DstReg, *MRI, TRI)).getID() ==
+ RISCV::GPRBRegBankID &&
+ "Unexpected ext regbank");
+
+ // Use addiw SrcReg, 0 (sext.w) for i32.
+ if (IsSigned && SrcSize == 32) {
+ MI.setDesc(TII.get(RISCV::ADDIW));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Use add.uw SrcReg, X0 (zext.w) for i32 with Zba.
+ if (!IsSigned && SrcSize == 32 && STI.hasStdExtZba()) {
+ MI.setDesc(TII.get(RISCV::ADD_UW));
+ MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Use sext.h/zext.h for i16 with Zbb.
+ if (SrcSize == 16 && STI.hasStdExtZbb()) {
+ MI.setDesc(TII.get(IsSigned ? RISCV::SEXT_H
+ : STI.isRV64() ? RISCV::ZEXT_H_RV64
+ : RISCV::ZEXT_H_RV32));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Use pack(w) SrcReg, X0 for i16 zext with Zbkb.
+ if (!IsSigned && SrcSize == 16 && STI.hasStdExtZbkb()) {
+ MI.setDesc(TII.get(STI.is64Bit() ? RISCV::PACKW : RISCV::PACK));
+ MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Fall back to shift pair.
+ auto ShiftLeft =
+ MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {SrcReg})
+ .addImm(STI.getXLen() - SrcSize);
+ constrainSelectedInstRegOperands(*ShiftLeft, TII, TRI, RBI);
+ auto ShiftRight = MIB.buildInstr(IsSigned ? RISCV::SRAI : RISCV::SRLI,
+ {DstReg}, {ShiftLeft})
+ .addImm(STI.getXLen() - SrcSize);
+ constrainSelectedInstRegOperands(*ShiftRight, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_FCONSTANT: {
// TODO: Use constant pool for complex constants.
Register DstReg = MI.getOperand(0).getReg();
@@ -836,6 +931,59 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
return selectImplicitDef(MI, MIB);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(MI, MIB);
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE: {
+ GLoadStore &LdSt = cast<GLoadStore>(MI);
+ const Register ValReg = LdSt.getReg(0);
+ const Register PtrReg = LdSt.getPointerReg();
+ LLT PtrTy = MRI->getType(PtrReg);
+
+ const RegisterBank &RB = *RBI.getRegBank(ValReg, *MRI, TRI);
+ if (RB.getID() != RISCV::GPRBRegBankID)
+ return false;
+
+#ifndef NDEBUG
+ const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, *MRI, TRI);
+ // Check that the pointer register is valid.
+ assert(PtrRB.getID() == RISCV::GPRBRegBankID &&
+ "Load/Store pointer operand isn't a GPR");
+ assert(PtrTy.isPointer() && "Load/Store pointer operand isn't a pointer");
+#endif
+
+ // Can only handle AddressSpace 0.
+ if (PtrTy.getAddressSpace() != 0)
+ return false;
+
+ unsigned MemSize = LdSt.getMemSizeInBits().getValue();
+ AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
+
+ if (isStrongerThanMonotonic(Order)) {
+ MI.setDesc(TII.get(selectZalasrLoadStoreOp(Opc, MemSize)));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ const unsigned NewOpc = selectRegImmLoadStoreOp(MI.getOpcode(), MemSize);
+ if (NewOpc == MI.getOpcode())
+ return false;
+
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns = selectAddrRegImm(MI.getOperand(1));
+ if (!AddrModeFns)
+ return false;
+
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, MI.getFlags());
+ if (isa<GStore>(MI))
+ NewInst.addUse(ValReg);
+ else
+ NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(MI);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ MI.eraseFromParent();
+
+ return constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI);
+ }
default:
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index a537904..5dd4bf4 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -166,7 +166,7 @@ static unsigned getLRForRMW32(AtomicOrdering Ordering,
return RISCV::LR_W;
return RISCV::LR_W_AQ;
case AtomicOrdering::SequentiallyConsistent:
- return RISCV::LR_W_AQ_RL;
+ return RISCV::LR_W_AQRL;
}
}
@@ -210,7 +210,7 @@ static unsigned getLRForRMW64(AtomicOrdering Ordering,
return RISCV::LR_D;
return RISCV::LR_D_AQ;
case AtomicOrdering::SequentiallyConsistent:
- return RISCV::LR_D_AQ_RL;
+ return RISCV::LR_D_AQRL;
}
}
@@ -287,8 +287,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
break;
}
BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
@@ -375,8 +375,8 @@ static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
ScratchReg);
BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
@@ -535,8 +535,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
// sc.w scratch1, scratch1, (addr)
// bnez scratch1, loop
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
- .addReg(AddrReg)
- .addReg(Scratch1Reg);
+ .addReg(Scratch1Reg)
+ .addReg(AddrReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(Scratch1Reg)
.addReg(RISCV::X0)
@@ -674,8 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// bnez scratch, loophead
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
ScratchReg)
- .addReg(AddrReg)
- .addReg(NewValReg);
+ .addReg(NewValReg)
+ .addReg(AddrReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
@@ -707,8 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
MaskReg, ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 27cf057..40c05e8 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -265,7 +265,7 @@ def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">,
def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">;
def FeatureStdExtZalasr
- : RISCVExperimentalExtension<0, 1, "Load-Acquire and Store-Release Instructions">;
+ : RISCVExperimentalExtension<0, 9, "Load-Acquire and Store-Release Instructions">;
def HasStdExtZalasr : Predicate<"Subtarget->hasStdExtZalasr()">,
AssemblerPredicate<(all_of FeatureStdExtZalasr),
"'Zalasr' (Load-Acquire and Store-Release Instructions)">;
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 6d01250..eba35ef 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -100,119 +100,11 @@ def : LdPat<load, LD, PtrVT>;
def : StPat<store, SD, GPR, PtrVT>;
}
-// Load and store patterns for i16, needed because Zfh makes s16 load/store
-// legal and regbank select may not constrain registers to FP.
-def : LdPat<load, LH, i16>;
-def : StPat<store, SH, GPR, i16>;
-
-def : LdPat<extloadi8, LBU, i16>; // Prefer unsigned due to no c.lb in Zcb.
-def : StPat<truncstorei8, SB, GPR, i16>;
-
-let Predicates = [HasAtomicLdSt] in {
- // Prefer unsigned due to no c.lb in Zcb.
- def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>;
- def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>;
-
- def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>;
- def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>;
-}
-
-let Predicates = [HasAtomicLdSt, IsRV64] in {
- // Load pattern is in RISCVInstrInfoA.td and shared with RV32.
- def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>;
-}
-
//===----------------------------------------------------------------------===//
// RV64 i32 patterns not used by SelectionDAG
//===----------------------------------------------------------------------===//
let Predicates = [IsRV64] in {
-def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
-def : LdPat<extloadi16, LH, i32>;
-
-def : StPat<truncstorei8, SB, GPR, i32>;
-def : StPat<truncstorei16, SH, GPR, i32>;
-
-def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
-
def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32),
(ADDIW GPR:$rs1, simm12_lo:$imm)>;
}
-
-let Predicates = [IsRV64, NoStdExtZba] in
-def : Pat<(zext (i32 GPR:$src)), (SRLI (i64 (SLLI GPR:$src, 32)), 32)>;
-
-let Predicates = [IsRV32, NoStdExtZbb, NoStdExtZbkb] in
-def : Pat<(XLenVT (zext (i16 GPR:$src))),
- (SRLI (XLenVT (SLLI GPR:$src, 16)), 16)>;
-
-let Predicates = [IsRV64, NoStdExtZbb, NoStdExtZbkb] in {
-def : Pat<(i64 (zext (i16 GPR:$src))),
- (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>;
-def : Pat<(i32 (zext (i16 GPR:$src))),
- (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>;
-}
-
-let Predicates = [IsRV32, NoStdExtZbb] in
-def : Pat<(XLenVT (sext (i16 GPR:$src))),
- (SRAI (XLenVT (SLLI GPR:$src, 16)), 16)>;
-
-let Predicates = [IsRV64, NoStdExtZbb] in {
-def : Pat<(i64 (sext (i16 GPR:$src))),
- (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>;
-def : Pat<(i32 (sext (i16 GPR:$src))),
- (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Zb* RV64 patterns not used by SelectionDAG.
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasStdExtZba, IsRV64] in {
-def : Pat<(zext (i32 GPR:$src)), (ADD_UW GPR:$src, (XLenVT X0))>;
-}
-
-let Predicates = [HasStdExtZbb] in
-def : Pat<(i32 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>;
-let Predicates = [HasStdExtZbb, IsRV64] in
-def : Pat<(i64 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>;
-
-let Predicates = [HasStdExtZbb, IsRV32] in
-def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV32 GPR:$rs)>;
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : Pat<(i64 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>;
-def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>;
-}
-
-let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV32] in
-def : Pat<(i32 (zext (i16 GPR:$rs))), (PACK GPR:$rs, (XLenVT X0))>;
-let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV64] in {
-def : Pat<(i64 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>;
-def : Pat<(i32 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>;
-}
-
-//===----------------------------------------------------------------------===//
-// Zalasr patterns not used by SelectionDAG
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasStdExtZalasr] in {
- // the sequentially consistent loads use
- // .aq instead of .aqrl to match the psABI/A.7
- def : PatLAQ<acquiring_load<atomic_load_aext_8>, LB_AQ, i16>;
- def : PatLAQ<seq_cst_load<atomic_load_aext_8>, LB_AQ, i16>;
-
- def : PatLAQ<acquiring_load<atomic_load_nonext_16>, LH_AQ, i16>;
- def : PatLAQ<seq_cst_load<atomic_load_nonext_16>, LH_AQ, i16>;
-
- def : PatSRL<releasing_store<atomic_store_8>, SB_RL, i16>;
- def : PatSRL<seq_cst_store<atomic_store_8>, SB_RL, i16>;
-
- def : PatSRL<releasing_store<atomic_store_16>, SH_RL, i16>;
- def : PatSRL<seq_cst_store<atomic_store_16>, SH_RL, i16>;
-}
-
-let Predicates = [HasStdExtZalasr, IsRV64] in {
- // Load pattern is in RISCVInstrInfoZalasr.td and shared with RV32.
- def : PatSRL<releasing_store<atomic_store_32>, SW_RL, i32>;
- def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL, i32>;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6234714..7123a2d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15721,8 +15721,7 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// Emit a negate of the setcc.
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
+ return DAG.getNegative(N0.getOperand(0), DL, VT);
}
static SDValue performADDCombine(SDNode *N,
@@ -16498,43 +16497,60 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
SDValue X = N->getOperand(0);
if (Subtarget.hasShlAdd(3)) {
- for (uint64_t Divisor : {3, 5, 9}) {
- if (MulAmt % Divisor != 0)
- continue;
- uint64_t MulAmt2 = MulAmt / Divisor;
- // 3/5/9 * 2^N -> shl (shXadd X, X), N
- if (isPowerOf2_64(MulAmt2)) {
- SDLoc DL(N);
- SDValue X = N->getOperand(0);
- // Put the shift first if we can fold a zext into the
- // shift forming a slli.uw.
- if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
- X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
- DAG.getConstant(Log2_64(MulAmt2), DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
- Shl);
- }
- // Otherwise, put rhe shl second so that it can fold with following
- // instructions (e.g. sext or add).
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359,
- DAG.getConstant(Log2_64(MulAmt2), DL, VT));
- }
-
- // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
- if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
- SDLoc DL(N);
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
- Mul359);
+ int Shift;
+ if (int ShXAmount = isShifted359(MulAmt, Shift)) {
+ // 3/5/9 * 2^N -> shl (shXadd X, X), N
+ SDLoc DL(N);
+ SDValue X = N->getOperand(0);
+ // Put the shift first if we can fold a zext into the shift forming
+ // a slli.uw.
+ if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
+ X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
+ DAG.getConstant(ShXAmount, DL, VT), Shl);
}
+ // Otherwise, put the shl second so that it can fold with following
+ // instructions (e.g. sext or add).
+ SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(ShXAmount, DL, VT), X);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul359,
+ DAG.getConstant(Shift, DL, VT));
+ }
+
+ // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
+ int ShX;
+ int ShY;
+ switch (MulAmt) {
+ case 3 * 5:
+ ShY = 1;
+ ShX = 2;
+ break;
+ case 3 * 9:
+ ShY = 1;
+ ShX = 3;
+ break;
+ case 5 * 5:
+ ShX = ShY = 2;
+ break;
+ case 5 * 9:
+ ShY = 2;
+ ShX = 3;
+ break;
+ case 9 * 9:
+ ShX = ShY = 3;
+ break;
+ default:
+ ShX = ShY = 0;
+ break;
+ }
+ if (ShX) {
+ SDLoc DL(N);
+ SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(ShY, DL, VT), X);
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
+ DAG.getConstant(ShX, DL, VT), Mul359);
}
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
@@ -16557,18 +16573,14 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// variants we could implement. e.g.
// (2^(1,2,3) * 3,5,9 + 1) << C2
// 2^(C1>3) * 3,5,9 +/- 1
- for (uint64_t Divisor : {3, 5, 9}) {
- uint64_t C = MulAmt - 1;
- if (C <= Divisor)
- continue;
- unsigned TZ = llvm::countr_zero(C);
- if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
+ if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
+ assert(Shift != 0 && "MulAmt=4,6,10 handled before");
+ if (Shift <= 3) {
SDLoc DL(N);
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
+ SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(ShXAmount, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(TZ, DL, VT), X);
+ DAG.getConstant(Shift, DL, VT), X);
}
}
@@ -16576,7 +16588,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
if (ScaleShift >= 1 && ScaleShift < 4) {
- unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+ unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
SDLoc DL(N);
SDValue Shift1 =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
@@ -16589,7 +16601,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
for (uint64_t Offset : {3, 5, 9}) {
if (isPowerOf2_64(MulAmt + Offset)) {
- unsigned ShAmt = Log2_64(MulAmt + Offset);
+ unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
if (ShAmt >= VT.getSizeInBits())
continue;
SDLoc DL(N);
@@ -16608,21 +16620,16 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt2 = MulAmt / Divisor;
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
- for (uint64_t Divisor2 : {3, 5, 9}) {
- if (MulAmt2 % Divisor2 != 0)
- continue;
- uint64_t MulAmt3 = MulAmt2 / Divisor2;
- if (isPowerOf2_64(MulAmt3)) {
- SDLoc DL(N);
- SDValue Mul359A =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
- SDValue Mul359B = DAG.getNode(
- RISCVISD::SHL_ADD, DL, VT, Mul359A,
- DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
- DAG.getConstant(Log2_64(MulAmt3), DL, VT));
- }
+ if (int ShBAmount = isShifted359(MulAmt2, Shift)) {
+ SDLoc DL(N);
+ SDValue Mul359A =
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
+ SDValue Mul359B =
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
+ DAG.getConstant(ShBAmount, DL, VT), Mul359A);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
+ DAG.getConstant(Shift, DL, VT));
}
}
}
@@ -16966,7 +16973,7 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+ return DAG.getNegative(Src, DL, VT);
// Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7db4832..96e1078 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -4586,24 +4586,23 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(DestReg, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
- } else if (STI.hasShlAdd(3) &&
- ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
- (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
- (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
+ } else if (int ShXAmount, ShiftAmount;
+ STI.hasShlAdd(3) &&
+ (ShXAmount = isShifted359(Amount, ShiftAmount)) != 0) {
// We can use Zba SHXADD+SLLI instructions for multiply in some cases.
unsigned Opc;
- uint32_t ShiftAmount;
- if (Amount % 9 == 0) {
- Opc = RISCV::SH3ADD;
- ShiftAmount = Log2_64(Amount / 9);
- } else if (Amount % 5 == 0) {
- Opc = RISCV::SH2ADD;
- ShiftAmount = Log2_64(Amount / 5);
- } else if (Amount % 3 == 0) {
+ switch (ShXAmount) {
+ case 1:
Opc = RISCV::SH1ADD;
- ShiftAmount = Log2_64(Amount / 3);
- } else {
- llvm_unreachable("implied by if-clause");
+ break;
+ case 2:
+ Opc = RISCV::SH2ADD;
+ break;
+ case 3:
+ Opc = RISCV::SH3ADD;
+ break;
+ default:
+ llvm_unreachable("unexpected result of isShifted359");
}
if (ShiftAmount)
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 42a0c4c..c5eddb9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -25,6 +25,25 @@
namespace llvm {
+// If Value is of the form C1<<C2, where C1 = 3, 5 or 9,
+// returns log2(C1 - 1) and assigns Shift = C2.
+// Otherwise, returns 0.
+template <typename T> int isShifted359(T Value, int &Shift) {
+ if (Value == 0)
+ return 0;
+ Shift = llvm::countr_zero(Value);
+ switch (Value >> Shift) {
+ case 3:
+ return 1;
+ case 5:
+ return 2;
+ case 9:
+ return 3;
+ default:
+ return 0;
+ }
+}
+
class RISCVSubtarget;
static const MachineMemOperand::Flags MONontemporalBit0 =
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 9855c47..7a14929 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1980,7 +1980,7 @@ def : LdPat<sextloadi8, LB>;
def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<sextloadi16, LH>;
def : LdPat<extloadi16, LH>;
-def : LdPat<load, LW, i32>;
+def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
def : LdPat<zextloadi8, LBU>;
def : LdPat<zextloadi16, LHU>;
@@ -1994,7 +1994,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
def : StPat<truncstorei8, SB, GPR, XLenVT>;
def : StPat<truncstorei16, SH, GPR, XLenVT>;
-def : StPat<store, SW, GPR, i32>;
+def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
/// Fences
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 25accd9..571d72f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -24,36 +24,36 @@ class LR_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
}
multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> {
- def "" : LR_r<0, 0, funct3, opcodestr>;
- def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">;
- def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">;
- def _AQ_RL : LR_r<1, 1, funct3, opcodestr # ".aqrl">;
+ def "" : LR_r<0, 0, funct3, opcodestr>;
+ def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">;
+ def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">;
+ def _AQRL : LR_r<1, 1, funct3, opcodestr # ".aqrl">;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class SC_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00011, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rd, $rs2, $rs1">;
multiclass SC_r_aq_rl<bits<3> funct3, string opcodestr> {
- def "" : SC_r<0, 0, funct3, opcodestr>;
- def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">;
- def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">;
- def _AQ_RL : SC_r<1, 1, funct3, opcodestr # ".aqrl">;
+ def "" : SC_r<0, 0, funct3, opcodestr>;
+ def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">;
+ def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">;
+ def _AQRL : SC_r<1, 1, funct3, opcodestr # ".aqrl">;
}
let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rd, $rs2, $rs1">;
multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
- def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>;
- def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">;
- def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">;
- def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
+ def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>;
+ def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">;
+ def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">;
+ def _AQRL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
//===----------------------------------------------------------------------===//
@@ -174,8 +174,9 @@ let Predicates = [HasAtomicLdSt] in {
def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>;
def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>;
def : StPat<relaxed_store<atomic_store_32>, SW, GPR, XLenVT>;
+}
- // Used by GISel for RV32 and RV64.
+let Predicates = [HasAtomicLdSt, IsRV32] in {
def : LdPat<relaxed_load<atomic_load_nonext_32>, LW, i32>;
}
@@ -188,31 +189,34 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
/// AMOs
+class PatAMO<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs2, GPR:$rs1)>;
+
multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
list<Predicate> ExtraPreds = []> {
let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in {
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
- !cast<RVInst>(BaseInst), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
- !cast<RVInst>(BaseInst#"_AQ"), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
- !cast<RVInst>(BaseInst#"_RL"), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
- !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
- !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ"), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL"), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQRL"), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQRL"), vt>;
}
let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
- !cast<RVInst>(BaseInst), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
- !cast<RVInst>(BaseInst), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
- !cast<RVInst>(BaseInst), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
- !cast<RVInst>(BaseInst), vt>;
- def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
- !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst), vt>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
index 7cf6d5f..20e2142 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
@@ -9,8 +9,8 @@
// This file describes the RISC-V instructions from the standard atomic 'Za*'
// extensions:
// - Zawrs (v1.0) : Wait-on-Reservation-Set.
-// - Zacas (v1.0-rc1) : Atomic Compare-and-Swap.
-// - Zabha (v1.0-rc1) : Byte and Halfword Atomic Memory Operations.
+// - Zacas (v1.0) : Atomic Compare-and-Swap.
+// - Zabha (v1.0) : Byte and Halfword Atomic Memory Operations.
//
//===----------------------------------------------------------------------===//
@@ -44,15 +44,15 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb"
class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr,
DAGOperand RC>
: RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
- (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2),
+ (outs RC:$rd_wb), (ins RC:$rd, RC:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rd, $rs2, $rs1">;
multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr,
DAGOperand RC> {
- def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>;
- def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>;
- def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>;
- def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>;
+ def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>;
+ def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>;
+ def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>;
+ def _AQRL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>;
}
let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in {
@@ -71,48 +71,48 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>;
multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
list<Predicate> ExtraPreds = []> {
let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in {
- def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>;
def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>;
} // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds)
let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in {
- def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
- def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (XLenVT GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
- (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>;
} // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds)
}
@@ -140,7 +140,7 @@ def WRS_STO : WRSInst<0b000000011101, "wrs.sto">, Sched<[]>;
// Zabha (Byte and Halfword Atomic Memory Operations)
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZabha] in {
+let Predicates = [HasStdExtZabha], IsSignExtendingOpW = 1 in {
defm AMOSWAP_B : AMO_rr_aq_rl<0b00001, 0b000, "amoswap.b">,
Sched<[WriteAtomicB, ReadAtomicBA, ReadAtomicBD]>;
defm AMOADD_B : AMO_rr_aq_rl<0b00000, 0b000, "amoadd.b">,
@@ -181,7 +181,7 @@ defm AMOMAXU_H : AMO_rr_aq_rl<0b11100, 0b001, "amomaxu.h">,
}
// If Zacas extension is also implemented, Zabha further provides AMOCAS.[B|H].
-let Predicates = [HasStdExtZabha, HasStdExtZacas] in {
+let Predicates = [HasStdExtZabha, HasStdExtZacas], IsSignExtendingOpW = 1 in {
defm AMOCAS_B : AMO_cas_aq_rl<0b00101, 0b000, "amocas.b", GPR>;
defm AMOCAS_H : AMO_cas_aq_rl<0b00101, 0b001, "amocas.h", GPR>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1deecd2..5f944034 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -30,21 +30,22 @@ class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
opcodestr, "$rs2, $rs1"> {
let rd = 0;
}
+
multiclass LAQ_r_aq_rl<bits<3> funct3, string opcodestr> {
- def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">;
- def _AQ_RL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">;
+ def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">;
+ def _AQRL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">;
}
multiclass SRL_r_aq_rl<bits<3> funct3, string opcodestr> {
- def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">;
- def _AQ_RL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">;
+ def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">;
+ def _AQRL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">;
}
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZalasr] in {
+let Predicates = [HasStdExtZalasr], IsSignExtendingOpW = 1 in {
defm LB : LAQ_r_aq_rl<0b000, "lb">;
defm LH : LAQ_r_aq_rl<0b001, "lh">;
defm LW : LAQ_r_aq_rl<0b010, "lw">;
@@ -93,11 +94,12 @@ let Predicates = [HasStdExtZalasr] in {
def : PatSRL<releasing_store<atomic_store_32>, SW_RL>;
def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL>;
+}
- // Used by GISel for RV32 and RV64.
+let Predicates = [HasStdExtZalasr, IsRV32] in {
def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ, i32>;
def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ, i32>;
-} // Predicates = [HasStdExtZalasr]
+} // Predicates = [HasStdExtZalasr, IsRV32]
let Predicates = [HasStdExtZalasr, IsRV64] in {
def : PatLAQ<acquiring_load<atomic_load_asext_32>, LW_AQ, i64>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index ce21d83..e519b72 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -702,13 +702,13 @@ def : Pat<(binop_allwusers<or>
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
def : Pat<(binop_allwusers<or>
(or (zexti16 (XLenVT GPR:$rs1)),
- (shl GPR:$op1rs1, (XLenVT 24))),
- (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
+ (shl GPR:$op1rs2, (XLenVT 24))),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)),
- (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
- (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
+ (sext_inreg (shl GPR:$op1rs2, (XLenVT 24)), i32))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
@@ -788,32 +788,32 @@ multiclass ShxAdd_UWPat<int i, Instruction shxadd_uw> {
}
multiclass Sh1Add_UWPat<Instruction sh1add_uw> {
- def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF),
- (XLenVT GPR:$rs2))),
+ def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), (i64 0x1FFFFFFFF)),
+ (XLenVT GPR:$rs2)),
(sh1add_uw GPR:$rs1, GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
- def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE),
- (XLenVT GPR:$rs2))),
+ def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x1FFFFFFFE)),
+ (XLenVT GPR:$rs2)),
(sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>;
}
multiclass Sh2Add_UWPat<Instruction sh2add_uw> {
- def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF),
- (XLenVT GPR:$rs2))),
+ def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), (i64 0x3FFFFFFFF)),
+ (XLenVT GPR:$rs2)),
(sh2add_uw GPR:$rs1, GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
- def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC),
- (XLenVT GPR:$rs2))),
+ def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x3FFFFFFFC)),
+ (XLenVT GPR:$rs2)),
(sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>;
}
multiclass Sh3Add_UWPat<Instruction sh3add_uw> {
- def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8),
- (XLenVT GPR:$rs2))),
- (sh3add_uw (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>;
+ def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), (i64 0x7FFFFFFFF)),
+ (XLenVT GPR:$rs2)),
+ (sh3add_uw GPR:$rs1, GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
- def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8),
- (XLenVT GPR:$rs2))),
+ def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x7FFFFFFF8)),
+ (XLenVT GPR:$rs2)),
(sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 82e768d..6605a5c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -238,7 +238,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
}
class GPRRegisterClass<dag regList>
- : RISCVRegisterClass<[XLenVT, XLenFVT, i32, i16], 32, regList> {
+ : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> {
let RegInfos = XLenRI;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 3f2e7db..3e07eff 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -567,9 +567,12 @@ multiclass SiFive7WriteResBase<int VLEN,
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [VCQ, VL],
- 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
- [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred,
+ // Predicated
+ [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)],
+ // Not Predicated
+ [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)],
+ mx, IsWorstCase>;
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm : LMULWriteResMX<"WriteVLDUX8", [VCQ, VL], mx, IsWorstCase>;
defm : LMULWriteResMX<"WriteVLDOX8", [VCQ, VL], mx, IsWorstCase>;
@@ -587,9 +590,12 @@ multiclass SiFive7WriteResBase<int VLEN,
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [VCQ, VL],
- 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
- [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred,
+ // Predicated
+ [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)],
+ // Not Predicated
+ [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)],
+ mx, IsWorstCase>;
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm : LMULWriteResMX<"WriteVLDUX16", [VCQ, VL], mx, IsWorstCase>;
defm : LMULWriteResMX<"WriteVLDOX16", [VCQ, VL], mx, IsWorstCase>;
@@ -604,9 +610,12 @@ multiclass SiFive7WriteResBase<int VLEN,
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [VCQ, VL],
- 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
- [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred,
+ // Predicated
+ [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)],
+ // Not Predicated
+ [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)],
+ mx, IsWorstCase>;
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm : LMULWriteResMX<"WriteVLDUX32", [VCQ, VL], mx, IsWorstCase>;
defm : LMULWriteResMX<"WriteVLDOX32", [VCQ, VL], mx, IsWorstCase>;
@@ -621,9 +630,12 @@ multiclass SiFive7WriteResBase<int VLEN,
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [VCQ, VL],
- 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
- [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred,
+ // Predicated
+ [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)],
+ // Not Predicated
+ [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)],
+ mx, IsWorstCase>;
let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm : LMULWriteResMX<"WriteVLDUX64", [VCQ, VL], mx, IsWorstCase>;
defm : LMULWriteResMX<"WriteVLDOX64", [VCQ, VL], mx, IsWorstCase>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 6c7658c..01a4308 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -67,42 +67,41 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources,
// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has
// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite
// is created similarly if IsWorstCase is true.
-multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
- list<ProcResourceKind> resources,
- int predLat, list<int> predAcquireCycles,
- list<int> predReleaseCycles, int noPredLat,
- list<int> noPredAcquireCycles,
- list<int> noPredReleaseCycles,
- string mx, bit IsWorstCase> {
- defvar nameMX = name # "_" # mx;
-
+multiclass LMULWriteResVariantImpl<string name, string writeResName, SchedPredicateBase Pred,
+ list<ProcResourceKind> predResources,
+ int predLat, list<int> predAcquireCycles,
+ list<int> predReleaseCycles,
+ list<ProcResourceKind> noPredResources,
+ int noPredLat, list<int> noPredAcquireCycles,
+ list<int> noPredReleaseCycles,
+ bit IsWorstCase> {
// Define the different behaviors
- def nameMX # "_Pred" : SchedWriteRes<resources>{
+ def writeResName # "_Pred" : SchedWriteRes<predResources>{
let Latency = predLat;
let AcquireAtCycles = predAcquireCycles;
let ReleaseAtCycles = predReleaseCycles;
}
- def nameMX # "_NoPred" : SchedWriteRes<resources> {
+ def writeResName # "_NoPred" : SchedWriteRes<noPredResources> {
let Latency = noPredLat;
let AcquireAtCycles = noPredAcquireCycles;
let ReleaseAtCycles = noPredReleaseCycles;
}
// Define SchedVars
- def nameMX # PredSchedVar
- : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>;
- def nameMX # NoPredSchedVar
- : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX #"_NoPred")]>;
+ def writeResName # PredSchedVar
+ : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # writeResName # "_Pred")]>;
+ def writeResName # NoPredSchedVar
+ : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # writeResName #"_NoPred")]>;
// Allow multiclass to refer to SchedVars -- need to have NAME prefix.
- defvar PredSchedVar = !cast<SchedVar>(NAME # nameMX # PredSchedVar);
- defvar NoPredSchedVar = !cast<SchedVar>(NAME # nameMX # NoPredSchedVar);
+ defvar PredSchedVar = !cast<SchedVar>(NAME # writeResName # PredSchedVar);
+ defvar NoPredSchedVar = !cast<SchedVar>(NAME # writeResName # NoPredSchedVar);
// Tie behavior to predicate
- def NAME # nameMX # "_Variant"
+ def NAME # writeResName # "_Variant"
: SchedWriteVariant<[PredSchedVar, NoPredSchedVar]>;
def : SchedAlias<
- !cast<SchedReadWrite>(nameMX),
- !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>;
+ !cast<SchedReadWrite>(writeResName),
+ !cast<SchedReadWrite>(NAME # writeResName # "_Variant")>;
if IsWorstCase then {
def NAME # name # "_WorstCase_Variant"
@@ -113,6 +112,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
}
}
+multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
+ list<ProcResourceKind> predResources,
+ int predLat, list<int> predAcquireCycles,
+ list<int> predReleaseCycles,
+ list<ProcResourceKind> noPredResources,
+ int noPredLat, list<int> noPredAcquireCycles,
+ list<int> noPredReleaseCycles,
+ string mx, bit IsWorstCase> {
+ defm "" : LMULWriteResVariantImpl<name, name # "_" # mx, Pred, predResources,
+ predLat, predAcquireCycles,
+ predReleaseCycles, noPredResources,
+ noPredLat, noPredAcquireCycles,
+ noPredReleaseCycles,
+ IsWorstCase>;
+}
+
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
// SchedMxList variants above. Each multiclass is responsible for defining
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ee25f69..7bc0b5b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2747,20 +2747,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
Intrinsic::ID IID = Inst->getIntrinsicID();
LLVMContext &C = Inst->getContext();
bool HasMask = false;
+
+ auto getSegNum = [](const IntrinsicInst *II, unsigned PtrOperandNo,
+ bool IsWrite) -> int64_t {
+ if (auto *TarExtTy =
+ dyn_cast<TargetExtType>(II->getArgOperand(0)->getType()))
+ return TarExtTy->getIntParameter(0);
+
+ return 1;
+ };
+
switch (IID) {
case Intrinsic::riscv_vle_mask:
case Intrinsic::riscv_vse_mask:
+ case Intrinsic::riscv_vlseg2_mask:
+ case Intrinsic::riscv_vlseg3_mask:
+ case Intrinsic::riscv_vlseg4_mask:
+ case Intrinsic::riscv_vlseg5_mask:
+ case Intrinsic::riscv_vlseg6_mask:
+ case Intrinsic::riscv_vlseg7_mask:
+ case Intrinsic::riscv_vlseg8_mask:
+ case Intrinsic::riscv_vsseg2_mask:
+ case Intrinsic::riscv_vsseg3_mask:
+ case Intrinsic::riscv_vsseg4_mask:
+ case Intrinsic::riscv_vsseg5_mask:
+ case Intrinsic::riscv_vsseg6_mask:
+ case Intrinsic::riscv_vsseg7_mask:
+ case Intrinsic::riscv_vsseg8_mask:
HasMask = true;
[[fallthrough]];
case Intrinsic::riscv_vle:
- case Intrinsic::riscv_vse: {
+ case Intrinsic::riscv_vse:
+ case Intrinsic::riscv_vlseg2:
+ case Intrinsic::riscv_vlseg3:
+ case Intrinsic::riscv_vlseg4:
+ case Intrinsic::riscv_vlseg5:
+ case Intrinsic::riscv_vlseg6:
+ case Intrinsic::riscv_vlseg7:
+ case Intrinsic::riscv_vlseg8:
+ case Intrinsic::riscv_vsseg2:
+ case Intrinsic::riscv_vsseg3:
+ case Intrinsic::riscv_vsseg4:
+ case Intrinsic::riscv_vsseg5:
+ case Intrinsic::riscv_vsseg6:
+ case Intrinsic::riscv_vsseg7:
+ case Intrinsic::riscv_vsseg8: {
// Intrinsic interface:
// riscv_vle(merge, ptr, vl)
// riscv_vle_mask(merge, ptr, mask, vl, policy)
// riscv_vse(val, ptr, vl)
// riscv_vse_mask(val, ptr, mask, vl, policy)
+ // riscv_vlseg#(merge, ptr, vl, sew)
+ // riscv_vlseg#_mask(merge, ptr, mask, vl, policy, sew)
+ // riscv_vsseg#(val, ptr, vl, sew)
+ // riscv_vsseg#_mask(val, ptr, mask, vl, sew)
bool IsWrite = Inst->getType()->isVoidTy();
Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType();
+ // The results of segment loads are TargetExtType.
+ if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) {
+ unsigned SEW =
+ 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1))
+ ->getZExtValue();
+ Ty = TarExtTy->getTypeParameter(0U);
+ Ty = ScalableVectorType::get(
+ IntegerType::get(C, SEW),
+ cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW);
+ }
const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
unsigned VLIndex = RVVIInfo->VLOperand;
unsigned PtrOperandNo = VLIndex - 1 - HasMask;
@@ -2771,23 +2823,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
if (HasMask)
Mask = Inst->getArgOperand(VLIndex - 1);
Value *EVL = Inst->getArgOperand(VLIndex);
+ unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
+ // RVV uses contiguous elements as a segment.
+ if (SegNum > 1) {
+ unsigned ElemSize = Ty->getScalarSizeInBits();
+ auto *SegTy = IntegerType::get(C, ElemSize * SegNum);
+ Ty = VectorType::get(SegTy, cast<VectorType>(Ty));
+ }
Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
Alignment, Mask, EVL);
return true;
}
case Intrinsic::riscv_vlse_mask:
case Intrinsic::riscv_vsse_mask:
+ case Intrinsic::riscv_vlsseg2_mask:
+ case Intrinsic::riscv_vlsseg3_mask:
+ case Intrinsic::riscv_vlsseg4_mask:
+ case Intrinsic::riscv_vlsseg5_mask:
+ case Intrinsic::riscv_vlsseg6_mask:
+ case Intrinsic::riscv_vlsseg7_mask:
+ case Intrinsic::riscv_vlsseg8_mask:
+ case Intrinsic::riscv_vssseg2_mask:
+ case Intrinsic::riscv_vssseg3_mask:
+ case Intrinsic::riscv_vssseg4_mask:
+ case Intrinsic::riscv_vssseg5_mask:
+ case Intrinsic::riscv_vssseg6_mask:
+ case Intrinsic::riscv_vssseg7_mask:
+ case Intrinsic::riscv_vssseg8_mask:
HasMask = true;
[[fallthrough]];
case Intrinsic::riscv_vlse:
- case Intrinsic::riscv_vsse: {
+ case Intrinsic::riscv_vsse:
+ case Intrinsic::riscv_vlsseg2:
+ case Intrinsic::riscv_vlsseg3:
+ case Intrinsic::riscv_vlsseg4:
+ case Intrinsic::riscv_vlsseg5:
+ case Intrinsic::riscv_vlsseg6:
+ case Intrinsic::riscv_vlsseg7:
+ case Intrinsic::riscv_vlsseg8:
+ case Intrinsic::riscv_vssseg2:
+ case Intrinsic::riscv_vssseg3:
+ case Intrinsic::riscv_vssseg4:
+ case Intrinsic::riscv_vssseg5:
+ case Intrinsic::riscv_vssseg6:
+ case Intrinsic::riscv_vssseg7:
+ case Intrinsic::riscv_vssseg8: {
// Intrinsic interface:
// riscv_vlse(merge, ptr, stride, vl)
// riscv_vlse_mask(merge, ptr, stride, mask, vl, policy)
// riscv_vsse(val, ptr, stride, vl)
// riscv_vsse_mask(val, ptr, stride, mask, vl, policy)
+ // riscv_vlsseg#(merge, ptr, offset, vl, sew)
+ // riscv_vlsseg#_mask(merge, ptr, offset, mask, vl, policy, sew)
+ // riscv_vssseg#(val, ptr, offset, vl, sew)
+ // riscv_vssseg#_mask(val, ptr, offset, mask, vl, sew)
bool IsWrite = Inst->getType()->isVoidTy();
Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType();
+ // The results of segment loads are TargetExtType.
+ if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) {
+ unsigned SEW =
+ 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1))
+ ->getZExtValue();
+ Ty = TarExtTy->getTypeParameter(0U);
+ Ty = ScalableVectorType::get(
+ IntegerType::get(C, SEW),
+ cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW);
+ }
const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
unsigned VLIndex = RVVIInfo->VLOperand;
unsigned PtrOperandNo = VLIndex - 2 - HasMask;
@@ -2809,6 +2910,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
if (HasMask)
Mask = Inst->getArgOperand(VLIndex - 1);
Value *EVL = Inst->getArgOperand(VLIndex);
+ unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
+ // RVV uses contiguous elements as a segment.
+ if (SegNum > 1) {
+ unsigned ElemSize = Ty->getScalarSizeInBits();
+ auto *SegTy = IntegerType::get(C, ElemSize * SegNum);
+ Ty = VectorType::get(SegTy, cast<VectorType>(Ty));
+ }
Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
Alignment, Mask, EVL, Stride);
return true;
@@ -2817,19 +2925,89 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::riscv_vluxei_mask:
case Intrinsic::riscv_vsoxei_mask:
case Intrinsic::riscv_vsuxei_mask:
+ case Intrinsic::riscv_vloxseg2_mask:
+ case Intrinsic::riscv_vloxseg3_mask:
+ case Intrinsic::riscv_vloxseg4_mask:
+ case Intrinsic::riscv_vloxseg5_mask:
+ case Intrinsic::riscv_vloxseg6_mask:
+ case Intrinsic::riscv_vloxseg7_mask:
+ case Intrinsic::riscv_vloxseg8_mask:
+ case Intrinsic::riscv_vluxseg2_mask:
+ case Intrinsic::riscv_vluxseg3_mask:
+ case Intrinsic::riscv_vluxseg4_mask:
+ case Intrinsic::riscv_vluxseg5_mask:
+ case Intrinsic::riscv_vluxseg6_mask:
+ case Intrinsic::riscv_vluxseg7_mask:
+ case Intrinsic::riscv_vluxseg8_mask:
+ case Intrinsic::riscv_vsoxseg2_mask:
+ case Intrinsic::riscv_vsoxseg3_mask:
+ case Intrinsic::riscv_vsoxseg4_mask:
+ case Intrinsic::riscv_vsoxseg5_mask:
+ case Intrinsic::riscv_vsoxseg6_mask:
+ case Intrinsic::riscv_vsoxseg7_mask:
+ case Intrinsic::riscv_vsoxseg8_mask:
+ case Intrinsic::riscv_vsuxseg2_mask:
+ case Intrinsic::riscv_vsuxseg3_mask:
+ case Intrinsic::riscv_vsuxseg4_mask:
+ case Intrinsic::riscv_vsuxseg5_mask:
+ case Intrinsic::riscv_vsuxseg6_mask:
+ case Intrinsic::riscv_vsuxseg7_mask:
+ case Intrinsic::riscv_vsuxseg8_mask:
HasMask = true;
[[fallthrough]];
case Intrinsic::riscv_vloxei:
case Intrinsic::riscv_vluxei:
case Intrinsic::riscv_vsoxei:
- case Intrinsic::riscv_vsuxei: {
+ case Intrinsic::riscv_vsuxei:
+ case Intrinsic::riscv_vloxseg2:
+ case Intrinsic::riscv_vloxseg3:
+ case Intrinsic::riscv_vloxseg4:
+ case Intrinsic::riscv_vloxseg5:
+ case Intrinsic::riscv_vloxseg6:
+ case Intrinsic::riscv_vloxseg7:
+ case Intrinsic::riscv_vloxseg8:
+ case Intrinsic::riscv_vluxseg2:
+ case Intrinsic::riscv_vluxseg3:
+ case Intrinsic::riscv_vluxseg4:
+ case Intrinsic::riscv_vluxseg5:
+ case Intrinsic::riscv_vluxseg6:
+ case Intrinsic::riscv_vluxseg7:
+ case Intrinsic::riscv_vluxseg8:
+ case Intrinsic::riscv_vsoxseg2:
+ case Intrinsic::riscv_vsoxseg3:
+ case Intrinsic::riscv_vsoxseg4:
+ case Intrinsic::riscv_vsoxseg5:
+ case Intrinsic::riscv_vsoxseg6:
+ case Intrinsic::riscv_vsoxseg7:
+ case Intrinsic::riscv_vsoxseg8:
+ case Intrinsic::riscv_vsuxseg2:
+ case Intrinsic::riscv_vsuxseg3:
+ case Intrinsic::riscv_vsuxseg4:
+ case Intrinsic::riscv_vsuxseg5:
+ case Intrinsic::riscv_vsuxseg6:
+ case Intrinsic::riscv_vsuxseg7:
+ case Intrinsic::riscv_vsuxseg8: {
// Intrinsic interface (only listed ordered version):
// riscv_vloxei(merge, ptr, index, vl)
// riscv_vloxei_mask(merge, ptr, index, mask, vl, policy)
// riscv_vsoxei(val, ptr, index, vl)
// riscv_vsoxei_mask(val, ptr, index, mask, vl, policy)
+ // riscv_vloxseg#(merge, ptr, index, vl, sew)
+ // riscv_vloxseg#_mask(merge, ptr, index, mask, vl, policy, sew)
+ // riscv_vsoxseg#(val, ptr, index, vl, sew)
+ // riscv_vsoxseg#_mask(val, ptr, index, mask, vl, sew)
bool IsWrite = Inst->getType()->isVoidTy();
Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType();
+ // The results of segment loads are TargetExtType.
+ if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) {
+ unsigned SEW =
+ 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1))
+ ->getZExtValue();
+ Ty = TarExtTy->getTypeParameter(0U);
+ Ty = ScalableVectorType::get(
+ IntegerType::get(C, SEW),
+ cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW);
+ }
const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
unsigned VLIndex = RVVIInfo->VLOperand;
unsigned PtrOperandNo = VLIndex - 2 - HasMask;
@@ -2845,6 +3023,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
Mask = ConstantInt::getTrue(MaskType);
}
Value *EVL = Inst->getArgOperand(VLIndex);
+ unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
+ // RVV uses contiguous elements as a segment.
+ if (SegNum > 1) {
+ unsigned ElemSize = Ty->getScalarSizeInBits();
+ auto *SegTy = IntegerType::get(C, ElemSize * SegNum);
+ Ty = VectorType::get(SegTy, cast<VectorType>(Ty));
+ }
Value *OffsetOp = Inst->getArgOperand(PtrOperandNo + 1);
Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
Align(1), Mask, EVL,