aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp198
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td55
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td35
-rw-r--r--llvm/lib/Target/RISCV/RISCVMacroFusion.td56
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp93
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp31
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp25
19 files changed, 387 insertions, 193 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index eb7460e..95ec42f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -743,7 +743,7 @@ std::optional<bool> RISCVAsmBackend::evaluateFixup(const MCFragment &,
if (!AUIPCTarget.getAddSym())
return false;
- const MCSymbolELF &SA = cast<MCSymbolELF>(*AUIPCTarget.getAddSym());
+ auto &SA = static_cast<const MCSymbolELF &>(*AUIPCTarget.getAddSym());
if (SA.isUndefined())
return false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 9bf7896..2885e3c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -55,8 +55,8 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
case ELF::R_RISCV_TLS_GOT_HI20:
case ELF::R_RISCV_TLS_GD_HI20:
case ELF::R_RISCV_TLSDESC_HI20:
- if (auto *SA = Target.getAddSym())
- cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS);
+ if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym()))
+ static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS);
break;
case ELF::R_RISCV_PLT32:
case ELF::R_RISCV_GOT32_PCREL:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index c654fd2b..543c4c5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -117,7 +117,7 @@ void RISCVTargetELFStreamer::reset() {
void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
getStreamer().getAssembler().registerSymbol(Symbol);
- cast<MCSymbolELF>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC);
+ static_cast<MCSymbolELF &>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC);
}
void RISCVELFStreamer::reset() {
@@ -142,7 +142,8 @@ void RISCVELFStreamer::emitInstructionsMappingSymbol() {
}
void RISCVELFStreamer::emitMappingSymbol(StringRef Name) {
- auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
+ auto *Symbol =
+ static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name));
emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 3655861..f70837e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -68,36 +68,30 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection(
const Triple &Triple = Ctx.getTargetTriple();
Align NoteAlign;
+ uint64_t DescSize;
if (Triple.isArch64Bit()) {
NoteAlign = Align(8);
+ DescSize = 16;
} else {
assert(Triple.isArch32Bit());
NoteAlign = Align(4);
+ DescSize = 12;
}
assert(Ctx.getObjectFileType() == MCContext::Environment::IsELF);
MCSection *const NoteSection =
Ctx.getELFSection(".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
- NoteSection->setAlignment(NoteAlign);
OutStreamer.pushSection();
OutStreamer.switchSection(NoteSection);
// Emit the note header
- OutStreamer.emitIntValue(4, 4); // n_namsz
-
- MCSymbol *const NDescBeginSym = Ctx.createTempSymbol();
- MCSymbol *const NDescEndSym = Ctx.createTempSymbol();
- const MCExpr *const NDescSzExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(NDescEndSym, Ctx),
- MCSymbolRefExpr::create(NDescBeginSym, Ctx), Ctx);
-
- OutStreamer.emitValue(NDescSzExpr, 4); // n_descsz
+ OutStreamer.emitValueToAlignment(NoteAlign);
+ OutStreamer.emitIntValue(4, 4); // n_namsz
+ OutStreamer.emitIntValue(DescSize, 4); // n_descsz
OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); // n_type
OutStreamer.emitBytes(StringRef("GNU", 4)); // n_name
// Emit n_desc field
- OutStreamer.emitLabel(NDescBeginSym);
- OutStreamer.emitValueToAlignment(NoteAlign);
// Emit the feature_1_and property
OutStreamer.emitIntValue(ELF::GNU_PROPERTY_RISCV_FEATURE_1_AND, 4); // pr_type
@@ -105,7 +99,6 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection(
OutStreamer.emitIntValue(Feature1And, 4); // pr_data
OutStreamer.emitValueToAlignment(NoteAlign); // pr_padding
- OutStreamer.emitLabel(NDescEndSym);
OutStreamer.popSection();
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 171940e..a7329d2 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1700,6 +1700,18 @@ def TuneNLogNVRGather
def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
+def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering",
+ "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">;
+
+def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering",
+ "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">;
+
+def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering",
+ "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">;
+
+def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering",
+ "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">;
+
def TuneDisableLatencySchedHeuristic
: SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 82c0d8d..80a48c5 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -167,9 +167,8 @@ static std::pair<Value *, Value *> matchStridedStart(Value *Start,
default:
llvm_unreachable("Unexpected opcode");
case Instruction::Or:
- // TODO: We'd be better off creating disjoint or here, but we don't yet
- // have an IRBuilder API for that.
- [[fallthrough]];
+ Start = Builder.CreateOr(Start, Splat, "", /*IsDisjoint=*/true);
+ break;
case Instruction::Add:
Start = Builder.CreateAdd(Start, Splat);
break;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index adbfbeb..03e54b3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -927,6 +927,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
@@ -1105,6 +1106,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1181,6 +1183,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
@@ -1352,6 +1355,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
@@ -1442,6 +1446,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
@@ -7012,6 +7017,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(FDIV)
OP_CASE(FNEG)
OP_CASE(FABS)
+ OP_CASE(FCOPYSIGN)
OP_CASE(FSQRT)
OP_CASE(SMIN)
OP_CASE(SMAX)
@@ -7079,6 +7085,15 @@ static unsigned getRISCVVLOp(SDValue Op) {
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMXOR_VL;
return RISCVISD::XOR_VL;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return RISCVISD::VZEXT_VL;
+ case ISD::SIGN_EXTEND:
+ return RISCVISD::VSEXT_VL;
+ case ISD::SETCC:
+ return RISCVISD::SETCC_VL;
+ case ISD::VSELECT:
+ return RISCVISD::VMERGE_VL;
case ISD::VP_SELECT:
case ISD::VP_MERGE:
return RISCVISD::VMERGE_VL;
@@ -7419,12 +7434,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
- return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
+ if (Op.getValueType().isScalableVector())
+ return Op;
+ return lowerToScalableOp(Op, DAG);
case ISD::SIGN_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
- return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
+ if (Op.getValueType().isScalableVector())
+ return Op;
+ return lowerToScalableOp(Op, DAG);
case ISD::SPLAT_VECTOR_PARTS:
return lowerSPLAT_VECTOR_PARTS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
@@ -8103,6 +8122,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MLOAD:
case ISD::VP_LOAD:
return lowerMaskedLoad(Op, DAG);
+ case ISD::VP_LOAD_FF:
+ return lowerLoadFF(Op, DAG);
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
@@ -8166,7 +8187,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
return SplitVectorOp(Op, DAG);
- return lowerFixedLengthVectorSetccToRVV(Op, DAG);
+ return lowerToScalableOp(Op, DAG);
}
case ISD::ADD:
case ISD::SUB:
@@ -8182,6 +8203,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::UREM:
case ISD::BSWAP:
case ISD::CTPOP:
+ case ISD::VSELECT:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
@@ -8250,14 +8272,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG);
assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
- case ISD::VSELECT:
- return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
return lowerFCOPYSIGN(Op, DAG, Subtarget);
if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
- return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
+ return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -9694,33 +9714,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
- SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
- MVT ExtVT = Op.getSimpleValueType();
- // Only custom-lower extensions from fixed-length vector types.
- if (!ExtVT.isFixedLengthVector())
- return Op;
- MVT VT = Op.getOperand(0).getSimpleValueType();
- // Grab the canonical container type for the extended type. Infer the smaller
- // type from that to ensure the same number of vector elements, as we know
- // the LMUL will be sufficient to hold the smaller type.
- MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
- // Get the extended container type manually to ensure the same number of
- // vector elements between source and dest.
- MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
- ContainerExtVT.getVectorElementCount());
-
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
-
- SDLoc DL(Op);
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
-
- return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
-}
-
// Custom-lower truncations from vectors to mask vectors by using a mask and a
// setcc operation:
// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
@@ -12739,6 +12732,51 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
return DAG.getMergeValues({Result, Chain}, DL);
}
+SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op->getSimpleValueType(0);
+
+ const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
+ EVT MemVT = VPLoadFF->getMemoryVT();
+ MachineMemOperand *MMO = VPLoadFF->getMemOperand();
+ SDValue Chain = VPLoadFF->getChain();
+ SDValue BasePtr = VPLoadFF->getBasePtr();
+
+ SDValue Mask = VPLoadFF->getMask();
+ SDValue VL = VPLoadFF->getVectorLength();
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ unsigned IntID = Intrinsic::riscv_vleff_mask;
+ SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(IntID, DL, XLenVT),
+ DAG.getUNDEF(ContainerVT),
+ BasePtr,
+ Mask,
+ VL,
+ DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)};
+
+ SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
+
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ SDValue OutVL = Result.getValue(1);
+ Chain = Result.getValue(2);
+
+ if (VT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return DAG.getMergeValues({Result, OutVL, Chain}, DL);
+}
+
SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -12834,31 +12872,6 @@ SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
return Res;
}
-SDValue
-RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
- SelectionDAG &DAG) const {
- MVT InVT = Op.getOperand(0).getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(InVT);
-
- MVT VT = Op.getSimpleValueType();
-
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
- SDValue Op2 =
- convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
-
- SDLoc DL(Op);
- auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
- DAG, Subtarget);
- MVT MaskVT = getMaskTypeFor(ContainerVT);
-
- SDValue Cmp =
- DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
- {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
-
- return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
-}
-
SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
@@ -12985,51 +12998,6 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
return Max;
}
-SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
- SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Mag = Op.getOperand(0);
- SDValue Sign = Op.getOperand(1);
- assert(Mag.getValueType() == Sign.getValueType() &&
- "Can only handle COPYSIGN with matching types.");
-
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
- Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
- Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
-
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
- Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
-
- return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
-}
-
-SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
- SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
-
- MVT I1ContainerVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
-
- SDValue CC =
- convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
- SDValue Op1 =
- convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
- SDValue Op2 =
- convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
-
- SDLoc DL(Op);
- SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
-
- SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
- Op2, DAG.getUNDEF(ContainerVT), VL);
-
- return convertFromScalableVector(VT, Select, DAG, Subtarget);
-}
-
SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
const auto &TSInfo =
@@ -13056,7 +13024,9 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
// "cast" fixed length vector to a scalable vector.
assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
"Only fixed length vectors are supported!");
- Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
+ MVT VContainerVT = ContainerVT.changeVectorElementType(
+ V.getSimpleValueType().getVectorElementType());
+ Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
}
SDLoc DL(Op);
@@ -21478,11 +21448,10 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
case RISCVISD::SELECT_CC:
- // Integer select_cc cannot create poison.
- // TODO: What are the FP poison semantics?
- // TODO: This instruction blocks poison from the unselected operand, can
- // we do anything with that?
- return !Op.getValueType().isInteger();
+ // Integer comparisons cannot create poison.
+ assert(Op.getOperand(0).getValueType().isInteger() &&
+ "RISCVISD::SELECT_CC only compares integers");
+ return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
@@ -22550,6 +22519,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
constexpr StringLiteral SupportedInterruptKinds[] = {
"machine",
"supervisor",
+ "rnmi",
"qci-nest",
"qci-nonest",
"SiFive-CLIC-preemptible",
@@ -22567,6 +22537,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
reportFatalUsageError(
"'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
+ if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
+ reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
@@ -23212,7 +23184,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Kind == "supervisor")
RetOpc = RISCVISD::SRET_GLUE;
- else if (Kind == "qci-nest" || Kind == "qci-nonest") {
+ else if (Kind == "rnmi") {
+ assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
+ "Need Smrnmi extension for rnmi");
+ RetOpc = RISCVISD::MNRET_GLUE;
+ } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
"Need Xqciint for qci-(no)nest");
RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
@@ -24715,7 +24691,7 @@ SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
+ DAG.getSignedConstant(-Align->value(), dl, VT));
// Set the real SP to the new value with a probing loop.
Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ca70c46..433b8be 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -526,6 +526,7 @@ private:
SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
@@ -534,9 +535,6 @@ private:
SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
- SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
@@ -551,8 +549,6 @@ private:
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
- unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 6536078..8bd3830 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -75,6 +75,8 @@ def riscv_sret_glue : RVSDNode<"SRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
def riscv_mret_glue : RVSDNode<"MRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
+def riscv_mnret_glue : RVSDNode<"MNRET_GLUE", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
def riscv_mileaveret_glue : RVSDNode<"QC_C_MILEAVERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -935,7 +937,6 @@ def MRET : Priv<"mret", 0b0011000>, Sched<[]> {
let rs1 = 0;
let rs2 = 0b00010;
}
-} // isBarrier = 1, isReturn = 1, isTerminator = 1
let Predicates = [HasStdExtSmrnmi] in {
def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> {
@@ -944,6 +945,8 @@ def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> {
let rs2 = 0b00010;
}
}// Predicates = [HasStdExtSmrnmi]
+} // isBarrier = 1, isReturn = 1, isTerminator = 1
+
def WFI : Priv<"wfi", 0b0001000>, Sched<[]> {
let rd = 0;
@@ -1801,6 +1804,8 @@ def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
def : Pat<(riscv_sret_glue), (SRET)>;
def : Pat<(riscv_mret_glue), (MRET)>;
+let Predicates = [HasStdExtSmrnmi] in
+def : Pat<(riscv_mnret_glue), (MNRET)>;
let isCall = 1, Defs = [X1] in {
let Predicates = [NoStdExtZicfilp] in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 5265613..2c64b0c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -14,6 +14,14 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
+
+def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
def uimm5nonzero : RISCVOp<XLenVT>,
ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5, "NonZero">;
@@ -27,6 +35,8 @@ def uimm5nonzero : RISCVOp<XLenVT>,
}];
}
+def tuimm5nonzero : TImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]>;
+
def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
[{return (Imm > 3) && isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5, "GT3">;
@@ -92,6 +102,8 @@ def uimm5slist : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
}];
}
+def tuimm7_lsb00 : TImmLeaf<XLenVT,[{return isShiftedUInt<5, 2>(Imm);}]>;
+
def uimm10 : RISCVUImmLeafOp<10>;
def uimm11 : RISCVUImmLeafOp<11>;
@@ -457,6 +469,13 @@ class QCIRVInstRR<bits<5> funct5, DAGOperand InTyRs1, string opcodestr>
: RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
(ins InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+class QCIRVInstRRTied<bits<5> funct5, DAGOperand InTyRs1, string opcodestr>
+ : RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr,
+ "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
class QCIBitManipRII<bits<3> funct3, bits<2> funct2,
DAGOperand InTyRs1, string opcodestr>
: RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
@@ -470,11 +489,26 @@ class QCIBitManipRII<bits<3> funct3, bits<2> funct2,
let Inst{24-20} = shamt;
}
+class QCIBitManipRIITied<bits<3> funct3, bits<2> funct2,
+ DAGOperand InTyRs1, string opcodestr>
+ : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd,
+ InTyRs1:$rs1, uimm5_plus1:$width, uimm5:$shamt),
+ opcodestr, "$rd, $rs1, $width, $shamt"> {
+ let Constraints = "$rd = $rd_wb";
+ bits<5> shamt;
+ bits<5> width;
+
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = width;
+ let Inst{24-20} = shamt;
+}
+
class QCIRVInstRI<bits<1> funct1, DAGOperand InTyImm11,
string opcodestr>
- : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
- (ins GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr,
+ : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr,
"$rd, $rs1, $imm11"> {
+ let Constraints = "$rd = $rd_wb";
bits<11> imm11;
let Inst{31-31} = funct1;
@@ -858,12 +892,12 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let Inst{29-25} = width;
let Inst{24-20} = shamt;
}
- def QC_INSB : QCIBitManipRII<0b001, 0b01, GPR, "qc.insb">;
- def QC_INSBH : QCIBitManipRII<0b001, 0b10, GPR, "qc.insbh">;
- def QC_INSBR : QCIRVInstRR<0b00000, GPR, "qc.insbr">;
- def QC_INSBHR : QCIRVInstRR<0b00001, GPR, "qc.insbhr">;
- def QC_INSBPR : QCIRVInstRR<0b00010, GPR, "qc.insbpr">;
- def QC_INSBPRH : QCIRVInstRR<0b00011, GPR, "qc.insbprh">;
+ def QC_INSB : QCIBitManipRIITied<0b001, 0b01, GPR, "qc.insb">;
+ def QC_INSBH : QCIBitManipRIITied<0b001, 0b10, GPR, "qc.insbh">;
+ def QC_INSBR : QCIRVInstRRTied<0b00000, GPR, "qc.insbr">;
+ def QC_INSBHR : QCIRVInstRRTied<0b00001, GPR, "qc.insbhr">;
+ def QC_INSBPR : QCIRVInstRRTied<0b00010, GPR, "qc.insbpr">;
+ def QC_INSBPRH : QCIRVInstRRTied<0b00011, GPR, "qc.insbprh">;
def QC_EXTU : QCIBitManipRII<0b010, 0b00, GPRNoX0, "qc.extu">;
def QC_EXTDU : QCIBitManipRII<0b010, 0b10, GPRNoX31, "qc.extdu">;
def QC_EXTDUR : QCIRVInstRR<0b00100, GPRNoX31, "qc.extdur">;
@@ -1566,6 +1600,11 @@ def : QCISELECTIICCPat <SETEQ, QC_SELECTIIEQ>;
def : QCISELECTIICCPat <SETNE, QC_SELECTIINE>;
} // Predicates = [HasVendorXqcics, IsRV32]
+let Predicates = [HasVendorXqcilsm, IsRV32] in {
+def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
+ (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
+} // Predicates = [HasVendorXqcilsm, IsRV32]
+
//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index d2a6514..27ad10a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
let Predicates = [HasStdExtZbkb, IsRV32] in {
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (or
- (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+
+// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
+// bits [15:0] coming from a zero extended value. We can use pack with packh for
+// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
+// separately.
+def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
- (or
- (shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
- (zexti8 (XLenVT GPR:$op0rs1)))),
- (PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)),
+ (zexti16 (XLenVT GPR:$rs1))),
+ (PACK (XLenVT GPR:$rs1),
(XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
}
@@ -661,6 +663,27 @@ def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
(zexti16 (i64 GPR:$rs1)))),
(PACKW GPR:$rs1, GPR:$rs2)>;
+
+// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
+// bits [15:0] coming from a zero extended value, and bits [63:32] being
+// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can
+// also be a packh, it can be matched separately.
+def : Pat<(binop_allwusers<or>
+ (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
+ (zexti16 (XLenVT GPR:$rs1))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+// We need to manually reassociate the patterns because of the binop_allwusers.
+def : Pat<(binop_allwusers<or>
+ (or (zexti16 (XLenVT GPR:$rs1)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
+ (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+def : Pat<(binop_allwusers<or>
+ (or (zexti16 (XLenVT GPR:$rs1)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))),
+ (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 875a93d..39e099b 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -91,3 +91,59 @@ def TuneLDADDFusion
CheckIsImmOperand<2>,
CheckImmOperand<2, 0>
]>>;
+
+defvar Load = [LB, LH, LW, LD, LBU, LHU, LWU];
+
+// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
+// add(.uw) rd, rs1, rs2
+// load rd, imm12(rd)
+def TuneADDLoadFusion
+ : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion",
+ CheckOpcode<[ADD, ADD_UW]>,
+ CheckOpcode<Load>>;
+
+// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+// auipc rd, imm20
+// load rd, imm12(rd)
+def TuneAUIPCLoadFusion
+ : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion",
+ "Enable AUIPC + load macrofusion",
+ CheckOpcode<[AUIPC]>,
+ CheckOpcode<Load>>;
+
+// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+// lui rd, imm[31:12]
+// load rd, imm12(rd)
+def TuneLUILoadFusion
+ : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion",
+ "Enable LUI + load macrofusion",
+ CheckOpcode<[LUI]>,
+ CheckOpcode<Load>>;
+
+// Bitfield extract fusion: similar to TuneShiftedZExtWFusion
+// but without the immediate restriction
+// slli rd, rs1, imm12
+// srli rd, rd, imm12
+def TuneBFExtFusion
+ : SimpleFusion<"bfext-fusion", "HasBFExtFusion",
+ "Enable SLLI+SRLI (bitfield extract) macrofusion",
+ CheckOpcode<[SLLI]>,
+ CheckOpcode<[SRLI]>>;
+
+// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+// addi rd, rs1, imm12
+// load rd, imm12(rd)
+def TuneADDILoadFusion
+ : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion",
+ "Enable ADDI + load macrofusion",
+ CheckOpcode<[ADDI]>,
+ CheckOpcode<Load>>;
+
+// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+// shXadd(.uw) rd, rs1, rs2
+// load rd, imm12(rd)
+def TuneSHXADDLoadFusion
+ : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
+ "Enable SH(1|2|3)ADD(.UW) + load macrofusion",
+ CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
+ CheckOpcode<Load>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 838edf6..31d2b3a 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -590,12 +590,17 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
FeatureStdExtZicboz,
FeatureVendorXVentanaCondOps],
[TuneVentanaVeyron,
+ TuneDisableMISchedLoadClustering,
+ TuneDisablePostMISchedLoadClustering,
+ TuneDisablePostMISchedStoreClustering,
TuneLUIADDIFusion,
TuneAUIPCADDIFusion,
TuneZExtHFusion,
TuneZExtWFusion,
TuneShiftedZExtWFusion,
- TuneLDADDFusion]> {
+ TuneADDLoadFusion,
+ TuneAUIPCLoadFusion,
+ TuneLUILoadFusion]> {
let MVendorID = 0x61f;
let MArchID = 0x8000000000010000;
let MImpID = 0x111;
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 6ecddad..041dd07 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "RISCVSelectionDAGInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#define GET_SDNODE_DESC
#include "RISCVGenSDNodeInfo.inc"
@@ -62,3 +64,94 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
}
#endif
}
+
+SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo) const {
+ const auto &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
+ // We currently do this only for Xqcilsm
+ if (!Subtarget.hasVendorXqcilsm())
+ return SDValue();
+
+ // Do this only if we know the size at compile time.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+
+ uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
+
+ // Do this only if it is word aligned and we write a multiple of 4 bytes.
+ if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0))
+ return SDValue();
+
+ SmallVector<SDValue, 8> OutChains;
+ SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ int NumberOfWords = NumberOfBytesToWrite / 4;
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto Volatile =
+ isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+
+ // Helper for constructing the QC_SETWMI instruction
+ auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue {
+ SDValue Ops[] = {Chain, SrcValueReplicated, Dst,
+ DAG.getTargetConstant(SizeWords, dl, MVT::i32),
+ DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
+ MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+ DstPtrInfo.getWithOffset(OffsetSetwmi),
+ MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4));
+ return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
+ DAG.getVTList(MVT::Other), Ops, MVT::i32,
+ BaseMemOperand);
+ };
+
+ // If i8 type and constant non-zero value.
+ if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src))
+ // Replicate byte to word by multiplication with 0x01010101.
+ SrcValueReplicated =
+ DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
+ DAG.getConstant(0x01010101ul, dl, MVT::i32));
+
+ // We limit a QC_SETWMI to 16 words or less to improve interruptibility.
+ // So for 1-16 words we use a single QC_SETWMI:
+ //
+ // QC_SETWMI reg1, N, 0(reg2)
+ //
+ // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the
+ // second for the remainder:
+ //
+ // QC_SETWMI reg1, 16, 0(reg2)
+ // QC_SETWMI reg1, N, 64(reg2)
+ //
+ // For 33-48 words, we would like to use (16, 16, n), but that means the last
+ // QC_SETWMI needs an offset of 128 which the instruction doesn't support.
+ // So in this case we use a length of 15 for the second instruction and we do
+ // the rest with the third instruction.
+ // This means the maximum inlined number of words is 47 (for now):
+ //
+ // QC_SETWMI R2, R0, 16, 0
+ // QC_SETWMI R2, R0, 15, 64
+ // QC_SETWMI R2, R0, N, 124
+ //
+ // For 48 words or more, call the target independent memset
+ if (NumberOfWords >= 48)
+ return SDValue();
+
+ if (NumberOfWords <= 16) {
+ // 1 - 16 words
+ return getSetwmiNode(NumberOfWords, 0);
+ }
+
+ if (NumberOfWords <= 32) {
+ // 17 - 32 words
+ OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64));
+ OutChains.push_back(getSetwmiNode(16, 0));
+ } else {
+ // 33 - 47 words
+ OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124));
+ OutChains.push_back(getSetwmiNode(15, 64));
+ OutChains.push_back(getSetwmiNode(16, 0));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
index 641189f..08c8d11 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
@@ -34,6 +34,12 @@ public:
void verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const override;
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo) const override;
+
bool hasPassthruOp(unsigned Opcode) const {
return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index da6ac2f..66ce134 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -94,16 +94,6 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
-static cl::opt<bool> EnableMISchedLoadStoreClustering(
- "riscv-misched-load-store-clustering", cl::Hidden,
- cl::desc("Enable load and store clustering in the machine scheduler"),
- cl::init(true));
-
-static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
- "riscv-postmisched-load-store-clustering", cl::Hidden,
- cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
- cl::init(true));
-
static cl::opt<bool> DisableVectorMaskMutation(
"riscv-disable-vector-mask-mutation",
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
@@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
ScheduleDAGInstrs *
RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMILive *DAG = createSchedLive(C);
- if (EnableMISchedLoadStoreClustering) {
+
+ if (ST.enableMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (ST.enableMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
- const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
if (!DisableVectorMaskMutation && ST.hasVInstructions())
DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));
@@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGInstrs *
RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMI *DAG = createSchedPostRA(C);
- if (EnablePostMISchedLoadStoreClustering) {
+
+ if (ST.enablePostMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (ST.enablePostMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
return DAG;
}
@@ -642,12 +637,6 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
OptimizationLevel Level) {
LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated));
});
-
- PB.registerVectorizerEndEPCallback(
- [](FunctionPassManager &FPM, OptimizationLevel Level) {
- if (Level.isOptimizingForSpeed())
- FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass()));
- });
}
yaml::MachineFunctionInfo *
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0d5eb86..67f924a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -979,11 +979,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
- // The interleaved memory access pass will lower interleaved memory ops (i.e
- // a load and store followed by a specific shuffle) to vlseg/vsseg
- // intrinsics.
- if (!UseMaskForCond && !UseMaskForGaps &&
- Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ // The interleaved memory access pass will lower (de)interleave ops combined
+ // with an adjacent appropriate memory to vlseg/vsseg intrinsics. vlseg/vsseg
+ // only support masking per-iteration (i.e. condition), not per-segment (i.e.
+ // gap).
+ if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
auto *VTy = cast<VectorType>(VecTy);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy);
// Need to make sure type has't been scalarized
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index d62d99c..05d504c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,6 +398,10 @@ public:
bool enableInterleavedAccessVectorization() const override { return true; }
+ bool enableMaskedInterleavedAccessVectorization() const override {
+ return ST->hasVInstructions();
+ }
+
unsigned getMinTripCountTailFoldingThreshold() const override;
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index c946451..37a71e8 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -69,6 +69,7 @@ struct OperandInfo {
// Represent as 1,2,4,8, ... and fractional indicator. This is because
// EMUL can take on values that don't map to RISCVVType::VLMUL values exactly.
// For example, a mask operand can have an EMUL less than MF8.
+ // If nullopt, then EMUL isn't used (i.e. only a single scalar is read).
std::optional<std::pair<unsigned, bool>> EMUL;
unsigned Log2EEW;
@@ -83,12 +84,14 @@ struct OperandInfo {
OperandInfo() = delete;
- static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
- return A.Log2EEW == B.Log2EEW && A.EMUL == B.EMUL;
- }
-
- static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
- return A.Log2EEW == B.Log2EEW;
+ /// Return true if the EMUL and EEW produced by \p Def is compatible with the
+ /// EMUL and EEW used by \p User.
+ static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) {
+ if (Def.Log2EEW != User.Log2EEW)
+ return false;
+ if (User.EMUL && Def.EMUL != User.EMUL)
+ return false;
+ return true;
}
void print(raw_ostream &OS) const {
@@ -98,7 +101,7 @@ struct OperandInfo {
OS << "f";
OS << EMUL->first;
} else
- OS << "EMUL: unknown\n";
+ OS << "EMUL: none\n";
OS << ", EEW: " << (1 << Log2EEW);
}
};
@@ -1399,13 +1402,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
return std::nullopt;
}
- // If the operand is used as a scalar operand, then the EEW must be
- // compatible. Otherwise, the EMUL *and* EEW must be compatible.
- bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);
- if ((IsVectorOpUsedAsScalarOp &&
- !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) ||
- (!IsVectorOpUsedAsScalarOp &&
- !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) {
+ if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
LLVM_DEBUG(
dbgs()
<< " Abort due to incompatible information for EMUL or EEW.\n");