aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/LoongArch
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/LoongArch')
-rw-r--r--llvm/lib/Target/LoongArch/CMakeLists.txt2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.td2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp3
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp364
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h179
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp214
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h19
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.td37
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td76
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td54
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp19
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h28
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp11
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.h13
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp9
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp23
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp2
23 files changed, 690 insertions, 379 deletions
diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt
index 0f674b1..8689d09 100644
--- a/llvm/lib/Target/LoongArch/CMakeLists.txt
+++ b/llvm/lib/Target/LoongArch/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM LoongArchGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(LoongArchCommonTableGen)
@@ -27,6 +28,7 @@ add_llvm_target(LoongArchCodeGen
LoongArchMergeBaseOffset.cpp
LoongArchOptWInstrs.cpp
LoongArchRegisterInfo.cpp
+ LoongArchSelectionDAGInfo.cpp
LoongArchSubtarget.cpp
LoongArchTargetMachine.cpp
LoongArchTargetTransformInfo.cpp
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 6497ff9..67f07f0 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -202,6 +202,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit,
// Define the LoongArch target.
//===----------------------------------------------------------------------===//
+defm : RemapAllTargetPseudoPointerOperands<GPR>;
+
def LoongArchInstrInfo : InstrInfo {
let guessInstructionProperties = 0;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
index 0ccebeb3..6358e348 100644
--- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
@@ -60,7 +60,6 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction(
return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n");
@@ -86,7 +85,7 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction(
continue;
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
MI.print(dbgs()));
- const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I);
if (!(RC && RC->contains(LoongArch::R0))) {
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index e86b21c..32954b6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -30,13 +30,18 @@ def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>;
+
+// FPR<->GPR transfer operations
def loongarch_movgr2fr_w
: SDNode<"LoongArchISD::MOVGR2FR_W", SDT_LoongArchMOVGR2FR_W>;
def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
+
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
+
+// Floating point approximate reciprocal operation
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 2e88254..e6cad1b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -20,6 +20,7 @@ def SDT_LoongArchMOVGR2FR_D_LO_HI
: SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
+// FPR<->GPR transfer operations
def loongarch_movgr2fr_d
: SDNode<"LoongArchISD::MOVGR2FR_D", SDT_LoongArchMOVGR2FR_D>;
def loongarch_movgr2fr_d_lo_hi
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 1493bf4..690b063 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -449,7 +449,7 @@ bool LoongArchFrameLowering::spillCalleeSavedRegisters(
bool IsKill =
!(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken());
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI,
+ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC,
Register());
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 1eed877..4c8dcb8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
#include "LoongArch.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "LoongArchTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a6de839..32ea219 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -15,6 +15,7 @@
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchRegisterInfo.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "LoongArchSubtarget.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
@@ -76,7 +77,7 @@ static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
const LoongArchSubtarget &STI)
- : TargetLowering(TM), Subtarget(STI) {
+ : TargetLowering(TM, STI), Subtarget(STI) {
MVT GRLenVT = Subtarget.getGRLenVT();
@@ -351,6 +352,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -371,6 +374,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
@@ -433,6 +442,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -453,6 +464,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
}
}
@@ -588,6 +605,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerBF16_TO_FP(Op, DAG);
case ISD::VECREDUCE_ADD:
return lowerVECREDUCE_ADD(Op, DAG);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ return lowerRotate(Op, DAG);
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
@@ -602,6 +622,59 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return SDValue();
}
+// Helper to attempt to return a cheaper, bit-inverted version of \p V.
+static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
+ // TODO: don't always ignore oneuse constraints.
+ V = peekThroughBitcasts(V);
+ EVT VT = V.getValueType();
+
+ // Match not(xor X, -1) -> X.
+ if (V.getOpcode() == ISD::XOR &&
+ (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
+ isAllOnesConstant(V.getOperand(1))))
+ return V.getOperand(0);
+
+ // Match not(extract_subvector(not(X)) -> extract_subvector(X).
+ if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+ if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
+ V.getOperand(1));
+ }
+ }
+
+ // Match not(SplatVector(not(X)) -> SplatVector(X).
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
+ if (!V->isOnlyUserOf(SplatValue.getNode()))
+ return SDValue();
+
+ if (SDValue Not = isNOT(SplatValue, DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getSplat(VT, SDLoc(Not), Not);
+ }
+ }
+ }
+
+ // Match not(or(not(X),not(Y))) -> and(X, Y).
+ if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
+ // TODO: Handle cases with single NOT operand -> VANDN
+ if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
+ if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
+ return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
+ DAG.getBitcast(VT, Op1));
+ }
+
+ // TODO: Add more matching patterns. Such as,
+ // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
+ // not(slt(C, X)) -> slt(X - 1, C)
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -761,6 +834,58 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
return Op;
}
+SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isVector() && "Unexpected type");
+
+ SDLoc DL(Op);
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ unsigned Opcode = Op.getOpcode();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
+ CstSplatValue = C->getAPIntValue();
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Check for constant splat rotation amount.
+ APInt CstSplatValue;
+ bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
+ bool isROTL = Opcode == ISD::ROTL;
+
+ // Check for splat rotate by zero.
+ if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
+ return R;
+
+ // LoongArch targets always prefer ISD::ROTR.
+ if (isROTL) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::ROTR, DL, VT, R,
+ DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
+ }
+
+ // Rotate by a immediate.
+ if (IsCstSplat) {
+ // ISD::ROTR: Attemp to rotate by a positive immediate.
+ SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
+ if (SDValue Urem =
+ DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
+ return DAG.getNode(Opcode, DL, VT, R, Urem);
+ }
+
+ return Op;
+}
+
// Return true if Val is equal to (setcc LHS, RHS, CC).
// Return false if Val is the inverse of (setcc LHS, RHS, CC).
// Otherwise, return std::nullopt.
@@ -1700,7 +1825,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
// Return vshuf4i.d
if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
+ return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
DAG.getConstant(Imm, DL, GRLenVT));
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
@@ -2873,11 +2998,13 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
- SDVTList Tys =
- LN->isIndexed()
- ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
- : DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
+ // Indexed loads and stores are not supported on LoongArch.
+ assert(LN->isUnindexed() && "Unexpected indexed load.");
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ // The offset operand of unindexed load is always undefined, so there is
+ // no need to pass it to VLDREPL.
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
return BCast;
@@ -4447,7 +4574,7 @@ SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
-static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
+static unsigned getLoongArchWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
@@ -4483,7 +4610,7 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
- LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
+ unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
SDValue NewOp0, NewRes;
switch (NumOp) {
@@ -5042,6 +5169,33 @@ void LoongArchTargetLowering::ReplaceNodeResults(
}
}
+/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
+static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
+
+ MVT VT = N->getSimpleValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+
+ SDValue X, Y;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (SDValue Not = isNOT(N0, DAG)) {
+ X = Not;
+ Y = N1;
+ } else if (SDValue Not = isNOT(N1, DAG)) {
+ X = Not;
+ Y = N0;
+ } else
+ return SDValue();
+
+ X = DAG.getBitcast(VT, X);
+ Y = DAG.getBitcast(VT, Y);
+ return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
@@ -5059,6 +5213,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
SDValue NewOperand;
MVT GRLenVT = Subtarget.getGRLenVT();
+ if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
+ return R;
+
// BSTRPICK requires the 32S feature.
if (!Subtarget.has32S())
return SDValue();
@@ -6618,6 +6775,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
N->getOperand(1));
break;
+ case Intrinsic::loongarch_lasx_concat_128_s:
+ case Intrinsic::loongarch_lasx_concat_128_d:
+ case Intrinsic::loongarch_lasx_concat_128:
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
}
return SDValue();
}
@@ -6731,6 +6893,69 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
+static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+
+ // VANDN(undef, x) -> 0
+ // VANDN(x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // VANDN(0, x) -> x
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+
+ // VANDN(x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+
+ // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return DAG.getNOT(DL, N0, VT);
+
+ // Turn VANDN back to AND if input is inverted.
+ if (SDValue Not = isNOT(N0, DAG))
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
+
+ // Folds for better commutativity:
+ if (N1->hasOneUse()) {
+ // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
+ if (SDValue Not = isNOT(N1, DAG))
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
+
+ // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
+ // -> NOT(OR(x, SplatVector(-Imm))
+ // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
+ // gain benefits.
+ if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
+ N1.getOpcode() == ISD::BUILD_VECTOR) {
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
+ if (!N1->isOnlyUserOf(SplatValue.getNode()))
+ return SDValue();
+
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
+ uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
+ SDValue Not =
+ DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
+ VT);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6766,6 +6991,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::VANDN:
+ return performVANDNCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -7466,123 +7693,6 @@ bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
-const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((LoongArchISD::NodeType)Opcode) {
- case LoongArchISD::FIRST_NUMBER:
- break;
-
-#define NODE_NAME_CASE(node) \
- case LoongArchISD::node: \
- return "LoongArchISD::" #node;
-
- // TODO: Add more target-dependent nodes later.
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_MEDIUM)
- NODE_NAME_CASE(CALL_LARGE)
- NODE_NAME_CASE(RET)
- NODE_NAME_CASE(TAIL)
- NODE_NAME_CASE(TAIL_MEDIUM)
- NODE_NAME_CASE(TAIL_LARGE)
- NODE_NAME_CASE(SELECT_CC)
- NODE_NAME_CASE(BR_CC)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(SLL_W)
- NODE_NAME_CASE(SRA_W)
- NODE_NAME_CASE(SRL_W)
- NODE_NAME_CASE(BSTRINS)
- NODE_NAME_CASE(BSTRPICK)
- NODE_NAME_CASE(MOVGR2FR_W)
- NODE_NAME_CASE(MOVGR2FR_W_LA64)
- NODE_NAME_CASE(MOVGR2FR_D)
- NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
- NODE_NAME_CASE(MOVFR2GR_S_LA64)
- NODE_NAME_CASE(FTINT)
- NODE_NAME_CASE(BUILD_PAIR_F64)
- NODE_NAME_CASE(SPLIT_PAIR_F64)
- NODE_NAME_CASE(REVB_2H)
- NODE_NAME_CASE(REVB_2W)
- NODE_NAME_CASE(BITREV_4B)
- NODE_NAME_CASE(BITREV_8B)
- NODE_NAME_CASE(BITREV_W)
- NODE_NAME_CASE(ROTR_W)
- NODE_NAME_CASE(ROTL_W)
- NODE_NAME_CASE(DIV_W)
- NODE_NAME_CASE(DIV_WU)
- NODE_NAME_CASE(MOD_W)
- NODE_NAME_CASE(MOD_WU)
- NODE_NAME_CASE(CLZ_W)
- NODE_NAME_CASE(CTZ_W)
- NODE_NAME_CASE(DBAR)
- NODE_NAME_CASE(IBAR)
- NODE_NAME_CASE(BREAK)
- NODE_NAME_CASE(SYSCALL)
- NODE_NAME_CASE(CRC_W_B_W)
- NODE_NAME_CASE(CRC_W_H_W)
- NODE_NAME_CASE(CRC_W_W_W)
- NODE_NAME_CASE(CRC_W_D_W)
- NODE_NAME_CASE(CRCC_W_B_W)
- NODE_NAME_CASE(CRCC_W_H_W)
- NODE_NAME_CASE(CRCC_W_W_W)
- NODE_NAME_CASE(CRCC_W_D_W)
- NODE_NAME_CASE(CSRRD)
- NODE_NAME_CASE(CSRWR)
- NODE_NAME_CASE(CSRXCHG)
- NODE_NAME_CASE(IOCSRRD_B)
- NODE_NAME_CASE(IOCSRRD_H)
- NODE_NAME_CASE(IOCSRRD_W)
- NODE_NAME_CASE(IOCSRRD_D)
- NODE_NAME_CASE(IOCSRWR_B)
- NODE_NAME_CASE(IOCSRWR_H)
- NODE_NAME_CASE(IOCSRWR_W)
- NODE_NAME_CASE(IOCSRWR_D)
- NODE_NAME_CASE(CPUCFG)
- NODE_NAME_CASE(MOVGR2FCSR)
- NODE_NAME_CASE(MOVFCSR2GR)
- NODE_NAME_CASE(CACOP_D)
- NODE_NAME_CASE(CACOP_W)
- NODE_NAME_CASE(VSHUF)
- NODE_NAME_CASE(VPICKEV)
- NODE_NAME_CASE(VPICKOD)
- NODE_NAME_CASE(VPACKEV)
- NODE_NAME_CASE(VPACKOD)
- NODE_NAME_CASE(VILVL)
- NODE_NAME_CASE(VILVH)
- NODE_NAME_CASE(VSHUF4I)
- NODE_NAME_CASE(VREPLVEI)
- NODE_NAME_CASE(VREPLGR2VR)
- NODE_NAME_CASE(XVPERMI)
- NODE_NAME_CASE(XVPERM)
- NODE_NAME_CASE(XVREPLVE0)
- NODE_NAME_CASE(XVREPLVE0Q)
- NODE_NAME_CASE(XVINSVE0)
- NODE_NAME_CASE(VPICK_SEXT_ELT)
- NODE_NAME_CASE(VPICK_ZEXT_ELT)
- NODE_NAME_CASE(VREPLVE)
- NODE_NAME_CASE(VALL_ZERO)
- NODE_NAME_CASE(VANY_ZERO)
- NODE_NAME_CASE(VALL_NONZERO)
- NODE_NAME_CASE(VANY_NONZERO)
- NODE_NAME_CASE(FRECIPE)
- NODE_NAME_CASE(FRSQRTE)
- NODE_NAME_CASE(VSLLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VBSLL)
- NODE_NAME_CASE(VBSRL)
- NODE_NAME_CASE(VLDREPL)
- NODE_NAME_CASE(VMSKLTZ)
- NODE_NAME_CASE(VMSKGEZ)
- NODE_NAME_CASE(VMSKEQZ)
- NODE_NAME_CASE(VMSKNEZ)
- NODE_NAME_CASE(XVMSKLTZ)
- NODE_NAME_CASE(XVMSKGEZ)
- NODE_NAME_CASE(XVMSKEQZ)
- NODE_NAME_CASE(XVMSKNEZ)
- NODE_NAME_CASE(VHADDW)
- }
-#undef NODE_NAME_CASE
- return nullptr;
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -8802,7 +8912,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
}
bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d774..5277e7e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -21,179 +21,6 @@
namespace llvm {
class LoongArchSubtarget;
-namespace LoongArchISD {
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // TODO: add more LoongArchISDs
- CALL,
- CALL_MEDIUM,
- CALL_LARGE,
- RET,
- TAIL,
- TAIL_MEDIUM,
- TAIL_LARGE,
-
- // Select
- SELECT_CC,
-
- // Branch
- BR_CC,
- BRCOND,
-
- // 32-bit shifts, directly matching the semantics of the named LoongArch
- // instructions.
- SLL_W,
- SRA_W,
- SRL_W,
-
- ROTL_W,
- ROTR_W,
-
- // unsigned 32-bit integer division
- DIV_W,
- MOD_W,
- DIV_WU,
- MOD_WU,
-
- // FPR<->GPR transfer operations
- MOVGR2FR_W,
- MOVGR2FR_W_LA64,
- MOVGR2FR_D,
- MOVGR2FR_D_LO_HI,
- MOVFR2GR_S_LA64,
- MOVFCSR2GR,
- MOVGR2FCSR,
-
- FTINT,
-
- // Build and split F64 pair
- BUILD_PAIR_F64,
- SPLIT_PAIR_F64,
-
- // Bit counting operations
- CLZ_W,
- CTZ_W,
-
- BSTRINS,
- BSTRPICK,
-
- // Byte-swapping and bit-reversal
- REVB_2H,
- REVB_2W,
- BITREV_4B,
- BITREV_8B,
- BITREV_W,
-
- // Intrinsic operations start ============================================
- BREAK,
- CACOP_D,
- CACOP_W,
- DBAR,
- IBAR,
- SYSCALL,
-
- // CRC check operations
- CRC_W_B_W,
- CRC_W_H_W,
- CRC_W_W_W,
- CRC_W_D_W,
- CRCC_W_B_W,
- CRCC_W_H_W,
- CRCC_W_W_W,
- CRCC_W_D_W,
-
- CSRRD,
-
- // Write new value to CSR and return old value.
- // Operand 0: A chain pointer.
- // Operand 1: The new value to write.
- // Operand 2: The address of the required CSR.
- // Result 0: The old value of the CSR.
- // Result 1: The new chain pointer.
- CSRWR,
-
- // Similar to CSRWR but with a write mask.
- // Operand 0: A chain pointer.
- // Operand 1: The new value to write.
- // Operand 2: The write mask.
- // Operand 3: The address of the required CSR.
- // Result 0: The old value of the CSR.
- // Result 1: The new chain pointer.
- CSRXCHG,
-
- // IOCSR access operations
- IOCSRRD_B,
- IOCSRRD_W,
- IOCSRRD_H,
- IOCSRRD_D,
- IOCSRWR_B,
- IOCSRWR_H,
- IOCSRWR_W,
- IOCSRWR_D,
-
- // Read CPU configuration information operation
- CPUCFG,
-
- // Vector Shuffle
- VREPLVE,
- VSHUF,
- VPICKEV,
- VPICKOD,
- VPACKEV,
- VPACKOD,
- VILVL,
- VILVH,
- VSHUF4I,
- VREPLVEI,
- VREPLGR2VR,
- XVPERMI,
- XVPERM,
- XVREPLVE0,
- XVREPLVE0Q,
- XVINSVE0,
-
- // Extended vector element extraction
- VPICK_SEXT_ELT,
- VPICK_ZEXT_ELT,
-
- // Vector comparisons
- VALL_ZERO,
- VANY_ZERO,
- VALL_NONZERO,
- VANY_NONZERO,
-
- // Floating point approximate reciprocal operation
- FRECIPE,
- FRSQRTE,
-
- // Vector logicial left / right shift by immediate
- VSLLI,
- VSRLI,
-
- // Vector byte logicial left / right shift
- VBSLL,
- VBSRL,
-
- // Scalar load broadcast to vector
- VLDREPL,
-
- // Vector mask set by condition
- VMSKLTZ,
- VMSKGEZ,
- VMSKEQZ,
- VMSKNEZ,
- XVMSKLTZ,
- XVMSKGEZ,
- XVMSKEQZ,
- XVMSKNEZ,
-
- // Vector Horizontal Addition with Widening‌
- VHADDW
-
- // Intrinsic operations end =============================================
-};
-} // end namespace LoongArchISD
class LoongArchTargetLowering : public TargetLowering {
const LoongArchSubtarget &Subtarget;
@@ -213,9 +40,6 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- // This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
// Lower incoming arguments, copy physregs into vregs.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -254,7 +78,7 @@ public:
Value *NewVal, Value *Mask,
AtomicOrdering Ord) const override;
- bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
@@ -415,6 +239,7 @@ private:
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index c89212d..9fc862a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -26,9 +26,9 @@ using namespace llvm;
#include "LoongArchGenInstrInfo.inc"
LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI)
- : LoongArchGenInstrInfo(STI, LoongArch::ADJCALLSTACKDOWN,
+ : LoongArchGenInstrInfo(STI, RegInfo, LoongArch::ADJCALLSTACKDOWN,
LoongArch::ADJCALLSTACKUP),
- STI(STI) {}
+ RegInfo(STI.getHwMode()), STI(STI) {}
MCInst LoongArchInstrInfo::getNop() const {
return MCInstBuilder(LoongArch::ANDI)
@@ -113,14 +113,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void LoongArchInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
bool IsKill, int FI, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
- MachineInstr::MIFlag Flags) const {
+
+ Register VReg, MachineInstr::MIFlag Flags) const {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
if (LoongArch::GPRRegClass.hasSubClassEq(RC))
- Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32
+ Opcode = TRI.getRegSizeInBits(LoongArch::GPRRegClass) == 32
? LoongArch::ST_W
: LoongArch::ST_D;
else if (LoongArch::FPR32RegClass.hasSubClassEq(RC))
@@ -149,8 +149,8 @@ void LoongArchInstrInfo::storeRegToStackSlot(
void LoongArchInstrInfo::loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+ int FI, const TargetRegisterClass *RC, Register VReg,
+ MachineInstr::MIFlag Flags) const {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
DebugLoc DL;
@@ -159,7 +159,7 @@ void LoongArchInstrInfo::loadRegFromStackSlot(
unsigned Opcode;
if (LoongArch::GPRRegClass.hasSubClassEq(RC))
- Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32
+ Opcode = RegInfo.getRegSizeInBits(LoongArch::GPRRegClass) == 32
? LoongArch::LD_W
: LoongArch::LD_D;
else if (LoongArch::FPR32RegClass.hasSubClassEq(RC))
@@ -378,12 +378,9 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}
-bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const {
- if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
- return true;
-
+bool LoongArchInstrInfo::isSafeToMove(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
auto MII = MI.getIterator();
auto MIE = MBB->end();
@@ -429,25 +426,25 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO2 = Lu32I->getOperand(2).getTargetFlags();
if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
MO2 == LoongArchII::MO_PCREL64_LO)
- return true;
+ return false;
if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
MO2 == LoongArchII::MO_IE_PC64_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_DESC_PC_HI &&
MO1 == LoongArchII::MO_DESC_PC_LO &&
MO2 == LoongArchII::MO_DESC64_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::LU52I_D: {
auto MO = MI.getOperand(2).getTargetFlags();
if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI)
- return true;
+ return false;
break;
}
default:
@@ -487,7 +484,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2));
if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD)
- return true;
+ return false;
break;
}
if (SecondOp == MIE ||
@@ -496,34 +493,34 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp &&
MO1 == LoongArchII::MO_PCREL_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp &&
MO1 == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
if ((MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::ADDI_W:
case LoongArch::ADDI_D: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::LD_W:
case LoongArch::LD_D: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::PseudoDESC_CALL: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_DESC_CALL)
- return true;
+ return false;
break;
}
default:
@@ -531,6 +528,18 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
}
}
+ return true;
+}
+
+bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+ return true;
+
+ if (!isSafeToMove(MI, MBB, MF))
+ return true;
+
return false;
}
@@ -656,13 +665,13 @@ void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
if (FrameIndex == -1)
report_fatal_error("The function size is incorrectly estimated.");
storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex,
- &LoongArch::GPRRegClass, TRI, Register());
+ &LoongArch::GPRRegClass, Register());
TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()),
/*SpAdj=*/0, /*FIOperandNum=*/1);
PCALAU12I.getOperand(1).setMBB(&RestoreBB);
ADDI.getOperand(2).setMBB(&RestoreBB);
loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex,
- &LoongArch::GPRRegClass, TRI, Register());
+ &LoongArch::GPRRegClass, Register());
TRI->eliminateFrameIndex(RestoreBB.back(),
/*SpAdj=*/0, /*FIOperandNum=*/1);
}
@@ -756,6 +765,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
+ Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ enum MemIOffsetType {
+ Imm14Shift2,
+ Imm12,
+ Imm11Shift1,
+ Imm10Shift2,
+ Imm9Shift3,
+ Imm8,
+ Imm8Shift1,
+ Imm8Shift2,
+ Imm8Shift3
+ };
+
+ MemIOffsetType OT;
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::LDPTR_W:
+ case LoongArch::LDPTR_D:
+ case LoongArch::STPTR_W:
+ case LoongArch::STPTR_D:
+ OT = Imm14Shift2;
+ break;
+ case LoongArch::LD_B:
+ case LoongArch::LD_H:
+ case LoongArch::LD_W:
+ case LoongArch::LD_D:
+ case LoongArch::LD_BU:
+ case LoongArch::LD_HU:
+ case LoongArch::LD_WU:
+ case LoongArch::ST_B:
+ case LoongArch::ST_H:
+ case LoongArch::ST_W:
+ case LoongArch::ST_D:
+ case LoongArch::FLD_S:
+ case LoongArch::FLD_D:
+ case LoongArch::FST_S:
+ case LoongArch::FST_D:
+ case LoongArch::VLD:
+ case LoongArch::VST:
+ case LoongArch::XVLD:
+ case LoongArch::XVST:
+ case LoongArch::VLDREPL_B:
+ case LoongArch::XVLDREPL_B:
+ OT = Imm12;
+ break;
+ case LoongArch::VLDREPL_H:
+ case LoongArch::XVLDREPL_H:
+ OT = Imm11Shift1;
+ break;
+ case LoongArch::VLDREPL_W:
+ case LoongArch::XVLDREPL_W:
+ OT = Imm10Shift2;
+ break;
+ case LoongArch::VLDREPL_D:
+ case LoongArch::XVLDREPL_D:
+ OT = Imm9Shift3;
+ break;
+ case LoongArch::VSTELM_B:
+ case LoongArch::XVSTELM_B:
+ OT = Imm8;
+ break;
+ case LoongArch::VSTELM_H:
+ case LoongArch::XVSTELM_H:
+ OT = Imm8Shift1;
+ break;
+ case LoongArch::VSTELM_W:
+ case LoongArch::XVSTELM_W:
+ OT = Imm8Shift2;
+ break;
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_D:
+ OT = Imm8Shift3;
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if ((AddrI.getOpcode() != LoongArch::ADDI_W &&
+ AddrI.getOpcode() != LoongArch::ADDI_D) ||
+ !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) &&
+ !(OT == Imm12 && isInt<12>(NewOffset)) &&
+ !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) &&
+ !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) &&
+ !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) &&
+ !(OT == Imm8 && isInt<8>(NewOffset)) &&
+ !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) &&
+ !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) &&
+ !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset)))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *
+LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ unsigned MemIOp = MemI.getOpcode();
+ switch (MemIOp) {
+ default:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ case LoongArch::VSTELM_B:
+ case LoongArch::VSTELM_H:
+ case LoongArch::VSTELM_W:
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_B:
+ case LoongArch::XVSTELM_H:
+ case LoongArch::XVSTELM_W:
+ case LoongArch::XVSTELM_D:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(), 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .addImm(MemI.getOperand(3).getImm())
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ }
+}
+
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f25958a..9f7a0a2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -24,9 +24,13 @@ namespace llvm {
class LoongArchSubtarget;
class LoongArchInstrInfo : public LoongArchGenInstrInfo {
+ const LoongArchRegisterInfo RegInfo;
+
public:
explicit LoongArchInstrInfo(const LoongArchSubtarget &STI);
+ const LoongArchRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
MCInst getNop() const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
@@ -36,13 +40,11 @@ public:
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
- bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg,
- int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
// Materializes the given integer Val into DstReg.
@@ -64,6 +66,9 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
+ bool isSafeToMove(const MachineInstr &MI, const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;
@@ -93,6 +98,12 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
protected:
const LoongArchSubtarget &STI;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 9565a55..2e6653e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -100,14 +100,22 @@ def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall,
def loongarch_selectcc : SDNode<"LoongArchISD::SELECT_CC", SDT_LoongArchSelectCC>;
def loongarch_brcc : SDNode<"LoongArchISD::BR_CC", SDT_LoongArchBrCC,
[SDNPHasChain]>;
+
+// 32-bit shifts, directly matching the semantics of the named LoongArch
+// instructions.
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+
def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+
+// unsigned 32-bit integer division
def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
+
+// CRC check operations
def loongarch_crc_w_b_w
: SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_crc_w_h_w
@@ -124,37 +132,63 @@ def loongarch_crcc_w_w_w : SDNode<"LoongArchISD::CRCC_W_W_W",
SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_crcc_w_d_w : SDNode<"LoongArchISD::CRCC_W_D_W",
SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+
def loongarch_bstrins
: SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
: SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
+
+// Byte-swapping and bit-reversal
def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;
def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>;
def loongarch_bitrev_8b : SDNode<"LoongArchISD::BITREV_8B", SDTUnaryOp>;
def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>;
+
+// Bit counting operations
def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>;
def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>;
+
def loongarch_dbar : SDNode<"LoongArchISD::DBAR", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
+
+// FPR<->GPR transfer operations
def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR",
SDT_LoongArchMovfcsr2gr, [SDNPHasChain]>;
def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR",
SDT_LoongArchMovgr2fcsr,
[SDNPHasChain, SDNPSideEffect]>;
+
def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_csrrd : SDNode<"LoongArchISD::CSRRD", SDT_LoongArchCsrrd,
[SDNPHasChain, SDNPSideEffect]>;
+
+// Write new value to CSR and return old value.
+// Operand 0: A chain pointer.
+// Operand 1: The new value to write.
+// Operand 2: The address of the required CSR.
+// Result 0: The old value of the CSR.
+// Result 1: The new chain pointer.
def loongarch_csrwr : SDNode<"LoongArchISD::CSRWR", SDT_LoongArchCsrwr,
[SDNPHasChain, SDNPSideEffect]>;
+
+// Similar to CSRWR but with a write mask.
+// Operand 0: A chain pointer.
+// Operand 1: The new value to write.
+// Operand 2: The write mask.
+// Operand 3: The address of the required CSR.
+// Result 0: The old value of the CSR.
+// Result 1: The new chain pointer.
def loongarch_csrxchg : SDNode<"LoongArchISD::CSRXCHG",
SDT_LoongArchCsrxchg,
[SDNPHasChain, SDNPSideEffect]>;
+
+// IOCSR access operations
def loongarch_iocsrrd_b : SDNode<"LoongArchISD::IOCSRRD_B", SDTUnaryOp,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_iocsrrd_h : SDNode<"LoongArchISD::IOCSRRD_H", SDTUnaryOp,
@@ -175,9 +209,12 @@ def loongarch_iocsrwr_w : SDNode<"LoongArchISD::IOCSRWR_W",
def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D",
SDT_LoongArchIocsrwr,
[SDNPHasChain, SDNPSideEffect]>;
+
+// Read CPU configuration information operation
def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp,
[SDNPHasChain]>;
+// Build and split F64 pair
def loongarch_build_pair_f64 : SDNode<"LoongArchISD::BUILD_PAIR_F64",
SDT_LoongArchBuildPairF64>;
def loongarch_split_pair_f64 : SDNode<"LoongArchISD::SPLIT_PAIR_F64",
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ca4ee5f..d6af093 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -16,11 +16,15 @@ def SDT_LoongArchXVREPLVE0 : SDTypeProfile<1, 1, [SDTCisVec<0>,
SDTCisSameAs<0, 1>]>;
// Target nodes.
+
+// Vector Shuffle
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>;
+
+// Vector mask set by condition
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1396,7 +1400,7 @@ def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))),
(XVNOR_V LASX256:$xj, LASX256:$xk)>;
// XVANDN_V
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
-def : Pat<(and (vt (vnot LASX256:$xj)), (vt LASX256:$xk)),
+def : Pat<(loongarch_vandn (vt LASX256:$xj), (vt LASX256:$xk)),
(XVANDN_V LASX256:$xj, LASX256:$xk)>;
// XVORN_V
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
@@ -1443,6 +1447,11 @@ defm : PatXrXr<sra, "XVSRA">;
defm : PatShiftXrXr<sra, "XVSRA">;
defm : PatShiftXrSplatUimm<sra, "XVSRAI">;
+// XVROTR[I]_{B/H/W/D}
+defm : PatXrXr<rotr, "XVROTR">;
+defm : PatShiftXrXr<rotr, "XVROTR">;
+defm : PatShiftXrSplatUimm<rotr, "XVROTRI">;
+
// XVCLZ_{B/H/W/D}
defm : PatXr<ctlz, "XVCLZ">;
@@ -1450,25 +1459,25 @@ defm : PatXr<ctlz, "XVCLZ">;
defm : PatXr<ctpop, "XVPCNT">;
// XVBITCLR_{B/H/W/D}
-def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))),
+def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1, v32i8:$xk)), v32i8:$xj),
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
-def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))),
+def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1, v16i16:$xk)), v16i16:$xj),
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
-def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))),
+def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1, v8i32:$xk)), v8i32:$xj),
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
-def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))),
+def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1, v4i64:$xk)), v4i64:$xj),
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
-def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati8imm7 v32i8:$xk)))),
+def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v32i8:$xk))), v32i8:$xj),
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
-def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati16imm15 v16i16:$xk)))),
+def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v16i16:$xk))), v16i16:$xj),
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
-def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati32imm31 v8i32:$xk)))),
+def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v8i32:$xk))), v8i32:$xj),
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
-def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1,
- (vsplati64imm63 v4i64:$xk)))),
+def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v4i64:$xk))), v4i64:$xj),
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
// XVBITCLRI_{B/H/W/D}
@@ -1558,6 +1567,10 @@ defm : PatXrXrF<fmul, "XVFMUL">;
// XVFDIV_{S/D}
defm : PatXrXrF<fdiv, "XVFDIV">;
+// XVFMAX_{S/D}, XVFMIN_{S/D}
+defm : PatXrXrF<fmaxnum, "XVFMAX">;
+defm : PatXrXrF<fminnum, "XVFMIN">;
+
// XVFMADD_{S/D}
def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
(XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
@@ -2109,6 +2122,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
+// LASX and LSX conversion
+def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
@@ -2424,6 +2468,12 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+// Vector floating-point conversion
+defm : PatXrF<fceil, "XVFRINTRP">;
+defm : PatXrF<ffloor, "XVFRINTRM">;
+defm : PatXrF<ftrunc, "XVFRINTRZ">;
+defm : PatXrF<froundeven, "XVFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 92402ba..43ad381 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -34,7 +34,11 @@ def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
// Target nodes.
+
+// Vector Shuffle
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
+
+// Vector comparisons
def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
SDT_LoongArchVecCond>;
def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO",
@@ -44,11 +48,13 @@ def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO",
def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO",
SDT_LoongArchVecCond>;
+// Extended vector element extraction
def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
+// Vector Shuffle
def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>;
def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>;
def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>;
@@ -56,27 +62,33 @@ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
+def loongarch_vandn: SDNode<"LoongArchISD::VANDN", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
-def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>;
+def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I_D", SDT_LoongArchV2RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
+// Vector logicial left / right shift by immediate
def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
+// Vector byte logicial left / right shift
def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>;
def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>;
+// Vector Horizontal Addition with Widening
def loongarch_vhaddw : SDNode<"LoongArchISD::VHADDW", SDT_LoongArchV2R>;
+// Scalar load broadcast to vector
def loongarch_vldrepl
: SDNode<"LoongArchISD::VLDREPL",
SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+// Vector mask set by condition
def loongarch_vmskltz: SDNode<"LoongArchISD::VMSKLTZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1598,7 +1610,7 @@ def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))),
(VNOR_V LSX128:$vj, LSX128:$vk)>;
// VANDN_V
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
-def : Pat<(and (vt (vnot LSX128:$vj)), (vt LSX128:$vk)),
+def : Pat<(loongarch_vandn (vt LSX128:$vj), (vt LSX128:$vk)),
(VANDN_V LSX128:$vj, LSX128:$vk)>;
// VORN_V
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
@@ -1645,6 +1657,11 @@ defm : PatVrVr<sra, "VSRA">;
defm : PatShiftVrVr<sra, "VSRA">;
defm : PatShiftVrSplatUimm<sra, "VSRAI">;
+// VROTR[I]_{B/H/W/D}
+defm : PatVrVr<rotr, "VROTR">;
+defm : PatShiftVrVr<rotr, "VROTR">;
+defm : PatShiftVrSplatUimm<rotr, "VROTRI">;
+
// VCLZ_{B/H/W/D}
defm : PatVr<ctlz, "VCLZ">;
@@ -1652,25 +1669,25 @@ defm : PatVr<ctlz, "VCLZ">;
defm : PatVr<ctpop, "VPCNT">;
// VBITCLR_{B/H/W/D}
-def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))),
+def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))),
+def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))),
+def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))),
+def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
-def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati8imm7 v16i8:$vk)))),
+def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v16i8:$vk))), v16i8:$vj),
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
-def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati16imm15 v8i16:$vk)))),
+def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v8i16:$vk))), v8i16:$vj),
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
-def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati32imm31 v4i32:$vk)))),
+def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v4i32:$vk))), v4i32:$vj),
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
-def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1,
- (vsplati64imm63 v2i64:$vk)))),
+def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1,
+ (vsplati64imm63 v2i64:$vk))), v2i64:$vj),
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
// VBITCLRI_{B/H/W/D}
@@ -1760,6 +1777,10 @@ defm : PatVrVrF<fmul, "VFMUL">;
// VFDIV_{S/D}
defm : PatVrVrF<fdiv, "VFDIV">;
+// VFMAX_{S/D}, VFMIN_{S/D}
+defm : PatVrVrF<fmaxnum, "VFMAX">;
+defm : PatVrVrF<fminnum, "VFMIN">;
+
// VFMADD_{S/D}
def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
(VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
@@ -2552,6 +2573,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+defm : PatVrF<fceil, "VFRINTRP">;
+defm : PatVrF<ffloor, "VFRINTRM">;
+defm : PatVrF<ftrunc, "VFRINTRZ">;
+defm : PatVrF<froundeven, "VFRINTRNE">;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp
new file mode 100644
index 0000000..c07adfc
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchSelectionDAGInfo.h"
+
+#define GET_SDNODE_DESC
+#include "LoongArchGenSDNodeInfo.inc"
+
+using namespace llvm;
+
+LoongArchSelectionDAGInfo::LoongArchSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(LoongArchGenSDNodeInfo) {}
+
+LoongArchSelectionDAGInfo::~LoongArchSelectionDAGInfo() = default;
diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h
new file mode 100644
index 0000000..7210a15
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+#define GET_SDNODE_ENUM
+#include "LoongArchGenSDNodeInfo.inc"
+
+namespace llvm {
+
+class LoongArchSelectionDAGInfo : public SelectionDAGGenTargetInfo {
+public:
+ LoongArchSelectionDAGInfo();
+
+ ~LoongArchSelectionDAGInfo() override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
index 3acbe49..6293cbe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -12,6 +12,7 @@
#include "LoongArchSubtarget.h"
#include "LoongArchFrameLowering.h"
+#include "LoongArchSelectionDAGInfo.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
using namespace llvm;
@@ -95,4 +96,12 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
: LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
- InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {}
+ InstrInfo(*this), TLInfo(TM, *this) {
+ TSInfo = std::make_unique<LoongArchSelectionDAGInfo>();
+}
+
+LoongArchSubtarget::~LoongArchSubtarget() = default;
+
+const SelectionDAGTargetInfo *LoongArchSubtarget::getSelectionDAGInfo() const {
+ return TSInfo.get();
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 5e12baf..b90542c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -18,7 +18,6 @@
#include "LoongArchInstrInfo.h"
#include "LoongArchRegisterInfo.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -45,9 +44,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
LoongArchFrameLowering FrameLowering;
LoongArchInstrInfo InstrInfo;
- LoongArchRegisterInfo RegInfo;
LoongArchTargetLowering TLInfo;
- SelectionDAGTargetInfo TSInfo;
+ std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
Align PrefFunctionAlignment;
Align PrefLoopAlignment;
@@ -69,6 +67,8 @@ public:
LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
StringRef FS, StringRef ABIName, const TargetMachine &TM);
+ ~LoongArchSubtarget() override;
+
// Parses features string setting specified subtarget options. The
// definition of this function is auto-generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
@@ -78,14 +78,13 @@ public:
}
const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const LoongArchRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const LoongArchTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
+
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool GETTER() const { return ATTRIBUTE; }
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 9de4c9d..92a9388 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -62,6 +62,11 @@ static cl::opt<bool>
cl::desc("Enable the merge base offset pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("loongarch-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::Static);
}
@@ -146,7 +151,9 @@ namespace {
class LoongArchPassConfig : public TargetPassConfig {
public:
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ setEnableSinkAndFold(EnableSinkFold);
+ }
LoongArchTargetMachine &getLoongArchTargetMachine() const {
return getTM<LoongArchTargetMachine>();
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index f548a8d..5107c8d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
}
}
-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+
+ if (!ST->hasUAL())
+ return Options;
+
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = Options.MaxNumLoads;
+ Options.AllowOverlappingLoads = true;
+
+ // TODO: Support for vectors.
+ if (ST->is64Bit()) {
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.AllowedTailExpansions = {3, 5, 6};
+ } else {
+ Options.LoadSizes = {4, 2, 1};
+ Options.AllowedTailExpansions = {3};
+ }
+
+ return Options;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index e3f16c7..9b479f9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -55,7 +55,8 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const override;
- // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+ TTI::MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 7d54565..6d69af5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
/*HasRelocationAddend=*/true) {}
-LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
+LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default;
unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCValue &Target,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index f0e2bc4..08fa51d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ public:
LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- ~LoongArchMCCodeEmitter() override {}
+ ~LoongArchMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,