diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 134 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.h | 19 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZa.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 36 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVScheduleV.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 191 |
7 files changed, 353 insertions, 115 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6234714..a3a4cf2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16498,43 +16498,60 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, SDValue X = N->getOperand(0); if (Subtarget.hasShlAdd(3)) { - for (uint64_t Divisor : {3, 5, 9}) { - if (MulAmt % Divisor != 0) - continue; - uint64_t MulAmt2 = MulAmt / Divisor; - // 3/5/9 * 2^N -> shl (shXadd X, X), N - if (isPowerOf2_64(MulAmt2)) { - SDLoc DL(N); - SDValue X = N->getOperand(0); - // Put the shift first if we can fold a zext into the - // shift forming a slli.uw. - if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && - X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) { - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X, - DAG.getConstant(Log2_64(MulAmt2), DL, VT)); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), - Shl); - } - // Otherwise, put rhe shl second so that it can fold with following - // instructions (e.g. sext or add). - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - return DAG.getNode(ISD::SHL, DL, VT, Mul359, - DAG.getConstant(Log2_64(MulAmt2), DL, VT)); - } - - // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) - if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { - SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT), - Mul359); + int Shift; + if (int ShXAmount = isShifted359(MulAmt, Shift)) { + // 3/5/9 * 2^N -> shl (shXadd X, X), N + SDLoc DL(N); + SDValue X = N->getOperand(0); + // Put the shift first if we can fold a zext into the shift forming + // a slli.uw. + if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && + X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) { + SDValue Shl = + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT)); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, + DAG.getConstant(ShXAmount, DL, VT), Shl); } + // Otherwise, put the shl second so that it can fold with following + // instructions (e.g. sext or add). + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShXAmount, DL, VT), X); + return DAG.getNode(ISD::SHL, DL, VT, Mul359, + DAG.getConstant(Shift, DL, VT)); + } + + // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) + int ShX; + int ShY; + switch (MulAmt) { + case 3 * 5: + ShY = 1; + ShX = 2; + break; + case 3 * 9: + ShY = 1; + ShX = 3; + break; + case 5 * 5: + ShX = ShY = 2; + break; + case 5 * 9: + ShY = 2; + ShX = 3; + break; + case 9 * 9: + ShX = ShY = 3; + break; + default: + ShX = ShY = 0; + break; + } + if (ShX) { + SDLoc DL(N); + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShY, DL, VT), X); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, + DAG.getConstant(ShX, DL, VT), Mul359); } // If this is a power 2 + 2/4/8, we can use a shift followed by a single @@ -16557,18 +16574,14 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // variants we could implement. e.g. // (2^(1,2,3) * 3,5,9 + 1) << C2 // 2^(C1>3) * 3,5,9 +/- 1 - for (uint64_t Divisor : {3, 5, 9}) { - uint64_t C = MulAmt - 1; - if (C <= Divisor) - continue; - unsigned TZ = llvm::countr_zero(C); - if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { + if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) { + assert(Shift != 0 && "MulAmt=4,6,10 handled before"); + if (Shift <= 3) { SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(ShXAmount, DL, VT), X); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getConstant(TZ, DL, VT), X); + DAG.getConstant(Shift, DL, VT), X); } } @@ -16576,7 +16589,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { unsigned ScaleShift = llvm::countr_zero(MulAmt - 1); if (ScaleShift >= 1 && ScaleShift < 4) { - unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); + unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2)); SDLoc DL(N); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); @@ -16589,7 +16602,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) for (uint64_t Offset : {3, 5, 9}) { if (isPowerOf2_64(MulAmt + Offset)) { - unsigned ShAmt = Log2_64(MulAmt + Offset); + unsigned ShAmt = llvm::countr_zero(MulAmt + Offset); if (ShAmt >= VT.getSizeInBits()) continue; SDLoc DL(N); @@ -16608,21 +16621,16 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt2 = MulAmt / Divisor; // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples // of 25 which happen to be quite common. - for (uint64_t Divisor2 : {3, 5, 9}) { - if (MulAmt2 % Divisor2 != 0) - continue; - uint64_t MulAmt3 = MulAmt2 / Divisor2; - if (isPowerOf2_64(MulAmt3)) { - SDLoc DL(N); - SDValue Mul359A = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); - SDValue Mul359B = DAG.getNode( - RISCVISD::SHL_ADD, DL, VT, Mul359A, - DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A); - return DAG.getNode(ISD::SHL, DL, VT, Mul359B, - DAG.getConstant(Log2_64(MulAmt3), DL, VT)); - } + if (int ShBAmount = isShifted359(MulAmt2, Shift)) { + SDLoc DL(N); + SDValue Mul359A = + DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); + SDValue Mul359B = + DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A, + DAG.getConstant(ShBAmount, DL, VT), Mul359A); + return DAG.getNode(ISD::SHL, DL, VT, Mul359B, + DAG.getConstant(Shift, DL, VT)); } } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7db4832..96e1078 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -4586,24 +4586,23 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(DestReg, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); - } else if (STI.hasShlAdd(3) && - ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || - (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || - (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { + } else if (int ShXAmount, ShiftAmount; + STI.hasShlAdd(3) && + (ShXAmount = isShifted359(Amount, ShiftAmount)) != 0) { // We can use Zba SHXADD+SLLI instructions for multiply in some cases. unsigned Opc; - uint32_t ShiftAmount; - if (Amount % 9 == 0) { - Opc = RISCV::SH3ADD; - ShiftAmount = Log2_64(Amount / 9); - } else if (Amount % 5 == 0) { - Opc = RISCV::SH2ADD; - ShiftAmount = Log2_64(Amount / 5); - } else if (Amount % 3 == 0) { + switch (ShXAmount) { + case 1: Opc = RISCV::SH1ADD; - ShiftAmount = Log2_64(Amount / 3); - } else { - llvm_unreachable("implied by if-clause"); + break; + case 2: + Opc = RISCV::SH2ADD; + break; + case 3: + Opc = RISCV::SH3ADD; + break; + default: + llvm_unreachable("unexpected result of isShifted359"); } if (ShiftAmount) BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 42a0c4c..c5eddb9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -25,6 +25,25 @@ namespace llvm { +// If Value is of the form C1<<C2, where C1 = 3, 5 or 9, +// returns log2(C1 - 1) and assigns Shift = C2. +// Otherwise, returns 0. +template <typename T> int isShifted359(T Value, int &Shift) { + if (Value == 0) + return 0; + Shift = llvm::countr_zero(Value); + switch (Value >> Shift) { + case 3: + return 1; + case 5: + return 2; + case 9: + return 3; + default: + return 0; + } +} + class RISCVSubtarget; static const MachineMemOperand::Flags MONontemporalBit0 = diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 7cf6d5f..87b9f45 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -9,8 +9,8 @@ // This file describes the RISC-V instructions from the standard atomic 'Za*' // extensions: // - Zawrs (v1.0) : Wait-on-Reservation-Set. -// - Zacas (v1.0-rc1) : Atomic Compare-and-Swap. -// - Zabha (v1.0-rc1) : Byte and Halfword Atomic Memory Operations. +// - Zacas (v1.0) : Atomic Compare-and-Swap. +// - Zabha (v1.0) : Byte and Halfword Atomic Memory Operations. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 3f2e7db..3e07eff 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -567,9 +567,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX8", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX8", [VCQ, VL], mx, IsWorstCase>; @@ -587,9 +590,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX16", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX16", [VCQ, VL], mx, IsWorstCase>; @@ -604,9 +610,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX32", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX32", [VCQ, VL], mx, IsWorstCase>; @@ -621,9 +630,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX64", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX64", [VCQ, VL], mx, IsWorstCase>; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 6c7658c..01a4308 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -67,42 +67,41 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources, // ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has // Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite // is created similarly if IsWorstCase is true. -multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, - list<ProcResourceKind> resources, - int predLat, list<int> predAcquireCycles, - list<int> predReleaseCycles, int noPredLat, - list<int> noPredAcquireCycles, - list<int> noPredReleaseCycles, - string mx, bit IsWorstCase> { - defvar nameMX = name # "_" # mx; - +multiclass LMULWriteResVariantImpl<string name, string writeResName, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + bit IsWorstCase> { // Define the different behaviors - def nameMX # "_Pred" : SchedWriteRes<resources>{ + def writeResName # "_Pred" : SchedWriteRes<predResources>{ let Latency = predLat; let AcquireAtCycles = predAcquireCycles; let ReleaseAtCycles = predReleaseCycles; } - def nameMX # "_NoPred" : SchedWriteRes<resources> { + def writeResName # "_NoPred" : SchedWriteRes<noPredResources> { let Latency = noPredLat; let AcquireAtCycles = noPredAcquireCycles; let ReleaseAtCycles = noPredReleaseCycles; } // Define SchedVars - def nameMX # PredSchedVar - : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>; - def nameMX # NoPredSchedVar - : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX #"_NoPred")]>; + def writeResName # PredSchedVar + : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # writeResName # "_Pred")]>; + def writeResName # NoPredSchedVar + : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # writeResName #"_NoPred")]>; // Allow multiclass to refer to SchedVars -- need to have NAME prefix. - defvar PredSchedVar = !cast<SchedVar>(NAME # nameMX # PredSchedVar); - defvar NoPredSchedVar = !cast<SchedVar>(NAME # nameMX # NoPredSchedVar); + defvar PredSchedVar = !cast<SchedVar>(NAME # writeResName # PredSchedVar); + defvar NoPredSchedVar = !cast<SchedVar>(NAME # writeResName # NoPredSchedVar); // Tie behavior to predicate - def NAME # nameMX # "_Variant" + def NAME # writeResName # "_Variant" : SchedWriteVariant<[PredSchedVar, NoPredSchedVar]>; def : SchedAlias< - !cast<SchedReadWrite>(nameMX), - !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>; + !cast<SchedReadWrite>(writeResName), + !cast<SchedReadWrite>(NAME # writeResName # "_Variant")>; if IsWorstCase then { def NAME # name # "_WorstCase_Variant" @@ -113,6 +112,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, } } +multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, bit IsWorstCase> { + defm "" : LMULWriteResVariantImpl<name, name # "_" # mx, Pred, predResources, + predLat, predAcquireCycles, + predReleaseCycles, noPredResources, + noPredLat, noPredAcquireCycles, + noPredReleaseCycles, + IsWorstCase>; +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ee25f69..7bc0b5b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2747,20 +2747,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, Intrinsic::ID IID = Inst->getIntrinsicID(); LLVMContext &C = Inst->getContext(); bool HasMask = false; + + auto getSegNum = [](const IntrinsicInst *II, unsigned PtrOperandNo, + bool IsWrite) -> int64_t { + if (auto *TarExtTy = + dyn_cast<TargetExtType>(II->getArgOperand(0)->getType())) + return TarExtTy->getIntParameter(0); + + return 1; + }; + switch (IID) { case Intrinsic::riscv_vle_mask: case Intrinsic::riscv_vse_mask: + case Intrinsic::riscv_vlseg2_mask: + case Intrinsic::riscv_vlseg3_mask: + case Intrinsic::riscv_vlseg4_mask: + case Intrinsic::riscv_vlseg5_mask: + case Intrinsic::riscv_vlseg6_mask: + case Intrinsic::riscv_vlseg7_mask: + case Intrinsic::riscv_vlseg8_mask: + case Intrinsic::riscv_vsseg2_mask: + case Intrinsic::riscv_vsseg3_mask: + case Intrinsic::riscv_vsseg4_mask: + case Intrinsic::riscv_vsseg5_mask: + case Intrinsic::riscv_vsseg6_mask: + case Intrinsic::riscv_vsseg7_mask: + case Intrinsic::riscv_vsseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vle: - case Intrinsic::riscv_vse: { + case Intrinsic::riscv_vse: + case Intrinsic::riscv_vlseg2: + case Intrinsic::riscv_vlseg3: + case Intrinsic::riscv_vlseg4: + case Intrinsic::riscv_vlseg5: + case Intrinsic::riscv_vlseg6: + case Intrinsic::riscv_vlseg7: + case Intrinsic::riscv_vlseg8: + case Intrinsic::riscv_vsseg2: + case Intrinsic::riscv_vsseg3: + case Intrinsic::riscv_vsseg4: + case Intrinsic::riscv_vsseg5: + case Intrinsic::riscv_vsseg6: + case Intrinsic::riscv_vsseg7: + case Intrinsic::riscv_vsseg8: { // Intrinsic interface: // riscv_vle(merge, ptr, vl) // riscv_vle_mask(merge, ptr, mask, vl, policy) // riscv_vse(val, ptr, vl) // riscv_vse_mask(val, ptr, mask, vl, policy) + // riscv_vlseg#(merge, ptr, vl, sew) + // riscv_vlseg#_mask(merge, ptr, mask, vl, policy, sew) + // riscv_vsseg#(val, ptr, vl, sew) + // riscv_vsseg#_mask(val, ptr, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 1 - HasMask; @@ -2771,23 +2823,72 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, if (HasMask) Mask = Inst->getArgOperand(VLIndex - 1); Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Alignment, Mask, EVL); return true; } case Intrinsic::riscv_vlse_mask: case Intrinsic::riscv_vsse_mask: + case Intrinsic::riscv_vlsseg2_mask: + case Intrinsic::riscv_vlsseg3_mask: + case Intrinsic::riscv_vlsseg4_mask: + case Intrinsic::riscv_vlsseg5_mask: + case Intrinsic::riscv_vlsseg6_mask: + case Intrinsic::riscv_vlsseg7_mask: + case Intrinsic::riscv_vlsseg8_mask: + case Intrinsic::riscv_vssseg2_mask: + case Intrinsic::riscv_vssseg3_mask: + case Intrinsic::riscv_vssseg4_mask: + case Intrinsic::riscv_vssseg5_mask: + case Intrinsic::riscv_vssseg6_mask: + case Intrinsic::riscv_vssseg7_mask: + case Intrinsic::riscv_vssseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vlse: - case Intrinsic::riscv_vsse: { + case Intrinsic::riscv_vsse: + case Intrinsic::riscv_vlsseg2: + case Intrinsic::riscv_vlsseg3: + case Intrinsic::riscv_vlsseg4: + case Intrinsic::riscv_vlsseg5: + case Intrinsic::riscv_vlsseg6: + case Intrinsic::riscv_vlsseg7: + case Intrinsic::riscv_vlsseg8: + case Intrinsic::riscv_vssseg2: + case Intrinsic::riscv_vssseg3: + case Intrinsic::riscv_vssseg4: + case Intrinsic::riscv_vssseg5: + case Intrinsic::riscv_vssseg6: + case Intrinsic::riscv_vssseg7: + case Intrinsic::riscv_vssseg8: { // Intrinsic interface: // riscv_vlse(merge, ptr, stride, vl) // riscv_vlse_mask(merge, ptr, stride, mask, vl, policy) // riscv_vsse(val, ptr, stride, vl) // riscv_vsse_mask(val, ptr, stride, mask, vl, policy) + // riscv_vlsseg#(merge, ptr, offset, vl, sew) + // riscv_vlsseg#_mask(merge, ptr, offset, mask, vl, policy, sew) + // riscv_vssseg#(val, ptr, offset, vl, sew) + // riscv_vssseg#_mask(val, ptr, offset, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 2 - HasMask; @@ -2809,6 +2910,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, if (HasMask) Mask = Inst->getArgOperand(VLIndex - 1); Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Alignment, Mask, EVL, Stride); return true; @@ -2817,19 +2925,89 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::riscv_vluxei_mask: case Intrinsic::riscv_vsoxei_mask: case Intrinsic::riscv_vsuxei_mask: + case Intrinsic::riscv_vloxseg2_mask: + case Intrinsic::riscv_vloxseg3_mask: + case Intrinsic::riscv_vloxseg4_mask: + case Intrinsic::riscv_vloxseg5_mask: + case Intrinsic::riscv_vloxseg6_mask: + case Intrinsic::riscv_vloxseg7_mask: + case Intrinsic::riscv_vloxseg8_mask: + case Intrinsic::riscv_vluxseg2_mask: + case Intrinsic::riscv_vluxseg3_mask: + case Intrinsic::riscv_vluxseg4_mask: + case Intrinsic::riscv_vluxseg5_mask: + case Intrinsic::riscv_vluxseg6_mask: + case Intrinsic::riscv_vluxseg7_mask: + case Intrinsic::riscv_vluxseg8_mask: + case Intrinsic::riscv_vsoxseg2_mask: + case Intrinsic::riscv_vsoxseg3_mask: + case Intrinsic::riscv_vsoxseg4_mask: + case Intrinsic::riscv_vsoxseg5_mask: + case Intrinsic::riscv_vsoxseg6_mask: + case Intrinsic::riscv_vsoxseg7_mask: + case Intrinsic::riscv_vsoxseg8_mask: + case Intrinsic::riscv_vsuxseg2_mask: + case Intrinsic::riscv_vsuxseg3_mask: + case Intrinsic::riscv_vsuxseg4_mask: + case Intrinsic::riscv_vsuxseg5_mask: + case Intrinsic::riscv_vsuxseg6_mask: + case Intrinsic::riscv_vsuxseg7_mask: + case Intrinsic::riscv_vsuxseg8_mask: HasMask = true; [[fallthrough]]; case Intrinsic::riscv_vloxei: case Intrinsic::riscv_vluxei: case Intrinsic::riscv_vsoxei: - case Intrinsic::riscv_vsuxei: { + case Intrinsic::riscv_vsuxei: + case Intrinsic::riscv_vloxseg2: + case Intrinsic::riscv_vloxseg3: + case Intrinsic::riscv_vloxseg4: + case Intrinsic::riscv_vloxseg5: + case Intrinsic::riscv_vloxseg6: + case Intrinsic::riscv_vloxseg7: + case Intrinsic::riscv_vloxseg8: + case Intrinsic::riscv_vluxseg2: + case Intrinsic::riscv_vluxseg3: + case Intrinsic::riscv_vluxseg4: + case Intrinsic::riscv_vluxseg5: + case Intrinsic::riscv_vluxseg6: + case Intrinsic::riscv_vluxseg7: + case Intrinsic::riscv_vluxseg8: + case Intrinsic::riscv_vsoxseg2: + case Intrinsic::riscv_vsoxseg3: + case Intrinsic::riscv_vsoxseg4: + case Intrinsic::riscv_vsoxseg5: + case Intrinsic::riscv_vsoxseg6: + case Intrinsic::riscv_vsoxseg7: + case Intrinsic::riscv_vsoxseg8: + case Intrinsic::riscv_vsuxseg2: + case Intrinsic::riscv_vsuxseg3: + case Intrinsic::riscv_vsuxseg4: + case Intrinsic::riscv_vsuxseg5: + case Intrinsic::riscv_vsuxseg6: + case Intrinsic::riscv_vsuxseg7: + case Intrinsic::riscv_vsuxseg8: { // Intrinsic interface (only listed ordered version): // riscv_vloxei(merge, ptr, index, vl) // riscv_vloxei_mask(merge, ptr, index, mask, vl, policy) // riscv_vsoxei(val, ptr, index, vl) // riscv_vsoxei_mask(val, ptr, index, mask, vl, policy) + // riscv_vloxseg#(merge, ptr, index, vl, sew) + // riscv_vloxseg#_mask(merge, ptr, index, mask, vl, policy, sew) + // riscv_vsoxseg#(val, ptr, index, vl, sew) + // riscv_vsoxseg#_mask(val, ptr, index, mask, vl, sew) bool IsWrite = Inst->getType()->isVoidTy(); Type *Ty = IsWrite ? Inst->getArgOperand(0)->getType() : Inst->getType(); + // The results of segment loads are TargetExtType. + if (auto *TarExtTy = dyn_cast<TargetExtType>(Ty)) { + unsigned SEW = + 1 << cast<ConstantInt>(Inst->getArgOperand(Inst->arg_size() - 1)) + ->getZExtValue(); + Ty = TarExtTy->getTypeParameter(0U); + Ty = ScalableVectorType::get( + IntegerType::get(C, SEW), + cast<ScalableVectorType>(Ty)->getMinNumElements() * 8 / SEW); + } const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID); unsigned VLIndex = RVVIInfo->VLOperand; unsigned PtrOperandNo = VLIndex - 2 - HasMask; @@ -2845,6 +3023,13 @@ bool RISCVTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, Mask = ConstantInt::getTrue(MaskType); } Value *EVL = Inst->getArgOperand(VLIndex); + unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite); + // RVV uses contiguous elements as a segment. + if (SegNum > 1) { + unsigned ElemSize = Ty->getScalarSizeInBits(); + auto *SegTy = IntegerType::get(C, ElemSize * SegNum); + Ty = VectorType::get(SegTy, cast<VectorType>(Ty)); + } Value *OffsetOp = Inst->getArgOperand(PtrOperandNo + 1); Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty, Align(1), Mask, EVL, |