diff options
author | Min-Yih Hsu <min.hsu@sifive.com> | 2024-11-12 15:29:40 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-12 15:29:40 -0800 |
commit | 84e95beae980466ffcc555297e0e34d23fca8a76 (patch) | |
tree | d6e46e1ab3381834212623b35eb07647ae2d2dbc | |
parent | 7b5e285d16090c2ddf4ee539c410d24bde52cbea (diff) | |
download | llvm-84e95beae980466ffcc555297e0e34d23fca8a76.zip llvm-84e95beae980466ffcc555297e0e34d23fca8a76.tar.gz llvm-84e95beae980466ffcc555297e0e34d23fca8a76.tar.bz2 |
[RISCV] Update SiFive P600's scheduling model on RVV instructions (#115243)
The biggest change is assigning vector crypto instructions to the
correct processor resource.
The majority of these changes are guided by our RVV-capable
llvm-exegesis.
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 578 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s | 129 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s | 816 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s | 338 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s | 46 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s | 54 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s | 118 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s | 70 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s | 32 |
9 files changed, 1735 insertions, 446 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 0543b99..c2d93d4 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -50,6 +50,240 @@ class SiFiveP600GetCyclesSegmented<string mx, int sew, int nf> { int c = !mul(VLUpperBound, nf); } +class SiFiveP600VSM3CCycles<string mx> { + // c = ceil(LMUL / 2) + int c = !cond(!eq(mx, "M2") : 1, + !eq(mx, "M4") : 2, + !eq(mx, "M8") : 4, + true : 1); +} + +class SiFiveP600RVVMultiplier<string mx> { + int c = !if(!eq(mx, "M8"), 2, 1); +} + +// ====================================================================== +// The latency and occupancy data in this section are primarily evaluated +// from llvm-exegesis. +// ====================================================================== + +class SiFiveP600VCryptoLatency<string mx> { + int c = !cond( + !eq(mx, "M4"): 4, + !eq(mx, "M8"): 8, + true: 2 + ); +} + +class SiFiveP600VFMinMaxReduction<string mx, int sew> { + defvar E64Lat = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 6, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 10, + true: 2 + ); + + defvar E64Cycles = !cond( + !eq(mx, "M1") : 3, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 5, + !eq(mx, "M8") : 6, + true: 2 + ); + + int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 2)); + int cycles = !if(!eq(sew, 64), E64Cycles, !add(E64Cycles, 1)); +} + +class SiFiveP600VFUnorderedReduction<string mx, int sew> { + defvar E64Lat = !cond( + !eq(mx, "M1") : 6, + !eq(mx, "M2") : 12, + !eq(mx, "M4") : 18, + !eq(mx, "M8") : 24, + true: 2 + ); + + defvar E32Cycles = !cond( + !eq(mx, "M1") : 10, + !eq(mx, "M2") : 10, + !eq(mx, "M4") : 11, + !eq(mx, "M8") : 11, + true: 6 + ); + + int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 4)); + int cycles = !if(!eq(sew, 64), 6, E32Cycles); +} + +class SiFiveP600VFWidenUnorderedReduction<string mx> { + int latency = !cond( + !eq(mx, "M1") : 10, + !eq(mx, "M2") : 18, + !eq(mx, "M4") : 24, + !eq(mx, "M8") : 30, + true: 6 + ); +} + +class SiFiveP600VFOrderedReduction<string mx, int sew> { + defvar Base = !if(!eq(sew, 64), 6, 10); + int c = !cond( + !eq(mx, "M1") : Base, + !eq(mx, "M2") : !mul(Base, 2), + !eq(mx, "M4") : !mul(Base, 4), + !eq(mx, "M8") : !mul(Base, 8), + true: 6 + ); +} + +class SiFiveP600VIReductionLatency<string mx> { + int c = !cond( + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 16, + // M1 and lower + true: 2 + ); +} + +class SiFiveP600VIMinMaxReductionLatency<string mx, int sew> { + // +-----+-----+-----+-----+----+ + // | | E64 | E32 | E16 | E8 | + // +-----+-----+-----+-----+----+ + // | MF8 | X | X | X | 4 | + // +-----+-----+-----+-----+----+ + // | MF4 | X | X | 4 | 6 | + // +-----+-----+-----+-----+----+ + // | MF2 | X | 4 | 6 | 8 | + // +-----+-----+-----+-----+----+ + // | M1 | 4 | 6 | 8 | 10 | + // +-----+-----+-----+-----+----+ + // | M2 | 6 | 8 | 10 | 12 | + // +-----+-----+-----+-----+----+ + // | M4 | 8 | 10 | 12 | 14 | + // +-----+-----+-----+-----+----+ + // | M8 | 10 | 12 | 14 | 16 | + // +-----+-----+-----+-----+----+ + defvar BaseIndex = !cond( + !eq(sew, 64): 0, + !eq(sew, 32): 1, + !eq(sew, 16): 2, + !eq(sew, 8): 3 + ); + + defvar Latencies = [4, 6, 8, 10, 12, 14, 16]; + + int c = !cond( + !eq(mx, "M1") : Latencies[BaseIndex], + !eq(mx, "M2") : Latencies[!add(BaseIndex, 1)], + !eq(mx, "M4") : Latencies[!add(BaseIndex, 2)], + !eq(mx, "M8") : Latencies[!add(BaseIndex, 3)], + // Fractional + !eq(mx, "MF2"): Latencies[!sub(BaseIndex, 1)], + !eq(mx, "MF4"): Latencies[!sub(BaseIndex, 2)], + !eq(mx, "MF8"): Latencies[!sub(BaseIndex, 3)], + ); +} + +class SiFiveP600VIMinMaxReductionCycles<string mx, int sew> { + // +-----+-----+-----+-----+----+ + // | | E64 | E32 | E16 | E8 | + // +-----+-----+-----+-----+----+ + // | MF8 | X | X | X | 3 | + // +-----+-----+-----+-----+----+ + // | MF4 | X | X | 3 | 5 | + // +-----+-----+-----+-----+----+ + // | MF2 | X | 3 | 5 | 6 | + // +-----+-----+-----+-----+----+ + // | M1 | 3 | 4 | 6 | 8 | + // +-----+-----+-----+-----+----+ + // | M2 | 4 | 5 | 8 | 9 | + // +-----+-----+-----+-----+----+ + // | M4 | 5 | 6 | 10 | 11 | + // +-----+-----+-----+-----+----+ + // | M8 | 7 | 8 | 9 | 11 | + // +-----+-----+-----+-----+----+ + defvar Index = !cond( + !eq(sew, 64): 0, + !eq(sew, 32): 1, + !eq(sew, 16): 2, + !eq(sew, 8): 3 + ); + + defvar Cycles = [ + [0, 0, 0, 3], + [0, 0, 3, 5], + [0, 3, 5, 6], + [3, 4, 6, 8], + [4, 5, 8, 9], + [5, 6, 10, 11], + [7, 8, 9, 11] + ]; + + int c = !cond( + !eq(mx, "MF8"): Cycles[0][Index], + !eq(mx, "MF4"): Cycles[1][Index], + !eq(mx, "MF2"): Cycles[2][Index], + !eq(mx, "M1"): Cycles[3][Index], + !eq(mx, "M2"): Cycles[4][Index], + !eq(mx, "M4"): Cycles[5][Index], + !eq(mx, "M8"): Cycles[6][Index], + ); +} + +class SiFiveP600VSlide1<string mx> { + int c = !cond( + !eq(mx, "M2") : 3, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + // M1 and lower + true: 2 + ); +} + +class SiFiveP600VSlideI<string mx> { + int c = !cond( + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 6, + !eq(mx, "M8") : 8, + // M1 and lower + true: 2 + ); +} + +class SiFiveP600VSlideXComplex<string mx, bit isUp = false> { + int latency = !cond( + !eq(mx, "M2") : 11, + !eq(mx, "M4") : 14, + !eq(mx, "M8") : 20 + ); + + int cycles = !cond( + !eq(mx, "M2") : !if(isUp, 10, 11), + !eq(mx, "M4") : !if(isUp, 12, 14), + !eq(mx, "M8") : !if(isUp, 16, 20) + ); +} + +class SiFiveP600VPermutationComplex<string mx> { + int c = !cond( + !eq(mx, "M2") : 12, + !eq(mx, "M4") : 16, + !eq(mx, "M8") : 24 + ); +} + +class SiFiveP600VSHA2MSCycles<string mx, int sew> { + int c = !cond( + !eq(mx, "M2") : !if(!eq(sew, 32), 2, 3), + !eq(mx, "M4") : !if(!eq(sew, 32), 4, 6), + !eq(mx, "M8") : !if(!eq(sew, 32), 8, 12), + true: 1 + ); +} + // SiFiveP600 machine model for scheduling and other instruction cost heuristics. def SiFiveP600Model : SchedMachineModel { let IssueWidth = 4; // 4 micro-ops are dispatched per cycle. @@ -95,6 +329,12 @@ def SiFiveP600FloatDiv : ProcResource<1>; def SiFiveP600VEXQ0 : ProcResource<1>; def SiFiveP600VEXQ1 : ProcResource<1>; def SiFiveP600VectorArith : ProcResGroup<[SiFiveP600VEXQ0, SiFiveP600VEXQ1]>; + +// Only VEXQ0 has mask unit. +defvar SiFiveP600VectorMask = SiFiveP600VEXQ0; +// Only VEXQ0 has vector crypto. +defvar SiFiveP600VectorCrypto = SiFiveP600VEXQ0; + def SiFiveP600VLD : ProcResource<1>; def SiFiveP600VST : ProcResource<1>; def SiFiveP600VDiv : ProcResource<1>; @@ -386,7 +626,7 @@ foreach LMul = [1, 2, 4, 8] in { def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP600VLD]>; def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP600VST]>; } - let Latency = LMul, ReleaseAtCycles = [LMul] in { + let Latency = 2, ReleaseAtCycles = [LMul] in { def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP600VectorArith]>; } } @@ -395,37 +635,42 @@ foreach LMul = [1, 2, 4, 8] in { foreach mx = SchedMxList in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 1, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = 2, ReleaseAtCycles = [LMulLat] in { defm "" : LMULWriteResMX<"WriteVExtV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorMask], mx, IsWorstCase>; } - let Latency = 6, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>; + let ReleaseAtCycles = [LMulLat] in { + let Latency = 6 in { + defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>; + } + + let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c) in { + defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; + } } } // Widening @@ -440,7 +685,26 @@ foreach mx = SchedMxListW in { defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>; + + // Special case for variants with widen operands. + let ReleaseAtCycles = [!mul(LMulLat, 2)] in + def P600WriteVIWALUWidenOp_ # mx : SchedWriteRes<[SiFiveP600VectorArith]>; } + + defvar P600VIWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVIWALUWidenOp_" # mx), + !cast<SchedRead>("ReadVPassthru_" # mx), + !cast<SchedRead>("ReadVIALUV_" # mx), + !cast<SchedRead>("ReadVIALUV_" # mx)]; + + def : InstRW<P600VIWALUBaseSchedRW, + (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "$")>; + def : InstRW<P600VIWALUBaseSchedRW[0,2,3], + (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_TIED$")>; + + def : InstRW<!listconcat(P600VIWALUBaseSchedRW, [!cast<SchedRead>("ReadVMask")]), + (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "_MASK$")>; + def : InstRW<!listconcat(P600VIWALUBaseSchedRW[0,1,3], [!cast<SchedRead>("ReadVMask")]), + (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_MASK_TIED$")>; } // Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64. @@ -504,34 +768,34 @@ foreach mx = SchedMxListF in { defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } - let Latency = 2, ReleaseAtCycles = [LMulLat] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; - let Latency = 3, ReleaseAtCycles = [LMulLat] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } } foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, isF=1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c; - let Latency = 1, ReleaseAtCycles = [LMulLat] in { + defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; + let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), ReleaseAtCycles = [LMulLat] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } + let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } } foreach mx = SchedMxList in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 3, ReleaseAtCycles = [LMulLat] in + let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorMask], mx, IsWorstCase>; } - let Latency = 1, ReleaseAtCycles = [LMulLat] in { + let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), + ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in { defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; @@ -565,7 +829,31 @@ foreach mx = SchedMxListFW in { defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + + // Special case for variants with widen operands. + let ReleaseAtCycles = [!mul(LMulLat, 2)] in + def P600WriteVFWALUWidenOp_ # mx # _E # sew : SchedWriteRes<[SiFiveP600VectorArith]>; } + + defvar P600VFWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVFWALUWidenOp_" # mx # "_E" # sew), + !cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), + !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]; + + def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]), + (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "$")>; + def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[2], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)], + (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_TIED$")>; + + def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew)]), + (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "$")>; + + def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]), + (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK$")>; + def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[1], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")], + (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK_TIED$")>; + + def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]), + (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "_MASK$")>; } } // Narrowing @@ -580,7 +868,7 @@ foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; - let Latency = 3, ReleaseAtCycles = [LMulLat] in { + let Latency = 3, ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in { defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } @@ -608,9 +896,14 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - let Latency = !add(2, !mul(2, LMulLat)), ReleaseAtCycles = [LMulLat] in { + + let ReleaseAtCycles = [LMulLat] in { + let Latency = SiFiveP600VIReductionLatency<mx>.c in defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; + + let Latency = SiFiveP600VIMinMaxReductionLatency<mx, sew>.c, + ReleaseAtCycles = [SiFiveP600VIMinMaxReductionCycles<mx, sew>.c] in defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; } @@ -621,7 +914,7 @@ foreach mx = SchedMxListWRed in { foreach sew = SchedSEWSet<mx, 0, 1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; - let Latency = !add(2, !mul(2, LMulLat)), ReleaseAtCycles = [LMulLat] in { + let Latency = SiFiveP600VIReductionLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; } @@ -632,14 +925,20 @@ foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, 1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; - let Latency = !add(6, !mul(6, LMulLat)), ReleaseAtCycles = [LMulLat] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP600VEXQ1], - mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", - [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP600VEXQ1], - mx, sew, IsWorstCase>; - } + let Latency = SiFiveP600VFMinMaxReduction<mx, sew>.latency, + ReleaseAtCycles = [SiFiveP600VFMinMaxReduction<mx, sew>.cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", + [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; + + let Latency = SiFiveP600VFUnorderedReduction<mx, sew>.latency, + ReleaseAtCycles = [SiFiveP600VFUnorderedReduction<mx, sew>.cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP600VEXQ1], + mx, sew, IsWorstCase>; + + let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c, + ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP600VEXQ1], + mx, sew, IsWorstCase>; } } @@ -647,28 +946,38 @@ foreach mx = SchedMxListFWRed in { foreach sew = SchedSEWSet<mx, 1, 1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; - let Latency = !add(6, !mul(6, LMulLat)), ReleaseAtCycles = [LMulLat] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFiveP600VEXQ1], - mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP600VEXQ1], - mx, sew, IsWorstCase>; - } + let Latency = SiFiveP600VFWidenUnorderedReduction<mx>.latency, + ReleaseAtCycles = [6] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFiveP600VEXQ1], + mx, sew, IsWorstCase>; + + let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c, + ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP600VEXQ1], + mx, sew, IsWorstCase>; } } // 15. Vector Mask Instructions foreach mx = SchedMxList in { defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 1, ReleaseAtCycles = [1] in { - defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + let Latency = 2 in { + defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP600VectorMask], mx, IsWorstCase>; + + let ReleaseAtCycles = [2] in { + defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP600VectorMask], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP600VectorMask], mx, IsWorstCase>; + } } defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - let Latency = 1, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + let ReleaseAtCycles = [LMulLat] in { + let Latency = 2 in + defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP600VectorMask], mx, IsWorstCase>; + + // vid.v isn't executed by the mask unit. + let Latency = !if(!eq(mx, "M8"), 4, !if(!eq(mx, "M4"), 2, 1)) in + defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP600VectorArith], mx, IsWorstCase>; } } @@ -677,12 +986,14 @@ foreach mx = SchedMxList in { foreach mx = SchedMxList in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 2, ReleaseAtCycles = [LMulLat] in { + let ReleaseAtCycles = [LMulLat] in { + let Latency = SiFiveP600VSlideI<mx>.c in defm "" : LMULWriteResMX<"WriteVSlideI", [SiFiveP600VEXQ0], mx, IsWorstCase>; - } - let Latency = 1, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP600VEXQ0], mx, IsWorstCase>; + + let Latency = SiFiveP600VSlide1<mx>.c in { + defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP600VEXQ0], mx, IsWorstCase>; + } } } foreach mx = ["MF8", "MF4", "MF2", "M1"] in { @@ -697,19 +1008,19 @@ foreach mx = ["MF8", "MF4", "MF2", "M1"] in { foreach mx = ["M8", "M4", "M2"] in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = !add(4, LMulLat), ReleaseAtCycles = [LMulLat] in { + let Latency = SiFiveP600VSlideXComplex<mx>.latency in { + let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/true>.cycles] in defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFiveP600VEXQ1], mx, IsWorstCase>; + let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/false>.cycles] in defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP600VEXQ1], mx, IsWorstCase>; } } -let Latency = 2, ReleaseAtCycles = [1] in { - def : WriteRes<WriteVMovSX, [SiFiveP600VectorArith]>; +let Latency = 2, ReleaseAtCycles = [2] in { def : WriteRes<WriteVMovXS, [SiFiveP600VectorArith]>; -} -let Latency = 6, ReleaseAtCycles = [1] in { - def : WriteRes<WriteVMovSF, [SiFiveP600VectorArith]>; + def : WriteRes<WriteVMovSX, [SiFiveP600VectorArith]>; def : WriteRes<WriteVMovFS, [SiFiveP600VectorArith]>; + def : WriteRes<WriteVMovSF, [SiFiveP600VectorArith]>; } // Simple Gather and Compress @@ -735,7 +1046,7 @@ foreach mx = ["MF8", "MF4", "MF2", "M1"] in { foreach mx = ["M2", "M4", "M8"] in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 6, ReleaseAtCycles = [LMulLat] in { + let Latency = 6, ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in { defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP600VEXQ1], mx, IsWorstCase>; } } @@ -744,9 +1055,13 @@ foreach mx = ["M2", "M4", "M8"] in { foreach sew = SchedSEWSet<mx>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - let Latency = 6, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; + let Latency = 6 in { + let ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; + } + + let ReleaseAtCycles = [!add(SiFiveP600VPermutationComplex<mx>.c, 1)] in defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; } } @@ -766,55 +1081,84 @@ foreach mx = SchedMxList in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; // Zvbb - let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorArith], mx, IsWorstCase>; + let ReleaseAtCycles = [LMulLat] in { + let Latency = 2 in { + // FIXME: Exegesis was not able to measure the latency of these instructions. + // We probably should update them at some point. + defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + } + + let Latency = SiFiveP600VCryptoLatency<mx>.c in { + defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + + def P600WriteVANDN_ # mx : SchedWriteRes<[SiFiveP600VectorCrypto]>; + } } + + // Special case for VANDN -- we execute it on vector crypto unit. + defvar P600VANDNBaseSchedRW = [!cast<SchedWrite>("P600WriteVANDN_" # mx), + // VANDN always merge read operand. + !cast<SchedRead>("ReadVPassthru_" # mx), + !cast<SchedRead>("ReadVIALUV_" # mx), + !cast<SchedRead>("ReadVIALUV_" # mx)]; + def : InstRW<P600VANDNBaseSchedRW, + (instregex "^PseudoVANDN_V(V|X)_" # mx # "$")>; + def : InstRW<!listconcat(P600VANDNBaseSchedRW, [!cast<SchedRead>("ReadVMask")]), + (instregex "^PseudoVANDN_V(V|X)_" # mx # "_MASK$")>; + // Zvbc - let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; } // Zvkb // VANDN uses WriteVIALU[V|X|I] - let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorCrypto], mx, IsWorstCase>; } // Zvkg - let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; } // ZvknhaOrZvknhb + // FIXME: The latency is probably wrong. let Latency = 3, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>; - foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in - defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in { + let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in + defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>; + } } // Zvkned - let Latency = 2, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorArith], mx, IsWorstCase>; + let Latency = 2 in { + let ReleaseAtCycles = [LMulLat] in { + defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + } + + let ReleaseAtCycles = [!if(!lt(LMulLat, 2), LMulLat, !div(LMulLat, 2))] in + defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; } - let Latency = 1, ReleaseAtCycles = [LMulLat] in - defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorArith], mx, IsWorstCase>; // Zvksed + let Latency = 3, ReleaseAtCycles = [SiFiveP600VSM3CCycles<mx>.c] in + defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + let Latency = 6, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; let Latency = 3, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VEXQ0], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; } } @@ -1029,7 +1373,7 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; -defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s new file mode 100644 index 0000000..adb45fc --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s @@ -0,0 +1,129 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=100 < %s | FileCheck %s + +vsetvli zero, zero, e32, m1, ta, ma + +vmslt.vv v0, v4, v20 +vmsle.vv v8, v4, v20 +vmsgt.vv v8, v20, v4 +vmsge.vv v8, v20, v4 +vmseq.vv v8, v4, v20 +vmsne.vv v8, v4, v20 +vmsltu.vv v8, v4, v20 +vmsleu.vv v8, v4, v20 +vmsgtu.vv v8, v20, v4 +vmsgeu.vv v8, v20, v4 + +vmflt.vv v0, v4, v20 +vmfle.vv v8, v4, v20 +vmfgt.vv v8, v20, v4 +vmfge.vv v8, v20, v4 +vmfeq.vv v8, v4, v20 +vmfne.vv v8, v4, v20 + +vmadc.vv v8, v4, v20 +vmsbc.vv v8, v4, v20 + +vfirst.m a2, v4 +vpopc.m a2, v4 + +viota.m v8, v4 + +vmsbf.m v8, v4 +vmsif.m v8, v4 +vmsof.m v8, v4 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 2500 +# CHECK-NEXT: Total Cycles: 2605 +# CHECK-NEXT: Total uOps: 2500 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 0.96 +# CHECK-NEXT: IPC: 0.96 +# CHECK-NEXT: Block RThroughput: 26.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, ta, ma +# CHECK-NEXT: 1 2 1.00 vmslt.vv v0, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsle.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmslt.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsle.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmseq.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsne.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsltu.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsleu.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsltu.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsleu.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmflt.vv v0, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmfle.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmflt.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmfle.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmfeq.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmfne.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmadc.vv v8, v4, v20 +# CHECK-NEXT: 1 2 1.00 vmsbc.vv v8, v4, v20 +# CHECK-NEXT: 1 2 2.00 vfirst.m a2, v4 +# CHECK-NEXT: 1 2 2.00 vcpop.m a2, v4 +# CHECK-NEXT: 1 2 1.00 viota.m v8, v4 +# CHECK-NEXT: 1 2 1.00 vmsbf.m v8, v4 +# CHECK-NEXT: 1 2 1.00 vmsif.m v8, v4 +# CHECK-NEXT: 1 2 1.00 vmsof.m v8, v4 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFiveP600Div +# CHECK-NEXT: [1] - SiFiveP600FEXQ0 +# CHECK-NEXT: [2] - SiFiveP600FEXQ1 +# CHECK-NEXT: [3] - SiFiveP600FloatDiv +# CHECK-NEXT: [4] - SiFiveP600IEXQ0 +# CHECK-NEXT: [5] - SiFiveP600IEXQ1 +# CHECK-NEXT: [6] - SiFiveP600IEXQ2 +# CHECK-NEXT: [7] - SiFiveP600IEXQ3 +# CHECK-NEXT: [8.0] - SiFiveP600LDST +# CHECK-NEXT: [8.1] - SiFiveP600LDST +# CHECK-NEXT: [9] - SiFiveP600VDiv +# CHECK-NEXT: [10] - SiFiveP600VEXQ0 +# CHECK-NEXT: [11] - SiFiveP600VEXQ1 +# CHECK-NEXT: [12] - SiFiveP600VFloatDiv +# CHECK-NEXT: [13] - SiFiveP600VLD +# CHECK-NEXT: [14] - SiFiveP600VST + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] +# CHECK-NEXT: - - - - 1.00 - - - - - - 26.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, ta, ma +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmslt.vv v0, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsle.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmslt.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsle.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmseq.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsne.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsltu.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsleu.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsltu.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsleu.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmflt.vv v0, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfle.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmflt.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfle.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfeq.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmfne.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmadc.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsbc.vv v8, v4, v20 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vfirst.m a2, v4 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vcpop.m a2, v4 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - viota.m v8, v4 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsbf.m v8, v4 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsif.m v8, v4 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmsof.m v8, v4 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s new file mode 100644 index 0000000..3e9dcff --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s @@ -0,0 +1,816 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s + +vsetvli zero, zero, e8, mf8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, mf4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, mf2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, m2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, m4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, m8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, mf8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, mf4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, mf2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, m2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, m4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e16, m8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, mf8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, mf4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, mf2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, m2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, m4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e32, m8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, mf8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, mf4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, mf2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, m2, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, m4, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e64, m8, tu, mu +vmv1r.v v8, v16 +vsetvli zero, zero, e8, mf8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, mf4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, mf2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, m2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, m4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, m8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, mf8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, mf4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, mf2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, m2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, m4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e16, m8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, mf8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, mf4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, mf2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, m2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, m4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e32, m8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, mf8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, mf4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, mf2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, m2, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, m4, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e64, m8, tu, mu +vmv2r.v v8, v16 +vsetvli zero, zero, e8, mf8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, mf4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, mf2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, m2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, m4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, m8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, mf8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, mf4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, mf2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, m2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, m4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e16, m8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, mf8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, mf4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, mf2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, m2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, m4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e32, m8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, mf8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, mf4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, mf2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, m2, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, m4, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e64, m8, tu, mu +vmv4r.v v8, v16 +vsetvli zero, zero, e8, mf8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, mf4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, mf2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, m2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, m4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e8, m8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, mf8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, mf4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, mf2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, m2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, m4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e16, m8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, mf8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, mf4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, mf2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, m2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, m4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e32, m8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, mf8, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, mf4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, mf2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, m1, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, m2, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, m4, tu, mu +vmv8r.v v8, v16 +vsetvli zero, zero, e64, m8, tu, mu +vmv8r.v v8, v16 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 256 +# CHECK-NEXT: Total Cycles: 237 +# CHECK-NEXT: Total uOps: 256 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 1.08 +# CHECK-NEXT: IPC: 1.08 +# CHECK-NEXT: Block RThroughput: 240.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: 1 2 0.50 vmv1r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: 1 2 1.00 vmv2r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: 1 2 2.00 vmv4r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: 1 2 4.00 vmv8r.v v8, v16 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFiveP600Div +# CHECK-NEXT: [1] - SiFiveP600FEXQ0 +# CHECK-NEXT: [2] - SiFiveP600FEXQ1 +# CHECK-NEXT: [3] - SiFiveP600FloatDiv +# CHECK-NEXT: [4] - SiFiveP600IEXQ0 +# CHECK-NEXT: [5] - SiFiveP600IEXQ1 +# CHECK-NEXT: [6] - SiFiveP600IEXQ2 +# CHECK-NEXT: [7] - SiFiveP600IEXQ3 +# CHECK-NEXT: [8.0] - SiFiveP600LDST +# CHECK-NEXT: [8.1] - SiFiveP600LDST +# CHECK-NEXT: [9] - SiFiveP600VDiv +# CHECK-NEXT: [10] - SiFiveP600VEXQ0 +# CHECK-NEXT: [11] - SiFiveP600VEXQ1 +# CHECK-NEXT: [12] - SiFiveP600VFloatDiv +# CHECK-NEXT: [13] - SiFiveP600VLD +# CHECK-NEXT: [14] - SiFiveP600VST + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] +# CHECK-NEXT: - - - - 128.00 - - - - - - 240.00 240.00 - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vmv1r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vmv2r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vmv4r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vmv8r.v v8, v16 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vmv8r.v v8, v16 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s index 6191537..3726d82 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s @@ -151,13 +151,13 @@ vwsll.vi v8, v4, 8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 133 -# CHECK-NEXT: Total Cycles: 166 +# CHECK-NEXT: Total Cycles: 330 # CHECK-NEXT: Total uOps: 133 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.80 -# CHECK-NEXT: Block RThroughput: 164.0 +# CHECK-NEXT: uOps Per Cycle: 0.40 +# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: Block RThroughput: 328.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -169,76 +169,59 @@ vwsll.vi v8, v4, 8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vclz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 2 1.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 0.50 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 0.50 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vbrev.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vbrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrev8.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vclz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vctz.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vcpop.v v4, v8 -# CHECK-NEXT: 1 2 0.50 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vwsll.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vwsll.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vwsll.vi v4, v8, 8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 2 1.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vandn.vx v4, v8, a0 # CHECK-NEXT: 1 2 1.00 vbrev.v v4, v8 # CHECK-NEXT: 1 2 1.00 vbrev8.v v4, v8 # CHECK-NEXT: 1 2 1.00 vrev8.v v4, v8 @@ -253,9 +236,9 @@ vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 2 1.00 vwsll.vv v4, v8, v12 # CHECK-NEXT: 1 2 1.00 vwsll.vx v4, v8, a0 # CHECK-NEXT: 1 2 1.00 vwsll.vi v4, v8, 8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu +# CHECK-NEXT: 1 2 2.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vandn.vx v4, v8, a0 # CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 # CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 # CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 @@ -267,40 +250,57 @@ vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12 # CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0 # CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12 -# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 2 2.00 vwsll.vv v4, v8, v12 +# CHECK-NEXT: 1 2 2.00 vwsll.vx v4, v8, a0 +# CHECK-NEXT: 1 2 2.00 vwsll.vi v4, v8, 8 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 4 4.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vclz.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 4.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 4.00 vwsll.vv v8, v4, v12 +# CHECK-NEXT: 1 2 4.00 vwsll.vx v8, v4, a0 +# CHECK-NEXT: 1 2 4.00 vwsll.vi v8, v4, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 4.00 vandn.vv v8, v16, v24 -# CHECK-NEXT: 1 1 4.00 vandn.vx v8, v16, a0 -# CHECK-NEXT: 1 2 4.00 vbrev.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vbrev8.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vrev8.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vclz.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vctz.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vcpop.v v8, v16 -# CHECK-NEXT: 1 2 4.00 vrol.vv v8, v16, v24 -# CHECK-NEXT: 1 2 4.00 vrol.vx v8, v16, a0 -# CHECK-NEXT: 1 2 4.00 vror.vv v8, v16, v24 -# CHECK-NEXT: 1 2 4.00 vror.vx v8, v16, a0 -# CHECK-NEXT: 1 2 4.00 vror.vi v8, v16, 8 +# CHECK-NEXT: 1 8 8.00 vandn.vv v8, v16, v24 +# CHECK-NEXT: 1 8 8.00 vandn.vx v8, v16, a0 +# CHECK-NEXT: 1 8 8.00 vbrev.v v8, v16 +# CHECK-NEXT: 1 8 8.00 vbrev8.v v8, v16 +# CHECK-NEXT: 1 8 8.00 vrev8.v v8, v16 +# CHECK-NEXT: 1 8 8.00 vclz.v v8, v16 +# CHECK-NEXT: 1 8 8.00 vctz.v v8, v16 +# CHECK-NEXT: 1 2 8.00 vcpop.v v8, v16 +# CHECK-NEXT: 1 8 8.00 vrol.vv v8, v16, v24 +# CHECK-NEXT: 1 8 8.00 vrol.vx v8, v16, a0 +# CHECK-NEXT: 1 8 8.00 vror.vv v8, v16, v24 +# CHECK-NEXT: 1 8 8.00 vror.vx v8, v16, a0 +# CHECK-NEXT: 1 8 8.00 vror.vi v8, v16, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 2.00 vandn.vv v4, v8, v12 -# CHECK-NEXT: 1 1 2.00 vandn.vx v4, v8, a0 -# CHECK-NEXT: 1 2 2.00 vbrev.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vbrev8.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vrev8.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vclz.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vctz.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vcpop.v v4, v8 -# CHECK-NEXT: 1 2 2.00 vrol.vv v4, v8, v12 -# CHECK-NEXT: 1 2 2.00 vrol.vx v4, v8, a0 -# CHECK-NEXT: 1 2 2.00 vror.vv v4, v8, v12 -# CHECK-NEXT: 1 2 2.00 vror.vx v4, v8, a0 -# CHECK-NEXT: 1 2 2.00 vror.vi v4, v8, 8 -# CHECK-NEXT: 1 2 2.00 vwsll.vv v8, v4, v12 -# CHECK-NEXT: 1 2 2.00 vwsll.vx v8, v4, a0 -# CHECK-NEXT: 1 2 2.00 vwsll.vi v8, v4, 8 +# CHECK-NEXT: 1 4 4.00 vandn.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vandn.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vbrev.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vbrev8.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vrev8.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vclz.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vctz.v v4, v8 +# CHECK-NEXT: 1 2 4.00 vcpop.v v4, v8 +# CHECK-NEXT: 1 4 4.00 vrol.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vrol.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vror.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vror.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vror.vi v4, v8, 8 +# CHECK-NEXT: 1 2 4.00 vwsll.vv v8, v4, v12 +# CHECK-NEXT: 1 2 4.00 vwsll.vx v8, v4, a0 +# CHECK-NEXT: 1 2 4.00 vwsll.vi v8, v4, 8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -322,140 +322,140 @@ vwsll.vi v8, v4, 8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 164.00 164.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 328.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vrol.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vwsll.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vwsll.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vrol.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vror.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vwsll.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vwsll.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vandn.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vv v8, v4, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vwsll.vx v8, v4, a0 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vx v8, v4, a0 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vi v8, v4, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vandn.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vandn.vx v8, v16, a0 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vandn.vx v8, v16, a0 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vbrev.v v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vbrev8.v v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vbrev8.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrev8.v v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclz.v v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclz.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vctz.v v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vcpop.v v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vcpop.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrol.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vrol.vx v8, v16, a0 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vrol.vx v8, v16, a0 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vror.vx v8, v16, a0 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vror.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vx v8, v16, a0 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vror.vi v8, v16, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vandn.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vbrev.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vbrev8.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrev8.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrev8.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclz.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vctz.v v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vctz.v v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vcpop.v v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vrol.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vrol.vx v4, v8, a0 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vror.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vx v4, v8, a0 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vror.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vv v8, v4, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vwsll.vx v8, v4, a0 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vx v8, v4, a0 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vwsll.vi v8, v4, 8 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s index faf7523..d8f926d 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s @@ -29,13 +29,13 @@ vclmulh.vx v8, v12, a0 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 20 -# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Total Cycles: 64 # CHECK-NEXT: Total uOps: 20 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.71 -# CHECK-NEXT: IPC: 0.71 -# CHECK-NEXT: Block RThroughput: 30.0 +# CHECK-NEXT: uOps Per Cycle: 0.31 +# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: Block RThroughput: 60.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -47,25 +47,25 @@ vclmulh.vx v8, v12, a0 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 2 0.50 vclmul.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vclmul.vx v4, v8, a0 -# CHECK-NEXT: 1 2 0.50 vclmulh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vclmulh.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu # CHECK-NEXT: 1 2 1.00 vclmul.vv v4, v8, v12 # CHECK-NEXT: 1 2 1.00 vclmul.vx v4, v8, a0 # CHECK-NEXT: 1 2 1.00 vclmulh.vv v4, v8, v12 # CHECK-NEXT: 1 2 1.00 vclmulh.vx v4, v8, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu # CHECK-NEXT: 1 2 2.00 vclmul.vv v4, v8, v12 # CHECK-NEXT: 1 2 2.00 vclmul.vx v4, v8, a0 # CHECK-NEXT: 1 2 2.00 vclmulh.vv v4, v8, v12 # CHECK-NEXT: 1 2 2.00 vclmulh.vx v4, v8, a0 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 4 4.00 vclmul.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vclmul.vx v4, v8, a0 +# CHECK-NEXT: 1 4 4.00 vclmulh.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vclmulh.vx v4, v8, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 2 4.00 vclmul.vv v8, v12, v24 -# CHECK-NEXT: 1 2 4.00 vclmul.vx v8, v12, a0 -# CHECK-NEXT: 1 2 4.00 vclmulh.vv v8, v12, v24 -# CHECK-NEXT: 1 2 4.00 vclmulh.vx v8, v12, a0 +# CHECK-NEXT: 1 8 8.00 vclmul.vv v8, v12, v24 +# CHECK-NEXT: 1 8 8.00 vclmul.vx v8, v12, a0 +# CHECK-NEXT: 1 8 8.00 vclmulh.vv v8, v12, v24 +# CHECK-NEXT: 1 8 8.00 vclmulh.vx v8, v12, a0 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -87,27 +87,27 @@ vclmulh.vx v8, v12, a0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 4.00 - - - - - - 30.00 30.00 - - - +# CHECK-NEXT: - - - - 4.00 - - - - - - 60.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vclmul.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmul.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmul.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vclmulh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmulh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vclmulh.vx v4, v8, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vclmul.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmul.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmul.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vclmulh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmulh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vclmulh.vx v4, v8, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vclmul.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmul.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmul.vx v4, v8, a0 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vclmulh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmulh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vclmulh.vx v4, v8, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclmul.vv v8, v12, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmul.vv v8, v12, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmul.vx v8, v12, a0 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vclmulh.vv v8, v12, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmulh.vv v8, v12, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vclmulh.vx v8, v12, a0 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s index d974a07..302b5fb 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s @@ -36,13 +36,13 @@ vgmul.vv v4, v8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 24 -# CHECK-NEXT: Total Cycles: 45 +# CHECK-NEXT: Total Cycles: 78 # CHECK-NEXT: Total uOps: 24 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.53 -# CHECK-NEXT: IPC: 0.53 -# CHECK-NEXT: Block RThroughput: 36.0 +# CHECK-NEXT: uOps Per Cycle: 0.31 +# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: Block RThroughput: 72.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -54,29 +54,29 @@ vgmul.vv v4, v8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 8 8.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 8 8.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 2 4.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 4.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 8 8.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 8 8.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 2 1.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 2 0.50 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 0.50 vgmul.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 2 1.00 vghsh.vv v4, v8, v12 # CHECK-NEXT: 1 2 1.00 vgmul.vv v4, v8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12 # CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 4 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vgmul.vv v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 2 4.00 vghsh.vv v8, v16, v24 -# CHECK-NEXT: 1 2 4.00 vgmul.vv v8, v16 +# CHECK-NEXT: 1 8 8.00 vghsh.vv v8, v16, v24 +# CHECK-NEXT: 1 8 8.00 vgmul.vv v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 2 2.00 vghsh.vv v4, v8, v12 -# CHECK-NEXT: 1 2 2.00 vgmul.vv v4, v8 +# CHECK-NEXT: 1 4 4.00 vghsh.vv v4, v8, v12 +# CHECK-NEXT: 1 4 4.00 vgmul.vv v4, v8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -98,31 +98,31 @@ vgmul.vv v4, v8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 36.00 36.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 72.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vghsh.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vghsh.vv v8, v16, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vgmul.vv v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vghsh.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vghsh.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vgmul.vv v4, v8 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s index a5c226e..13578ec 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s @@ -60,13 +60,13 @@ vaesz.vs v8, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 50 -# CHECK-NEXT: Total Cycles: 73 +# CHECK-NEXT: Total Cycles: 139 # CHECK-NEXT: Total uOps: 50 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.68 -# CHECK-NEXT: IPC: 0.68 -# CHECK-NEXT: Block RThroughput: 72.0 +# CHECK-NEXT: uOps Per Cycle: 0.36 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 137.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -78,26 +78,16 @@ vaesz.vs v8, v16 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 2 0.50 vaesef.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesef.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesem.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesem.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesdm.vv v4, v8 -# CHECK-NEXT: 1 2 0.50 vaesdm.vs v4, v8 -# CHECK-NEXT: 1 2 0.50 vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: 1 2 0.50 vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: 1 1 0.50 vaesz.vs v4, v8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 2 1.00 vaesef.vv v4, v8 # CHECK-NEXT: 1 2 1.00 vaesef.vs v4, v8 # CHECK-NEXT: 1 2 1.00 vaesem.vv v4, v8 @@ -106,8 +96,8 @@ vaesz.vs v8, v16 # CHECK-NEXT: 1 2 1.00 vaesdm.vs v4, v8 # CHECK-NEXT: 1 2 1.00 vaeskf1.vi v4, v8, 8 # CHECK-NEXT: 1 2 1.00 vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: 1 1 1.00 vaesz.vs v4, v8 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 2 2.00 vaesef.vv v4, v8 # CHECK-NEXT: 1 2 2.00 vaesef.vs v4, v8 # CHECK-NEXT: 1 2 2.00 vaesem.vv v4, v8 @@ -116,17 +106,27 @@ vaesz.vs v8, v16 # CHECK-NEXT: 1 2 2.00 vaesdm.vs v4, v8 # CHECK-NEXT: 1 2 2.00 vaeskf1.vi v4, v8, 8 # CHECK-NEXT: 1 2 2.00 vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: 1 1 2.00 vaesz.vs v4, v8 +# CHECK-NEXT: 1 2 1.00 vaesz.vs v4, v8 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 2 4.00 vaesef.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vaesef.vs v4, v8 +# CHECK-NEXT: 1 2 4.00 vaesem.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vaesem.vs v4, v8 +# CHECK-NEXT: 1 2 4.00 vaesdm.vv v4, v8 +# CHECK-NEXT: 1 2 4.00 vaesdm.vs v4, v8 +# CHECK-NEXT: 1 2 4.00 vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: 1 2 4.00 vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: 1 2 2.00 vaesz.vs v4, v8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 2 4.00 vaesef.vv v8, v16 -# CHECK-NEXT: 1 2 4.00 vaesef.vs v8, v16 -# CHECK-NEXT: 1 2 4.00 vaesem.vv v8, v16 -# CHECK-NEXT: 1 2 4.00 vaesem.vs v8, v16 -# CHECK-NEXT: 1 2 4.00 vaesdm.vv v8, v16 -# CHECK-NEXT: 1 2 4.00 vaesdm.vs v8, v16 -# CHECK-NEXT: 1 2 4.00 vaeskf1.vi v8, v16, 8 -# CHECK-NEXT: 1 2 4.00 vaeskf2.vi v8, v16, 8 -# CHECK-NEXT: 1 1 4.00 vaesz.vs v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesef.vv v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesef.vs v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesem.vv v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesem.vs v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesdm.vv v8, v16 +# CHECK-NEXT: 1 2 8.00 vaesdm.vs v8, v16 +# CHECK-NEXT: 1 2 8.00 vaeskf1.vi v8, v16, 8 +# CHECK-NEXT: 1 2 8.00 vaeskf2.vi v8, v16, 8 +# CHECK-NEXT: 1 2 4.00 vaesz.vs v8, v16 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -148,40 +148,40 @@ vaesz.vs v8, v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 5.00 - - - - - - 71.00 73.00 - - - +# CHECK-NEXT: - - - - 5.00 - - - - - - 137.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesef.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesef.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesem.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesem.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaesdm.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaeskf2.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesef.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesef.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesef.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesem.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesem.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vv v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesem.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesdm.vv v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesdm.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf1.vi v4, v8, 8 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesef.vs v4, v8 @@ -189,16 +189,16 @@ vaesz.vs v8, v16 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesem.vs v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vv v4, v8 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesdm.vs v4, v8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf1.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaeskf2.vi v4, v8, 8 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vaesz.vs v4, v8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaeskf1.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaeskf2.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vaesz.vs v4, v8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesef.vv v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesef.vv v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesef.vs v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vv v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesem.vs v8, v16 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vv v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesdm.vs v8, v16 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf1.vi v8, v16, 8 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaeskf2.vi v8, v16, 8 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vaesz.vs v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaesdm.vs v8, v16 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaeskf1.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vaeskf2.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vaesz.vs v8, v16 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s index f1a2a18..20ac87a 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s @@ -45,13 +45,13 @@ vsha2cl.vv v8, v16, v24 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 32 -# CHECK-NEXT: Total Cycles: 68 +# CHECK-NEXT: Total Cycles: 108 # CHECK-NEXT: Total uOps: 32 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.47 -# CHECK-NEXT: IPC: 0.47 -# CHECK-NEXT: Block RThroughput: 45.0 +# CHECK-NEXT: uOps Per Cycle: 0.30 +# CHECK-NEXT: IPC: 0.30 +# CHECK-NEXT: Block RThroughput: 97.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -63,37 +63,37 @@ vsha2cl.vv v8, v16, v24 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12 # CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu +# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsha2cl.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 3 0.50 vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: 1 3 0.50 vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: 1 3 0.50 vsha2cl.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu # CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsha2cl.vv v4, v8, v12 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 3 2.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, tu, mu +# CHECK-NEXT: 1 3 3.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 3 2.00 vsha2ch.vv v4, v8, v12 # CHECK-NEXT: 1 3 2.00 vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, tu, mu +# CHECK-NEXT: 1 3 6.00 vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 3 4.00 vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: 1 3 4.00 vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: 1 3 4.00 vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: 1 3 8.00 vsha2cl.vv v8, v16, v24 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -115,39 +115,39 @@ vsha2cl.vv v8, v16, v24 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 44.00 46.00 - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ms.vv v8, v16, v24 # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2ch.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - vsha2cl.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ms.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - - 4.00 - - - vsha2ch.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 6.00 - - - - vsha2ms.vv v4, v8, v12 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2ch.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsha2cl.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ms.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2ch.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - - 8.00 - - - vsha2cl.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2ch.vv v8, v16, v24 +# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsha2cl.vv v8, v16, v24 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s index 574bbb3..7d11f46 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s @@ -25,13 +25,13 @@ vsm3c.vi v8, v16, 8 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 15 -# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Total Cycles: 28 # CHECK-NEXT: Total uOps: 15 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.48 -# CHECK-NEXT: IPC: 0.48 -# CHECK-NEXT: Block RThroughput: 32.0 +# CHECK-NEXT: uOps Per Cycle: 0.54 +# CHECK-NEXT: IPC: 0.54 +# CHECK-NEXT: Block RThroughput: 25.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -43,20 +43,20 @@ vsm3c.vi v8, v16, 8 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 6 1.00 vsm3me.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 3 1.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 6 1.00 vsm3me.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 3 2.00 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 3 2.00 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 6 2.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 1.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 3 4.00 vsm3me.vv v4, v8, v12 -# CHECK-NEXT: 1 3 4.00 vsm3c.vi v4, v8, 8 +# CHECK-NEXT: 1 6 4.00 vsm3me.vv v4, v8, v12 +# CHECK-NEXT: 1 3 2.00 vsm3c.vi v4, v8, 8 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 3 8.00 vsm3me.vv v8, v16, v24 -# CHECK-NEXT: 1 3 8.00 vsm3c.vi v8, v16, 8 +# CHECK-NEXT: 1 6 8.00 vsm3me.vv v8, v16, v24 +# CHECK-NEXT: 1 3 4.00 vsm3c.vi v8, v16, 8 # CHECK: Resources: # CHECK-NEXT: [0] - SiFiveP600Div @@ -78,7 +78,7 @@ vsm3c.vi v8, v16, 8 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 5.00 - - - - - - 32.00 - - - - +# CHECK-NEXT: - - - - 5.00 - - - - - - 25.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: @@ -90,10 +90,10 @@ vsm3c.vi v8, v16, 8 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m2, tu, mu # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3me.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3c.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m4, tu, mu # CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3me.vv v4, v8, v12 -# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3c.vi v4, v8, 8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - vsm3c.vi v4, v8, 8 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3me.vv v8, v16, v24 -# CHECK-NEXT: - - - - - - - - - - - 8.00 - - - - vsm3c.vi v8, v16, 8 +# CHECK-NEXT: - - - - - - - - - - - 4.00 - - - - vsm3c.vi v8, v16, 8 |