1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=riscv64 -mcpu=mips-p8700 -timeline -iterations=1 < %s | FileCheck %s
# A few instructions to test the pipeline:
# - Integer division (IDiv) exercises the p8700GpDiv resource.
# - Integer multiplication (IMul) uses p8700GpMul.
# - Floating-point multiplication uses the FPUL pipeline.
# - Load/Store instructions use the LSU pipeline.
# - Simple ALU instructions test the p8700WriteEitherALU and p8700IssueAL2 resources.
# - A jump instruction to test the CTI pipeline.
# Integer division: a0 = a1 / a2
# Exercises p8700GpDiv resource.
div a0, a1, a2
# Integer multiplication: a4 = a1 * a2
# Exercises p8700GpMul resource.
mul a4, a1, a2
# Floating-point multiply: f1 = f2 * f3 (single precision)
# Exercises p8700FpuLong + p8700FpuApu resources.
fmul.s f1, f2, f3
# Load/Store: load word from a0 into a3, then store a3 into a1
# Exercises p8700IssueLSU resource.
lw a3, 0(a0)
sw a3, 0(a1)
# Simple ALU operations (adding two registers, rotating bits)
# Exercises p8700WriteEitherALU.
add a5, a1, a2
ror a6, a5, a2
# A jump instruction: a simple forward jump
# Exercises p8700IssueCTI resource.
jal x0, .Lend
add a7, a4, a0 # Instruction after jump (won't execute)
.Lend:
nop
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 17
# CHECK-NEXT: Total uOps: 10
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.59
# CHECK-NEXT: IPC: 0.59
# CHECK-NEXT: Block RThroughput: 7.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 7 7.00 div a0, a1, a2
# CHECK-NEXT: 1 4 1.00 mul a4, a1, a2
# CHECK-NEXT: 1 5 1.00 fmul.s ft1, ft2, ft3
# CHECK-NEXT: 1 4 1.00 * lw a3, 0(a0)
# CHECK-NEXT: 1 3 1.00 * sw a3, 0(a1)
# CHECK-NEXT: 1 1 0.50 add a5, a1, a2
# CHECK-NEXT: 1 1 0.50 ror a6, a5, a2
# CHECK-NEXT: 1 1 1.00 j .Lend
# CHECK-NEXT: 1 1 0.50 add a7, a4, a0
# CHECK-NEXT: 1 0 0.50 nop
# CHECK: Resources:
# CHECK-NEXT: [0.0] - p8700AGQ
# CHECK-NEXT: [0.1] - p8700AGQ
# CHECK-NEXT: [0.2] - p8700AGQ
# CHECK-NEXT: [1] - p8700ALQ
# CHECK-NEXT: [2.0] - p8700FPQ
# CHECK-NEXT: [2.1] - p8700FPQ
# CHECK-NEXT: [2.2] - p8700FPQ
# CHECK-NEXT: [3] - p8700FpuApu
# CHECK-NEXT: [4] - p8700FpuLong
# CHECK-NEXT: [5] - p8700GpDiv
# CHECK-NEXT: [6] - p8700GpMul
# CHECK-NEXT: [7] - p8700IssueAL2
# CHECK-NEXT: [8] - p8700IssueCTI
# CHECK-NEXT: [9] - p8700IssueFPUL
# CHECK-NEXT: [10] - p8700IssueFPUS
# CHECK-NEXT: [11] - p8700IssueLSU
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [0.2] [1] [2.0] [2.1] [2.2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 1.00 1.00 1.00 2.00 - - - 1.00 1.00 7.00 1.00 2.00 1.00 - - 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [0.2] [1] [2.0] [2.1] [2.2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - - 7.00 - - - - - - div a0, a1, a2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - mul a4, a1, a2
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - - - - - fmul.s ft1, ft2, ft3
# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - 1.00 lw a3, 0(a0)
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - 1.00 sw a3, 0(a1)
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - add a5, a1, a2
# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - ror a6, a5, a2
# CHECK-NEXT: - - 1.00 - - - - - - - - - 1.00 - - - j .Lend
# CHECK-NEXT: - - - 1.00 - - - - - - - - - - - - add a7, a4, a0
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - nop
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeeER. .. div a0, a1, a2
# CHECK-NEXT: [0,1] DeeeeE---R. .. mul a4, a1, a2
# CHECK-NEXT: [0,2] DeeeeeE--R. .. fmul.s ft1, ft2, ft3
# CHECK-NEXT: [0,3] D=======eeeeER .. lw a3, 0(a0)
# CHECK-NEXT: [0,4] .D==========eeeER sw a3, 0(a1)
# CHECK-NEXT: [0,5] .DeE------------R add a5, a1, a2
# CHECK-NEXT: [0,6] .D=eE-----------R ror a6, a5, a2
# CHECK-NEXT: [0,7] .DeE------------R j .Lend
# CHECK-NEXT: [0,8] . D=====eE------R add a7, a4, a0
# CHECK-NEXT: [0,9] . DE------------R nop
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 div a0, a1, a2
# CHECK-NEXT: 1. 1 1.0 1.0 3.0 mul a4, a1, a2
# CHECK-NEXT: 2. 1 1.0 1.0 2.0 fmul.s ft1, ft2, ft3
# CHECK-NEXT: 3. 1 8.0 0.0 0.0 lw a3, 0(a0)
# CHECK-NEXT: 4. 1 11.0 0.0 0.0 sw a3, 0(a1)
# CHECK-NEXT: 5. 1 1.0 1.0 12.0 add a5, a1, a2
# CHECK-NEXT: 6. 1 2.0 0.0 11.0 ror a6, a5, a2
# CHECK-NEXT: 7. 1 1.0 1.0 12.0 j .Lend
# CHECK-NEXT: 8. 1 6.0 0.0 6.0 add a7, a4, a0
# CHECK-NEXT: 9. 1 1.0 1.0 12.0 nop
# CHECK-NEXT: 1 3.3 0.6 5.8 <total>
|