aboutsummaryrefslogtreecommitdiff
path: root/gprofng/src/hwc_amd_zen3.h
blob: 6a2ee021e8b8a32d918007f7ce32906f563908b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
/* Copyright (C) 2024 Free Software Foundation, Inc.
   Contributed by Oracle.

   This file is part of GNU Binutils.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, 51 Franklin Street - Fifth Floor, Boston,
   MA 02110-1301, USA.  */

#ifndef _HWC_AMD_ZEN3_H
#define _HWC_AMD_ZEN3_H

#define I(nm, event, umask, mtr) INIT_HWC(nm, mtr, (event) | ((umask) << 8), PERF_TYPE_RAW)

static Hwcentry	amd_zen3_list[] = {
  HWC_GENERIC
/* branch: */
  { I("bp_de_redirect", 0x91, 0, STXT("Decode Redirects")) },
  { I("bp_dyn_ind_pred", 0x8e, 0, STXT("Dynamic Indirect Predictions")) },
  { I("bp_l1_btb_correct", 0x8a, 0,
      STXT("L1 Branch Prediction Overrides Existing Prediction (speculative)")) },
  { I("bp_l1_tlb_fetch_hit", 0x94, 0xff,
      STXT("The number of instruction fetches that hit in the L1 ITLB")) },
  { I("bp_l1_tlb_fetch_hit.if1g", 0x94, 0x4,
      STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
      "Instruction TLB hit (1G page size)")) },
  { I("bp_l1_tlb_fetch_hit.if2m", 0x94, 0x2,
      STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
      "Instruction TLB hit (2M page size)")) },
  { I("bp_l1_tlb_fetch_hit.if4k", 0x94, 0x1,
      STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
      "Instrcution TLB hit (4K or 16K page size)")) },
  { I("bp_l2_btb_correct", 0x8b, 0,
      STXT("L2 Branch Prediction Overrides Existing Prediction (speculative)")) },
  { I("bp_tlb_rel", 0x99, 0, STXT("The number of ITLB reload requests")) },
/* cache: */
  { I("bp_l1_tlb_miss_l2_tlb_hit", 0x84, 0,
      STXT("L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss"
      "in the L1 ITLB but hit in the L2 ITLB")) },
  { I("bp_l1_tlb_miss_l2_tlb_miss", 0x85, 0xff,
      STXT("The number of instruction fetches that miss in both the L1 and L2 TLBs")) },
  { I("bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", 0x85, 0x8,
      STXT("The number of valid fills into the ITLB originating from the LS"
      "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
      "L2-ITLB misses. Walk for >4K Coalesced page")) },
  { I("bp_l1_tlb_miss_l2_tlb_miss.if1g", 0x85, 0x4,
      STXT("The number of valid fills into the ITLB originating from the LS"
      "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
      "L2-ITLB misses. Walk for 1G page")) },
  { I("bp_l1_tlb_miss_l2_tlb_miss.if2m", 0x85, 0x2,
      STXT("The number of valid fills into the ITLB originating from the LS"
      "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
      "L2-ITLB misses. Walk for 2M page")) },
  { I("bp_l1_tlb_miss_l2_tlb_miss.if4k", 0x85, 0x1,
      STXT("The number of valid fills into the ITLB originating from the LS"
      "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
      "L2-ITLB misses. Walk to 4K page")) },
  { I("bp_snp_re_sync", 0x86, 0,
      STXT("The number of pipeline restarts caused by invalidating probes that hit"
      "on the instruction stream currently being executed. This would happen"
      "if the active instruction stream was being modified by another"
      "processor in an MP system - typically a highly unlikely event")) },
  { I("ic_cache_fill_l2", 0x82, 0,
      STXT("Instruction Cache Refills from L2. The number of 64 byte instruction"
      "cache line was fulfilled from the L2 cache")) },
  { I("ic_cache_fill_sys", 0x83, 0,
      STXT("Instruction Cache Refills from System. The number of 64 byte"
      "instruction cache line fulfilled from system memory or another cache")) },
  { I("ic_cache_inval.fill_invalidated", 0x8c, 0x1,
      STXT("IC line invalidated due to overwriting fill response. The number of"
      "instruction cache lines invalidated. A non-SMC event is CMC (cross"
      "modifying code), either from the other thread of the core or another"
      "core")) },
  { I("ic_cache_inval.l2_invalidating_probe", 0x8c, 0x2,
      STXT("IC line invalidated due to L2 invalidating probe (external or LS). The"
      "number of instruction cache lines invalidated. A non-SMC event is CMC"
      "(cross modifying code), either from the other thread of the core or"
      "another core")) },
  { I("ic_fetch_stall.ic_stall_any", 0x87, 0x4,
      STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
      "for any reason (nothing valid in pipe ICM1)")) },
  { I("ic_fetch_stall.ic_stall_back_pressure", 0x87, 0x1,
      STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
      "(including IC to OC fetches) due to back-pressure")) },
  { I("ic_fetch_stall.ic_stall_dq_empty", 0x87, 0x2,
      STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
      "(including IC to OC fetches) due to DQ empty")) },
  { I("ic_fw32", 0x80, 0,
      STXT("The number of 32B fetch windows transferred from IC pipe to DE"
      "instruction decoder (includes non-cacheable and cacheable fill"
      "responses)")) },
  { I("ic_fw32_miss", 0x81, 0,
      STXT("The number of 32B fetch windows tried to read the L1 IC and missed in"
      "the full tag")) },
  { I("ic_oc_mode_switch.ic_oc_mode_switch", 0x28a, 0x1,
      STXT("OC Mode Switch. IC to OC mode switch")) },
  { I("ic_oc_mode_switch.oc_ic_mode_switch", 0x28a, 0x2,
      STXT("OC Mode Switch. OC to IC mode switch")) },
  { I("ic_tag_hit_miss.all_instruction_cache_accesses", 0x18e, 0x1f,
      STXT("All Instruction Cache Accesses. Counts various IC tag related hit and"
      "miss events")) },
  { I("ic_tag_hit_miss.instruction_cache_hit", 0x18e, 0x7,
      STXT("Instruction Cache Hit. Counts various IC tag related hit and miss"
      "events")) },
  { I("ic_tag_hit_miss.instruction_cache_miss", 0x18e, 0x18,
      STXT("Instruction Cache Miss. Counts various IC tag related hit and miss"
      "events")) },
  { I("l2_cache_req_stat.ic_access_in_l2", 0x64, 0x7,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache requests in L2")) },
  { I("l2_cache_req_stat.ic_dc_hit_in_l2", 0x64, 0xf6,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache request hit in L2 and Data cache request"
      "hit in L2 (all types)")) },
  { I("l2_cache_req_stat.ic_dc_miss_in_l2", 0x64, 0x9,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache request miss in L2 and Data cache request"
      "miss in L2 (all types)")) },
  { I("l2_cache_req_stat.ic_fill_hit_s", 0x64, 0x2,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache hit non-modifiable line in L2")) },
  { I("l2_cache_req_stat.ic_fill_hit_x", 0x64, 0x4,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache hit modifiable line in L2")) },
  { I("l2_cache_req_stat.ic_fill_miss", 0x64, 0x1,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Instruction cache request miss in L2. Use"
      "l2_cache_misses_from_ic_miss instead")) },
  { I("l2_cache_req_stat.ls_rd_blk_c", 0x64, 0x8,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Data cache request miss in L2 (all types). Use"
      "l2_cache_misses_from_dc_misses instead")) },
  { I("l2_cache_req_stat.ls_rd_blk_cs", 0x64, 0x80,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Data cache shared read hit in L2")) },
  { I("l2_cache_req_stat.ls_rd_blk_l_hit_s", 0x64, 0x20,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Data cache read hit non-modifiable line in L2")) },
  { I("l2_cache_req_stat.ls_rd_blk_l_hit_x", 0x64, 0x40,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Data cache read hit in L2. Modifiable")) },
  { I("l2_cache_req_stat.ls_rd_blk_x", 0x64, 0x10,
      STXT("Core to L2 cacheable request access status (not including L2"
      "Prefetch). Data cache store or state change hit in L2")) },
  { I("l2_fill_pending.l2_fill_busy", 0x6d, 0x1,
      STXT("Cycles with fill pending from L2. Total cycles spent with one or more"
      "fill requests in flight from L2")) },
  { I("l2_latency.l2_cycles_waiting_on_fills", 0x62, 0x1,
      STXT("Total cycles spent waiting for L2 fills to complete from L3 or memory,"
      "divided by four. Event counts are for both threads. To calculate"
      "average latency, the number of fills from both threads must be used")) },
  { I("l2_pf_hit_l2", 0x70, 0xff,
      STXT("L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead")) },
  { I("l2_pf_miss_l2_hit_l3", 0x71, 0xff,
      STXT("L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2"
      "pipeline which miss the L2 cache and hit the L3")) },
  { I("l2_pf_miss_l2_l3", 0x72, 0xff,
      STXT("L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the"
      "L2 pipeline which miss the L2 and the L3 caches")) },
  { I("l2_request_g1.all_no_prefetch", 0x60, 0xf9, STXT("(null)")) },
  { I("l2_request_g1.cacheable_ic_read", 0x60, 0x10,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads")) },
  { I("l2_request_g1.change_to_x", 0x60, 0x8,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache state change"
      "requests. Request change to writable, check L2 for current state")) },
  { I("l2_request_g1.group2", 0x60, 0x1,
      STXT("Miscellaneous events covered in more detail by l2_request_g2 (PMCx061)")) },
  { I("l2_request_g1.l2_hw_pf", 0x60, 0x2,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All"
      "prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2"
      "hit/miss broken out in a separate perfmon event")) },
  { I("l2_request_g1.ls_rd_blk_c_s", 0x60, 0x20,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads")) },
  { I("l2_request_g1.prefetch_l2_cmd", 0x60, 0x4,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd")) },
  { I("l2_request_g1.rd_blk_l", 0x60, 0x80,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache reads"
      "(including hardware and software prefetch)")) },
  { I("l2_request_g1.rd_blk_x", 0x60, 0x40,
      STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache stores")) },
  { I("l2_request_g2.bus_locks_originator", 0x61, 0x2,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus locks")) },
  { I("l2_request_g2.bus_locks_responses", 0x61, 0x1,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response")) },
  { I("l2_request_g2.group1", 0x61, 0x80,
      STXT("Miscellaneous events covered in more detail by l2_request_g1 (PMCx060)")) },
  { I("l2_request_g2.ic_rd_sized", 0x61, 0x10,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read"
      "sized")) },
  { I("l2_request_g2.ic_rd_sized_nc", 0x61, 0x8,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read"
      "sized non-cacheable")) },
  { I("l2_request_g2.ls_rd_sized", 0x61, 0x40,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized")) },
  { I("l2_request_g2.ls_rd_sized_nc", 0x61, 0x20,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized"
      "non-cacheable")) },
  { I("l2_request_g2.smc_inval", 0x61, 0x4,
      STXT("All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code"
      "invalidates")) },
  { I("l2_wcb_req.cl_zero", 0x63, 0x1,
      STXT("LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2"
      "WCB (Write Combining Buffer) cache line zeroing requests")) },
  { I("l2_wcb_req.wcb_close", 0x63, 0x20,
      STXT("LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write"
      "Combining Buffer) close requests")) },
  { I("l2_wcb_req.wcb_write", 0x63, 0x40,
      STXT("LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write"
      "Combining Buffer) write requests")) },
  { I("l2_wcb_req.zero_byte_store", 0x63, 0x4,
      STXT("LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB"
      "(Write Combining Buffer) zero byte store requests")) },
  { I("op_cache_hit_miss.all_op_cache_accesses", 0x28f, 0x7,
      STXT("All Op Cache accesses. Counts Op Cache micro-tag hit/miss events")) },
  { I("op_cache_hit_miss.op_cache_hit", 0x28f, 0x3,
      STXT("Op Cache Hit. Counts Op Cache micro-tag hit/miss events")) },
  { I("op_cache_hit_miss.op_cache_miss", 0x28f, 0x4,
      STXT("Op Cache Miss. Counts Op Cache micro-tag hit/miss events")) },
/* core: */
  { I("ex_div_busy", 0xd3, 0, STXT("Div Cycles Busy count")) },
  { I("ex_div_count", 0xd4, 0, STXT("Div Op Count")) },
  { I("ex_ret_brn", 0xc2, 0, STXT("Retired Branch Instructions")) },
  { I("ex_ret_brn_far", 0xc6, 0, STXT("Retired Far Control Transfers")) },
  { I("ex_ret_brn_ind_misp", 0xca, 0,
      STXT("Retired Indirect Branch Instructions Mispredicted")) },
  { I("ex_ret_brn_misp", 0xc3, 0,
      STXT("Retired Branch Instructions Mispredicted")) },
  { I("ex_ret_brn_resync", 0xc7, 0, STXT("Retired Branch Resyncs")) },
  { I("ex_ret_brn_tkn", 0xc4, 0, STXT("Retired Taken Branch Instructions")) },
  { I("ex_ret_brn_tkn_misp", 0xc5, 0,
      STXT("Retired Taken Branch Instructions Mispredicted")) },
  { I("ex_ret_cond", 0xd1, 0,
      STXT("Retired Conditional Branch Instructions")) },
  { I("ex_ret_fused_instr", 0x1d0, 0,
      STXT("Counts retired Fused Instructions")) },
  { I("ex_ret_ind_brch_instr", 0xcc, 0,
      STXT("Retired Indirect Branch Instructions. The number of indirect branches"
      "retired")) },
  { I("ex_ret_instr", 0xc0, 0, STXT("Retired Instructions")) },
  { I("ex_ret_mmx_fp_instr.mmx_instr", 0xcb, 0x2, STXT("MMX instructions")) },
  { I("ex_ret_mmx_fp_instr.sse_instr", 0xcb, 0x4,
      STXT("SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX)")) },
  { I("ex_ret_mmx_fp_instr.x87_instr", 0xcb, 0x1, STXT("x87 instructions")) },
  { I("ex_ret_msprd_brnch_instr_dir_msmtch", 0x1c7, 0,
      STXT("Retired Mispredicted Branch Instructions due to Direction Mismatch")) },
  { I("ex_ret_near_ret", 0xc8, 0, STXT("Retired Near Returns")) },
  { I("ex_ret_near_ret_mispred", 0xc9, 0,
      STXT("Retired Near Returns Mispredicted")) },
  { I("ex_ret_ops", 0xc1, 0,
      STXT("Retired Ops. Use macro_ops_retired instead")) },
  { I("ex_tagged_ibs_ops.ibs_count_rollover", 0x1cf, 0x4,
      STXT("Tagged IBS Ops. Number of times an op could not be tagged by IBS"
      "because of a previous tagged op that has not retired")) },
  { I("ex_tagged_ibs_ops.ibs_tagged_ops", 0x1cf, 0x1,
      STXT("Tagged IBS Ops. Number of Ops tagged by IBS")) },
  { I("ex_tagged_ibs_ops.ibs_tagged_ops_ret", 0x1cf, 0x2,
      STXT("Tagged IBS Ops. Number of Ops tagged by IBS that retired")) },
/* floating point: */
  { I("fp_disp_faults.x87_fill_fault", 0xe, 0x1,
      STXT("Floating Point Dispatch Faults. x87 fill fault")) },
  { I("fp_disp_faults.xmm_fill_fault", 0xe, 0x2,
      STXT("Floating Point Dispatch Faults. XMM fill fault")) },
  { I("fp_disp_faults.ymm_fill_fault", 0xe, 0x4,
      STXT("Floating Point Dispatch Faults. YMM fill fault")) },
  { I("fp_disp_faults.ymm_spill_fault", 0xe, 0x8,
      STXT("Floating Point Dispatch Faults. YMM spill fault")) },
  { I("fp_num_mov_elim_scal_op.opt_potential", 0x4, 0x4,
      STXT("Number of Ops that are candidates for optimization (have Z-bit either"
      "set or pass). This is a dispatch based speculative event, and is"
      "useful for measuring the effectiveness of the Move elimination and"
      "Scalar code optimization schemes")) },
  { I("fp_num_mov_elim_scal_op.optimized", 0x4, 0x8,
      STXT("Number of Scalar Ops optimized. This is a dispatch based speculative"
      "event, and is useful for measuring the effectiveness of the Move"
      "elimination and Scalar code optimization schemes")) },
  { I("fp_num_mov_elim_scal_op.sse_mov_ops", 0x4, 0x1,
      STXT("Number of SSE Move Ops. This is a dispatch based speculative event,"
      "and is useful for measuring the effectiveness of the Move elimination"
      "and Scalar code optimization schemes")) },
  { I("fp_num_mov_elim_scal_op.sse_mov_ops_elim", 0x4, 0x2,
      STXT("Number of SSE Move Ops eliminated. This is a dispatch based"
      "speculative event, and is useful for measuring the effectiveness of"
      "the Move elimination and Scalar code optimization schemes")) },
  { I("fp_ret_sse_avx_ops.add_sub_flops", 0x3, 0x1,
      STXT("Add/subtract FLOPs. This is a retire-based event. The number of"
      "retired SSE/AVX FLOPs. The number of events logged per cycle can vary"
      "from 0 to 64. This event requires the use of the MergeEvent since it"
      "can count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
      "Cycle Events]. It does not provide a useful count without the use of"
      "the MergeEvent")) },
  { I("fp_ret_sse_avx_ops.all", 0x3, 0xff,
      STXT("All FLOPS. This is a retire-based event. The number of retired SSE/AVX"
      "FLOPS. The number of events logged per cycle can vary from 0 to 64."
      "This event can count above 15")) },
  { I("fp_ret_sse_avx_ops.div_flops", 0x3, 0x4,
      STXT("Divide/square root FLOPs. This is a retire-based event. The number of"
      "retired SSE/AVX FLOPs. The number of events logged per cycle can vary"
      "from 0 to 64. This event requires the use of the MergeEvent since it"
      "can count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
      "Cycle Events]. It does not provide a useful count without the use of"
      "the MergeEvent")) },
  { I("fp_ret_sse_avx_ops.mac_flops", 0x3, 0x8,
      STXT("Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS."
      "This is a retire-based event. The number of retired SSE/AVX FLOPs. The"
      "number of events logged per cycle can vary from 0 to 64. This event"
      "requires the use of the MergeEvent since it can count above 15 events"
      "per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does"
      "not provide a useful count without the use of the MergeEvent")) },
  { I("fp_ret_sse_avx_ops.mult_flops", 0x3, 0x2,
      STXT("Multiply FLOPs. This is a retire-based event. The number of retired"
      "SSE/AVX FLOPs. The number of events logged per cycle can vary from 0"
      "to 64. This event requires the use of the MergeEvent since it can"
      "count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
      "Cycle Events]. It does not provide a useful count without the use of"
      "the MergeEvent")) },
  { I("fp_retired_ser_ops.sse_bot_ret", 0x5, 0x8,
      STXT("SSE/AVX bottom-executing ops retired. The number of serializing Ops"
      "retired")) },
  { I("fp_retired_ser_ops.sse_ctrl_ret", 0x5, 0x4,
      STXT("SSE/AVX control word mispredict traps. The number of serializing Ops"
      "retired")) },
  { I("fp_retired_ser_ops.x87_bot_ret", 0x5, 0x2,
      STXT("x87 bottom-executing ops retired. The number of serializing Ops"
      "retired")) },
  { I("fp_retired_ser_ops.x87_ctrl_ret", 0x5, 0x1,
      STXT("x87 control word mispredict traps due to mispredictions in RC or PC,"
      "or changes in mask bits. The number of serializing Ops retired")) },
  { I("fpu_pipe_assignment.total", 0, 0xf, STXT("Total number of fp uOps")) },
  { I("fpu_pipe_assignment.total0", 0, 0x1,
      STXT("Total number of fp uOps on pipe 0")) },
  { I("fpu_pipe_assignment.total1", 0, 0x2,
      STXT("Total number uOps assigned to pipe 1")) },
  { I("fpu_pipe_assignment.total2", 0, 0x4,
      STXT("Total number uOps assigned to pipe 2")) },
  { I("fpu_pipe_assignment.total3", 0, 0x8,
      STXT("Total number uOps assigned to pipe 3")) },
/* memory: */
  { I("ls_alloc_mab_count", 0x5f, 0, STXT("Count of Allocated Mabs")) },
  { I("ls_any_fills_from_sys.ext_cache_local", 0x44, 0x4,
      STXT("Any Data Cache Fills by Data Source. From cache of different CCX in"
      "same node")) },
  { I("ls_any_fills_from_sys.ext_cache_remote", 0x44, 0x10,
      STXT("Any Data Cache Fills by Data Source. From CCX Cache in different Node")) },
  { I("ls_any_fills_from_sys.int_cache", 0x44, 0x2,
      STXT("Any Data Cache Fills by Data Source. From L3 or different L2 in same"
      "CCX")) },
  { I("ls_any_fills_from_sys.lcl_l2", 0x44, 0x1,
      STXT("Any Data Cache Fills by Data Source. From Local L2 to the core")) },
  { I("ls_any_fills_from_sys.mem_io_local", 0x44, 0x8,
      STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in same"
      "node")) },
  { I("ls_any_fills_from_sys.mem_io_remote", 0x44, 0x40,
      STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in"
      "different Node")) },
  { I("ls_bad_status2.stli_other", 0x24, 0x2,
      STXT("Non-forwardable conflict; used to reduce STLI's via software. All"
      "reasons. Store To Load Interlock (STLI) are loads that were unable to"
      "complete because of a possible match with an older store, and the"
      "older store could not do STLF for some reason")) },
  { I("ls_dc_accesses", 0x40, 0,
      STXT("Number of accesses to the dcache for load/store references")) },
  { I("ls_dispatch.ld_dispatch", 0x29, 0x1,
      STXT("Dispatch of a single op that performs a memory load. Counts the number"
      "of operations dispatched to the LS unit. Unit Masks ADDed")) },
  { I("ls_dispatch.ld_st_dispatch", 0x29, 0x4,
      STXT("Load-op-Store Dispatch. Dispatch of a single op that performs a load"
      "from and store to the same memory address. Counts the number of"
      "operations dispatched to the LS unit. Unit Masks ADDed")) },
  { I("ls_dispatch.store_dispatch", 0x29, 0x2,
      STXT("Dispatch of a single op that performs a memory store. Counts the"
      "number of operations dispatched to the LS unit. Unit Masks ADDed")) },
  { I("ls_dmnd_fills_from_sys.ext_cache_local", 0x43, 0x4,
      STXT("Demand Data Cache Fills by Data Source. From cache of different CCX in"
      "same node")) },
  { I("ls_dmnd_fills_from_sys.ext_cache_remote", 0x43, 0x10,
      STXT("Demand Data Cache Fills by Data Source. From CCX Cache in different"
      "Node")) },
  { I("ls_dmnd_fills_from_sys.int_cache", 0x43, 0x2,
      STXT("Demand Data Cache Fills by Data Source. From L3 or different L2 in"
      "same CCX")) },
  { I("ls_dmnd_fills_from_sys.lcl_l2", 0x43, 0x1,
      STXT("Demand Data Cache Fills by Data Source. From Local L2 to the core")) },
  { I("ls_dmnd_fills_from_sys.mem_io_local", 0x43, 0x8,
      STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in"
      "same node")) },
  { I("ls_dmnd_fills_from_sys.mem_io_remote", 0x43, 0x40,
      STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in"
      "different Node")) },
  { I("ls_hw_pf_dc_fills.ext_cache_local", 0x5a, 0x4,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From cache of"
      "different CCX in same node")) },
  { I("ls_hw_pf_dc_fills.ext_cache_remote", 0x5a, 0x10,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in"
      "different Node")) },
  { I("ls_hw_pf_dc_fills.int_cache", 0x5a, 0x2,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From L3 or"
      "different L2 in same CCX")) },
  { I("ls_hw_pf_dc_fills.lcl_l2", 0x5a, 0x1,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to"
      "the core")) },
  { I("ls_hw_pf_dc_fills.mem_io_local", 0x5a, 0x8,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO"
      "connected in same node")) },
  { I("ls_hw_pf_dc_fills.mem_io_remote", 0x5a, 0x40,
      STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO"
      "connected in different Node")) },
  { I("ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 0x52, 0x1,
      STXT("The number of software prefetches that did not fetch data outside of"
      "the processor core. Software PREFETCH instruction saw a DC hit")) },
  { I("ls_inef_sw_pref.mab_mch_cnt", 0x52, 0x2,
      STXT("The number of software prefetches that did not fetch data outside of"
      "the processor core. Software PREFETCH instruction saw a match on an"
      "already-allocated miss request buffer")) },
  { I("ls_int_taken", 0x2c, 0,
      STXT("Counts the number of interrupts taken")) },
  { I("ls_l1_d_tlb_miss.all", 0x45, 0xff,
      STXT("All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 0x45, 0x8,
      STXT("L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 0x45, 0x80,
      STXT("L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 0x45, 0x4,
      STXT("L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 0x45, 0x40,
      STXT("L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 0x45, 0x1,
      STXT("L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 0x45, 0x10,
      STXT("L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 0x45, 0x2,
      STXT("L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB")) },
  { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 0x45, 0x20,
      STXT("L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2"
      "TLB")) },
  { I("ls_locks.bus_lock", 0x25, 0x1,
      STXT("Retired lock instructions. Comparable to legacy bus lock")) },
  { I("ls_locks.non_spec_lock", 0x25, 0x2,
      STXT("Retired lock instructions. Non-speculative lock succeeded")) },
  { I("ls_locks.spec_lock_hi_spec", 0x25, 0x8,
      STXT("Retired lock instructions. High speculative cacheable lock speculation"
      "succeeded")) },
  { I("ls_locks.spec_lock_lo_spec", 0x25, 0x4,
      STXT("Retired lock instructions. Low speculative cacheable lock speculation"
      "succeeded")) },
  { I("ls_mab_alloc.all_allocations", 0x41, 0x7f,
      STXT("All Allocations. Counts when a LS pipe allocates a MAB entry")) },
  { I("ls_mab_alloc.dc_prefetcher", 0x41, 0x8,
      STXT("LS MAB Allocates by Type. DC prefetcher")) },
  { I("ls_mab_alloc.hardware_prefetcher_allocations", 0x41, 0x40,
      STXT("Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB"
      "entry")) },
  { I("ls_mab_alloc.load_store_allocations", 0x41, 0x3f,
      STXT("Load Store Allocations. Counts when a LS pipe allocates a MAB entry")) },
  { I("ls_mab_alloc.loads", 0x41, 0x1,
      STXT("LS MAB Allocates by Type. Loads")) },
  { I("ls_mab_alloc.stores", 0x41, 0x2,
      STXT("LS MAB Allocates by Type. Stores")) },
  { I("ls_misal_loads.ma4k", 0x47, 0x2,
      STXT("The number of 4KB misaligned (i.e., page crossing) loads")) },
  { I("ls_misal_loads.ma64", 0x47, 0x1,
      STXT("The number of 64B misaligned (i.e., cacheline crossing) loads")) },
  { I("ls_not_halted_cyc", 0x76, 0, STXT("Cycles not in Halt")) },
  { I("ls_pref_instr_disp", 0x4b, 0xff,
      STXT("Software Prefetch Instructions Dispatched (Speculative)")) },
  { I("ls_pref_instr_disp.prefetch", 0x4b, 0x1,
      STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchT0,"
      "T1 and T2 instructions. See docAPM3 PREFETCHlevel")) },
  { I("ls_pref_instr_disp.prefetch_nta", 0x4b, 0x4,
      STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA"
      "instruction. See docAPM3 PREFETCHlevel")) },
  { I("ls_pref_instr_disp.prefetch_w", 0x4b, 0x2,
      STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchW"
      "instruction. See docAPM3 PREFETCHW")) },
  { I("ls_rdtsc", 0x2d, 0,
      STXT("Number of reads of the TSC (RDTSC instructions). The count is"
      "speculative")) },
  { I("ls_ret_cl_flush", 0x26, 0,
      STXT("The number of retired CLFLUSH instructions. This is a non-speculative"
      "event")) },
  { I("ls_ret_cpuid", 0x27, 0,
      STXT("The number of CPUID instructions retired")) },
  { I("ls_smi_rx", 0x2b, 0, STXT("Counts the number of SMIs received")) },
  { I("ls_st_commit_cancel2.st_commit_cancel_wcb_full", 0x37, 0x1,
      STXT("A non-cacheable store and the non-cacheable commit buffer is full")) },
  { I("ls_stlf", 0x35, 0, STXT("Number of STLF hits")) },
  { I("ls_sw_pf_dc_fills.ext_cache_local", 0x59, 0x4,
      STXT("Software Prefetch Data Cache Fills by Data Source. From cache of"
      "different CCX in same node")) },
  { I("ls_sw_pf_dc_fills.ext_cache_remote", 0x59, 0x10,
      STXT("Software Prefetch Data Cache Fills by Data Source. From CCX Cache in"
      "different Node")) },
  { I("ls_sw_pf_dc_fills.int_cache", 0x59, 0x2,
      STXT("Software Prefetch Data Cache Fills by Data Source. From L3 or"
      "different L2 in same CCX")) },
  { I("ls_sw_pf_dc_fills.lcl_l2", 0x59, 0x1,
      STXT("Software Prefetch Data Cache Fills by Data Source. From Local L2 to"
      "the core")) },
  { I("ls_sw_pf_dc_fills.mem_io_local", 0x59, 0x8,
      STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO"
      "connected in same node")) },
  { I("ls_sw_pf_dc_fills.mem_io_remote", 0x59, 0x40,
      STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO"
      "connected in different Node")) },
  { I("ls_tablewalker.dc_type0", 0x46, 0x1,
      STXT("Total Page Table Walks DC Type 0")) },
  { I("ls_tablewalker.dc_type1", 0x46, 0x2,
      STXT("Total Page Table Walks DC Type 1")) },
  { I("ls_tablewalker.dside", 0x46, 0x3,
      STXT("Total Page Table Walks on D-side")) },
  { I("ls_tablewalker.ic_type0", 0x46, 0x4,
      STXT("Total Page Table Walks IC Type 0")) },
  { I("ls_tablewalker.ic_type1", 0x46, 0x8,
      STXT("Total Page Table Walks IC Type 1")) },
  { I("ls_tablewalker.iside", 0x46, 0xc,
      STXT("Total Page Table Walks on I-side")) },
  { I("ls_tlb_flush.all_tlb_flushes", 0x78, 0xff,
      STXT("All TLB Flushes. Requires unit mask 0xFF to engage event for counting."
      "Use all_tlbs_flushed instead")) },
/* other: */
  { I("de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", 0xab, 0x4,
      STXT("Any FP dispatch. Types of Oops Dispatched from Decoder")) },
  { I("de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", 0xab, 0x8,
      STXT("Any Integer dispatch. Types of Oops Dispatched from Decoder")) },
  { I("de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", 0xae, 0x80,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. FP Flush"
      "recovery stall")) },
  { I("de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 0xae, 0x20,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. Floating"
      "point register file resource stall. Applies to all FP ops that have a"
      "destination register")) },
  { I("de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 0xae, 0x40,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. FP"
      "scheduler resource stall. Applies to ops that use the FP scheduler")) },
  { I("de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", 0xae, 0x1,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. Integer"
      "Physical Register File resource stall. Integer Physical Register File,"
      "applies to all ops that have an integer destination register")) },
  { I("de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 0xae, 0x8,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. Integer Scheduler miscellaneous resource stall")) },
  { I("de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", 0xae, 0x2,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. Load"
      "Queue resource stall. Applies to all ops with load semantics")) },
  { I("de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", 0xae, 0x4,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. Store"
      "Queue resource stall. Applies to all ops with store semantics")) },
  { I("de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", 0xae, 0x10,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a Token Stall. Also counts cycles when the thread is not selected"
      "to dispatch but would have been stalled due to a Token Stall. Taken"
      "branch buffer resource stall")) },
  { I("de_dis_dispatch_token_stalls2.agsq_token_stall", 0xaf, 0x10,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. AGSQ Tokens unavailable")) },
  { I("de_dis_dispatch_token_stalls2.int_sch0_token_stall", 0xaf, 0x1,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. No tokens for Integer Scheduler Queue 0 available")) },
  { I("de_dis_dispatch_token_stalls2.int_sch1_token_stall", 0xaf, 0x2,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. No tokens for Integer Scheduler Queue 1 available")) },
  { I("de_dis_dispatch_token_stalls2.int_sch2_token_stall", 0xaf, 0x4,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. No tokens for Integer Scheduler Queue 2 available")) },
  { I("de_dis_dispatch_token_stalls2.int_sch3_token_stall", 0xaf, 0x8,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. No tokens for Integer Scheduler Queue 3 available")) },
  { I("de_dis_dispatch_token_stalls2.retire_token_stall", 0xaf, 0x20,
      STXT("Cycles where a dispatch group is valid but does not get dispatched due"
      "to a token stall. Insufficient Retire Queue tokens available")) },
  { I("de_dis_uop_queue_empty_di0", 0xa9, 0,
      STXT("Cycles where the Micro-Op Queue is empty")) },
/* recommended: */
  { I("all_data_cache_accesses", 0x29, 0x7,
      STXT("All L1 Data Cache Accesses")) },
  { I("all_tlbs_flushed", 0x78, 0xff, STXT("All TLBs Flushed")) },
  { I("l1_data_cache_fills_all", 0x44, 0xff,
      STXT("L1 Data Cache Fills: All")) },
  { I("l1_data_cache_fills_from_external_ccx_cache", 0x44, 0x14,
      STXT("L1 Data Cache Fills: From External CCX Cache")) },
  { I("l1_data_cache_fills_from_memory", 0x44, 0x48,
      STXT("L1 Data Cache Fills: From Memory")) },
  { I("l1_data_cache_fills_from_remote_node", 0x44, 0x50,
      STXT("L1 Data Cache Fills: From Remote Node")) },
  { I("l1_data_cache_fills_from_within_same_ccx", 0x44, 0x3,
      STXT("L1 Data Cache Fills: From within same CCX")) },
  { I("l1_dtlb_misses", 0x45, 0xff, STXT("L1 DTLB Misses")) },
  { I("l2_cache_accesses_from_dc_misses", 0x60, 0xe8,
      STXT("L2 Cache Accesses from L1 Data Cache Misses (including prefetch)")) },
  { I("l2_cache_accesses_from_ic_misses", 0x60, 0x10,
      STXT("L2 Cache Accesses from L1 Instruction Cache Misses (including"
      "prefetch)")) },
  { I("l2_cache_hits_from_dc_misses", 0x64, 0xf0,
      STXT("L2 Cache Hits from L1 Data Cache Misses")) },
  { I("l2_cache_hits_from_ic_misses", 0x64, 0x6,
      STXT("L2 Cache Hits from L1 Instruction Cache Misses")) },
  { I("l2_cache_hits_from_l2_hwpf", 0x70, 0xff,
      STXT("L2 Cache Hits from L2 Cache HWPF")) },
  { I("l2_cache_misses_from_dc_misses", 0x64, 0x8,
      STXT("L2 Cache Misses from L1 Data Cache Misses")) },
  { I("l2_cache_misses_from_ic_miss", 0x64, 0x1,
      STXT("L2 Cache Misses from L1 Instruction Cache Misses")) },
  { I("l2_dtlb_misses", 0x45, 0xf0,
      STXT("L2 DTLB Misses & Data page walks")) },
  { I("l2_itlb_misses", 0x85, 0x7,
      STXT("L2 ITLB Misses & Instruction page walks")) },
  { I("macro_ops_retired", 0xc1, 0, STXT("Macro-ops Retired")) },
  { I("sse_avx_stalls", 0xe, 0xe, STXT("Mixed SSE/AVX Stalls")) },
  { NULL, NULL, 0, NULL }
};

#undef I
#endif