/* Copyright (C) 2024 Free Software Foundation, Inc. Contributed by Oracle. This file is part of GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _HWC_AMD_ZEN3_H #define _HWC_AMD_ZEN3_H #define I(nm, event, umask, mtr) INIT_HWC(nm, mtr, (event) | ((umask) << 8), PERF_TYPE_RAW) static Hwcentry amd_zen3_list[] = { HWC_GENERIC /* branch: */ { I("bp_de_redirect", 0x91, 0, STXT("Decode Redirects")) }, { I("bp_dyn_ind_pred", 0x8e, 0, STXT("Dynamic Indirect Predictions")) }, { I("bp_l1_btb_correct", 0x8a, 0, STXT("L1 Branch Prediction Overrides Existing Prediction (speculative)")) }, { I("bp_l1_tlb_fetch_hit", 0x94, 0xff, STXT("The number of instruction fetches that hit in the L1 ITLB")) }, { I("bp_l1_tlb_fetch_hit.if1g", 0x94, 0x4, STXT("The number of instruction fetches that hit in the L1 ITLB. L1" "Instruction TLB hit (1G page size)")) }, { I("bp_l1_tlb_fetch_hit.if2m", 0x94, 0x2, STXT("The number of instruction fetches that hit in the L1 ITLB. L1" "Instruction TLB hit (2M page size)")) }, { I("bp_l1_tlb_fetch_hit.if4k", 0x94, 0x1, STXT("The number of instruction fetches that hit in the L1 ITLB. L1" "Instrcution TLB hit (4K or 16K page size)")) }, { I("bp_l2_btb_correct", 0x8b, 0, STXT("L2 Branch Prediction Overrides Existing Prediction (speculative)")) }, { I("bp_tlb_rel", 0x99, 0, STXT("The number of ITLB reload requests")) }, /* cache: */ { I("bp_l1_tlb_miss_l2_tlb_hit", 0x84, 0, STXT("L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss" "in the L1 ITLB but hit in the L2 ITLB")) }, { I("bp_l1_tlb_miss_l2_tlb_miss", 0x85, 0xff, STXT("The number of instruction fetches that miss in both the L1 and L2 TLBs")) }, { I("bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", 0x85, 0x8, STXT("The number of valid fills into the ITLB originating from the LS" "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and" "L2-ITLB misses. Walk for >4K Coalesced page")) }, { I("bp_l1_tlb_miss_l2_tlb_miss.if1g", 0x85, 0x4, STXT("The number of valid fills into the ITLB originating from the LS" "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and" "L2-ITLB misses. Walk for 1G page")) }, { I("bp_l1_tlb_miss_l2_tlb_miss.if2m", 0x85, 0x2, STXT("The number of valid fills into the ITLB originating from the LS" "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and" "L2-ITLB misses. Walk for 2M page")) }, { I("bp_l1_tlb_miss_l2_tlb_miss.if4k", 0x85, 0x1, STXT("The number of valid fills into the ITLB originating from the LS" "Page-Table Walker. Tablewalk requests are issued for L1-ITLB and" "L2-ITLB misses. Walk to 4K page")) }, { I("bp_snp_re_sync", 0x86, 0, STXT("The number of pipeline restarts caused by invalidating probes that hit" "on the instruction stream currently being executed. This would happen" "if the active instruction stream was being modified by another" "processor in an MP system - typically a highly unlikely event")) }, { I("ic_cache_fill_l2", 0x82, 0, STXT("Instruction Cache Refills from L2. The number of 64 byte instruction" "cache line was fulfilled from the L2 cache")) }, { I("ic_cache_fill_sys", 0x83, 0, STXT("Instruction Cache Refills from System. The number of 64 byte" "instruction cache line fulfilled from system memory or another cache")) }, { I("ic_cache_inval.fill_invalidated", 0x8c, 0x1, STXT("IC line invalidated due to overwriting fill response. The number of" "instruction cache lines invalidated. A non-SMC event is CMC (cross" "modifying code), either from the other thread of the core or another" "core")) }, { I("ic_cache_inval.l2_invalidating_probe", 0x8c, 0x2, STXT("IC line invalidated due to L2 invalidating probe (external or LS). The" "number of instruction cache lines invalidated. A non-SMC event is CMC" "(cross modifying code), either from the other thread of the core or" "another core")) }, { I("ic_fetch_stall.ic_stall_any", 0x87, 0x4, STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle" "for any reason (nothing valid in pipe ICM1)")) }, { I("ic_fetch_stall.ic_stall_back_pressure", 0x87, 0x1, STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle" "(including IC to OC fetches) due to back-pressure")) }, { I("ic_fetch_stall.ic_stall_dq_empty", 0x87, 0x2, STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle" "(including IC to OC fetches) due to DQ empty")) }, { I("ic_fw32", 0x80, 0, STXT("The number of 32B fetch windows transferred from IC pipe to DE" "instruction decoder (includes non-cacheable and cacheable fill" "responses)")) }, { I("ic_fw32_miss", 0x81, 0, STXT("The number of 32B fetch windows tried to read the L1 IC and missed in" "the full tag")) }, { I("ic_oc_mode_switch.ic_oc_mode_switch", 0x28a, 0x1, STXT("OC Mode Switch. IC to OC mode switch")) }, { I("ic_oc_mode_switch.oc_ic_mode_switch", 0x28a, 0x2, STXT("OC Mode Switch. OC to IC mode switch")) }, { I("ic_tag_hit_miss.all_instruction_cache_accesses", 0x18e, 0x1f, STXT("All Instruction Cache Accesses. Counts various IC tag related hit and" "miss events")) }, { I("ic_tag_hit_miss.instruction_cache_hit", 0x18e, 0x7, STXT("Instruction Cache Hit. Counts various IC tag related hit and miss" "events")) }, { I("ic_tag_hit_miss.instruction_cache_miss", 0x18e, 0x18, STXT("Instruction Cache Miss. Counts various IC tag related hit and miss" "events")) }, { I("l2_cache_req_stat.ic_access_in_l2", 0x64, 0x7, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache requests in L2")) }, { I("l2_cache_req_stat.ic_dc_hit_in_l2", 0x64, 0xf6, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache request hit in L2 and Data cache request" "hit in L2 (all types)")) }, { I("l2_cache_req_stat.ic_dc_miss_in_l2", 0x64, 0x9, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache request miss in L2 and Data cache request" "miss in L2 (all types)")) }, { I("l2_cache_req_stat.ic_fill_hit_s", 0x64, 0x2, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache hit non-modifiable line in L2")) }, { I("l2_cache_req_stat.ic_fill_hit_x", 0x64, 0x4, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache hit modifiable line in L2")) }, { I("l2_cache_req_stat.ic_fill_miss", 0x64, 0x1, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Instruction cache request miss in L2. Use" "l2_cache_misses_from_ic_miss instead")) }, { I("l2_cache_req_stat.ls_rd_blk_c", 0x64, 0x8, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Data cache request miss in L2 (all types). Use" "l2_cache_misses_from_dc_misses instead")) }, { I("l2_cache_req_stat.ls_rd_blk_cs", 0x64, 0x80, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Data cache shared read hit in L2")) }, { I("l2_cache_req_stat.ls_rd_blk_l_hit_s", 0x64, 0x20, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Data cache read hit non-modifiable line in L2")) }, { I("l2_cache_req_stat.ls_rd_blk_l_hit_x", 0x64, 0x40, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Data cache read hit in L2. Modifiable")) }, { I("l2_cache_req_stat.ls_rd_blk_x", 0x64, 0x10, STXT("Core to L2 cacheable request access status (not including L2" "Prefetch). Data cache store or state change hit in L2")) }, { I("l2_fill_pending.l2_fill_busy", 0x6d, 0x1, STXT("Cycles with fill pending from L2. Total cycles spent with one or more" "fill requests in flight from L2")) }, { I("l2_latency.l2_cycles_waiting_on_fills", 0x62, 0x1, STXT("Total cycles spent waiting for L2 fills to complete from L3 or memory," "divided by four. Event counts are for both threads. To calculate" "average latency, the number of fills from both threads must be used")) }, { I("l2_pf_hit_l2", 0x70, 0xff, STXT("L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead")) }, { I("l2_pf_miss_l2_hit_l3", 0x71, 0xff, STXT("L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2" "pipeline which miss the L2 cache and hit the L3")) }, { I("l2_pf_miss_l2_l3", 0x72, 0xff, STXT("L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the" "L2 pipeline which miss the L2 and the L3 caches")) }, { I("l2_request_g1.all_no_prefetch", 0x60, 0xf9, STXT("(null)")) }, { I("l2_request_g1.cacheable_ic_read", 0x60, 0x10, STXT("All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads")) }, { I("l2_request_g1.change_to_x", 0x60, 0x8, STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache state change" "requests. Request change to writable, check L2 for current state")) }, { I("l2_request_g1.group2", 0x60, 0x1, STXT("Miscellaneous events covered in more detail by l2_request_g2 (PMCx061)")) }, { I("l2_request_g1.l2_hw_pf", 0x60, 0x2, STXT("All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All" "prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2" "hit/miss broken out in a separate perfmon event")) }, { I("l2_request_g1.ls_rd_blk_c_s", 0x60, 0x20, STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads")) }, { I("l2_request_g1.prefetch_l2_cmd", 0x60, 0x4, STXT("All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd")) }, { I("l2_request_g1.rd_blk_l", 0x60, 0x80, STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache reads" "(including hardware and software prefetch)")) }, { I("l2_request_g1.rd_blk_x", 0x60, 0x40, STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache stores")) }, { I("l2_request_g2.bus_locks_originator", 0x61, 0x2, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus locks")) }, { I("l2_request_g2.bus_locks_responses", 0x61, 0x1, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response")) }, { I("l2_request_g2.group1", 0x61, 0x80, STXT("Miscellaneous events covered in more detail by l2_request_g1 (PMCx060)")) }, { I("l2_request_g2.ic_rd_sized", 0x61, 0x10, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read" "sized")) }, { I("l2_request_g2.ic_rd_sized_nc", 0x61, 0x8, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read" "sized non-cacheable")) }, { I("l2_request_g2.ls_rd_sized", 0x61, 0x40, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized")) }, { I("l2_request_g2.ls_rd_sized_nc", 0x61, 0x20, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized" "non-cacheable")) }, { I("l2_request_g2.smc_inval", 0x61, 0x4, STXT("All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code" "invalidates")) }, { I("l2_wcb_req.cl_zero", 0x63, 0x1, STXT("LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2" "WCB (Write Combining Buffer) cache line zeroing requests")) }, { I("l2_wcb_req.wcb_close", 0x63, 0x20, STXT("LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write" "Combining Buffer) close requests")) }, { I("l2_wcb_req.wcb_write", 0x63, 0x40, STXT("LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write" "Combining Buffer) write requests")) }, { I("l2_wcb_req.zero_byte_store", 0x63, 0x4, STXT("LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB" "(Write Combining Buffer) zero byte store requests")) }, { I("op_cache_hit_miss.all_op_cache_accesses", 0x28f, 0x7, STXT("All Op Cache accesses. Counts Op Cache micro-tag hit/miss events")) }, { I("op_cache_hit_miss.op_cache_hit", 0x28f, 0x3, STXT("Op Cache Hit. Counts Op Cache micro-tag hit/miss events")) }, { I("op_cache_hit_miss.op_cache_miss", 0x28f, 0x4, STXT("Op Cache Miss. Counts Op Cache micro-tag hit/miss events")) }, /* core: */ { I("ex_div_busy", 0xd3, 0, STXT("Div Cycles Busy count")) }, { I("ex_div_count", 0xd4, 0, STXT("Div Op Count")) }, { I("ex_ret_brn", 0xc2, 0, STXT("Retired Branch Instructions")) }, { I("ex_ret_brn_far", 0xc6, 0, STXT("Retired Far Control Transfers")) }, { I("ex_ret_brn_ind_misp", 0xca, 0, STXT("Retired Indirect Branch Instructions Mispredicted")) }, { I("ex_ret_brn_misp", 0xc3, 0, STXT("Retired Branch Instructions Mispredicted")) }, { I("ex_ret_brn_resync", 0xc7, 0, STXT("Retired Branch Resyncs")) }, { I("ex_ret_brn_tkn", 0xc4, 0, STXT("Retired Taken Branch Instructions")) }, { I("ex_ret_brn_tkn_misp", 0xc5, 0, STXT("Retired Taken Branch Instructions Mispredicted")) }, { I("ex_ret_cond", 0xd1, 0, STXT("Retired Conditional Branch Instructions")) }, { I("ex_ret_fused_instr", 0x1d0, 0, STXT("Counts retired Fused Instructions")) }, { I("ex_ret_ind_brch_instr", 0xcc, 0, STXT("Retired Indirect Branch Instructions. The number of indirect branches" "retired")) }, { I("ex_ret_instr", 0xc0, 0, STXT("Retired Instructions")) }, { I("ex_ret_mmx_fp_instr.mmx_instr", 0xcb, 0x2, STXT("MMX instructions")) }, { I("ex_ret_mmx_fp_instr.sse_instr", 0xcb, 0x4, STXT("SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX)")) }, { I("ex_ret_mmx_fp_instr.x87_instr", 0xcb, 0x1, STXT("x87 instructions")) }, { I("ex_ret_msprd_brnch_instr_dir_msmtch", 0x1c7, 0, STXT("Retired Mispredicted Branch Instructions due to Direction Mismatch")) }, { I("ex_ret_near_ret", 0xc8, 0, STXT("Retired Near Returns")) }, { I("ex_ret_near_ret_mispred", 0xc9, 0, STXT("Retired Near Returns Mispredicted")) }, { I("ex_ret_ops", 0xc1, 0, STXT("Retired Ops. Use macro_ops_retired instead")) }, { I("ex_tagged_ibs_ops.ibs_count_rollover", 0x1cf, 0x4, STXT("Tagged IBS Ops. Number of times an op could not be tagged by IBS" "because of a previous tagged op that has not retired")) }, { I("ex_tagged_ibs_ops.ibs_tagged_ops", 0x1cf, 0x1, STXT("Tagged IBS Ops. Number of Ops tagged by IBS")) }, { I("ex_tagged_ibs_ops.ibs_tagged_ops_ret", 0x1cf, 0x2, STXT("Tagged IBS Ops. Number of Ops tagged by IBS that retired")) }, /* floating point: */ { I("fp_disp_faults.x87_fill_fault", 0xe, 0x1, STXT("Floating Point Dispatch Faults. x87 fill fault")) }, { I("fp_disp_faults.xmm_fill_fault", 0xe, 0x2, STXT("Floating Point Dispatch Faults. XMM fill fault")) }, { I("fp_disp_faults.ymm_fill_fault", 0xe, 0x4, STXT("Floating Point Dispatch Faults. YMM fill fault")) }, { I("fp_disp_faults.ymm_spill_fault", 0xe, 0x8, STXT("Floating Point Dispatch Faults. YMM spill fault")) }, { I("fp_num_mov_elim_scal_op.opt_potential", 0x4, 0x4, STXT("Number of Ops that are candidates for optimization (have Z-bit either" "set or pass). This is a dispatch based speculative event, and is" "useful for measuring the effectiveness of the Move elimination and" "Scalar code optimization schemes")) }, { I("fp_num_mov_elim_scal_op.optimized", 0x4, 0x8, STXT("Number of Scalar Ops optimized. This is a dispatch based speculative" "event, and is useful for measuring the effectiveness of the Move" "elimination and Scalar code optimization schemes")) }, { I("fp_num_mov_elim_scal_op.sse_mov_ops", 0x4, 0x1, STXT("Number of SSE Move Ops. This is a dispatch based speculative event," "and is useful for measuring the effectiveness of the Move elimination" "and Scalar code optimization schemes")) }, { I("fp_num_mov_elim_scal_op.sse_mov_ops_elim", 0x4, 0x2, STXT("Number of SSE Move Ops eliminated. This is a dispatch based" "speculative event, and is useful for measuring the effectiveness of" "the Move elimination and Scalar code optimization schemes")) }, { I("fp_ret_sse_avx_ops.add_sub_flops", 0x3, 0x1, STXT("Add/subtract FLOPs. This is a retire-based event. The number of" "retired SSE/AVX FLOPs. The number of events logged per cycle can vary" "from 0 to 64. This event requires the use of the MergeEvent since it" "can count above 15 events per cycle. See 2.1.17.3 [Large Increment per" "Cycle Events]. It does not provide a useful count without the use of" "the MergeEvent")) }, { I("fp_ret_sse_avx_ops.all", 0x3, 0xff, STXT("All FLOPS. This is a retire-based event. The number of retired SSE/AVX" "FLOPS. The number of events logged per cycle can vary from 0 to 64." "This event can count above 15")) }, { I("fp_ret_sse_avx_ops.div_flops", 0x3, 0x4, STXT("Divide/square root FLOPs. This is a retire-based event. The number of" "retired SSE/AVX FLOPs. The number of events logged per cycle can vary" "from 0 to 64. This event requires the use of the MergeEvent since it" "can count above 15 events per cycle. See 2.1.17.3 [Large Increment per" "Cycle Events]. It does not provide a useful count without the use of" "the MergeEvent")) }, { I("fp_ret_sse_avx_ops.mac_flops", 0x3, 0x8, STXT("Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS." "This is a retire-based event. The number of retired SSE/AVX FLOPs. The" "number of events logged per cycle can vary from 0 to 64. This event" "requires the use of the MergeEvent since it can count above 15 events" "per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does" "not provide a useful count without the use of the MergeEvent")) }, { I("fp_ret_sse_avx_ops.mult_flops", 0x3, 0x2, STXT("Multiply FLOPs. This is a retire-based event. The number of retired" "SSE/AVX FLOPs. The number of events logged per cycle can vary from 0" "to 64. This event requires the use of the MergeEvent since it can" "count above 15 events per cycle. See 2.1.17.3 [Large Increment per" "Cycle Events]. It does not provide a useful count without the use of" "the MergeEvent")) }, { I("fp_retired_ser_ops.sse_bot_ret", 0x5, 0x8, STXT("SSE/AVX bottom-executing ops retired. The number of serializing Ops" "retired")) }, { I("fp_retired_ser_ops.sse_ctrl_ret", 0x5, 0x4, STXT("SSE/AVX control word mispredict traps. The number of serializing Ops" "retired")) }, { I("fp_retired_ser_ops.x87_bot_ret", 0x5, 0x2, STXT("x87 bottom-executing ops retired. The number of serializing Ops" "retired")) }, { I("fp_retired_ser_ops.x87_ctrl_ret", 0x5, 0x1, STXT("x87 control word mispredict traps due to mispredictions in RC or PC," "or changes in mask bits. The number of serializing Ops retired")) }, { I("fpu_pipe_assignment.total", 0, 0xf, STXT("Total number of fp uOps")) }, { I("fpu_pipe_assignment.total0", 0, 0x1, STXT("Total number of fp uOps on pipe 0")) }, { I("fpu_pipe_assignment.total1", 0, 0x2, STXT("Total number uOps assigned to pipe 1")) }, { I("fpu_pipe_assignment.total2", 0, 0x4, STXT("Total number uOps assigned to pipe 2")) }, { I("fpu_pipe_assignment.total3", 0, 0x8, STXT("Total number uOps assigned to pipe 3")) }, /* memory: */ { I("ls_alloc_mab_count", 0x5f, 0, STXT("Count of Allocated Mabs")) }, { I("ls_any_fills_from_sys.ext_cache_local", 0x44, 0x4, STXT("Any Data Cache Fills by Data Source. From cache of different CCX in" "same node")) }, { I("ls_any_fills_from_sys.ext_cache_remote", 0x44, 0x10, STXT("Any Data Cache Fills by Data Source. From CCX Cache in different Node")) }, { I("ls_any_fills_from_sys.int_cache", 0x44, 0x2, STXT("Any Data Cache Fills by Data Source. From L3 or different L2 in same" "CCX")) }, { I("ls_any_fills_from_sys.lcl_l2", 0x44, 0x1, STXT("Any Data Cache Fills by Data Source. From Local L2 to the core")) }, { I("ls_any_fills_from_sys.mem_io_local", 0x44, 0x8, STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in same" "node")) }, { I("ls_any_fills_from_sys.mem_io_remote", 0x44, 0x40, STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in" "different Node")) }, { I("ls_bad_status2.stli_other", 0x24, 0x2, STXT("Non-forwardable conflict; used to reduce STLI's via software. All" "reasons. Store To Load Interlock (STLI) are loads that were unable to" "complete because of a possible match with an older store, and the" "older store could not do STLF for some reason")) }, { I("ls_dc_accesses", 0x40, 0, STXT("Number of accesses to the dcache for load/store references")) }, { I("ls_dispatch.ld_dispatch", 0x29, 0x1, STXT("Dispatch of a single op that performs a memory load. Counts the number" "of operations dispatched to the LS unit. Unit Masks ADDed")) }, { I("ls_dispatch.ld_st_dispatch", 0x29, 0x4, STXT("Load-op-Store Dispatch. Dispatch of a single op that performs a load" "from and store to the same memory address. Counts the number of" "operations dispatched to the LS unit. Unit Masks ADDed")) }, { I("ls_dispatch.store_dispatch", 0x29, 0x2, STXT("Dispatch of a single op that performs a memory store. Counts the" "number of operations dispatched to the LS unit. Unit Masks ADDed")) }, { I("ls_dmnd_fills_from_sys.ext_cache_local", 0x43, 0x4, STXT("Demand Data Cache Fills by Data Source. From cache of different CCX in" "same node")) }, { I("ls_dmnd_fills_from_sys.ext_cache_remote", 0x43, 0x10, STXT("Demand Data Cache Fills by Data Source. From CCX Cache in different" "Node")) }, { I("ls_dmnd_fills_from_sys.int_cache", 0x43, 0x2, STXT("Demand Data Cache Fills by Data Source. From L3 or different L2 in" "same CCX")) }, { I("ls_dmnd_fills_from_sys.lcl_l2", 0x43, 0x1, STXT("Demand Data Cache Fills by Data Source. From Local L2 to the core")) }, { I("ls_dmnd_fills_from_sys.mem_io_local", 0x43, 0x8, STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in" "same node")) }, { I("ls_dmnd_fills_from_sys.mem_io_remote", 0x43, 0x40, STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in" "different Node")) }, { I("ls_hw_pf_dc_fills.ext_cache_local", 0x5a, 0x4, STXT("Hardware Prefetch Data Cache Fills by Data Source. From cache of" "different CCX in same node")) }, { I("ls_hw_pf_dc_fills.ext_cache_remote", 0x5a, 0x10, STXT("Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in" "different Node")) }, { I("ls_hw_pf_dc_fills.int_cache", 0x5a, 0x2, STXT("Hardware Prefetch Data Cache Fills by Data Source. From L3 or" "different L2 in same CCX")) }, { I("ls_hw_pf_dc_fills.lcl_l2", 0x5a, 0x1, STXT("Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to" "the core")) }, { I("ls_hw_pf_dc_fills.mem_io_local", 0x5a, 0x8, STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO" "connected in same node")) }, { I("ls_hw_pf_dc_fills.mem_io_remote", 0x5a, 0x40, STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO" "connected in different Node")) }, { I("ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 0x52, 0x1, STXT("The number of software prefetches that did not fetch data outside of" "the processor core. Software PREFETCH instruction saw a DC hit")) }, { I("ls_inef_sw_pref.mab_mch_cnt", 0x52, 0x2, STXT("The number of software prefetches that did not fetch data outside of" "the processor core. Software PREFETCH instruction saw a match on an" "already-allocated miss request buffer")) }, { I("ls_int_taken", 0x2c, 0, STXT("Counts the number of interrupts taken")) }, { I("ls_l1_d_tlb_miss.all", 0x45, 0xff, STXT("All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead")) }, { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 0x45, 0x8, STXT("L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 0x45, 0x80, STXT("L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 0x45, 0x4, STXT("L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 0x45, 0x40, STXT("L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 0x45, 0x1, STXT("L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 0x45, 0x10, STXT("L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 0x45, 0x2, STXT("L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB")) }, { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 0x45, 0x20, STXT("L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2" "TLB")) }, { I("ls_locks.bus_lock", 0x25, 0x1, STXT("Retired lock instructions. Comparable to legacy bus lock")) }, { I("ls_locks.non_spec_lock", 0x25, 0x2, STXT("Retired lock instructions. Non-speculative lock succeeded")) }, { I("ls_locks.spec_lock_hi_spec", 0x25, 0x8, STXT("Retired lock instructions. High speculative cacheable lock speculation" "succeeded")) }, { I("ls_locks.spec_lock_lo_spec", 0x25, 0x4, STXT("Retired lock instructions. Low speculative cacheable lock speculation" "succeeded")) }, { I("ls_mab_alloc.all_allocations", 0x41, 0x7f, STXT("All Allocations. Counts when a LS pipe allocates a MAB entry")) }, { I("ls_mab_alloc.dc_prefetcher", 0x41, 0x8, STXT("LS MAB Allocates by Type. DC prefetcher")) }, { I("ls_mab_alloc.hardware_prefetcher_allocations", 0x41, 0x40, STXT("Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB" "entry")) }, { I("ls_mab_alloc.load_store_allocations", 0x41, 0x3f, STXT("Load Store Allocations. Counts when a LS pipe allocates a MAB entry")) }, { I("ls_mab_alloc.loads", 0x41, 0x1, STXT("LS MAB Allocates by Type. Loads")) }, { I("ls_mab_alloc.stores", 0x41, 0x2, STXT("LS MAB Allocates by Type. Stores")) }, { I("ls_misal_loads.ma4k", 0x47, 0x2, STXT("The number of 4KB misaligned (i.e., page crossing) loads")) }, { I("ls_misal_loads.ma64", 0x47, 0x1, STXT("The number of 64B misaligned (i.e., cacheline crossing) loads")) }, { I("ls_not_halted_cyc", 0x76, 0, STXT("Cycles not in Halt")) }, { I("ls_pref_instr_disp", 0x4b, 0xff, STXT("Software Prefetch Instructions Dispatched (Speculative)")) }, { I("ls_pref_instr_disp.prefetch", 0x4b, 0x1, STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchT0," "T1 and T2 instructions. See docAPM3 PREFETCHlevel")) }, { I("ls_pref_instr_disp.prefetch_nta", 0x4b, 0x4, STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA" "instruction. See docAPM3 PREFETCHlevel")) }, { I("ls_pref_instr_disp.prefetch_w", 0x4b, 0x2, STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchW" "instruction. See docAPM3 PREFETCHW")) }, { I("ls_rdtsc", 0x2d, 0, STXT("Number of reads of the TSC (RDTSC instructions). The count is" "speculative")) }, { I("ls_ret_cl_flush", 0x26, 0, STXT("The number of retired CLFLUSH instructions. This is a non-speculative" "event")) }, { I("ls_ret_cpuid", 0x27, 0, STXT("The number of CPUID instructions retired")) }, { I("ls_smi_rx", 0x2b, 0, STXT("Counts the number of SMIs received")) }, { I("ls_st_commit_cancel2.st_commit_cancel_wcb_full", 0x37, 0x1, STXT("A non-cacheable store and the non-cacheable commit buffer is full")) }, { I("ls_stlf", 0x35, 0, STXT("Number of STLF hits")) }, { I("ls_sw_pf_dc_fills.ext_cache_local", 0x59, 0x4, STXT("Software Prefetch Data Cache Fills by Data Source. From cache of" "different CCX in same node")) }, { I("ls_sw_pf_dc_fills.ext_cache_remote", 0x59, 0x10, STXT("Software Prefetch Data Cache Fills by Data Source. From CCX Cache in" "different Node")) }, { I("ls_sw_pf_dc_fills.int_cache", 0x59, 0x2, STXT("Software Prefetch Data Cache Fills by Data Source. From L3 or" "different L2 in same CCX")) }, { I("ls_sw_pf_dc_fills.lcl_l2", 0x59, 0x1, STXT("Software Prefetch Data Cache Fills by Data Source. From Local L2 to" "the core")) }, { I("ls_sw_pf_dc_fills.mem_io_local", 0x59, 0x8, STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO" "connected in same node")) }, { I("ls_sw_pf_dc_fills.mem_io_remote", 0x59, 0x40, STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO" "connected in different Node")) }, { I("ls_tablewalker.dc_type0", 0x46, 0x1, STXT("Total Page Table Walks DC Type 0")) }, { I("ls_tablewalker.dc_type1", 0x46, 0x2, STXT("Total Page Table Walks DC Type 1")) }, { I("ls_tablewalker.dside", 0x46, 0x3, STXT("Total Page Table Walks on D-side")) }, { I("ls_tablewalker.ic_type0", 0x46, 0x4, STXT("Total Page Table Walks IC Type 0")) }, { I("ls_tablewalker.ic_type1", 0x46, 0x8, STXT("Total Page Table Walks IC Type 1")) }, { I("ls_tablewalker.iside", 0x46, 0xc, STXT("Total Page Table Walks on I-side")) }, { I("ls_tlb_flush.all_tlb_flushes", 0x78, 0xff, STXT("All TLB Flushes. Requires unit mask 0xFF to engage event for counting." "Use all_tlbs_flushed instead")) }, /* other: */ { I("de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", 0xab, 0x4, STXT("Any FP dispatch. Types of Oops Dispatched from Decoder")) }, { I("de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", 0xab, 0x8, STXT("Any Integer dispatch. Types of Oops Dispatched from Decoder")) }, { I("de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", 0xae, 0x80, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. FP Flush" "recovery stall")) }, { I("de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 0xae, 0x20, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. Floating" "point register file resource stall. Applies to all FP ops that have a" "destination register")) }, { I("de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 0xae, 0x40, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. FP" "scheduler resource stall. Applies to ops that use the FP scheduler")) }, { I("de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", 0xae, 0x1, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. Integer" "Physical Register File resource stall. Integer Physical Register File," "applies to all ops that have an integer destination register")) }, { I("de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 0xae, 0x8, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. Integer Scheduler miscellaneous resource stall")) }, { I("de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", 0xae, 0x2, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. Load" "Queue resource stall. Applies to all ops with load semantics")) }, { I("de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", 0xae, 0x4, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. Store" "Queue resource stall. Applies to all ops with store semantics")) }, { I("de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", 0xae, 0x10, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a Token Stall. Also counts cycles when the thread is not selected" "to dispatch but would have been stalled due to a Token Stall. Taken" "branch buffer resource stall")) }, { I("de_dis_dispatch_token_stalls2.agsq_token_stall", 0xaf, 0x10, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. AGSQ Tokens unavailable")) }, { I("de_dis_dispatch_token_stalls2.int_sch0_token_stall", 0xaf, 0x1, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. No tokens for Integer Scheduler Queue 0 available")) }, { I("de_dis_dispatch_token_stalls2.int_sch1_token_stall", 0xaf, 0x2, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. No tokens for Integer Scheduler Queue 1 available")) }, { I("de_dis_dispatch_token_stalls2.int_sch2_token_stall", 0xaf, 0x4, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. No tokens for Integer Scheduler Queue 2 available")) }, { I("de_dis_dispatch_token_stalls2.int_sch3_token_stall", 0xaf, 0x8, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. No tokens for Integer Scheduler Queue 3 available")) }, { I("de_dis_dispatch_token_stalls2.retire_token_stall", 0xaf, 0x20, STXT("Cycles where a dispatch group is valid but does not get dispatched due" "to a token stall. Insufficient Retire Queue tokens available")) }, { I("de_dis_uop_queue_empty_di0", 0xa9, 0, STXT("Cycles where the Micro-Op Queue is empty")) }, /* recommended: */ { I("all_data_cache_accesses", 0x29, 0x7, STXT("All L1 Data Cache Accesses")) }, { I("all_tlbs_flushed", 0x78, 0xff, STXT("All TLBs Flushed")) }, { I("l1_data_cache_fills_all", 0x44, 0xff, STXT("L1 Data Cache Fills: All")) }, { I("l1_data_cache_fills_from_external_ccx_cache", 0x44, 0x14, STXT("L1 Data Cache Fills: From External CCX Cache")) }, { I("l1_data_cache_fills_from_memory", 0x44, 0x48, STXT("L1 Data Cache Fills: From Memory")) }, { I("l1_data_cache_fills_from_remote_node", 0x44, 0x50, STXT("L1 Data Cache Fills: From Remote Node")) }, { I("l1_data_cache_fills_from_within_same_ccx", 0x44, 0x3, STXT("L1 Data Cache Fills: From within same CCX")) }, { I("l1_dtlb_misses", 0x45, 0xff, STXT("L1 DTLB Misses")) }, { I("l2_cache_accesses_from_dc_misses", 0x60, 0xe8, STXT("L2 Cache Accesses from L1 Data Cache Misses (including prefetch)")) }, { I("l2_cache_accesses_from_ic_misses", 0x60, 0x10, STXT("L2 Cache Accesses from L1 Instruction Cache Misses (including" "prefetch)")) }, { I("l2_cache_hits_from_dc_misses", 0x64, 0xf0, STXT("L2 Cache Hits from L1 Data Cache Misses")) }, { I("l2_cache_hits_from_ic_misses", 0x64, 0x6, STXT("L2 Cache Hits from L1 Instruction Cache Misses")) }, { I("l2_cache_hits_from_l2_hwpf", 0x70, 0xff, STXT("L2 Cache Hits from L2 Cache HWPF")) }, { I("l2_cache_misses_from_dc_misses", 0x64, 0x8, STXT("L2 Cache Misses from L1 Data Cache Misses")) }, { I("l2_cache_misses_from_ic_miss", 0x64, 0x1, STXT("L2 Cache Misses from L1 Instruction Cache Misses")) }, { I("l2_dtlb_misses", 0x45, 0xf0, STXT("L2 DTLB Misses & Data page walks")) }, { I("l2_itlb_misses", 0x85, 0x7, STXT("L2 ITLB Misses & Instruction page walks")) }, { I("macro_ops_retired", 0xc1, 0, STXT("Macro-ops Retired")) }, { I("sse_avx_stalls", 0xe, 0xe, STXT("Mixed SSE/AVX Stalls")) }, { NULL, NULL, 0, NULL } }; #undef I #endif