aboutsummaryrefslogtreecommitdiff
path: root/gprofng
diff options
context:
space:
mode:
authorVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-06-01 20:36:06 -0700
committerVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-06-03 11:38:58 -0700
commit9af067bfcdc9a006a7d3517c8c8f805a22b36a35 (patch)
tree684b0319c5b135ec8050c81877aef092ccd4b829 /gprofng
parent7c493aa7e73e6718790f6b4a01a39ff4146cba4a (diff)
downloadgdb-9af067bfcdc9a006a7d3517c8c8f805a22b36a35.zip
gdb-9af067bfcdc9a006a7d3517c8c8f805a22b36a35.tar.gz
gdb-9af067bfcdc9a006a7d3517c8c8f805a22b36a35.tar.bz2
gprofng: add hardware counters for AMD Zen4
ChangeLog 2024-06-01 Vladimir Mezentsev <vladimir.mezentsev@oracle.com> * common/hwctable.c: Add the hwc table for AMD Zen4. * src/hwc_amd_zen4.h: New file. * src/hwc_amd_zen3.h: Define _HWC_AMD_ZEN3_H.
Diffstat (limited to 'gprofng')
-rw-r--r--gprofng/common/hwctable.c22
-rw-r--r--gprofng/src/hwc_amd_zen3.h5
-rw-r--r--gprofng/src/hwc_amd_zen4.h863
3 files changed, 889 insertions, 1 deletions
diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c
index 0b4800e..b3ccb36 100644
--- a/gprofng/common/hwctable.c
+++ b/gprofng/common/hwctable.c
@@ -1303,6 +1303,7 @@ static Hwcentry generic_list[] = {
};
#include "hwc_amd_zen3.h"
+#include "hwc_amd_zen4.h"
/* structure defining the counters for a CPU type */
typedef struct
@@ -1353,6 +1354,7 @@ static cpu_list_t cputabs[] = {
{ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}},
+ {CPC_AMD_FAM_19H_ZEN4, amd_zen4_list, {"insts,,cycles", 0}},
{0, generic_list, {"insts,,cycles", 0}},
};
@@ -1825,6 +1827,26 @@ setup_cpc_general (int skip_hwc_test)
hwcdrv->hwcdrv_get_info (&cpcx_cpuver, &cpcx_cciname, &cpcx_npics,
&cpcx_docref, &cpcx_support_bitmask);
+ /* Fix cpcx_cpuver for new Zen machines */
+ cpu_info_t *cpu_p = read_cpuinfo ();
+ if (strcmp (cpu_p->cpu_vendorstr, "AuthenticAMD") == 0)
+ {
+ if (cpu_p->cpu_family == AMD_ZEN3_FAMILY)
+ switch (cpu_p->cpu_model)
+ {
+ case AMD_ZEN3_RYZEN:
+ case AMD_ZEN3_RYZEN2:
+ case AMD_ZEN3_RYZEN3:
+ case AMD_ZEN3_EPYC_TRENTO:
+ cpcx_cpuver = CPC_AMD_FAM_19H_ZEN3;
+ break;
+ case AMD_ZEN4_RYZEN:
+ case AMD_ZEN4_EPYC:
+ cpcx_cpuver = CPC_AMD_FAM_19H_ZEN4;
+ break;
+ }
+ }
+
#ifdef DISALLOW_PENTIUM_PRO_MMX_7007575
if (cpcx_cpuver == CPC_PENTIUM_PRO_MMX)
{
diff --git a/gprofng/src/hwc_amd_zen3.h b/gprofng/src/hwc_amd_zen3.h
index a6400f5..6a2ee02 100644
--- a/gprofng/src/hwc_amd_zen3.h
+++ b/gprofng/src/hwc_amd_zen3.h
@@ -18,6 +18,9 @@
Foundation, 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
+#ifndef _HWC_AMD_ZEN3_H
+#define _HWC_AMD_ZEN3_H
+
#define I(nm, event, umask, mtr) INIT_HWC(nm, mtr, (event) | ((umask) << 8), PERF_TYPE_RAW)
static Hwcentry amd_zen3_list[] = {
@@ -629,4 +632,4 @@ static Hwcentry amd_zen3_list[] = {
};
#undef I
-
+#endif
diff --git a/gprofng/src/hwc_amd_zen4.h b/gprofng/src/hwc_amd_zen4.h
new file mode 100644
index 0000000..1750194
--- /dev/null
+++ b/gprofng/src/hwc_amd_zen4.h
@@ -0,0 +1,863 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _HWC_AMD_ZEN4_H
+#define _HWC_AMD_ZEN4_H
+
+#define I(nm, event, umask, mtr) INIT_HWC(nm, mtr, (event) | ((umask) << 8), PERF_TYPE_RAW)
+
+static Hwcentry amd_zen4_list[] = {
+ HWC_GENERIC
+/* branch: */
+ { I("bp_de_redirect", 0x91, 0,
+ STXT("Instruction decoder corrects the predicted target and resteers the"
+ "branch predictor")) },
+ { I("bp_dyn_ind_pred", 0x8e, 0,
+ STXT("Dynamic indirect predictions (branch used the indirect predictor to"
+ "make a prediction)")) },
+ { I("bp_l2_btb_correct", 0x8b, 0,
+ STXT("L2 branch prediction overrides existing prediction (speculative)")) },
+ { I("ex_ret_brn", 0xc2, 0,
+ STXT("Retired branch instructions (all types of architectural control flow"
+ "changes, including exceptions and interrupts)")) },
+ { I("ex_ret_brn_far", 0xc6, 0,
+ STXT("Retired far control transfers (far call/jump/return, IRET, SYSCALL and"
+ "SYSRET, plus exceptions and interrupts). Far control transfers are not"
+ "subject to branch prediction")) },
+ { I("ex_ret_brn_ind_misp", 0xca, 0,
+ STXT("Retired indirect branch instructions mispredicted (only EX"
+ "mispredicts). Each misprediction incurs the same penalty as a"
+ "mispredicted conditional branch instruction")) },
+ { I("ex_ret_brn_misp", 0xc3, 0,
+ STXT("Retired branch instructions mispredicted")) },
+ { I("ex_ret_brn_tkn", 0xc4, 0,
+ STXT("Retired taken branch instructions (all types of architectural control"
+ "flow changes, including exceptions and interrupts)")) },
+ { I("ex_ret_brn_tkn_misp", 0xc5, 0,
+ STXT("Retired taken branch instructions mispredicted")) },
+ { I("ex_ret_cond", 0xd1, 0,
+ STXT("Retired conditional branch instructions")) },
+ { I("ex_ret_ind_brch_instr", 0xcc, 0,
+ STXT("Retired indirect branch instructions")) },
+ { I("ex_ret_msprd_brnch_instr_dir_msmtch", 0x1c7, 0,
+ STXT("Retired branch instructions mispredicted due to direction mismatch")) },
+ { I("ex_ret_near_ret", 0xc8, 0,
+ STXT("Retired near returns (RET or RET Iw)")) },
+ { I("ex_ret_near_ret_mispred", 0xc9, 0,
+ STXT("Retired near returns mispredicted. Each misprediction incurs the same"
+ "penalty as a mispredicted conditional branch instruction")) },
+ { I("ex_ret_uncond_brnch_instr", 0x1c9, 0,
+ STXT("Retired unconditional branch instructions")) },
+ { I("ex_ret_uncond_brnch_instr_mispred", 0x1c8, 0,
+ STXT("Retired unconditional indirect branch instructions mispredicted")) },
+/* cache: */
+ { I("ic_cache_fill_l2", 0x82, 0,
+ STXT("Instruction cache lines (64 bytes) fulfilled from the L2 cache")) },
+ { I("ic_cache_fill_sys", 0x83, 0,
+ STXT("Instruction cache lines (64 bytes) fulfilled from system memory or"
+ "another cache")) },
+ { I("ic_tag_hit_miss.all_instruction_cache_accesses", 0x18e, 0x1f,
+ STXT("Instruction cache accesses of all types")) },
+ { I("ic_tag_hit_miss.instruction_cache_hit", 0x18e, 0x7,
+ STXT("Instruction cache hits")) },
+ { I("ic_tag_hit_miss.instruction_cache_miss", 0x18e, 0x18,
+ STXT("Instruction cache misses")) },
+ { I("l2_cache_req_stat.all", 0x64, 0xff,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for data and"
+ "instruction cache access")) },
+ { I("l2_cache_req_stat.dc_access_in_l2", 0x64, 0xf8,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for data cache"
+ "access")) },
+ { I("l2_cache_req_stat.dc_hit_in_l2", 0x64, 0xf0,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for data cache"
+ "hits")) },
+ { I("l2_cache_req_stat.ic_access_in_l2", 0x64, 0x7,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for instruction"
+ "cache access")) },
+ { I("l2_cache_req_stat.ic_dc_hit_in_l2", 0x64, 0xf6,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for data and"
+ "instruction cache hits")) },
+ { I("l2_cache_req_stat.ic_dc_miss_in_l2", 0x64, 0x9,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for data and"
+ "instruction cache misses")) },
+ { I("l2_cache_req_stat.ic_fill_hit_s", 0x64, 0x2,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "instruction cache hit non-modifiable line in L2")) },
+ { I("l2_cache_req_stat.ic_fill_hit_x", 0x64, 0x4,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "instruction cache hit modifiable line in L2")) },
+ { I("l2_cache_req_stat.ic_fill_miss", 0x64, 0x1,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "instruction cache request miss in L2")) },
+ { I("l2_cache_req_stat.ic_hit_in_l2", 0x64, 0x6,
+ STXT("Core to L2 cache requests (not including L2 prefetch) for instruction"
+ "cache hits")) },
+ { I("l2_cache_req_stat.ls_rd_blk_c", 0x64, 0x8,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "data cache request miss in L2")) },
+ { I("l2_cache_req_stat.ls_rd_blk_cs", 0x64, 0x80,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "data cache shared read hit in L2")) },
+ { I("l2_cache_req_stat.ls_rd_blk_l_hit_s", 0x64, 0x20,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "data cache read hit non-modifiable line in L2")) },
+ { I("l2_cache_req_stat.ls_rd_blk_l_hit_x", 0x64, 0x40,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "data cache read hit modifiable line in L2")) },
+ { I("l2_cache_req_stat.ls_rd_blk_x", 0x64, 0x10,
+ STXT("Core to L2 cache requests (not including L2 prefetch) with status:"
+ "data cache store or state change hit in L2")) },
+ { I("l2_pf_hit_l2.all", 0x70, 0xff,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "all types")) },
+ { I("l2_pf_hit_l2.l1_region", 0x70, 0x80,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L1Region (fetch additional lines into L1 cache when the data"
+ "access for a given instruction tends to be followed by a consistent"
+ "pattern of other accesses within a localized region)")) },
+ { I("l2_pf_hit_l2.l1_stream", 0x70, 0x20,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L1Stream (fetch additional sequential lines into L1 cache)")) },
+ { I("l2_pf_hit_l2.l1_stride", 0x70, 0x40,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L1Stride (fetch additional lines into L1 cache when each access"
+ "is a constant distance from the previous)")) },
+ { I("l2_pf_hit_l2.l2_burst", 0x70, 0x8,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L2Burst (aggressively fetch additional sequential lines into L2"
+ "cache)")) },
+ { I("l2_pf_hit_l2.l2_next_line", 0x70, 0x2,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L2NextLine (fetch the next line into L2 cache)")) },
+ { I("l2_pf_hit_l2.l2_stream", 0x70, 0x1,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L2Stream (fetch additional sequential lines into L2 cache)")) },
+ { I("l2_pf_hit_l2.l2_stride", 0x70, 0x10,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L2Stride (fetch additional lines into L2 cache when each access"
+ "is at a constant distance from the previous)")) },
+ { I("l2_pf_hit_l2.l2_up_down", 0x70, 0x4,
+ STXT("L2 prefetches accepted by the L2 pipeline which hit in the L2 cache of"
+ "type L2UpDown (fetch the next or previous line into L2 cache for all"
+ "memory accesses)")) },
+ { I("l2_pf_miss_l2_hit_l3.all", 0x71, 0xff,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache cache of all types")) },
+ { I("l2_pf_miss_l2_hit_l3.l1_region", 0x71, 0x80,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L1Region (fetch additional lines into L1"
+ "cache when the data access for a given instruction tends to be"
+ "followed by a consistent pattern of other accesses within a localized"
+ "region)")) },
+ { I("l2_pf_miss_l2_hit_l3.l1_stream", 0x71, 0x20,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L1Stream (fetch additional sequential"
+ "lines into L1 cache)")) },
+ { I("l2_pf_miss_l2_hit_l3.l1_stride", 0x71, 0x40,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L1Stride (fetch additional lines into L1"
+ "cache when each access is a constant distance from the previous)")) },
+ { I("l2_pf_miss_l2_hit_l3.l2_burst", 0x71, 0x8,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L2Burst (aggressively fetch additional"
+ "sequential lines into L2 cache)")) },
+ { I("l2_pf_miss_l2_hit_l3.l2_next_line", 0x71, 0x2,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L2NextLine (fetch the next line into L2"
+ "cache)")) },
+ { I("l2_pf_miss_l2_hit_l3.l2_stream", 0x71, 0x1,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L2Stream (fetch additional sequential"
+ "lines into L2 cache)")) },
+ { I("l2_pf_miss_l2_hit_l3.l2_stride", 0x71, 0x10,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L2Stride (fetch additional lines into L2"
+ "cache when each access is a constant distance from the previous)")) },
+ { I("l2_pf_miss_l2_hit_l3.l2_up_down", 0x71, 0x4,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 cache and"
+ "hit in the L3 cache of type L2UpDown (fetch the next or previous line"
+ "into L2 cache for all memory accesses)")) },
+ { I("l2_pf_miss_l2_l3.all", 0x72, 0xff,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of all types")) },
+ { I("l2_pf_miss_l2_l3.l1_region", 0x72, 0x80,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L1Region (fetch additional lines into L1 cache when the"
+ "data access for a given instruction tends to be followed by a"
+ "consistent pattern of other accesses within a localized region)")) },
+ { I("l2_pf_miss_l2_l3.l1_stream", 0x72, 0x20,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L1Stream (fetch additional sequential lines into L1"
+ "cache)")) },
+ { I("l2_pf_miss_l2_l3.l1_stride", 0x72, 0x40,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L1Stride (fetch additional lines into L1 cache when"
+ "each access is a constant distance from the previous)")) },
+ { I("l2_pf_miss_l2_l3.l2_burst", 0x72, 0x8,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L2Burst (aggressively fetch additional sequential lines"
+ "into L2 cache)")) },
+ { I("l2_pf_miss_l2_l3.l2_next_line", 0x72, 0x2,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L2NextLine (fetch the next line into L2 cache)")) },
+ { I("l2_pf_miss_l2_l3.l2_stream", 0x72, 0x1,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L2Stream (fetch additional sequential lines into L2"
+ "cache)")) },
+ { I("l2_pf_miss_l2_l3.l2_stride", 0x72, 0x10,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L2Stride (fetch additional lines into L2 cache when"
+ "each access is a constant distance from the previous)")) },
+ { I("l2_pf_miss_l2_l3.l2_up_down", 0x72, 0x4,
+ STXT("L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3"
+ "caches of type L2UpDown (fetch the next or previous line into L2 cache"
+ "for all memory accesses)")) },
+ { I("l2_request_g1.all", 0x60, 0xff,
+ STXT("L2 cache requests of all types")) },
+ { I("l2_request_g1.all_dc", 0x60, 0xe8,
+ STXT("L2 cache requests of common types from L1 data cache (including"
+ "prefetches)")) },
+ { I("l2_request_g1.all_no_prefetch", 0x60, 0xf9,
+ STXT("L2 cache requests of common types not including prefetches")) },
+ { I("l2_request_g1.cacheable_ic_read", 0x60, 0x10,
+ STXT("L2 cache requests: instruction cache reads")) },
+ { I("l2_request_g1.change_to_x", 0x60, 0x8,
+ STXT("L2 cache requests: data cache state change to writable, check L2 for"
+ "current state")) },
+ { I("l2_request_g1.group2", 0x60, 0x1,
+ STXT("L2 cache requests of non-cacheable type (non-cached data and"
+ "instructions reads, self-modifying code checks)")) },
+ { I("l2_request_g1.l2_hw_pf", 0x60, 0x2,
+ STXT("L2 cache requests: from hardware prefetchers to prefetch directly into"
+ "L2 (hit or miss)")) },
+ { I("l2_request_g1.ls_rd_blk_c_s", 0x60, 0x20,
+ STXT("L2 cache requests: data cache shared reads")) },
+ { I("l2_request_g1.prefetch_l2_cmd", 0x60, 0x4,
+ STXT("L2 cache requests: prefetch directly into L2")) },
+ { I("l2_request_g1.rd_blk_l", 0x60, 0x80,
+ STXT("L2 cache requests: data cache reads including hardware and software"
+ "prefetch")) },
+ { I("l2_request_g1.rd_blk_x", 0x60, 0x40,
+ STXT("L2 cache requests: data cache stores")) },
+ { I("ls_alloc_mab_count", 0x5f, 0,
+ STXT("In-flight L1 data cache misses i.e. Miss Address Buffer (MAB)"
+ "allocations each cycle")) },
+ { I("ls_any_fills_from_sys.all", 0x44, 0xff,
+ STXT("Any data cache fills from all types of data sources")) },
+ { I("ls_any_fills_from_sys.all_dram_io", 0x44, 0x48,
+ STXT("Any data cache fills from either DRAM or MMIO in any NUMA node (same"
+ "or different socket)")) },
+ { I("ls_any_fills_from_sys.alternate_memories", 0x44, 0x80,
+ STXT("Any data cache fills from extension memory")) },
+ { I("ls_any_fills_from_sys.dram_io_all", 0x44, 0x48,
+ STXT("Any data cache fills from either DRAM or MMIO in any NUMA node (same"
+ "or different socket)")) },
+ { I("ls_any_fills_from_sys.dram_io_far", 0x44, 0x40,
+ STXT("Any data cache fills from either DRAM or MMIO in a different NUMA node"
+ "(same or different socket)")) },
+ { I("ls_any_fills_from_sys.dram_io_near", 0x44, 0x8,
+ STXT("Any data cache fills from either DRAM or MMIO in the same NUMA node")) },
+ { I("ls_any_fills_from_sys.far_all", 0x44, 0x50,
+ STXT("Any data cache fills from either cache of another CCX, DRAM or MMIO"
+ "when the address was in a different NUMA node (same or different"
+ "socket)")) },
+ { I("ls_any_fills_from_sys.far_cache", 0x44, 0x10,
+ STXT("Any data cache fills from cache of another CCX when the address was in"
+ "a different NUMA node")) },
+ { I("ls_any_fills_from_sys.local_all", 0x44, 0x3,
+ STXT("Any data cache fills from local L2 cache or L3 cache or different L2"
+ "cache in the same CCX")) },
+ { I("ls_any_fills_from_sys.local_ccx", 0x44, 0x2,
+ STXT("Any data cache fills from L3 cache or different L2 cache in the same"
+ "CCX")) },
+ { I("ls_any_fills_from_sys.local_l2", 0x44, 0x1,
+ STXT("Any data cache fills from local L2 cache")) },
+ { I("ls_any_fills_from_sys.near_cache", 0x44, 0x4,
+ STXT("Any data cache fills from cache of another CCX when the address was in"
+ "the same NUMA node")) },
+ { I("ls_any_fills_from_sys.remote_cache", 0x44, 0x14,
+ STXT("Any data cache fills from cache of another CCX when the address was in"
+ "the same or a different NUMA node")) },
+ { I("ls_dmnd_fills_from_sys.all", 0x43, 0xff,
+ STXT("Demand data cache fills from all types of data sources")) },
+ { I("ls_dmnd_fills_from_sys.alternate_memories", 0x43, 0x80,
+ STXT("Demand data cache fills from extension memory")) },
+ { I("ls_dmnd_fills_from_sys.dram_io_far", 0x43, 0x40,
+ STXT("Demand data cache fills from either DRAM or MMIO in a different NUMA"
+ "node (same or different socket)")) },
+ { I("ls_dmnd_fills_from_sys.dram_io_near", 0x43, 0x8,
+ STXT("Demand data cache fills from either DRAM or MMIO in the same NUMA node")) },
+ { I("ls_dmnd_fills_from_sys.far_cache", 0x43, 0x10,
+ STXT("Demand data cache fills from cache of another CCX when the address was"
+ "in a different NUMA node")) },
+ { I("ls_dmnd_fills_from_sys.local_ccx", 0x43, 0x2,
+ STXT("Demand data cache fills from L3 cache or different L2 cache in the"
+ "same CCX")) },
+ { I("ls_dmnd_fills_from_sys.local_l2", 0x43, 0x1,
+ STXT("Demand data cache fills from local L2 cache")) },
+ { I("ls_dmnd_fills_from_sys.near_cache", 0x43, 0x4,
+ STXT("Demand data cache fills from cache of another CCX when the address was"
+ "in the same NUMA node")) },
+ { I("ls_hw_pf_dc_fills.all", 0x5a, 0xdf,
+ STXT("Hardware prefetch data cache fills from all types of data sources")) },
+ { I("ls_hw_pf_dc_fills.alternate_memories", 0x5a, 0x80,
+ STXT("Hardware prefetch data cache fills from extension memory")) },
+ { I("ls_hw_pf_dc_fills.dram_io_far", 0x5a, 0x40,
+ STXT("Hardware prefetch data cache fills from either DRAM or MMIO in a"
+ "different NUMA node (same or different socket)")) },
+ { I("ls_hw_pf_dc_fills.dram_io_near", 0x5a, 0x8,
+ STXT("Hardware prefetch data cache fills from either DRAM or MMIO in the"
+ "same NUMA node")) },
+ { I("ls_hw_pf_dc_fills.far_cache", 0x5a, 0x10,
+ STXT("Hardware prefetch data cache fills from cache of another CCX when the"
+ "address was in a different NUMA node")) },
+ { I("ls_hw_pf_dc_fills.local_ccx", 0x5a, 0x2,
+ STXT("Hardware prefetch data cache fills from L3 cache or different L2 cache"
+ "in the same CCX")) },
+ { I("ls_hw_pf_dc_fills.local_l2", 0x5a, 0x1,
+ STXT("Hardware prefetch data cache fills from local L2 cache")) },
+ { I("ls_hw_pf_dc_fills.near_cache", 0x5a, 0x4,
+ STXT("Hardware prefetch data cache fills from cache of another CCX when the"
+ "address was in the same NUMA node")) },
+ { I("ls_inef_sw_pref.all", 0x52, 0x3, STXT("(null)")) },
+ { I("ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 0x52, 0x1,
+ STXT("Software prefetches that did not fetch data outside of the processor"
+ "core as the PREFETCH instruction saw a data cache hit")) },
+ { I("ls_inef_sw_pref.mab_mch_cnt", 0x52, 0x2,
+ STXT("Software prefetches that did not fetch data outside of the processor"
+ "core as the PREFETCH instruction saw a match on an already allocated"
+ "Miss Address Buffer (MAB)")) },
+ { I("ls_mab_alloc.all_allocations", 0x41, 0x7f,
+ STXT("Miss Address Buffer (MAB) entries allocated by a Load-Store (LS) pipe"
+ "for all types of allocations")) },
+ { I("ls_mab_alloc.hardware_prefetcher_allocations", 0x41, 0x40,
+ STXT("Miss Address Buffer (MAB) entries allocated by a Load-Store (LS) pipe"
+ "for hardware prefetcher allocations")) },
+ { I("ls_mab_alloc.load_store_allocations", 0x41, 0x3f,
+ STXT("Miss Address Buffer (MAB) entries allocated by a Load-Store (LS) pipe"
+ "for load-store allocations")) },
+ { I("ls_pref_instr_disp.all", 0x4b, 0x7,
+ STXT("Software prefetch instructions dispatched (speculative) of all types")) },
+ { I("ls_pref_instr_disp.prefetch", 0x4b, 0x1,
+ STXT("Software prefetch instructions dispatched (speculative) of type"
+ "PrefetchT0 (move data to all cache levels), T1 (move data to all cache"
+ "levels except L1) and T2 (move data to all cache levels except L1 and"
+ "L2)")) },
+ { I("ls_pref_instr_disp.prefetch_nta", 0x4b, 0x4,
+ STXT("Software prefetch instructions dispatched (speculative) of type"
+ "PrefetchNTA (move data with minimum cache pollution i.e. non-temporal"
+ "access)")) },
+ { I("ls_pref_instr_disp.prefetch_w", 0x4b, 0x2,
+ STXT("Software prefetch instructions dispatched (speculative) of type"
+ "PrefetchW (move data to L1 cache and mark it modifiable)")) },
+ { I("ls_sw_pf_dc_fills.all", 0x59, 0xdf,
+ STXT("Software prefetch data cache fills from all types of data sources")) },
+ { I("ls_sw_pf_dc_fills.alternate_memories", 0x59, 0x80,
+ STXT("Software prefetch data cache fills from extension memory")) },
+ { I("ls_sw_pf_dc_fills.dram_io_far", 0x59, 0x40,
+ STXT("Software prefetch data cache fills from either DRAM or MMIO in a"
+ "different NUMA node (same or different socket)")) },
+ { I("ls_sw_pf_dc_fills.dram_io_near", 0x59, 0x8,
+ STXT("Software prefetch data cache fills from either DRAM or MMIO in the"
+ "same NUMA node")) },
+ { I("ls_sw_pf_dc_fills.far_cache", 0x59, 0x10,
+ STXT("Software prefetch data cache fills from cache of another CCX in a"
+ "different NUMA node")) },
+ { I("ls_sw_pf_dc_fills.local_ccx", 0x59, 0x2,
+ STXT("Software prefetch data cache fills from L3 cache or different L2 cache"
+ "in the same CCX")) },
+ { I("ls_sw_pf_dc_fills.local_l2", 0x59, 0x1,
+ STXT("Software prefetch data cache fills from local L2 cache")) },
+ { I("ls_sw_pf_dc_fills.near_cache", 0x59, 0x4,
+ STXT("Software prefetch data cache fills from cache of another CCX in the"
+ "same NUMA node")) },
+ { I("op_cache_hit_miss.all_op_cache_accesses", 0x28f, 0x7,
+ STXT("Op cache accesses of all types")) },
+ { I("op_cache_hit_miss.op_cache_hit", 0x28f, 0x3, STXT("Op cache hits")) },
+ { I("op_cache_hit_miss.op_cache_miss", 0x28f, 0x4,
+ STXT("Op cache misses")) },
+/* core: */
+ { I("ex_div_busy", 0xd3, 0, STXT("Number of cycles the divider is busy")) },
+ { I("ex_div_count", 0xd4, 0, STXT("Divide ops executed")) },
+ { I("ex_no_retire.all", 0xd6, 0x1b,
+ STXT("Cycles with no retire for any reason")) },
+ { I("ex_no_retire.empty", 0xd6, 0x1,
+ STXT("Cycles with no retire due to the lack of valid ops in the retire queue"
+ "(may be caused by front-end bottlenecks or pipeline redirects)")) },
+ { I("ex_no_retire.load_not_complete", 0xd6, 0xa2,
+ STXT("Cycles with no retire while the oldest op is waiting for load data")) },
+ { I("ex_no_retire.not_complete", 0xd6, 0x2,
+ STXT("Cycles with no retire while the oldest op is waiting to be executed")) },
+ { I("ex_no_retire.other", 0xd6, 0x8,
+ STXT("Cycles with no retire caused by other reasons (retire breaks, traps,"
+ "faults, etc.)")) },
+ { I("ex_no_retire.thread_not_selected", 0xd6, 0x10,
+ STXT("Cycles with no retire because thread arbitration did not select the"
+ "thread")) },
+ { I("ex_ret_fused_instr", 0x1d0, 0, STXT("Retired fused instructions")) },
+ { I("ex_ret_instr", 0xc0, 0, STXT("Retired instructions")) },
+ { I("ex_ret_ops", 0xc1, 0, STXT("Retired macro-ops")) },
+ { I("ex_ret_ucode_instr", 0x1c1, 0,
+ STXT("Retired microcoded instructions")) },
+ { I("ex_ret_ucode_ops", 0x1c2, 0, STXT("Retired microcode ops")) },
+ { I("ex_tagged_ibs_ops.ibs_tagged_ops", 0x1cf, 0x1,
+ STXT("Ops tagged by IBS")) },
+ { I("ex_tagged_ibs_ops.ibs_tagged_ops_ret", 0x1cf, 0x2,
+ STXT("Ops tagged by IBS that retired")) },
+ { I("ls_int_taken", 0x2c, 0, STXT("Interrupts taken")) },
+ { I("ls_locks.bus_lock", 0x25, 0x1,
+ STXT("Retired Lock instructions which caused a bus lock")) },
+ { I("ls_not_halted_cyc", 0x76, 0, STXT("Core cycles not in halt")) },
+ { I("ls_not_halted_p0_cyc.p0_freq_cyc", 0x120, 0x1,
+ STXT("Reference cycles (P0 frequency) not in halt")) },
+ { I("ls_ret_cl_flush", 0x26, 0, STXT("Retired CLFLUSH instructions")) },
+ { I("ls_ret_cpuid", 0x27, 0, STXT("Retired CPUID instructions")) },
+ { I("ls_smi_rx", 0x2b, 0, STXT("SMIs received")) },
+/* floating point: */
+ { I("fp_disp_faults.all", 0xe, 0xf,
+ STXT("Floating-point dispatch faults of all types")) },
+ { I("fp_disp_faults.sse_avx_all", 0xe, 0xe,
+ STXT("Floating-point dispatch faults of all types for SSE and AVX ops")) },
+ { I("fp_disp_faults.x87_fill_fault", 0xe, 0x1,
+ STXT("Floating-point dispatch faults for x87 fills")) },
+ { I("fp_disp_faults.xmm_fill_fault", 0xe, 0x2,
+ STXT("Floating-point dispatch faults for XMM fills")) },
+ { I("fp_disp_faults.ymm_fill_fault", 0xe, 0x4,
+ STXT("Floating-point dispatch faults for YMM fills")) },
+ { I("fp_disp_faults.ymm_spill_fault", 0xe, 0x8,
+ STXT("Floating-point dispatch faults for YMM spills")) },
+ { I("fp_ops_retired_by_type.all", 0xa, 0xff,
+ STXT("Retired floating-point ops of all types")) },
+ { I("fp_ops_retired_by_type.scalar_add", 0xa, 0x1,
+ STXT("Retired scalar floating-point add ops")) },
+ { I("fp_ops_retired_by_type.scalar_all", 0xa, 0xf,
+ STXT("Retired scalar floating-point ops of all types")) },
+ { I("fp_ops_retired_by_type.scalar_blend", 0xa, 0x9,
+ STXT("Retired scalar floating-point blend ops")) },
+ { I("fp_ops_retired_by_type.scalar_cmp", 0xa, 0x7,
+ STXT("Retired scalar floating-point compare ops")) },
+ { I("fp_ops_retired_by_type.scalar_cvt", 0xa, 0x8,
+ STXT("Retired scalar floating-point convert ops")) },
+ { I("fp_ops_retired_by_type.scalar_div", 0xa, 0x5,
+ STXT("Retired scalar floating-point divide ops")) },
+ { I("fp_ops_retired_by_type.scalar_mac", 0xa, 0x4,
+ STXT("Retired scalar floating-point multiply-accumulate ops")) },
+ { I("fp_ops_retired_by_type.scalar_mul", 0xa, 0x3,
+ STXT("Retired scalar floating-point multiply ops")) },
+ { I("fp_ops_retired_by_type.scalar_other", 0xa, 0xe,
+ STXT("Retired scalar floating-point ops of other types")) },
+ { I("fp_ops_retired_by_type.scalar_sqrt", 0xa, 0x6,
+ STXT("Retired scalar floating-point square root ops")) },
+ { I("fp_ops_retired_by_type.scalar_sub", 0xa, 0x2,
+ STXT("Retired scalar floating-point subtract ops")) },
+ { I("fp_ops_retired_by_type.vector_add", 0xa, 0x10,
+ STXT("Retired vector floating-point add ops")) },
+ { I("fp_ops_retired_by_type.vector_all", 0xa, 0xf0,
+ STXT("Retired vector floating-point ops of all types")) },
+ { I("fp_ops_retired_by_type.vector_blend", 0xa, 0x90,
+ STXT("Retired vector floating-point blend ops")) },
+ { I("fp_ops_retired_by_type.vector_cmp", 0xa, 0x70,
+ STXT("Retired vector floating-point compare ops")) },
+ { I("fp_ops_retired_by_type.vector_cvt", 0xa, 0x80,
+ STXT("Retired vector floating-point convert ops")) },
+ { I("fp_ops_retired_by_type.vector_div", 0xa, 0x50,
+ STXT("Retired vector floating-point divide ops")) },
+ { I("fp_ops_retired_by_type.vector_logical", 0xa, 0xd0,
+ STXT("Retired vector floating-point logical ops")) },
+ { I("fp_ops_retired_by_type.vector_mac", 0xa, 0x40,
+ STXT("Retired vector floating-point multiply-accumulate ops")) },
+ { I("fp_ops_retired_by_type.vector_mul", 0xa, 0x30,
+ STXT("Retired vector floating-point multiply ops")) },
+ { I("fp_ops_retired_by_type.vector_other", 0xa, 0xe0,
+ STXT("Retired vector floating-point ops of other types")) },
+ { I("fp_ops_retired_by_type.vector_shuffle", 0xa, 0xb0,
+ STXT("Retired vector floating-point shuffle ops (may include instructions"
+ "not necessarily thought of as including shuffles e.g. horizontal add,"
+ "dot product, and certain MOV instructions)")) },
+ { I("fp_ops_retired_by_type.vector_sqrt", 0xa, 0x60,
+ STXT("Retired vector floating-point square root ops")) },
+ { I("fp_ops_retired_by_type.vector_sub", 0xa, 0x20,
+ STXT("Retired vector floating-point subtract ops")) },
+ { I("fp_ops_retired_by_width.all", 0x8, 0x3f,
+ STXT("Retired floating-point ops of all widths")) },
+ { I("fp_ops_retired_by_width.mmx_uops_retired", 0x8, 0x2,
+ STXT("Retired MMX floating-point ops")) },
+ { I("fp_ops_retired_by_width.pack_128_uops_retired", 0x8, 0x8,
+ STXT("Retired packed 128-bit floating-point ops")) },
+ { I("fp_ops_retired_by_width.pack_256_uops_retired", 0x8, 0x10,
+ STXT("Retired packed 256-bit floating-point ops")) },
+ { I("fp_ops_retired_by_width.pack_512_uops_retired", 0x8, 0x20,
+ STXT("Retired packed 512-bit floating-point ops")) },
+ { I("fp_ops_retired_by_width.scalar_uops_retired", 0x8, 0x4,
+ STXT("Retired scalar floating-point ops")) },
+ { I("fp_ops_retired_by_width.x87_uops_retired", 0x8, 0x1,
+ STXT("Retired x87 floating-point ops")) },
+ { I("fp_pack_ops_retired.all", 0xc, 0xff,
+ STXT("Retired packed floating-point ops of all types")) },
+ { I("fp_pack_ops_retired.fp128_add", 0xc, 0x1,
+ STXT("Retired 128-bit packed floating-point add ops")) },
+ { I("fp_pack_ops_retired.fp128_all", 0xc, 0xf,
+ STXT("Retired 128-bit packed floating-point ops of all types")) },
+ { I("fp_pack_ops_retired.fp128_blend", 0xc, 0x9,
+ STXT("Retired 128-bit packed floating-point blend ops")) },
+ { I("fp_pack_ops_retired.fp128_cmp", 0xc, 0x7,
+ STXT("Retired 128-bit packed floating-point compare ops")) },
+ { I("fp_pack_ops_retired.fp128_cvt", 0xc, 0x8,
+ STXT("Retired 128-bit packed floating-point convert ops")) },
+ { I("fp_pack_ops_retired.fp128_div", 0xc, 0x5,
+ STXT("Retired 128-bit packed floating-point divide ops")) },
+ { I("fp_pack_ops_retired.fp128_logical", 0xc, 0xd,
+ STXT("Retired 128-bit packed floating-point logical ops")) },
+ { I("fp_pack_ops_retired.fp128_mac", 0xc, 0x4,
+ STXT("Retired 128-bit packed floating-point multiply-accumulate ops")) },
+ { I("fp_pack_ops_retired.fp128_mul", 0xc, 0x3,
+ STXT("Retired 128-bit packed floating-point multiply ops")) },
+ { I("fp_pack_ops_retired.fp128_other", 0xc, 0xe,
+ STXT("Retired 128-bit packed floating-point ops of other types")) },
+ { I("fp_pack_ops_retired.fp128_shuffle", 0xc, 0xb,
+ STXT("Retired 128-bit packed floating-point shuffle ops (may include"
+ "instructions not necessarily thought of as including shuffles e.g."
+ "horizontal add, dot product, and certain MOV instructions)")) },
+ { I("fp_pack_ops_retired.fp128_sqrt", 0xc, 0x6,
+ STXT("Retired 128-bit packed floating-point square root ops")) },
+ { I("fp_pack_ops_retired.fp128_sub", 0xc, 0x2,
+ STXT("Retired 128-bit packed floating-point subtract ops")) },
+ { I("fp_pack_ops_retired.fp256_add", 0xc, 0x10,
+ STXT("Retired 256-bit packed floating-point add ops")) },
+ { I("fp_pack_ops_retired.fp256_all", 0xc, 0xf0,
+ STXT("Retired 256-bit packed floating-point ops of all types")) },
+ { I("fp_pack_ops_retired.fp256_blend", 0xc, 0x90,
+ STXT("Retired 256-bit packed floating-point blend ops")) },
+ { I("fp_pack_ops_retired.fp256_cmp", 0xc, 0x70,
+ STXT("Retired 256-bit packed floating-point compare ops")) },
+ { I("fp_pack_ops_retired.fp256_cvt", 0xc, 0x80,
+ STXT("Retired 256-bit packed floating-point convert ops")) },
+ { I("fp_pack_ops_retired.fp256_div", 0xc, 0x50,
+ STXT("Retired 256-bit packed floating-point divide ops")) },
+ { I("fp_pack_ops_retired.fp256_logical", 0xc, 0xd0,
+ STXT("Retired 256-bit packed floating-point logical ops")) },
+ { I("fp_pack_ops_retired.fp256_mac", 0xc, 0x40,
+ STXT("Retired 256-bit packed floating-point multiply-accumulate ops")) },
+ { I("fp_pack_ops_retired.fp256_mul", 0xc, 0x30,
+ STXT("Retired 256-bit packed floating-point multiply ops")) },
+ { I("fp_pack_ops_retired.fp256_other", 0xc, 0xe0,
+ STXT("Retired 256-bit packed floating-point ops of other types")) },
+ { I("fp_pack_ops_retired.fp256_shuffle", 0xc, 0xb0,
+ STXT("Retired 256-bit packed floating-point shuffle ops (may include"
+ "instructions not necessarily thought of as including shuffles e.g."
+ "horizontal add, dot product, and certain MOV instructions)")) },
+ { I("fp_pack_ops_retired.fp256_sqrt", 0xc, 0x60,
+ STXT("Retired 256-bit packed floating-point square root ops")) },
+ { I("fp_pack_ops_retired.fp256_sub", 0xc, 0x20,
+ STXT("Retired 256-bit packed floating-point subtract ops")) },
+ { I("fp_ret_sse_avx_ops.add_sub_flops", 0x3, 0x1,
+ STXT("Retired SSE and AVX floating-point add and subtract ops")) },
+ { I("fp_ret_sse_avx_ops.all", 0x3, 0x1f,
+ STXT("Retired SSE and AVX floating-point ops of all types")) },
+ { I("fp_ret_sse_avx_ops.bfloat_mac_flops", 0x3, 0x10,
+ STXT("Retired SSE and AVX floating-point bfloat multiply-accumulate ops"
+ "(each operation is counted as 2 ops)")) },
+ { I("fp_ret_sse_avx_ops.div_flops", 0x3, 0x4,
+ STXT("Retired SSE and AVX floating-point divide and square root ops")) },
+ { I("fp_ret_sse_avx_ops.mac_flops", 0x3, 0x8,
+ STXT("Retired SSE and AVX floating-point multiply-accumulate ops (each"
+ "operation is counted as 2 ops)")) },
+ { I("fp_ret_sse_avx_ops.mult_flops", 0x3, 0x2,
+ STXT("Retired SSE and AVX floating-point multiply ops")) },
+ { I("fp_ret_x87_fp_ops.add_sub_ops", 0x2, 0x1,
+ STXT("Retired x87 floating-point add and subtract ops")) },
+ { I("fp_ret_x87_fp_ops.all", 0x2, 0x7,
+ STXT("Retired x87 floating-point ops of all types")) },
+ { I("fp_ret_x87_fp_ops.div_sqrt_ops", 0x2, 0x4,
+ STXT("Retired x87 floating-point divide and square root ops")) },
+ { I("fp_ret_x87_fp_ops.mul_ops", 0x2, 0x2,
+ STXT("Retired x87 floating-point multiply ops")) },
+ { I("fp_retired_ser_ops.all", 0x5, 0xf,
+ STXT("Retired SSE and AVX serializing ops of all types")) },
+ { I("fp_retired_ser_ops.sse_bot_ret", 0x5, 0x8,
+ STXT("Retired SSE and AVX bottom-executing ops. Bottom-executing ops wait"
+ "for all older ops to retire before executing")) },
+ { I("fp_retired_ser_ops.sse_ctrl_ret", 0x5, 0x4,
+ STXT("Retired SSE and AVX control word mispredict traps")) },
+ { I("fp_retired_ser_ops.x87_bot_ret", 0x5, 0x2,
+ STXT("Retired x87 bottom-executing ops. Bottom-executing ops wait for all"
+ "older ops to retire before executing")) },
+ { I("fp_retired_ser_ops.x87_ctrl_ret", 0x5, 0x1,
+ STXT("Retired x87 control word mispredict traps due to mispredictions in RC"
+ "or PC, or changes in exception mask bits")) },
+ { I("packed_int_op_type.all", 0xd, 0xff,
+ STXT("Retired packed integer ops of all types")) },
+ { I("packed_int_op_type.int128_add", 0xd, 0x1,
+ STXT("Retired 128-bit packed integer add ops")) },
+ { I("packed_int_op_type.int128_aes", 0xd, 0x5,
+ STXT("Retired 128-bit packed integer AES ops")) },
+ { I("packed_int_op_type.int128_all", 0xd, 0xf,
+ STXT("Retired 128-bit packed integer ops of all types")) },
+ { I("packed_int_op_type.int128_clm", 0xd, 0x8,
+ STXT("Retired 128-bit packed integer CLM ops")) },
+ { I("packed_int_op_type.int128_cmp", 0xd, 0x7,
+ STXT("Retired 128-bit packed integer compare ops")) },
+ { I("packed_int_op_type.int128_logical", 0xd, 0xd,
+ STXT("Retired 128-bit packed integer logical ops")) },
+ { I("packed_int_op_type.int128_mac", 0xd, 0x4,
+ STXT("Retired 128-bit packed integer multiply-accumulate ops")) },
+ { I("packed_int_op_type.int128_mov", 0xd, 0xa,
+ STXT("Retired 128-bit packed integer MOV ops")) },
+ { I("packed_int_op_type.int128_mul", 0xd, 0x3,
+ STXT("Retired 128-bit packed integer multiply ops")) },
+ { I("packed_int_op_type.int128_other", 0xd, 0xe,
+ STXT("Retired 128-bit packed integer ops of other types")) },
+ { I("packed_int_op_type.int128_pack", 0xd, 0xc,
+ STXT("Retired 128-bit packed integer pack ops")) },
+ { I("packed_int_op_type.int128_sha", 0xd, 0x6,
+ STXT("Retired 128-bit packed integer SHA ops")) },
+ { I("packed_int_op_type.int128_shift", 0xd, 0x9,
+ STXT("Retired 128-bit packed integer shift ops")) },
+ { I("packed_int_op_type.int128_shuffle", 0xd, 0xb,
+ STXT("Retired 128-bit packed integer shuffle ops (may include instructions"
+ "not necessarily thought of as including shuffles e.g. horizontal add,"
+ "dot product, and certain MOV instructions)")) },
+ { I("packed_int_op_type.int128_sub", 0xd, 0x2,
+ STXT("Retired 128-bit packed integer subtract ops")) },
+ { I("packed_int_op_type.int256_add", 0xd, 0x10,
+ STXT("Retired 256-bit packed integer add ops")) },
+ { I("packed_int_op_type.int256_all", 0xd, 0xf0,
+ STXT("Retired 256-bit packed integer ops of all types")) },
+ { I("packed_int_op_type.int256_cmp", 0xd, 0x70,
+ STXT("Retired 256-bit packed integer compare ops")) },
+ { I("packed_int_op_type.int256_logical", 0xd, 0xd0,
+ STXT("Retired 256-bit packed integer logical ops")) },
+ { I("packed_int_op_type.int256_mac", 0xd, 0x40,
+ STXT("Retired 256-bit packed integer multiply-accumulate ops")) },
+ { I("packed_int_op_type.int256_mov", 0xd, 0xa0,
+ STXT("Retired 256-bit packed integer MOV ops")) },
+ { I("packed_int_op_type.int256_mul", 0xd, 0x30,
+ STXT("Retired 256-bit packed integer multiply ops")) },
+ { I("packed_int_op_type.int256_other", 0xd, 0xe0,
+ STXT("Retired 256-bit packed integer ops of other types")) },
+ { I("packed_int_op_type.int256_pack", 0xd, 0xc0,
+ STXT("Retired 256-bit packed integer pack ops")) },
+ { I("packed_int_op_type.int256_shift", 0xd, 0x90,
+ STXT("Retired 256-bit packed integer shift ops")) },
+ { I("packed_int_op_type.int256_shuffle", 0xd, 0xb0,
+ STXT("Retired 256-bit packed integer shuffle ops (may include instructions"
+ "not necessarily thought of as including shuffles e.g. horizontal add,"
+ "dot product, and certain MOV instructions)")) },
+ { I("packed_int_op_type.int256_sub", 0xd, 0x20,
+ STXT("Retired 256-bit packed integer subtract ops")) },
+ { I("sse_avx_ops_retired.all", 0xb, 0xff,
+ STXT("Retired SSE, AVX and MMX integer ops of all types")) },
+ { I("sse_avx_ops_retired.mmx_add", 0xb, 0x1,
+ STXT("Retired MMX integer add")) },
+ { I("sse_avx_ops_retired.mmx_all", 0xb, 0xf,
+ STXT("Retired MMX integer ops of all types")) },
+ { I("sse_avx_ops_retired.mmx_cmp", 0xb, 0x7,
+ STXT("Retired MMX integer compare ops")) },
+ { I("sse_avx_ops_retired.mmx_logical", 0xb, 0xd,
+ STXT("Retired MMX integer logical ops")) },
+ { I("sse_avx_ops_retired.mmx_mac", 0xb, 0x4,
+ STXT("Retired MMX integer multiply-accumulate ops")) },
+ { I("sse_avx_ops_retired.mmx_mov", 0xb, 0xa,
+ STXT("Retired MMX integer MOV ops")) },
+ { I("sse_avx_ops_retired.mmx_mul", 0xb, 0x3,
+ STXT("Retired MMX integer multiply ops")) },
+ { I("sse_avx_ops_retired.mmx_other", 0xb, 0xe,
+ STXT("Retired MMX integer multiply ops of other types")) },
+ { I("sse_avx_ops_retired.mmx_pack", 0xb, 0xc,
+ STXT("Retired MMX integer pack ops")) },
+ { I("sse_avx_ops_retired.mmx_shift", 0xb, 0x9,
+ STXT("Retired MMX integer shift ops")) },
+ { I("sse_avx_ops_retired.mmx_shuffle", 0xb, 0xb,
+ STXT("Retired MMX integer shuffle ops (may include instructions not"
+ "necessarily thought of as including shuffles e.g. horizontal add, dot"
+ "product, and certain MOV instructions)")) },
+ { I("sse_avx_ops_retired.mmx_sub", 0xb, 0x2,
+ STXT("Retired MMX integer subtract ops")) },
+ { I("sse_avx_ops_retired.sse_avx_add", 0xb, 0x10,
+ STXT("Retired SSE and AVX integer add ops")) },
+ { I("sse_avx_ops_retired.sse_avx_aes", 0xb, 0x50,
+ STXT("Retired SSE and AVX integer AES ops")) },
+ { I("sse_avx_ops_retired.sse_avx_all", 0xb, 0xf0,
+ STXT("Retired SSE and AVX integer ops of all types")) },
+ { I("sse_avx_ops_retired.sse_avx_clm", 0xb, 0x80,
+ STXT("Retired SSE and AVX integer CLM ops")) },
+ { I("sse_avx_ops_retired.sse_avx_cmp", 0xb, 0x70,
+ STXT("Retired SSE and AVX integer compare ops")) },
+ { I("sse_avx_ops_retired.sse_avx_logical", 0xb, 0xd0,
+ STXT("Retired SSE and AVX integer logical ops")) },
+ { I("sse_avx_ops_retired.sse_avx_mac", 0xb, 0x40,
+ STXT("Retired SSE and AVX integer multiply-accumulate ops")) },
+ { I("sse_avx_ops_retired.sse_avx_mov", 0xb, 0xa0,
+ STXT("Retired SSE and AVX integer MOV ops")) },
+ { I("sse_avx_ops_retired.sse_avx_mul", 0xb, 0x30,
+ STXT("Retired SSE and AVX integer multiply ops")) },
+ { I("sse_avx_ops_retired.sse_avx_other", 0xb, 0xe0,
+ STXT("Retired SSE and AVX integer ops of other types")) },
+ { I("sse_avx_ops_retired.sse_avx_pack", 0xb, 0xc0,
+ STXT("Retired SSE and AVX integer pack ops")) },
+ { I("sse_avx_ops_retired.sse_avx_sha", 0xb, 0x60,
+ STXT("Retired SSE and AVX integer SHA ops")) },
+ { I("sse_avx_ops_retired.sse_avx_shift", 0xb, 0x90,
+ STXT("Retired SSE and AVX integer shift ops")) },
+ { I("sse_avx_ops_retired.sse_avx_shuffle", 0xb, 0xb0,
+ STXT("Retired SSE and AVX integer shuffle ops (may include instructions not"
+ "necessarily thought of as including shuffles e.g. horizontal add, dot"
+ "product, and certain MOV instructions)")) },
+ { I("sse_avx_ops_retired.sse_avx_sub", 0xb, 0x20,
+ STXT("Retired SSE and AVX integer subtract ops")) },
+/* memory: */
+ { I("bp_l1_tlb_fetch_hit.all", 0x94, 0x7,
+ STXT("Instruction fetches that hit in the L1 ITLB for all page sizes")) },
+ { I("bp_l1_tlb_fetch_hit.if1g", 0x94, 0x4,
+ STXT("Instruction fetches that hit in the L1 ITLB for 1G pages")) },
+ { I("bp_l1_tlb_fetch_hit.if2m", 0x94, 0x2,
+ STXT("Instruction fetches that hit in the L1 ITLB for 2M pages")) },
+ { I("bp_l1_tlb_fetch_hit.if4k", 0x94, 0x1,
+ STXT("Instruction fetches that hit in the L1 ITLB for 4k or coalesced pages."
+ "A coalesced page is a 16k page created from four adjacent 4k pages")) },
+ { I("bp_l1_tlb_miss_l2_tlb_hit", 0x84, 0,
+ STXT("Instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB")) },
+ { I("bp_l1_tlb_miss_l2_tlb_miss.all", 0x85, 0xf,
+ STXT("Instruction fetches that miss in both the L1 and L2 ITLBs (page-table"
+ "walks are requested) for all page sizes")) },
+ { I("bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", 0x85, 0x8,
+ STXT("Instruction fetches that miss in both the L1 and L2 ITLBs (page-table"
+ "walks are requested) for coalesced pages. A coalesced page is a 16k"
+ "page created from four adjacent 4k pages")) },
+ { I("bp_l1_tlb_miss_l2_tlb_miss.if1g", 0x85, 0x4,
+ STXT("Instruction fetches that miss in both the L1 and L2 ITLBs (page-table"
+ "walks are requested) for 1G pages")) },
+ { I("bp_l1_tlb_miss_l2_tlb_miss.if2m", 0x85, 0x2,
+ STXT("Instruction fetches that miss in both the L1 and L2 ITLBs (page-table"
+ "walks are requested) for 2M pages")) },
+ { I("bp_l1_tlb_miss_l2_tlb_miss.if4k", 0x85, 0x1,
+ STXT("Instruction fetches that miss in both the L1 and L2 ITLBs (page-table"
+ "walks are requested) for 4k pages")) },
+ { I("ls_bad_status2.stli_other", 0x24, 0x2,
+ STXT("Store-to-load conflicts (load unable to complete due to a"
+ "non-forwardable conflict with an older store)")) },
+ { I("ls_dispatch.ld_dispatch", 0x29, 0x1,
+ STXT("Number of memory load operations dispatched to the load-store unit")) },
+ { I("ls_dispatch.ld_st_dispatch", 0x29, 0x4,
+ STXT("Number of memory load-store operations dispatched to the load-store"
+ "unit")) },
+ { I("ls_dispatch.store_dispatch", 0x29, 0x2,
+ STXT("Number of memory store operations dispatched to the load-store unit")) },
+ { I("ls_l1_d_tlb_miss.all", 0x45, 0xff,
+ STXT("L1 DTLB misses for all page sizes")) },
+ { I("ls_l1_d_tlb_miss.all_l2_miss", 0x45, 0xf0,
+ STXT("L1 DTLB misses with L2 DTLB misses (page-table walks are requested)"
+ "for all page sizes")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 0x45, 0x8,
+ STXT("L1 DTLB misses with L2 DTLB hits for 1G pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 0x45, 0x80,
+ STXT("L1 DTLB misses with L2 DTLB misses (page-table walks are requested)"
+ "for 1G pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 0x45, 0x4,
+ STXT("L1 DTLB misses with L2 DTLB hits for 2M pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 0x45, 0x40,
+ STXT("L1 DTLB misses with L2 DTLB misses (page-table walks are requested)"
+ "for 2M pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 0x45, 0x1,
+ STXT("L1 DTLB misses with L2 DTLB hits for 4k pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 0x45, 0x10,
+ STXT("L1 DTLB misses with L2 DTLB misses (page-table walks are requested)"
+ "for 4k pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 0x45, 0x2,
+ STXT("L1 DTLB misses with L2 DTLB hits for coalesced pages. A coalesced page"
+ "is a 16k page created from four adjacent 4k pages")) },
+ { I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 0x45, 0x20,
+ STXT("L1 DTLB misses with L2 DTLB misses (page-table walks are requested)"
+ "for coalesced pages. A coalesced page is a 16k page created from four"
+ "adjacent 4k pages")) },
+ { I("ls_misal_loads.ma4k", 0x47, 0x2,
+ STXT("4kB misaligned (page crossing) loads")) },
+ { I("ls_misal_loads.ma64", 0x47, 0x1,
+ STXT("64B misaligned (cacheline crossing) loads")) },
+ { I("ls_st_commit_cancel2.st_commit_cancel_wcb_full", 0x37, 0x1,
+ STXT("Non-cacheable store commits cancelled due to the non-cacheable commit"
+ "buffer being full")) },
+ { I("ls_stlf", 0x35, 0, STXT("Store-to-load-forward (STLF) hits")) },
+ { I("ls_tlb_flush.all", 0x78, 0xff, STXT("All TLB Flushes")) },
+/* other: */
+ { I("de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", 0xae, 0x80,
+ STXT("Number of cycles dispatch is stalled for floating-point flush recovery")) },
+ { I("de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 0xae, 0x20,
+ STXT("Number of cycles dispatch is stalled for floating-point register file"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 0xae, 0x40,
+ STXT("Number of cycles dispatch is stalled for floating-point scheduler"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", 0xae, 0x1,
+ STXT("Number of cycles dispatch is stalled for integer physical register"
+ "file tokens")) },
+ { I("de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", 0xae, 0x2,
+ STXT("Number of cycles dispatch is stalled for Load queue token")) },
+ { I("de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", 0xae, 0x4,
+ STXT("Number of cycles dispatch is stalled for store queue tokens")) },
+ { I("de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", 0xae, 0x10,
+ STXT("Number of cycles dispatch is stalled for taken branch buffer tokens")) },
+ { I("de_dis_dispatch_token_stalls2.int_sch0_token_stall", 0xaf, 0x1,
+ STXT("Number of cycles dispatch is stalled for integer scheduler queue 0"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls2.int_sch1_token_stall", 0xaf, 0x2,
+ STXT("Number of cycles dispatch is stalled for integer scheduler queue 1"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls2.int_sch2_token_stall", 0xaf, 0x4,
+ STXT("Number of cycles dispatch is stalled for integer scheduler queue 2"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls2.int_sch3_token_stall", 0xaf, 0x8,
+ STXT("Number of cycles dispatch is stalled for integer scheduler queue 3"
+ "tokens")) },
+ { I("de_dis_dispatch_token_stalls2.retire_token_stall", 0xaf, 0x20,
+ STXT("Number of cycles dispatch is stalled for retire queue tokens")) },
+ { I("de_dis_ops_from_decoder.any_fp_dispatch", 0xab, 0x4,
+ STXT("Number of ops dispatched to the floating-point unit")) },
+ { I("de_dis_ops_from_decoder.disp_op_type.any_integer_dispatch", 0xab, 0x8,
+ STXT("Number of ops dispatched to the integer execution unit")) },
+ { I("de_no_dispatch_per_slot.backend_stalls", 0x1a0, 0x1e,
+ STXT("In each cycle counts ops unable to dispatch because of back-end stalls")) },
+ { I("de_no_dispatch_per_slot.no_ops_from_frontend", 0x1a0, 0x1,
+ STXT("In each cycle counts dispatch slots left empty because the front-end"
+ "did not supply ops")) },
+ { I("de_no_dispatch_per_slot.smt_contention", 0x1a0, 0x60,
+ STXT("In each cycle counts ops unable to dispatch because the dispatch cycle"
+ "was granted to the other SMT thread")) },
+ { I("de_op_queue_empty", 0xa9, 0,
+ STXT("Cycles when the op queue is empty. Such cycles indicate that the"
+ "front-end is not delivering instructions fast enough")) },
+ { I("de_src_op_disp.all", 0xaa, 0x7,
+ STXT("Ops dispatched from any source")) },
+ { I("de_src_op_disp.decoder", 0xaa, 0x1,
+ STXT("Ops fetched from instruction cache and dispatched")) },
+ { I("de_src_op_disp.loop_buffer", 0xaa, 0x4,
+ STXT("Ops dispatched from loop buffer")) },
+ { I("de_src_op_disp.op_cache", 0xaa, 0x2,
+ STXT("Ops fetched from op cache and dispatched")) },
+ { I("resyncs_or_nc_redirects", 0x96, 0,
+ STXT("Pipeline restarts not caused by branch mispredicts")) },
+/* recommended: */
+ { I("all_data_cache_accesses", 0x29, 0x7, STXT("All data cache accesses")) },
+ { NULL, NULL, 0, NULL }
+};
+
+#undef I
+#endif
+