diff options
author | Vladimir Mezentsev <vladimir.mezentsev@oracle.com> | 2022-03-11 08:58:31 +0000 |
---|---|---|
committer | Nick Clifton <nickc@redhat.com> | 2022-03-11 08:58:31 +0000 |
commit | bb368aad297fe3ad40cf397e6fc85aa471429a28 (patch) | |
tree | 0ab25909b8fe789d676bbdb00d501d4d485e4afe /gprofng/common | |
parent | a655f19af95eb685ba64f48ee8fc2b3b7a3d886a (diff) | |
download | gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.zip gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.tar.gz gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.tar.bz2 |
gprofng: a new GNU profiler
top-level
* Makefile.def: Add gprofng module.
* configure.ac: Add --enable-gprofng option.
* src-release.sh: Add gprofng.
* Makefile.in: Regenerate.
* configure: Regenerate.
* gprofng: New directory.
binutils
* MAINTAINERS: Add gprofng maintainer.
* README-how-to-make-a-release: Add gprofng.
include.
* collectorAPI.h: New file.
* libcollector.h: New file.
* libfcollector.h: New file.
Diffstat (limited to 'gprofng/common')
-rw-r--r-- | gprofng/common/cc_libcollector.h | 44 | ||||
-rw-r--r-- | gprofng/common/config.h.in | 117 | ||||
-rw-r--r-- | gprofng/common/core_pcbe.c | 3023 | ||||
-rw-r--r-- | gprofng/common/cpu_frequency.h | 303 | ||||
-rw-r--r-- | gprofng/common/cpuid.c | 203 | ||||
-rw-r--r-- | gprofng/common/gp-defs.h | 58 | ||||
-rw-r--r-- | gprofng/common/gp-experiment.h | 186 | ||||
-rw-r--r-- | gprofng/common/gp-time.h | 46 | ||||
-rw-r--r-- | gprofng/common/hwc_cpus.h | 198 | ||||
-rw-r--r-- | gprofng/common/hwcdrv.c | 1454 | ||||
-rw-r--r-- | gprofng/common/hwcdrv.h | 330 | ||||
-rw-r--r-- | gprofng/common/hwcentry.h | 417 | ||||
-rw-r--r-- | gprofng/common/hwcfuncs.c | 704 | ||||
-rw-r--r-- | gprofng/common/hwcfuncs.h | 269 | ||||
-rw-r--r-- | gprofng/common/hwctable.c | 5410 | ||||
-rw-r--r-- | gprofng/common/opteron_pcbe.c | 448 |
16 files changed, 13210 insertions, 0 deletions
diff --git a/gprofng/common/cc_libcollector.h b/gprofng/common/cc_libcollector.h new file mode 100644 index 0000000..e078541 --- /dev/null +++ b/gprofng/common/cc_libcollector.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* + * This file describes the enum's, etc. shared by the collector control + * class and libcollector and its modules. It is #included in collctrl.h + * so any changes to it should follow the procedure described there. + */ + +#ifndef _CC_LIBCOLLECTOR_H +#define _CC_LIBCOLLECTOR_H + +/* definitions for synchronization tracing scope -- a bit mask */ +#define SYNCSCOPE_NATIVE 0x1 +#define SYNCSCOPE_JAVA 0x2 + +typedef enum +{ + FOLLOW_NONE = 0x0, + FOLLOW_EXEC = 0x1, + FOLLOW_FORK = 0x2, + FOLLOW_ON = 0x3, + FOLLOW_COMBO = 0x4, + FOLLOW_ALL = 0x7 +} Follow_type; + +#endif /* !__CC_LIBCOLLECTOR_H */ diff --git a/gprofng/common/config.h.in b/gprofng/common/config.h.in new file mode 100644 index 0000000..e46e64f --- /dev/null +++ b/gprofng/common/config.h.in @@ -0,0 +1,117 @@ +/* common/config.h.in. Generated from configure.ac by autoheader. */ + +/* Enable debugging output. */ +#undef DEBUG + +/* Enable java profiling */ +#undef GPROFNG_JAVA_PROFILING + +/* Define to 1 if you have the declaration of `basename', and to 0 if you + don't. */ +#undef HAVE_DECL_BASENAME + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strsignal' function. */ +#undef HAVE_STRSIGNAL + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + +/* Version number of package */ +#undef VERSION + +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c new file mode 100644 index 0000000..6f746d8 --- /dev/null +++ b/gprofng/common/core_pcbe.c @@ -0,0 +1,3023 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* + * Performance Counter Back-End for Intel Family 6 + * Models 15(06_0FH) 23(06_17H) (Core 2) + * Models 28(06_1CH) (Atom) + * Models 37(06_25H) 44(06_2CH) (Westmere) + * Models 26(06_1AH) 30(06_1EH) 31(06_1FH) 46(06_2EH) (Nehalem) + * Models 42(06_2AH) 45(06_2DH) (Sandy Bridge) + * Models 58(06_3AH) 62(06_3EH) (Ivy Bridge) + * Models 60(06_3CH) 63(06_3FH) 69(06_45H) 70(06_46H) (Haswell) + * Models 61(06_3DH) 71(06_47H) 79(06_4FH) 86(06_??H) (Broadwell) (79 not listed in Intel SDM as of June 2015) + * Models 78(06_4EH) 85(06_55H) 94(06_5EH) (Skylake) (Note Skylake and later: versionID==4) + * To add another model number: + * - add appropriate table data in the form + * #define EVENTS_FAM6_MODXX + * - add appropriate table definitions in the form + * const struct events_table_t events_fam6_modXX[] = + * - set events_table to the appropriate table + * using the "switch ( cpuid_getmodel(CPU) )" statement + * in core_pcbe_init() + * - check the date in core_pcbe_cpuref() + * Table data can be derived from: + * - the Intel SDM + * also https://download.01.org/perfmon/ + * - libcpc source code in usr/src/uts/intel/pcbe/ + * - libpfm4 + * but there are typically inconsistencies among these + * sources of data. So, judgment is required. + * Other things to do to add a new processor: + * x file hwc_cpus.h + * add a cpuver enumerator + * add lookup entry + * x file hwctable.c + * add a table (aliases, etc.) + * add a cputabs entry, including default metrics + * look for other places where the most-recently-added CPU is mentioned + * x file cpu_frequency.h + * function get_max_turbo_freq() + * go to "switch (model)", and add turbo boosts + */ + +#include <sys/types.h> +#include "hwcdrv.h" + +static uint64_t num_gpc; /* number of general purpose counters (e.g. 2-4) */ +static uint64_t num_ffc; /* number fixed function counters (e.g. 3) */ +static uint_t total_pmc; /* num_gpc + num_ffc */ + +/* + * Only the lower 32-bits can be written to in the general-purpose + * counters. The higher bits are extended from bit 31; all ones if + * bit 31 is one and all zeros otherwise. + * + * The fixed-function counters do not have this restriction. + */ + +static const char *ffc_names[] = { +/* + * While modern Intel processors have fixed-function counters (FFCs), + * on Linux we access HWCs through the perf_event_open() kernel interface, + * which does not allow us direct access to the FFCs. + * Rather, the Linux kernel manages registers opaquely. + * At best, it allows us extra HW events by off-loading + * HWCs to FFCs as available. Often, however, the FFCs + * are commandeered by other activities like the NMI watchdog. + * We will omit any explicit reference to them. + * https://lists.eecs.utk.edu/pipermail/perfapi-devel/2015-February/006895.html + * See also bug 21315497. + */ +#if 0 + "instr_retired.any", + "cpu_clk_unhalted.core", + "cpu_clk_unhalted.ref", +#endif + NULL +}; + +#define IMPL_NAME_LEN 100 +static char core_impl_name[IMPL_NAME_LEN]; + +/* + * Most events require only an event code and a umask. + * Some also require attributes, cmasks, or MSR programming. + * Until Sandy Bridge, the number of these other events + * was small and libcpc just ignored them. + * With Sandy Bridge, libcpc added for support for these + * additional events. + * + * We use an expanded events_table_t here -- patterned + * after snb_pcbe_events_table_t in libcpc's + * usr/src/uts/intel/pcbe/snb_pcbe.h -- for all processors. + * + * Correspondingly, we also define ATTR_* macros, but we + * define them to set bits as they will appear + * in bits 16-23 of the final eventsel. Definitions of those + * bits can be found in "struct ia32_perfevtsel" in libcpc's + * usr/src/uts/intel/pcbe/intel_pcbe_utils.h . + * + * For now, I don't know how to handle msr_offset. + * So, let's not include events that call for it. + * + * For now, don't do anything with ATTR_PEBS other than + * to note it in tables (starting with Haswell). + * + * Solaris tables also have ATTR_PEBS_ONLY. We cannot + * use these counters from "collect -h" and so do not + * include them. + */ +#define ATTR_NONE 0 +#define ATTR_EDGE (1 << 2) /* bit 18 - offset 16 */ +#define ATTR_ANY (1 << 5) /* bit 21 - offset 16 */ +#define ATTR_INV (1 << 7) /* bit 23 - offset 16 */ +#define ATTR_PEBS ATTR_NONE // PEBS not supported +#define ATTR_TSX ATTR_NONE // TSX MSRs not supported +#undef ATTR_PEBS_ONLY // PEBS-only event, not supported +#undef ATTR_PEBS_ONLY_LD_LAT // not supported + +struct events_table_t +{ + uint32_t eventselect; + uint32_t unitmask; + uint64_t supported_counters; + const char *name; + uint8_t cmask; + uint8_t attrs; + uint16_t msr_offset; +}; + +/* Used to describe which counters support an event */ +#define C(x) (1 << (x)) +#define C0 C(0) +#define C1 C(1) +#define C2 C(2) +#define C3 C(3) +#define C_ALL 0xFFFFFFFFFFFFFFFF +#define CDEAD 0 /* Counter that is broken */ + +/* note that regular events use the original spelling like "inst_retired.any_p" */ +#define ARCH_EVENTS /* NOTE: Order specified in PRM must be maintained! */ \ +{ 0x3C, 0x00, C_ALL, "unhalted-core-cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "unhalted-reference-cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x00, C_ALL, "instruction-retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C_ALL, "llc-reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C_ALL, "llc-misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C_ALL, "branch-instruction-retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C_ALL, "branch-misses-retired" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* + * FAM6/MOD15: + * Xeon 3000, 3200, 5100, 5300, 7300 + * Core 2 Quad, Extreme, and Duo + * Pentium dual-core processors + * FAM6/MOD23: + * Xeon 5200, 5400 series, Intel + * Core 2 Quad Q9650. + */ +#define EVENTS_FAM6_MOD23 \ +{ 0x03, 0x00, C0|C1, "load_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x02, C0|C1, "load_block.sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x04, C0|C1, "load_block.std" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x08, C0|C1, "load_block.overlap_store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x10, C0|C1, "load_block.until_retire" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x20, C0|C1, "load_block.l1d" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x04, 0x00, C0|C1, "store_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x04, 0x01, C0|C1, "store_block.drain_cycles" /*spell-diff*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x04, 0x02, C0|C1, "store_block.order" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x04, 0x08, C0|C1, "store_block.snoop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x00, C0|C1, "misalign_mem_ref" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x00, C0|C1, "segment_reg_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x00, C0|C1, "sse_pre_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x00, C0|C1, "sse_pre_exec.nta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C0|C1, "sse_pre_exec.l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x02, C0|C1, "sse_pre_exec.l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x03, C0|C1, "sse_pre_exec.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x00, C0|C1, "dtlb_misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C0|C1, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C0|C1, "dtlb_misses.miss_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C0|C1, "dtlb_misses.l0_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x08, C0|C1, "dtlb_misses.miss_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x09, 0x00, C0|C1, "memory_disambiguation" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x09, 0x01, C0|C1, "memory_disambiguation.reset" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x09, 0x02, C0|C1, "memory_disambiguation.success" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0c, 0x00, C0|C1, "page_walks" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0c, 0x01, C0|C1, "page_walks.count" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0c, 0x02, C0|C1, "page_walks.cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x00, C0 , "fp_comp_ops_exe" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x00, C1, "fp_assist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x00, C1, "mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x00, C1, "div" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x00, C0 , "cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x18, 0x00, C0 , "idle_during_div" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x00, C1, "delayed_bypass" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x00, C1, "delayed_bypass.fp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x01, C1, "delayed_bypass.simd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x02, C1, "delayed_bypass.load" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x21, 0x00, C0|C1, "l2_ads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x23, 0x00, C0|C1, "l2_dbus_busy_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x00, C0|C1, "l2_lines_in" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x25, 0x00, C0|C1, "l2_m_lines_in" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x00, C0|C1, "l2_lines_out" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x00, C0|C1, "l2_m_lines_out" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x00, C0|C1, "l2_ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x29, 0x00, C0|C1, "l2_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2a, 0x00, C0|C1, "l2_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2b, 0x00, C0|C1, "l2_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x00, C0|C1, "l2_rqsts" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x4f, C0|C1, "l2_rqsts.self.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x30, 0x00, C0|C1, "l2_reject_busq" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x32, 0x00, C0|C1, "l2_no_req" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3a, 0x00, C0|C1, "eist_trans" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3b, 0xc0, C0|C1, "thermal_trip" /*non-zero umask*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted.core_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x01, C0|C1, "cpu_clk_unhalted.bus" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x02, C0|C1, "cpu_clk_unhalted.no_other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x00, C0|C1, "l1d_cache_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x41, 0x00, C0|C1, "l1d_cache_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x00, C0|C1, "l1d_cache_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x10, C0|C1, "l1d_cache_lock.duration" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x00, C0|C1, "l1d_all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x00, C0|C1, "l1d_all_ref" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x01, C0|C1, "l1d_all.ref" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x02, C0|C1, "l1d_all.cache_ref" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x45, 0x0f, C0|C1, "l1d_repl" /*non-zero umask*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x46, 0x00, C0|C1, "l1d_m_repl" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x47, 0x00, C0|C1, "l1d_m_evict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x00, C0|C1, "l1d_pend_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x00, C0|C1, "l1d_split" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x01, C0|C1, "l1d_split.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C0|C1, "l1d_split.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4b, 0x00, C0|C1, "sse_pre_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4b, 0x00, C0|C1, "sse_pre_miss.nta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4b, 0x01, C0|C1, "sse_pre_miss.l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4b, 0x02, C0|C1, "sse_pre_miss.l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4c, 0x00, C0|C1, "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4e, 0x00, C0|C1, "l1d_prefetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4e, 0x10, C0|C1, "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x00, C0|C1, "bus_request_outstanding" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x61, 0x00, C0|C1, "bus_bnr_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x62, 0x00, C0|C1, "bus_drdy_clocks" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x00, C0|C1, "bus_lock_clocks" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x64, 0x00, C0|C1, "bus_data_rcv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x65, 0x00, C0|C1, "bus_trans_brd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x66, 0x00, C0|C1, "bus_trans_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x67, 0x00, C0|C1, "bus_trans_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x68, 0x00, C0|C1, "bus_trans_ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x69, 0x00, C0|C1, "bus_trans_inval" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6a, 0x00, C0|C1, "bus_trans_pwr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6b, 0x00, C0|C1, "bus_trans_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6c, 0x00, C0|C1, "bus_trans_io" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6d, 0x00, C0|C1, "bus_trans_def" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6e, 0x00, C0|C1, "bus_trans_burst" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6f, 0x00, C0|C1, "bus_trans_mem" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x70, 0x00, C0|C1, "bus_trans_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x77, 0x00, C0|C1, "ext_snoop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x78, 0x00, C0|C1, "cmp_snoop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7a, 0x00, C0|C1, "bus_hit_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7b, 0x00, C0|C1, "bus_hitm_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7d, 0x00, C0|C1, "busq_empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7e, 0x00, C0|C1, "snoop_stall_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7f, 0x00, C0|C1, "bus_io_wait" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x00, C0|C1, "l1i_reads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x81, 0x00, C0|C1, "l1i_misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x00, C0|C1, "itlb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x02, C0|C1, "itlb.small_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x10, C0|C1, "itlb.large_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x12, C0|C1, "itlb.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x40, C0|C1, "itlb.flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x83, 0x00, C0|C1, "inst_queue" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x83, 0x02, C0|C1, "inst_queue.full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x86, 0x00, C0|C1, "cycles_l1i_mem_stalled" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x00, C0|C1, "ild_stall" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x00, C0|C1, "br_inst_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x00, C0|C1, "br_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8a, 0x00, C0|C1, "br_bac_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8b, 0x00, C0|C1, "br_cnd_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8c, 0x00, C0|C1, "br_cnd_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8d, 0x00, C0|C1, "br_ind_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8e, 0x00, C0|C1, "br_ind_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x8f, 0x00, C0|C1, "br_ret_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x90, 0x00, C0|C1, "br_ret_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x91, 0x00, C0|C1, "br_ret_bac_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x92, 0x00, C0|C1, "br_call_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x93, 0x00, C0|C1, "br_call_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x94, 0x00, C0|C1, "br_ind_call_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x97, 0x00, C0|C1, "br_tkn_bubble_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x98, 0x00, C0|C1, "br_tkn_bubble_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa0, 0x00, C0|C1, "rs_uops_dispatched" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x00, C0 , "rs_uops_dispatched_port" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x01, C0 , "rs_uops_dispatched_port.0" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x02, C0 , "rs_uops_dispatched_port.1" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x04, C0 , "rs_uops_dispatched_port.2" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x08, C0 , "rs_uops_dispatched_port.3" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x10, C0 , "rs_uops_dispatched_port.4" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xa1, 0x20, C0 , "rs_uops_dispatched_port.5" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xaa, 0x00, C0|C1, "macro_insts" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xaa, 0x01, C0|C1, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xaa, 0x08, C0|C1, "macro_insts.cisc_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xab, 0x00, C0|C1, "esp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xab, 0x01, C0|C1, "esp.synch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xab, 0x02, C0|C1, "esp.additions" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb0, 0x00, C0|C1, "simd_uops_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb1, 0x00, C0|C1, "simd_sat_uop_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x00, C0|C1, "simd_uop_type_exec" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x01, C0|C1, "simd_uop_type_exec.mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x02, C0|C1, "simd_uop_type_exec.shift" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x04, C0|C1, "simd_uop_type_exec.pack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x08, C0|C1, "simd_uop_type_exec.unpack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x10, C0|C1, "simd_uop_type_exec.logical" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x00, C0|C1, "inst_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x00, C0|C1, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x01, C0|C1, "inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x02, C0|C1, "inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x04, C0|C1, "inst_retired.other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x08, C0|C1, "inst_retired.vm_h" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc1, 0x00, C0|C1, "x87_ops_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc1, 0x01, C0|C1, "x87_ops_retired.fxch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc1, 0xfe, C0|C1, "x87_ops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x00, C0|C1, "uops_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x01, C0|C1, "uops_retired.ld_ind_br" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x02, C0|C1, "uops_retired.std_sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x04, C0|C1, "uops_retired.macro_fusion" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x07, C0|C1, "uops_retired.fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x08, C0|C1, "uops_retired.non_fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x0f, C0|C1, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc3, 0x00, C0|C1, "machine_nukes" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc3, 0x01, C0|C1, "machine_nukes.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc3, 0x04, C0|C1, "machine_nukes.mem_order" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x00, C0|C1, "br_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x00, C0|C1, "br_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x01, C0|C1, "br_inst_retired.pred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x02, C0|C1, "br_inst_retired.mispred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x04, C0|C1, "br_inst_retired.pred_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x08, C0|C1, "br_inst_retired.mispred_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x0c, C0|C1, "br_inst_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc5, 0x00, C0|C1, "br_inst_retired_mispred" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc5, 0x00, C0|C1, "br_inst_retired.mispred" /*alt-spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc6, 0x00, C0|C1, "cycles_int" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc6, 0x01, C0|C1, "cycles_int.masked" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc6, 0x02, C0|C1, "cycles_int.pending_and_masked" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x00, C0|C1, "simd_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x01, C0|C1, "simd_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x02, C0|C1, "simd_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x04, C0|C1, "simd_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x08, C0|C1, "simd_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x10, C0|C1, "simd_inst_retired.vector" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x1f, C0|C1, "simd_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc8, 0x00, C0|C1, "hw_int_rcv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc9, 0x00, C0|C1, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x00, C0|C1, "simd_comp_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x01, C0|C1, "simd_comp_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x02, C0|C1, "simd_comp_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x04, C0|C1, "simd_comp_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x08, C0|C1, "simd_comp_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x00, C0 , "mem_load_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x01, C0 , "mem_load_retired.l1d_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x02, C0 , "mem_load_retired.l1d_line_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x04, C0 , "mem_load_retired.l2_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x08, C0 , "mem_load_retired.l2_line_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x10, C0 , "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcc, 0x00, C0|C1, "fp_mmx_trans" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcc, 0x01, C0|C1, "fp_mmx_trans.to_mmx" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcc, 0x02, C0|C1, "fp_mmx_trans.to_fp" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcd, 0x00, C0|C1, "simd_assist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xce, 0x00, C0|C1, "simd_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcf, 0x00, C0|C1, "simd_sat_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x00, C0|C1, "rat_stalls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x01, C0|C1, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x02, C0|C1, "rat_stalls.partial_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x04, C0|C1, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x08, C0|C1, "rat_stalls.fpsw" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x0f, C0|C1, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd2, 0x10, C0|C1, "rat_stalls.other_serialization_stalls", 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x00, C0|C1, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x01, C0|C1, "seg_rename_stalls.es" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x02, C0|C1, "seg_rename_stalls.ds" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x04, C0|C1, "seg_rename_stalls.fs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x08, C0|C1, "seg_rename_stalls.gs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd4, 0x0f, C0|C1, "seg_rename_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x00, C0|C1, "seg_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x01, C0|C1, "seg_reg_renames.es" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x02, C0|C1, "seg_reg_renames.ds" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x04, C0|C1, "seg_reg_renames.fs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x08, C0|C1, "seg_reg_renames.gs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xd5, 0x0f, C0|C1, "seg_reg_renames.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x00, C0|C1, "resource_stalls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x01, C0|C1, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x02, C0|C1, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x04, C0|C1, "resource_stalls.ld_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x08, C0|C1, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x10, C0|C1, "resource_stalls.br_miss_clear" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xdc, 0x1f, C0|C1, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe0, 0x00, C0|C1, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe4, 0x00, C0|C1, "bogus_br" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe6, 0x00, C0|C1, "baclears" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xf0, 0x00, C0|C1, "pref_rqsts_up" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xf8, 0x00, C0|C1, "pref_rqsts_dn" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* FAM6 MOD28: Intel Atom processor */ +#define EVENTS_FAM6_MOD28 \ +{ 0x02, 0x81, C0|C1, "store_forwards.good" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x00, C0|C1, "segment_reg_loads.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C0|C1, "prefetch.prefetcht0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x06, C0|C1, "prefetch.sw_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x08, C0|C1, "prefetch.prefetchnta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x05, C0|C1, "data_tlb_misses.dtlb_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x06, C0|C1, "data_tlb_misses.dtlb_miss_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x07, C0|C1, "data_tlb_misses.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x09, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0c, 0x03, C0|C1, "page_walks.cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x01, C0|C1, "x87_comp_ops_exe.any.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x01, C0|C1, "fp_assist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x81, C0|C1, "fp_assist.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x01, C0|C1, "mul.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x81, C0|C1, "mul.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x01, C0|C1, "div.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x81, C0|C1, "div.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C0|C1, "cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x21, 0x00, C0|C1, "l2_ads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x22, 0x00, C0|C1, "l2_dbus_busy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x00, C0|C1, "l2_lines_in" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x25, 0x00, C0|C1, "l2_m_lines_in" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x00, C0|C1, "l2_lines_out" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x00, C0|C1, "l2_m_lines_out" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x00, C0|C1, "l2_ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x29, 0x00, C0|C1, "l2_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2a, 0x00, C0|C1, "l2_st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2b, 0x00, C0|C1, "l2_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x00, C0|C1, "l2_rqsts" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x4f, C0|C1, "l2_rqsts.self.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x30, 0x00, C0|C1, "l2_reject_busq" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x32, 0x00, C0|C1, "l2_no_req" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3a, 0x00, C0|C1, "eist_trans" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3b, 0xc0, C0|C1, "thermal_trip" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted.core_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x01, C0|C1, "cpu_clk_unhalted.bus" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x02, C0|C1, "cpu_clk_unhalted.no_other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x21, C0|C1, "l1d_cache.ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x22, C0|C1, "l1d_cache.st" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x00, C0|C1, "bus_request_outstanding" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x61, 0x00, C0|C1, "bus_bnr_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x62, 0x00, C0|C1, "bus_drdy_clocks" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x00, C0|C1, "bus_lock_clocks" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x64, 0x00, C0|C1, "bus_data_rcv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x65, 0x00, C0|C1, "bus_trans_brd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x66, 0x00, C0|C1, "bus_trans_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x67, 0x00, C0|C1, "bus_trans_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x68, 0x00, C0|C1, "bus_trans_ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x69, 0x00, C0|C1, "bus_trans_inval" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6a, 0x00, C0|C1, "bus_trans_pwr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6b, 0x00, C0|C1, "bus_trans_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6c, 0x00, C0|C1, "bus_trans_io" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6d, 0x00, C0|C1, "bus_trans_def" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6e, 0x00, C0|C1, "bus_trans_burst" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6f, 0x00, C0|C1, "bus_trans_mem" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x70, 0x00, C0|C1, "bus_trans_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x77, 0x00, C0|C1, "ext_snoop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7a, 0x00, C0|C1, "bus_hit_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7b, 0x00, C0|C1, "bus_hitm_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7d, 0x00, C0|C1, "busq_empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7e, 0x00, C0|C1, "snoop_stall_drv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x7f, 0x00, C0|C1, "bus_io_wait" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C0|C1, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x03, C0|C1, "icache.accesses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x02, C0|C1, "itlb.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x04, C0|C1, "itlb.flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xaa, 0x02, C0|C1, "macro_insts.cisc_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xaa, 0x03, C0|C1, "macro_insts.all_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb0, 0x00, C0|C1, "simd_uops_exec.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb0, 0x80, C0|C1, "simd_uops_exec.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb1, 0x00, C0|C1, "simd_sat_uop_exec.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb1, 0x80, C0|C1, "simd_sat_uop_exec.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x01, C0|C1, "simd_uop_type_exec.mul.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xb3, 0xa0, C0|C1, "simd_uop_type_exec.arithmetic.ar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x00, C0|C1, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc2, 0x10, C0|C1, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc3, 0x01, C0|C1, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x00, C0|C1, "br_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x01, C0|C1, "br_inst_retired.pred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x02, C0|C1, "br_inst_retired.mispred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x04, C0|C1, "br_inst_retired.pred_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x08, C0|C1, "br_inst_retired.mispred_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x0a, C0|C1, "br_inst_retired.mispred" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x0c, C0|C1, "br_inst_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x0f, C0|C1, "br_inst_retired.any1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc5, 0x00, C0|C1, "br_inst_retired.mispred" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc6, 0x01, C0|C1, "cycles_int_masked.cycles_int_masked" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc6, 0x02, C0|C1, "cycles_int_masked.cycles_int_pending_and_masked" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x01, C0|C1, "simd_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x02, C0|C1, "simd_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x04, C0|C1, "simd_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x08, C0|C1, "simd_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x10, C0|C1, "simd_inst_retired.vector" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc7, 0x1f, C0|C1, "simd_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc8, 0x00, C0|C1, "hw_int_rcv" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x01, C0|C1, "simd_comp_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x02, C0|C1, "simd_comp_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x04, C0|C1, "simd_comp_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xca, 0x08, C0|C1, "simd_comp_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x01, C0|C1, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x02, C0|C1, "mem_load_retired.l2_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcb, 0x04, C0|C1, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcd, 0x00, C0|C1, "simd_assist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xce, 0x00, C0|C1, "simd_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xcf, 0x00, C0|C1, "simd_sat_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe0, 0x01, C0|C1, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe4, 0x01, C0|C1, "bogus_br" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xe6, 0x01, C0|C1, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* Intel Core i7 (Nehalem) Processor */ +/* + * The Nehalem tables are basically from Bug 16457009 + * libcpc counter names should be based on public Intel documentation -- Nehalem + * and those tables are basically from the + * Intel SDM, January 2013, Section 19.5, Table 19-11. + * We omit the Table 19-12 uncore events. + * + * Note that the table below includes some events from + * the Intel SDM that require cmask or attr settings. + * These events are not in libcpc, which did not include + * events requiring cmask or attr until Sandy Bridge. + */ + +#define EVENTS_FAM6_MOD26 \ +{ 0x04, 0x07, C0|C1|C2|C3, "sb_drain.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C0|C1|C2|C3, "partial_address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C0|C1|C2|C3, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x80, C0|C1|C2|C3, "dtlb_load_misses.large_walk_completed", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x10, C0|C1|C2|C3, "mem_inst_retired.latency_above_threshold" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.stalled_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C0|C1|C2|C3, "arith.fpu_div" , 0x1, ATTR_EDGE | ATTR_INV, 0x0 }, \ +{ 0x14, 0x02, C0|C1|C2|C3, "arith.mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x80, C0|C1|C2|C3, "l2_write.lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0xE0, C0|C1|C2|C3, "l2_write.lock.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0xF0, C0|C1|C2|C3, "l2_write.lock.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x02, C0|C1|C2|C3, "l1d_wb_l2.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x0F, C0|C1|C2|C3, "l1d_wb_l2.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C0|C1|C2|C3, "l3_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C0|C1|C2|C3, "l3_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C0|C1|C2|C3, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C0|C1|C2|C3, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x01, C0|C1 , "l1d_cache_ld.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x02, C0|C1 , "l1d_cache_ld.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x04, C0|C1 , "l1d_cache_ld.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x08, C0|C1 , "l1d_cache_ld.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x40, 0x0F, C0|C1 , "l1d_cache_ld.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x41, 0x02, C0|C1 , "l1d_cache_st.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x41, 0x04, C0|C1 , "l1d_cache_st.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x41, 0x08, C0|C1 , "l1d_cache_st.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x01, C0|C1 , "l1d_cache_lock.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x02, C0|C1 , "l1d_cache_lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x04, C0|C1 , "l1d_cache_lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x42, 0x08, C0|C1 , "l1d_cache_lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x01, C0|C1 , "l1d_all_ref.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x43, 0x02, C0|C1 , "l1d_all_ref.cacheable" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x20, C0|C1|C2|C3, "dtlb_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x80, C0|C1|C2|C3, "dtlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C0|C1|C2|C3, "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x01, C0|C1|C2|C3, "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x02, C0|C1|C2|C3, "l1d_prefetch.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x04, C0|C1|C2|C3, "l1d_prefetch.triggers" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C0|C1 , "l1d.repl" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x02, C0|C1 , "l1d.m_repl" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x04, C0|C1 , "l1d.m_evict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x08, C0|C1 , "l1d.m_snoop_evict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x52, 0x01, C0|C1|C2|C3, "l1d_cache_prefetch_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x53, 0x01, C0|C1|C2|C3, "l1d_cache_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x01, C0|C1 , "cache_lock_cycles.l1d_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C0|C1 , "cache_lock_cycles.l1d" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C0|C1|C2|C3, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x80, C0|C1|C2|C3, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xAE, 0x01, C0|C1|C2|C3, "itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x04, C0|C1|C2|C3, "uops_executed.port2_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x08, C0|C1|C2|C3, "uops_executed.port3_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x10, C0|C1|C2|C3, "uops_executed.port4_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x1F, C0|C1|C2|C3, "uops_executed.core_active_cycles_no_port5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x20, C0|C1|C2|C3, "uops_executed.port5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x3F, C0|C1|C2|C3, "uops_executed.core_active_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015_stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xB1, 0x80, C0|C1|C2|C3, "uops_executed.port234" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB2, 0x01, C0|C1|C2|C3, "offcore_requests_sq_full" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xB7, 0x01, C0|C1|C2|C3, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \ +{ 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xBB, 0x01, C0|C1|C2|C3, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \ +{ 0xC0, 0x00, C0|C1|C2|C3, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x02, C0|C1|C2|C3, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x01, C0|C1|C2|C3, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x02, C0|C1|C2|C3, "machine_clears.mem_order" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C0|C1|C2|C3, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x08, C0|C1|C2|C3, "l2_lines_out.prefetch_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF6, 0x01, C0|C1|C2|C3, "sq_full_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +#define EVENTS_FAM6_MOD46_ONLY \ +{ 0x0F, 0x01, C0|C1|C2|C3, "mem_uncore_retired.l3_data_miss_unknown" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x80, C0|C1|C2|C3, "mem_uncore_retired.uncacheable" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* Intel Westmere Processor */ +/* + * The Westmere tables are basically from Bug 16173963 + * libcpc counter names should be based on public Intel documentation -- Westmere + * and those tables are basically from the + * Intel SDM, January 2013, Section 19.6, Table 19-13. + * We omit the Table 19-14 uncore events. + * + * Note that the table below includes some events from + * the Intel SDM that require cmask or attr settings. + * These events are not in libcpc, which did not include + * events requiring cmask or attr until Sandy Bridge. + */ + +#define EVENTS_FAM6_MOD37 \ +{ 0x03, 0x02, C0|C1|C2|C3, "load_block.overlap_store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x04, 0x07, C0|C1|C2|C3, "sb_drain.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x02, C0|C1|C2|C3, "misalign_mem_ref.store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C0|C1|C2|C3, "partial_address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C0|C1|C2|C3, "dtlb_load_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C0|C1|C2|C3, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0B, 0x10, C0|C1|C2|C3, "mem_inst_retired.latency_above_threshold" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x01, C0|C1|C2|C3, "mem_uncore_retired.unknown_source" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x80, C0|C1|C2|C3, "mem_uncore_retired.uncacheable" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x02, C0|C1|C2|C3, "arith.mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x80, C0|C1|C2|C3, "l2_write.lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0xE0, C0|C1|C2|C3, "l2_write.lock.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0xF0, C0|C1|C2|C3, "l2_write.lock.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x02, C0|C1|C2|C3, "l1d_wb_l2.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x0F, C0|C1|C2|C3, "l1d_wb_l2.mesi" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C0|C1|C2|C3, "l3_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C0|C1|C2|C3, "l3_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C0|C1|C2|C3, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C0|C1|C2|C3, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C0|C1|C2|C3, "dtlb_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x20, C0|C1|C2|C3, "dtlb_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x80, C0|C1|C2|C3, "dtlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C0|C1 , "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x01, C0|C1 , "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x02, C0|C1 , "l1d_prefetch.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x04, C0|C1 , "l1d_prefetch.triggers" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4F, 0x10, C0|C1|C2|C3, "ept.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C0|C1 , "l1d.repl" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x02, C0|C1 , "l1d.m_repl" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x04, C0|C1 , "l1d.m_evict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x08, C0|C1 , "l1d.m_snoop_evict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x52, 0x01, C0|C1|C2|C3, "l1d_cache_prefetch_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C0 , "offcore_requests_outstanding.demand.read_data", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C0 , "offcore_requests_outstanding.demand.read_code", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C0 , "offcore_requests_outstanding.demand.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C0 , "offcore_requests_outstanding.any_read", 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x01, C0|C1 , "cache_lock_cycles.l1d_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C0|C1 , "cache_lock_cycles.l1d" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C0|C1|C2|C3, "itlb_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C0|C1|C2|C3, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x80, C0|C1|C2|C3, "itlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C0|C1|C2|C3, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x80, C0|C1|C2|C3, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAE, 0x01, C0|C1|C2|C3, "itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x01, C0|C1|C2|C3, "offcore_requests.demand.read_data" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x02, C0|C1|C2|C3, "offcore_requests.demand.read_code" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C0|C1|C2|C3, "offcore_requests.demand.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C0|C1|C2|C3, "offcore_requests.any.read" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x04, C0|C1|C2|C3, "uops_executed.port2_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x08, C0|C1|C2|C3, "uops_executed.port3_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x10, C0|C1|C2|C3, "uops_executed.port4_core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x1F, C0|C1|C2|C3, "uops_executed.core_active_cycles_no_port5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x20, C0|C1|C2|C3, "uops_executed.port5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x3F, C0|C1|C2|C3, "uops_executed.core_active_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x80, C0|C1|C2|C3, "uops_executed.port234" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB2, 0x01, C0|C1|C2|C3, "offcore_requests_sq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB3, 0x01, C0, "snoopq_requests_outstanding.data" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB3, 0x02, C0, "snoopq_requests_outstanding.invalidate" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB3, 0x04, C0, "snoopq_requests_outstanding.code" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB4, 0x01, C0|C1|C2|C3, "snoopq_requests.code" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB4, 0x02, C0|C1|C2|C3, "snoopq_requests.data" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB4, 0x04, C0|C1|C2|C3, "snoopq_requests.invalidate" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xB7, 0x01, C0|C1|C2|C3, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \ +{ 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xBB, 0x01, C0|C1|C2|C3, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \ +{ 0xC0, 0x00, C0|C1|C2|C3, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x02, C0|C1|C2|C3, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x01, C0|C1|C2|C3, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x02, C0|C1|C2|C3, "machine_clears.mem_order" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C0|C1|C2|C3, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x01, C0|C1|C2|C3, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x04, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x01, C0|C1|C2|C3, "uops_decoded.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xEC, 0x01, C0|C1|C2|C3, "thread_active" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x08, C0|C1|C2|C3, "l2_lines_out.prefetch_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF4, 0x04, C0|C1|C2|C3, "sq_misc.lru_hints" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF6, 0x01, C0|C1|C2|C3, "sq_full_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* + * This special omission of the following events from Model 47 + * is due to usr/src/uts/intel/pcbe/wm_pcbe.h . There seems + * to be no substantiation for this treatment in the Intel SDM. + */ +#define EVENTS_FAM6_MOD37_ALSO \ +{ 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x04, C0|C1|C2|C3, "mem_uncore_retired.remote_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.local_dram_remote_cache_hit", 0x0, ATTR_NONE, 0x0 },\ +{ 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.other_llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* Intel Sandy Bridge Processor */ +/* + * The Sandy Bridge tables are basically from Bug 16457080 + * libcpc counter names should be based on public Intel documentation -- Sandy Bridge + * and those tables are basically from the + * Intel SDM, January 2013, Section 19.4, Table 19-7. + * Additionally, there are + * Table 19-8. Model 42 only. + * Table 19-9. Model 45 only. + * We omit the Table 19-10 uncore events. + */ + +#define EVENTS_FAM6_MOD42 \ +{ 0x03, 0x01, C_ALL, "ld_blocks.data_unknown" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x10, C_ALL, "ld_blocks.all_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x08, C_ALL, "ld_blocks_partial.all_sta_block" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_stalls_count" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x0D, 0x40, C_ALL, "int_misc.rat_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \ +{ 0x10, 0x01, C_ALL, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x10, C_ALL, "fp_comp_ops_exe.sse_fp_packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x20, C_ALL, "fp_comp_ops_exe.sse_fp_scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x40, C_ALL, "fp_comp_ops_exe.sse_packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x80, C_ALL, "fp_comp_ops_exe.sse_scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x01, C_ALL, "simd_fp_256.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x02, C_ALL, "simd_fp_256.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C_ALL, "arith.fpu_div" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x17, 0x01, C_ALL, "insts_written_to_iq.insts" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x01, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x03, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x04, C_ALL, "l2_rqsts.rfo_hits" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x08, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x0C, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x10, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x20, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x40, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x80, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xC0, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x01, C_ALL, "l2_store_lock_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x04, C_ALL, "l2_store_lock_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x08, C_ALL, "l2_store_lock_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0F, C_ALL, "l2_store_lock_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x01, C_ALL, "l2_l1d_wb_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x02, C_ALL, "l2_l1d_wb_rqsts.hit_s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x04, C_ALL, "l2_l1d_wb_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x08, C_ALL, "l2_l1d_wb_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x0F, C_ALL, "l2_l1d_wb_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C2 , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C2 , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C2 , "l1d_pend_miss.occurrences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4E, 0x02, C_ALL, "hw_pre_req.dl1_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x02, C_ALL, "l1d.allocated_in_m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x04, C_ALL, "l1d.eviction" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x08, C_ALL, "l1d.all_m_replacement" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x59, 0x20, C_ALL, "partial_rat_stalls.flags_merge_uop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x59, 0x20, C_ALL, "partial_rat_stalls.flags_merge_uop_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x59, 0x40, C_ALL, "partial_rat_stalls.slow_lea_window" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x59, 0x80, C_ALL, "partial_rat_stalls.mul_single_uop" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5B, 0x0C, C0|C1|C2|C3, "resource_stalls2.all_fl_empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5B, 0x0F, C_ALL, "resource_stalls2.all_prf_control" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5B, 0x40, C_ALL, "resource_stalls2.bob_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5B, 0x4F, C_ALL, "resource_stalls2.ooo_rsrc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_transition" , 0x0, ATTR_EDGE, 0x0 }, \ +{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_activations" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xC1, C_ALL, "br_inst_exec.all_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xC1, C_ALL, "br_misp_exec.all_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_2_sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x0C, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_3_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_3_sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x30, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x02, C_ALL, "resource_stalls.lb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x20, C_ALL, "resource_stalls.fcsw" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x40, C_ALL, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x80, C_ALL, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C2 , "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x04, C0|C1|C2|C3, "cycle_activity.cycles_no_dispatch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAB, 0x01, C_ALL, "dsb2mite_switches.count" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAC, 0x02, C_ALL, "dsb_fill.other_cancel" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAC, 0x08, C_ALL, "dsb_fill.exceed_dsb_lines" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAC, 0x0A, C_ALL, "dsb_fill.all_cancel" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C0|C1|C2|C3, "uops_dispatched.thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C0|C1|C2|C3, "uops_dispatched.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_dispatched.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB6, 0x01, C_ALL, "agu_bypass_cancel.count" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xB7, 0x01, C_ALL, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \ +/* { 0xBB, 0x01, C_ALL, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \ +{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBF, 0x05, C_ALL, "l1d_blocks.bank_conflict_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x01, C1, "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x02, C_ALL, "other_assists.itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x08, C_ALL, "other_assists.avx_store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x10, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x20, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x10, C_ALL, "br_misp_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x20, C_ALL, "br_misp_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xCD, 0x01, C3, "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, ignore events that require msr_offset */ /* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \ +{ 0xCD, 0x02, C3, "mem_trans_retired.precise_store" , 0x0, ATTR_NONE, 0x0 }, /* See Section "Precise Store Facility" */ \ +{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x01, C0|C1|C2|C3, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.llc_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x01, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x02, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x04, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x08, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_none" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x01, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x01, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x02, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x04, C_ALL, "l2_lines_out.pf_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x08, C_ALL, "l2_lines_out.pf_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x0A, C_ALL, "l2_lines_out.dirty_all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF4, 0x10, C_ALL, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +#define EVENTS_FAM6_MOD42_ONLY \ +{ 0xD4, 0x02, C0|C1|C2|C3, "mem_load_uops_misc_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +#define EVENTS_FAM6_MOD45_ONLY \ +/* { 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x3C9 }, ignore events that require msr_offset */ \ +/* { 0xD3, 0x04, C_ALL, "mem_load_uops_llc_miss_retired.remote_dram" , 0x0, ATTR_NONE, 0x3C9 }, ignore events that require msr_offset */ \ +/* end of #define */ + +/* Intel Ivy Bridge Processor */ +/* + * The Ivy Bridge tables are basically from Bug 16457100 + * libcpc counter names should be based on public Intel documentation -- Ivy Bridge + * and those tables are basically from the + * Intel SDM, January 2013, Section 19.3, Table 19-5. + * Additionally, there is + * Table 19-6. Model 62 only. + */ + +#define EVENTS_FAM6_MOD58 \ +{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x81, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x82, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x84, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \ +{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x40, C_ALL, "uops_issued.sIngle_mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x01, C_ALL, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x10, C_ALL, "fp_comp_ops_exe.sse_fp_packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x20, C_ALL, "fp_comp_ops_exe.sse_fp_scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x40, C_ALL, "fp_comp_ops_exe.sse_packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x10, 0x80, C_ALL, "fp_comp_ops_exe.sse_scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x01, C_ALL, "simd_fp_256.packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x11, 0x02, C_ALL, "simd_fp_256.packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C_ALL, "arith.fpu_div" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x24, 0x01, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x03, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x04, C_ALL, "l2_rqsts.rfo_hits" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x08, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x0C, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x10, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x20, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x40, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x80, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xC0, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x01, C_ALL, "l2_store_lock_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x08, C_ALL, "l2_store_lock_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x0F, C_ALL, "l2_store_lock_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x01, C_ALL, "l2_l1d_wb_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x04, C_ALL, "l2_l1d_wb_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x08, C_ALL, "l2_l1d_wb_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x28, 0x0F, C_ALL, "l2_l1d_wb_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2), "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2), "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2), "l1d_pend_miss.occurrences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x0, ATTR_EDGE, 0x0 }, \ +{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5F, 0x04, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_activations" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" /* synonym, from Intel SDM */ , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" /* synonym, from Intel SDM */ , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" /* weird name suggested by Intel docs */ , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_2_sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x0C, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_3_ld" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_3_sta" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x30, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA3, 0x02, C0|C1|C2|C3, "cycle_activity.cycles_ldm_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C0|C1|C2|C3, "cycle_activity.cycles_ldm_pending_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA3, 0x08, C(2), "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x08, C(2), "cycle_activity.cycles_l1d_pending_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xAB, 0x01, C_ALL, "dsb2mite_switches.count" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAC, 0x08, C_ALL, "dsb_fill.exceed_dsb_lines" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \ +/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \ +{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x01, C(1), "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x08, C_ALL, "other_assists.avx_store" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x10, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x20, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x10, C_ALL, "br_misp_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x20, C_ALL, "br_misp_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xCD, 0x01, C3 , "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, ignore events that require msr_offset */ /* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \ +{ 0xCD, 0x02, C3 , "mem_trans_retired.precise_store" , 0x0, ATTR_NONE, 0x0 }, /* See Section "Precise Store Facility" */ \ +{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.llc_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x01, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x02, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x04, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x08, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_none" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x01, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x02, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x04, C_ALL, "l2_lines_out.pf_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x08, C_ALL, "l2_lines_out.pf_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x0A, C_ALL, "l2_lines_out.dirty_all" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +#define EVENTS_FAM6_MOD62_ONLY \ +{ 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD3, 0x04, C_ALL, "mem_load_uops_llc_miss_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD3, 0x10, C_ALL, "mem_load_uops_llc_miss_retired.remote_hitm" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD3, 0x20, C_ALL, "mem_load_uops_llc_miss_retired.remote_fwd" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* Intel Haswell Processor */ +/* + * The Haswell tables take into account Bug 17006019 + * libcpc counter names should be based on public Intel documentation -- Haswell + * and are basically from the + * Intel SDM, June 2013, Section 19.3, Table 19-2 and Table 19-3. + * We omit the Table 19-4 uncore events. + */ + +#define EVENTS_FAM6_MOD60 \ +{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x40, C_ALL, "dtlb_load_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x60, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x80, C_ALL, "dtlb_load_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_occurrences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \ +{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x40, C_ALL, "uops_issued.single_mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C_ALL, "l2_rqsts.l2_pf_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x50, C_ALL, "l2_rqsts.l2_pf_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x27, 0x50, C_ALL, "l2_demand_rqsts.wb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.occurences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x40, C_ALL, "dtlb_store_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x60, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x80, C_ALL, "dtlb_store_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x40, C_ALL, "tx_mem.abort_hle_elision_buffer_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x0, ATTR_EDGE, 0x0 }, \ +{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_occur" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x40, C_ALL, "itlb_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x60, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jump" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending_cycles" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending_cycles" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending_cycles" , 0x8, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xB7, 0x01, C_ALL, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, omit events requiring MSR programming */ \ +/* { 0xBB, 0x01, C_ALL, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, omit events requiring MSR programming */ \ +{ 0xBC, 0x11, C_ALL, "page_walker_loads.dtlb_l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x21, C_ALL, "page_walker_loads.itlb_l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x12, C_ALL, "page_walker_loads.dtlb_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x22, C_ALL, "page_walker_loads.itlb_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x14, C_ALL, "page_walker_loads.dtlb_l3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x24, C_ALL, "page_walker_loads.itlb_l3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x18, C_ALL, "page_walker_loads.dtlb_memory" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x28, C_ALL, "page_walker_loads.itlb_memory" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x01, C(1) , "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x08, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x10, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x40, C_ALL, "other_assists.any_wb_assist" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_misc1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_misc2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_misc3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_misc4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_misc5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_misc1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_misc2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_misc3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_misc4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_misc5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xCD, 0x01, C_ALL, "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, omit events requiring MSR programming */ \ +{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xD2, 0x01, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x02, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x04, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x08, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD3, 0x01, C_ALL, "mem_load_uops_l3_miss_retired.local_dram" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x05, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x06, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +/* Intel Broadwell Processor */ +/* + * This table is essentially taken from: + * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/bdw_pcbe_tbl.c + */ + +#define EVENTS_FAM6_MOD61 \ +{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x40, C_ALL, "dtlb_load_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x60, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x80, C_ALL, "dtlb_load_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_any" , 0x1, ATTR_ANY , 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_occurrences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x0D, 0x08, C_ALL, "int_misc.rat_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x40, C_ALL, "uops_issued.single_mul" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1,(ATTR_INV | ATTR_ANY), 0x0 }, \ + \ +{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x30, C_ALL, "l2_rqsts.l2_pf_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x50, C_ALL, "l2_rqsts.l2_pf_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x27, 0x50, C_ALL, "l2_demand_rqsts.wb_hit" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p_any" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk_any" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0x3C, 0x02, C_ALL, "cpu_clk_thread_unhalted.one_thread_active" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles_any" , 0x1, ATTR_ANY , 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0x48, 0x01, C(2) , "l1d_pend_miss.occurences" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x48, 0x02, C_ALL, "l1d_pend_miss.fb_full" , 0x1, ATTR_NONE, 0x0 }, \ + \ +{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x40, C_ALL, "dtlb_store_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x60, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x80, C_ALL, "dtlb_store_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x4F, 0x10, C_ALL, "ept.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity_write" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x54, 0x40, C_ALL, "tx_mem.hle_elision_buffer_full" , 0x0, ATTR_TSX , 0x0 }, \ + \ +{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_TSX , 0x0 }, \ + \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_end" , 0x1, (ATTR_INV | ATTR_EDGE), 0x0 }, \ + \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_ge_6 " , 0x6, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \ + \ +{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_occur" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_switches" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x80, 0x01, C_ALL, "icache.hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x80, 0x04, C_ALL, "icache.ifdata_stall" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x40, C_ALL, "itlb_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x60, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jump" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xC1, C_ALL, "br_inst_exec.all_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xC2, C_ALL, "br_inst_exec.all_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xC4, C_ALL, "br_inst_exec.all_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xC8, C_ALL, "br_inst_exec.all_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xD0, C_ALL, "br_inst_exec.all_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xC1, C_ALL, "br_misp_exec.all_conditional" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xC4, C_ALL, "br_misp_exec.all_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ + \ +/* Use Cmask to qualify uop b/w */ \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_INV , 0x0 }, \ + \ +{ 0xA0, 0x03, C_ALL, "uop_dispatches_cancelled.simd_prf" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6_core" , 0x0, ATTR_ANY , 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7_core" , 0x0, ATTR_ANY , 0x0 }, \ + \ +{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_pending" , 0x5, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x06, C_ALL, "cycle_activity.stalls_ldm_pending" , 0x6, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending" , 0x8, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x0C, C(2) , "cycle_activity.stalls_l1d_pending" , 0xC, ATTR_NONE, 0x0 }, \ + \ +{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.cycles_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ + \ +{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_1_uop_exec" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_2_uops_exec" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_3_uops_exec" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_4_uops_exec" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_none" , 0x0, ATTR_INV , 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_1" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_2" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_3" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_4" , 0x4, ATTR_NONE, 0x0 }, \ + \ +{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \ + \ +/* \ + * See Section "Off-core Response Performance Monitoring" \ + * \ + * Though these two off_core events support all counters, only 1 of \ + * them can be used at any given time. This is due to the extra MSR \ + * programming required. \ + */ \ +/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, OFFCORE_RSP_0 }, omit events requiring MSR programming */ \ +/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, OFFCORE_RSP_1 }, omit events requiring MSR programming */ \ + \ +{ 0xBC, 0x11, C_ALL, "page_walker_loads.dtlb_l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x21, C_ALL, "page_walker_loads.itlb_l1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x12, C_ALL, "page_walker_loads.dtlb_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x22, C_ALL, "page_walker_loads.itlb_l2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x14, C_ALL, "page_walker_loads.dtlb_l3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x24, C_ALL, "page_walker_loads.itlb_l3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBC, 0x18, C_ALL, "page_walker_loads.dtlb_memory" , 0x0, ATTR_NONE, 0x0 }, \ +/* itlb_memory is not in the Intel SDM or spreadsheet for Broadwell; "cputrack -h" does have it though */ \ +{ 0xBC, 0x28, C_ALL, "page_walker_loads.itlb_memory" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC0, 0x02, C_ALL, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xC1, 0x08, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x10, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC1, 0x40, C_ALL, "other_assists.any_wb_assist" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.total_cycles" , 0xA, ATTR_INV , 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.core_stall_cycles" , 0x1, (ATTR_INV | ATTR_ANY), 0x0 }, \ +{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xC3, 0x01, C_ALL, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x01, C_ALL, "machine_clears.count" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call_r3" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xC7, 0x01, C_ALL, "fp_arith_inst_retired.scalar_double" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x02, C_ALL, "fp_arith_inst_retired.scalar_single" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x03, C_ALL, "fp_arith_inst_retired.scalar" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x04, C_ALL, "fp_arith_inst_retired.128b_packed_double" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x08, C_ALL, "fp_arith_inst_retired.128b_packed_single" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x10, C_ALL, "fp_arith_inst_retired.256b_packed_double" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x15, C_ALL, "fp_arith_inst_retired.double" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x20, C_ALL, "fp_arith_inst_retired.256b_packed_single" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x2A, C_ALL, "fp_arith_inst_retired.single" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC7, 0x3C, C_ALL, "fp_arith_inst_retired.packed" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_misc1" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_misc2" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_misc3" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_misc4" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_misc5" , 0x0, ATTR_TSX , 0x0 }, \ + \ +{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_misc1" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_misc2" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_misc3" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_misc4" , 0x0, ATTR_TSX , 0x0 }, \ +{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_misc5" , 0x0, ATTR_TSX , 0x0 }, \ + \ +{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x1, ATTR_NONE, 0x0 }, \ + \ +{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \ + \ +/* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \ +/* { 0xCD, 0x01, C(3) , "mem_trans_retired.load_latency" , 0x0, ATTR_PEBS_ONLY_LD_LAT, PEBS_LD_LAT_THRESHOLD }, omit events requiring MSR programming */ \ + \ +/* \ + * Event 0xD0 must be combined with umasks 0x1(loads) or 0x2(stores) \ + */ \ +{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +/* Private event, not public by Intel */ \ +{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xD2, 0x01, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x02, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x04, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x08, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +{ 0xD3, 0x01, C_ALL, "mem_load_uops_l3_miss_retired.local_dram" , 0x0, ATTR_PEBS, 0x0 }, \ + \ +/* The mem_load_l4_miss_retired events are not in "cputrack -h" output nor in the Intel spreadsheet. */ \ +/* { 0xD5, 0x01, C_ALL, "mem_load_l4_miss_retired.local_hit" , 0x0, ATTR_NONE, 0x0 }, */ \ +/* { 0xD5, 0x04, C_ALL, "mem_load_l4_miss_retired.local_miss" , 0x0, ATTR_NONE, 0x0 }, */ \ + \ +{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \ + \ +{ 0xF2, 0x05, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + + +/* Intel Skylake Processor */ +/* + * This table is essentially taken from: + * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/skl_pcbe_tbl.c + * Also: + * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/fam6_pcbe.h + * { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \ + * { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \ + * { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \ + * { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \ + * { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \ + * { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" } + * And: + * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/core_pcbe.c + * { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" }, + * { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" }, + */ +#define EVENTS_FAM6_MOD78 \ +{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x08, C_ALL, "dtlb_load_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x01, C_ALL, "int_misc.recovery_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0D, 0x01, C_ALL, "int_misc.recovery_cycles_any" , 0x0, ATTR_ANY, 0x0 }, \ +{ 0x0D, 0x80, C_ALL, "int_misc.clear_resteer_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \ +{ 0x0E, 0x02, C_ALL, "uops_issued.vector_width_mismatch" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x14, 0x01, C_ALL, "arith.divider_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x38, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xD8, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p_any" , 0x0, ATTR_ANY, 0x0 }, \ +{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.ring0_trans" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk_any" , 0x0, ATTR_ANY, 0x0 }, \ +{ 0x3C, 0x02, C_ALL, "cpu_clk_thread_unhalted.one_thread_active" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending_cycles_any" , 0x1, ATTR_ANY, 0x0 }, \ +{ 0x48, 0x02, C_ALL, "l1d_pend_miss.fb_full" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x08, C_ALL, "dtlb_store_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x4F, 0x10, C_ALL, "ept.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment", 0x0, ATTR_TSX, 0x0 }, \ +{ 0x54, 0x40, C_ALL, "tx_mem.hle_elision_buffer_full" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x5E, 0x01, C_ALL, "rs_events.empty_end" , 0x1, (ATTR_INV | ATTR_EDGE), 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_ge_6" , 0x6, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.cycles_with_demand_code_rd", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.cycles_with_demand_rfo",0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.l3_miss_demand_data_rd",0x0, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \ +{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6",0x6, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x79, 0x30, C_ALL, "idq.ms_switches" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0x80, 0x04, C_ALL, "icache_16b.ifdata_stall" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x83, 0x01, C_ALL, "icache_64b.iftag_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x83, 0x02, C_ALL, "icache_64b.iftag_miss" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x83, 0x04, C_ALL, "icache_64b.iftag_stall" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x08, C_ALL, "itlb_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x10, C_ALL, "itlb_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_INV, 0x0 }, \ +{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_miss" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_l3_miss" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x04, C_ALL, "cycle_activity.stalls_total" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_miss" , 0x5, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x06, C_ALL, "cycle_activity.stalls_l3_miss" , 0x6, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x08, C_ALL, "cycle_activity.cycles_l1d_miss" , 0x8, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x0C, C_ALL, "cycle_activity.stalls_l1d_miss" , 0xC, ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x10, C_ALL, "cycle_activity.cycles_mem_any" , 0x10,ATTR_NONE, 0x0 }, \ +{ 0xA3, 0x14, C_ALL, "cycle_activity.stalls_mem_any" , 0x14,ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x01, C_ALL, "exe_activity.exe_bound_0_ports" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x02, C_ALL, "exe_activity.1_ports_util" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x04, C_ALL, "exe_activity.2_ports_util" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x08, C_ALL, "exe_activity.3_ports_util" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x10, C_ALL, "exe_activity.4_ports_util" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA6, 0x40, C_ALL, "exe_activity.bound_on_stores" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.cycles_active" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xA8, 0x01, C_ALL, "lsd.cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x10, C_ALL, "offcore_requests.l3_miss_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB0, 0x80, C_ALL, "offcore_requests.all_requests" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_1_uop_exec" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_2_uops_exec" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_3_uops_exec" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_4_uops_exec" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_none" , 0x1, ATTR_INV, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_1" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_2" , 0x2, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_3" , 0x3, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_4" , 0x4, ATTR_NONE, 0x0 }, \ +{ 0xB1, 0x10, C_ALL, "uops_executed.x87" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \ +\ + /* \ + * See Section "Off-core Response Performance Monitoring" \ + * \ + * Though these two off_core events support all counters, only 1 of \ + * them can be used at any given time. This is due to the extra MSR \ + * programming required. \ + */ \ +/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, OFFCORE_RSP_0 }, omit events requiring MSR programming */ \ +/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, OFFCORE_RSP_1 }, omit events requiring MSR programming */ \ +{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xC0, 0x1, C(1), "inst_retired.prec_dist" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \ +/* { 0xC0, 0x1, (C(0) | C(2) | C(3)), "inst_retired.total_cycles_ps" , 0x0A, (ATTR_PEBS_ONLY | ATTR_INV), 0x0 }, omit PEBS-only events */ \ +{ 0xC1, 0x3F, C_ALL, "other_assists.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \ +{ 0xC2, 0x01, C_ALL, "uops_retired.total_cycles" , 0x0A, ATTR_INV, 0x0 }, \ +{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x01, C_ALL, "machine_clears.count" , 0x1, ATTR_EDGE, 0x0 }, \ +{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \ +/* { 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches_pebs" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \ +{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \ +/* { 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches_pebs" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \ +{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \ +/* { 0xC6, 0x01, C_ALL, "frontend_retired" , 0x0, ATTR_PEBS, MSR_PEBS_FRONTEND}, omit events requiring MSR programming */ \ +{ 0xC7, 0x01, C_ALL, "fp_arith_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x02, C_ALL, "fp_arith_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x04, C_ALL, "fp_arith_inst_retired.128b_packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x08, C_ALL, "fp_arith_inst_retired.128b_packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x10, C_ALL, "fp_arith_inst_retired.256b_packed_double" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC7, 0x20, C_ALL, "fp_arith_inst_retired.256b_packed_single" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_mem" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_timer" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_unfriendly" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_memtype" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_events" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_mem" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_timer" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_unfriendly" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_memtype" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_events" , 0x0, ATTR_TSX, 0x0 }, \ +{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x1, ATTR_NONE, 0x0 }, \ +{ 0xCB, 0x01, C_ALL, "hw_interrupts.received" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \ +/* { 0xCD, 0x01, C_ALL, "mem_trans_retired.load_latency" , 0x0, ATTR_PEBS_ONLY_LD_LAT, PEBS_LD_LAT_THRESHOLD }, omit events requiring MSR programming */ \ +{ 0xD0, 0x11, C_ALL, "mem_inst_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x12, C_ALL, "mem_inst_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x21, C_ALL, "mem_inst_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x41, C_ALL, "mem_inst_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x42, C_ALL, "mem_inst_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x81, C_ALL, "mem_inst_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD0, 0x82, C_ALL, "mem_inst_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x01, C_ALL, "mem_load_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x02, C_ALL, "mem_load_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x04, C_ALL, "mem_load_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x08, C_ALL, "mem_load_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x10, C_ALL, "mem_load_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x20, C_ALL, "mem_load_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD1, 0x40, C_ALL, "mem_load_retired.fb_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x01, C_ALL, "mem_load_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x02, C_ALL, "mem_load_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x04, C_ALL, "mem_load_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD2, 0x08, C_ALL, "mem_load_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xD4, 0x04, C_ALL, "mem_load_misc_retired.uc" , 0x0, ATTR_PEBS, 0x0 }, \ +{ 0xE6, 0x01, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF1, 0x1F, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x01, C_ALL, "l2_lines_out.silent" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x02, C_ALL, "l2_lines_out.non_silent" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF2, 0x04, C_ALL, "l2_lines_out.useless_hwpf" , 0x0, ATTR_NONE, 0x0 }, \ +{ 0xF4, 0x10, C_ALL, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \ +/* end of #define */ + +#define NT_END {0, 0, 0, NULL, 0x0, ATTR_NONE, 0x0 } /* end-of-table */ + +static const struct events_table_t *events_table = NULL; + +const struct events_table_t events_fam6_mod23[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD23 + NT_END +}; + +const struct events_table_t events_fam6_mod28[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD28 + NT_END +}; + +const struct events_table_t events_fam6_mod26[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD26 + NT_END +}; + +const struct events_table_t events_fam6_mod46[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD26 + EVENTS_FAM6_MOD46_ONLY + NT_END +}; + +const struct events_table_t events_fam6_mod37[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD37 + EVENTS_FAM6_MOD37_ALSO + NT_END +}; + +const struct events_table_t events_fam6_mod47[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD37 + NT_END +}; + +const struct events_table_t events_fam6_mod42[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD42 + EVENTS_FAM6_MOD42_ONLY + NT_END +}; + +const struct events_table_t events_fam6_mod45[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD42 + EVENTS_FAM6_MOD45_ONLY + NT_END +}; + +const struct events_table_t events_fam6_mod58[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD58 + NT_END +}; + +const struct events_table_t events_fam6_mod62[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD58 + EVENTS_FAM6_MOD62_ONLY + NT_END +}; + +const struct events_table_t events_fam6_mod60[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD60 + NT_END +}; + +const struct events_table_t events_fam6_mod61[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD61 + NT_END +}; + +const struct events_table_t events_fam6_mod78[] = { + ARCH_EVENTS + EVENTS_FAM6_MOD78 + NT_END +}; + +const struct events_table_t events_fam6_unknown[] = { + ARCH_EVENTS + NT_END +}; + +const struct events_table_t events_fam_arm[] = { +// ARCH_EVENTS +// *eventnum = pevent->eventselect; +// *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT); +// *eventnum |= (pevent->attrs << 16); +// *eventnum |= (pevent->cmask << 24); +// eventselect, unitmask, supported_counters, name, cmask, attrs, msr_offset + +// Hardware event +#define HWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_HARDWARE, 0, 0 }, + HWE("branch-instructions", PERF_COUNT_HW_BRANCH_INSTRUCTIONS) + HWE("branch-misses", PERF_COUNT_HW_BRANCH_MISSES) + HWE("bus-cycles", PERF_COUNT_HW_BUS_CYCLES) + HWE("cache-misses", PERF_COUNT_HW_CACHE_MISSES) + HWE("cache-references", PERF_COUNT_HW_CACHE_REFERENCES) + HWE("cycles", PERF_COUNT_HW_CPU_CYCLES) + HWE("instructions", PERF_COUNT_HW_INSTRUCTIONS) + HWE("ref-cycles", PERF_COUNT_HW_REF_CPU_CYCLES) + HWE("stalled-cycles-backend", PERF_COUNT_HW_STALLED_CYCLES_BACKEND) + HWE("stalled-cycles-frontend", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND) + +// Software event +#define SWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_SOFTWARE, 0, 0 }, + SWE("alignment-faults", PERF_COUNT_SW_ALIGNMENT_FAULTS) + SWE("context-switches", PERF_COUNT_SW_CONTEXT_SWITCHES) + SWE("cpu-clock", PERF_COUNT_SW_CPU_CLOCK) + SWE("cpu-migrations", PERF_COUNT_SW_CPU_MIGRATIONS) + SWE("emulation-faults", PERF_COUNT_SW_EMULATION_FAULTS) + SWE("major-faults", PERF_COUNT_SW_PAGE_FAULTS_MAJ) + SWE("minor-faults", PERF_COUNT_SW_PAGE_FAULTS_MIN) + SWE("page-faults", PERF_COUNT_SW_PAGE_FAULTS) + SWE("task-clock", PERF_COUNT_SW_TASK_CLOCK) + +// Hardware cache event +#define HWCE(nm, id, op, res) { id | (op << 8) | (res << 16), 0, C_ALL, nm, PERF_TYPE_HW_CACHE, 0, 0 }, + HWCE("L1-dcache-load-misses", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) + HWCE("L1-dcache-loads", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) + HWCE("L1-dcache-store-misses",PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) + HWCE("L1-dcache-stores", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) + HWCE("L1-icache-load-misses", PERF_COUNT_HW_CACHE_L1I, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) + HWCE("L1-icache-loads", PERF_COUNT_HW_CACHE_L1I, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) +// HWCE("branch-load-misses",) +// HWCE("branch-loads",) + HWCE("dTLB-load-misses", PERF_COUNT_HW_CACHE_DTLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) + HWCE("dTLB-loads", PERF_COUNT_HW_CACHE_DTLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) + HWCE("iTLB-load-misses", PERF_COUNT_HW_CACHE_ITLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) + HWCE("iTLB-loads", PERF_COUNT_HW_CACHE_ITLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) + + NT_END +}; + +static int +core_pcbe_init (void) +{ + switch (cpuid_getvendor ()) + { + case ARM_CPU_IMP_ARM: + case ARM_CPU_IMP_BRCM: + case ARM_CPU_IMP_CAVIUM: + case ARM_CPU_IMP_APM: + case ARM_CPU_IMP_QCOM: + snprintf (core_impl_name, sizeof (core_impl_name), "%s", AARCH64_VENDORSTR_ARM); + events_table = events_fam_arm; + num_gpc = 4; // MEZ: a real implementation is needed + num_ffc = 0; + total_pmc = num_gpc + num_ffc; + return 0; + case X86_VENDOR_Intel: + break; + default: + return -1; + } + +#if defined(__i386__) || defined(__x86_64) + /* No Architectural Performance Monitoring Leaf returned by CPUID */ + if (get_cpuid_info ()->cpi_maxeax < 0xa) + return (-1); + + /* Obtain the Architectural Performance Monitoring Leaf */ + cpuid_regs_t cp; + my_cpuid (0xa, &cp); + uint32_t versionid = cp.eax & 0xFF; + + /* + * Fixed-Function Counters (FFC) + * + * All Family 6 Model 15 and Model 23 processors have fixed-function + * counters. These counters were made Architectural with + * Family 6 Model 15 Stepping 9. + */ + switch (versionid) + { + case 0: + return -1; + case 2: + num_ffc = cp.edx & 0x1F; + /* + * Some processors have an errata (AW34) where + * versionid is reported as 2 when actually 1. + * In this case, fixed-function counters are + * model-specific as in Version 1. + */ + if (num_ffc != 0) + break; + /* FALLTHROUGH */ + case 1: + num_ffc = 3; + versionid = 1; + break; + default: + num_ffc = cp.edx & 0x1F; + break; + } + if (num_ffc >= 64) + return (-1); + uint64_t known_ffc_num = sizeof (ffc_names) / sizeof (char *) - 1; /* -1 for EOT */ + if (num_ffc > known_ffc_num) + /* + * The system seems to have more fixed-function counters than + * what this PCBE is able to handle correctly. Default to the + * maximum number of fixed-function counters that this driver + * is aware of. + */ + num_ffc = known_ffc_num; + + /* + * General Purpose Counters (GPC) + */ + num_gpc = (cp.eax >> 8) & 0xFF; + if (num_gpc >= 64) + return (-1); + total_pmc = num_gpc + num_ffc; + if (total_pmc > 64) /* Too wide for the overflow bitmap */ + return (-1); + + uint_t cpuid_model = cpuid_getmodel (); + + /* GPC events for Family 6 Models 15 & 23 only */ + if ((cpuid_getfamily () == 6) && + ((cpuid_model == 15) || (cpuid_model == 23))) + (void) snprintf (core_impl_name, IMPL_NAME_LEN, "Core Microarchitecture"); + else + (void) snprintf (core_impl_name, IMPL_NAME_LEN, + "Intel Arch PerfMon v%d on Family %d Model %d", + versionid, cpuid_getfamily (), cpuid_model); + /* + * Process architectural and non-architectural events using GPC + */ + if (num_gpc > 0) + { + switch (cpuid_model) + { + case 15: /* Core 2 */ + case 23: + events_table = events_fam6_mod23; + break; + case 28: /* Atom */ + events_table = events_fam6_mod28; + break; + case 37: /* Westmere */ + case 44: + events_table = events_fam6_mod37; + break; + case 47: + events_table = events_fam6_mod47; + break; + case 26: /* Nehalem */ + case 30: + case 31: + events_table = events_fam6_mod26; + break; + case 46: + events_table = events_fam6_mod46; + break; + case 42: /* Sandy Bridge */ + events_table = events_fam6_mod42; + break; + case 45: + events_table = events_fam6_mod45; + break; + case 58: /* Ivy Bridge */ + events_table = events_fam6_mod58; + break; + case 62: + events_table = events_fam6_mod62; + break; + case 60: /* Haswell */ + case 63: + case 69: + case 70: + events_table = events_fam6_mod60; + break; + case 61: /* Broadwell */ + case 71: + case 79: + case 86: + events_table = events_fam6_mod61; + break; + case 78: /* Skylake */ + case 85: + case 94: + events_table = events_fam6_mod78; + break; + default: /* unknown */ + events_table = events_fam6_unknown; + } + } + /* + * Fixed-function Counters (FFC) are already listed individually in + * ffc_names[] + */ +#endif + return 0; +} + +static uint_t +core_pcbe_ncounters () +{ + return total_pmc; +} + +static const char * +core_pcbe_impl_name (void) +{ + return core_impl_name; +} + +static const char * +core_pcbe_cpuref (void) +{ +#if defined(__aarch64__) + return ""; +#elif defined(__i386__) || defined(__x86_64) + switch (cpuid_getmodel ()) + { + case 60: /* Haswell */ + case 63: + case 69: + case 70: + return GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide, Part 2\"\nOrder Number: 253669-047US, June 2013"); + case 61: /* Broadwell */ + case 71: + case 79: + case 86: + case 78: /* Skylake */ + case 85: + case 94: + return GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide\""); + default: + return + GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide, Part 2\"\nOrder Number: 253669-045US, January 2013"); + } +#endif +} + +static int +core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) +{ + int count = 0; + const struct events_table_t *pevent; + for (pevent = events_table; pevent && pevent->name; pevent++) + for (uint_t jj = 0; jj < num_gpc; jj++) + if (C (jj) & pevent->supported_counters) + { + hwc_cb (jj, pevent->name); + count++; + } + + for (int ii = 0; ii < sizeof (ffc_names) / sizeof (*ffc_names) && ffc_names[ii]; ii++) + { + hwc_cb (ii + num_gpc, ffc_names[ii]); + count++; + } + /* add generic events here */ + return count; +} + +static int +core_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventnum, + eventsel_t *valid_umask, uint_t *pmc_sel) +{ + const struct events_table_t* pevent; + *valid_umask = 0x0; /* by default, don't allow user umask */ + *pmc_sel = pmc; /* by default, use the requested pmc */ + + /* search non-ffc table */ + for (pevent = events_table; pevent && pevent->name; pevent++) + { + if (strcmp (eventname, pevent->name) == 0) + { + *eventnum = pevent->eventselect; + *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT); + *eventnum |= (pevent->attrs << 16); + *eventnum |= (pevent->cmask << 24); + + if (pevent->msr_offset) + { + /* + * Should also handle any pevent->msr_offset. + * Can check libcpc's usr/src/uts/intel/pcbe/snb_pcbe.h, + * function snb_gpc_configure(). + * + * Actually, we should probably error out here + * until the appropriate support has been added. + * Also, we can comment out events that require + * msr_offset so that they aren't even listed. + */ + } + if (!pevent->unitmask) + *valid_umask = 0xff; /* allow umask if nothing set */ + return 0; + } + } + + /* search ffc table */ + for (int ii = 0; ii < sizeof (ffc_names) / sizeof (*ffc_names) && ffc_names[ii]; ii++) + { + if (strcmp (eventname, ffc_names[ii]) == 0) + { + *eventnum = 0; + *pmc_sel = ii | PERFCTR_FIXED_MAGIC; + return 0; + } + } + *eventnum = (eventsel_t) - 1; + return -1; +} + +static hdrv_pcbe_api_t hdrv_pcbe_core_api = { + core_pcbe_init, + core_pcbe_ncounters, + core_pcbe_impl_name, + core_pcbe_cpuref, + core_pcbe_get_events, + core_pcbe_get_eventnum +}; diff --git a/gprofng/common/cpu_frequency.h b/gprofng/common/cpu_frequency.h new file mode 100644 index 0000000..b46b54d --- /dev/null +++ b/gprofng/common/cpu_frequency.h @@ -0,0 +1,303 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _CPU_FREQUENCY_H +#define _CPU_FREQUENCY_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include <alloca.h> +#include <unistd.h> /* processor_info_t */ +#include <fcntl.h> + + typedef unsigned char uint8_t; + +#define MAXSTRLEN 1024 + /* + * This file provide the api to detect Intel CPU frequency variation features + */ + +#define COL_CPUFREQ_NONE 0x0000 +#define COL_CPUFREQ_SCALING 0x0001 +#define COL_CPUFREQ_TURBO 0x0002 + +#if defined(__i386__) || defined(__x86_64) + // XXXX This is a rough table to estimate frequency increment due to intel turbo boost. + // CPU with different stepping and different core number have different turbo increment. + // It is used internally here, and is not implemented on SPARC + + // YLM: one can use cputrack to estimate max turbo frequency + // example: for a cpu-bound app that runs for > 10 seconds, count cycles for 10 seconds: + // cputrack -T 10 -v -c cpu_clk_unhalted.thread_p a.out + + static int + get_max_turbo_freq (int model) + { + switch (model) + { + // Nehalem + case 30:// Core i7-870: 2/2/4/5 + return 2 * 133333; + case 26:// Xeon L5520: 1/1/1/2 + return 2 * 133333; + case 46:// Xeon E7540: 2 + return 2 * 133333; + // Westmere + case 37:// Core i5-520M: 2/4 + return 2 * 133333; + case 44:// Xeon E5620: 1/1/2/2 + return 2 * 133333; + case 47:// Xeon E7-2820: 1/1/1/2 + return 1 * 133333; + // Sandy Bridge + case 42:// Core i5-2500: 1/2/3/4 + return 3 * 100000; + // http://ark.intel.com/products/64584/Intel-Xeon-Processor-E5-2660-20M-Cache-2_20-GHz-8_00-GTs-Intel-QPI + case 45:// Xeon E5-2660 GenuineIntel 206D7 family 6 model 45 step 7 clock 2200 MHz + return 8 * 100000; + // Ivy Bridge + case 58:// Core i7-3770: 3/4/5/5 + return 4 * 100000; + case 62:// Xeon E5-2697: 3/3/3/3/3/3/3/4/5/6/7/8 + return 7 * 100000; + // Haswell + case 60: + return 789000; // empirically we see 3189 MHz - 2400 MHz + case 63: + return 1280000; // empirically we see 3580 MHz - 2300 MHz for single-threaded + // return 500000; // empirically we see 2800 MHz - 2300 MHz for large throughput + // Broadwell + // where are these values listed? + // maybe try https://en.wikipedia.org/wiki/Broadwell_%28microarchitecture%29#Server_processors + case 61: + return 400000; + case 71: + return 400000; + case 79: + return 950000; // empirically we see (3550-2600) MHz for single-threaded on x6-2a + case 85: + return 1600000; // X7: empirically see ~3.7GHz with single thread, baseline is 2.1Ghz Return 3,700,000-2,100,000 + case 31: // Nehalem? + case 28: // Atom + case 69: // Haswell + case 70: // Haswell + case 78: // Skylake + case 94: // Skylake + default: + return 0; + } + } +#endif + + /* + * parameter: mode, pointer to a 8bit mode indicator + * return: max cpu frequency in MHz + */ + //YXXX Updating this function? Check similar cut/paste code in: + // collctrl.cc::Coll_Ctrl() + // collector.c::log_header_write() + // cpu_frequency.h::get_cpu_frequency() + + static int + get_cpu_frequency (uint8_t *mode) + { + int ret_freq = 0; + if (mode != NULL) + *mode = COL_CPUFREQ_NONE; + FILE *procf = fopen ("/proc/cpuinfo", "r"); + if (procf != NULL) + { + char temp[1024]; + int cpu = -1; +#if defined(__i386__) || defined(__x86_64) + int model = -1; + int family = -1; +#endif + while (fgets (temp, 1024, procf) != NULL) + { + if (strncmp (temp, "processor", strlen ("processor")) == 0) + { + char *val = strchr (temp, ':'); + cpu = val ? atoi (val + 1) : -1; + } +#if defined(__i386__) || defined(__x86_64) + else if (strncmp (temp, "model", strlen ("model")) == 0 + && strstr (temp, "name") == 0) + { + char *val = strchr (temp, ':'); + model = val ? atoi (val + 1) : -1; + } + else if (strncmp (temp, "cpu family", strlen ("cpu family")) == 0) + { + char *val = strchr (temp, ':'); + family = val ? atoi (val + 1) : -1; + } +#endif + else if (strncmp (temp, "cpu MHz", strlen ("cpu MHz")) == 0) + { + char *val = strchr (temp, ':'); + int mhz = val ? atoi (val + 1) : 0; /* reading it as int is fine */ + char scaling_freq_file[MAXSTRLEN + 1]; + snprintf (scaling_freq_file, sizeof (scaling_freq_file), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", cpu); + int intel_pstate = 0; + int no_turbo = 0; + if (access (scaling_freq_file, R_OK) == 0) + { + FILE *cpufreqd = fopen (scaling_freq_file, "r"); + if (cpufreqd != NULL) + { + if (fgets (temp, 1024, cpufreqd) != NULL + && strncmp (temp, "intel_pstate", sizeof ("intel_pstate") - 1) == 0) + intel_pstate = 1; + fclose (cpufreqd); + } + } + snprintf (scaling_freq_file, sizeof (scaling_freq_file), + "/sys/devices/system/cpu/intel_pstate/no_turbo"); + if (access (scaling_freq_file, R_OK) == 0) + { + FILE *pstatent = fopen (scaling_freq_file, "r"); + if (pstatent != NULL) + { + if (fgets (temp, 1024, pstatent) != NULL) + if (strncmp (temp, "1", sizeof ("1") - 1) == 0) + no_turbo = 1; + fclose (pstatent); + } + } + + snprintf (scaling_freq_file, sizeof (scaling_freq_file), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpu); + int frequency_scaling = 0; + int turbo_mode = 0; + if (access (scaling_freq_file, R_OK) == 0) + { + FILE *cpufreqf = fopen (scaling_freq_file, "r"); + if (cpufreqf != NULL) + { + if (fgets (temp, 1024, cpufreqf) != NULL) + { + int ondemand = 0; + if (strncmp (temp, "ondemand", sizeof ("ondemand") - 1) == 0) + ondemand = 1; + int performance = 0; + if (strncmp (temp, "performance", sizeof ("performance") - 1) == 0) + performance = 1; + int powersave = 0; + if (strncmp (temp, "powersave", sizeof ("powersave") - 1) == 0) + powersave = 1; + if (intel_pstate || ondemand || performance) + { + snprintf (scaling_freq_file, sizeof (scaling_freq_file), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + if (access (scaling_freq_file, R_OK) == 0) + { + FILE * cpufreqf_max; + if ((cpufreqf_max = fopen (scaling_freq_file, "r")) != NULL) + { + if (fgets (temp, 1024, cpufreqf_max) != NULL) + { + int tmpmhz = atoi (temp); + snprintf (scaling_freq_file, sizeof (scaling_freq_file), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies", cpu); + if (intel_pstate) + { + frequency_scaling = 1; + turbo_mode = !no_turbo; + if (powersave) + // the system might have been relatively cold + // so we might do better with scaling_max_freq + mhz = (int) (((double) tmpmhz / 1000.0) + 0.5); + } + else if (access (scaling_freq_file, R_OK) == 0) + { + FILE * cpufreqf_ava; + if ((cpufreqf_ava = fopen (scaling_freq_file, "r")) != NULL) + { + if (fgets (temp, 1024, cpufreqf_ava) != NULL) + { + if (strchr (temp, ' ') != strrchr (temp, ' ') && ondemand) + frequency_scaling = 1; + if (tmpmhz > 1000) + { +#if defined(__i386__) || defined(__x86_64) + if (family == 6) + { + // test turbo mode + char non_turbo_max_freq[1024]; + snprintf (non_turbo_max_freq, sizeof (non_turbo_max_freq), + "%d", tmpmhz - 1000); + if (strstr (temp, non_turbo_max_freq)) + { + turbo_mode = 1; + tmpmhz = (tmpmhz - 1000) + get_max_turbo_freq (model); + } + } +#endif + } + } + fclose (cpufreqf_ava); + } + mhz = (int) (((double) tmpmhz / 1000.0) + 0.5); + } + } + fclose (cpufreqf_max); + } + } + } + } + fclose (cpufreqf); + } + } + if (mhz > ret_freq) + ret_freq = mhz; + if (frequency_scaling && mode != NULL) + *mode |= COL_CPUFREQ_SCALING; + if (turbo_mode && mode != NULL) + *mode |= COL_CPUFREQ_TURBO; + } + else if (strncmp (temp, "Cpu", 3) == 0 && temp[3] != '\0' && + strncmp (strchr (temp + 1, 'C') ? strchr (temp + 1, 'C') : (temp + 4), "ClkTck", 6) == 0) + { // sparc-Linux + char *val = strchr (temp, ':'); + if (val) + { + unsigned long long freq; + sscanf (val + 2, "%llx", &freq); + int mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5); + if (mhz > ret_freq) + ret_freq = mhz; + } + } + } + fclose (procf); + } + return ret_freq; + } + +#ifdef __cplusplus +} +#endif + +#endif /*_CPU_FREQUENCY_H*/ diff --git a/gprofng/common/cpuid.c b/gprofng/common/cpuid.c new file mode 100644 index 0000000..211e09a --- /dev/null +++ b/gprofng/common/cpuid.c @@ -0,0 +1,203 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#if defined(__i386__) || defined(__x86_64) +#include <cpuid.h> /* GCC-provided */ +#elif defined(__aarch64__) +#define ATTRIBUTE_UNUSED __attribute__((unused)) + +static inline uint_t __attribute_const__ +__get_cpuid (unsigned int op ATTRIBUTE_UNUSED, unsigned int *eax, + unsigned int *ebx ATTRIBUTE_UNUSED, + unsigned int *ecx ATTRIBUTE_UNUSED, unsigned int *edx ATTRIBUTE_UNUSED) +{ + // CPUID bit assignments: + // [31:24] IMPLEMENTER (0x50 - ARM_CPU_IMP_APM) + // [23:20] VARIANT indicates processor revision (0x2 = Revision 2) + // [19:16] Constant (Reads as 0xF) + // [15:04] PARTNO indicates part number (0xC23 = Cortex-M3) + // [03:00] REVISION indicates patch release (0x0 = Patch 0) + // unsigned long v = 0; + // __asm volatile ("MRS %[result], MPIDR_EL1" : [result] "=r" (v)); + // Tprintf(DBG_LT0, "cpuid.c:%d read_cpuid_id() MPIDR_EL1=0x%016lx\n", __LINE__, v); + uint_t res = 0; + __asm volatile ("MRS %[result], MIDR_EL1" : [result] "=r" (*eax)); + Tprintf (DBG_LT0, "cpuid.c:%d read_cpuid_id() MIDR_EL1=0x%016x\n", __LINE__, *eax); + return res; +} +#endif + +/* + * Various routines to handle identification + * and classification of x86 processors. + */ + +#define IS_GLOBAL /* externally visible */ +#define X86_VENDOR_Intel 0 +#define X86_VENDORSTR_Intel "GenuineIntel" +#define X86_VENDOR_IntelClone 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDORSTR_AMD "AuthenticAMD" + +#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU)) +#define CPI_FAMILY_XTD(reg) BITX(reg, 27, 20) +#define CPI_MODEL_XTD(reg) BITX(reg, 19, 16) +#define CPI_TYPE(reg) BITX(reg, 13, 12) +#define CPI_FAMILY(reg) BITX(reg, 11, 8) +#define CPI_STEP(reg) BITX(reg, 3, 0) +#define CPI_MODEL(reg) BITX(reg, 7, 4) +#define IS_EXTENDED_MODEL_INTEL(model) ((model) == 0x6 || (model) >= 0xf) + + +typedef struct +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; +} cpuid_regs_t; + +typedef struct +{ + unsigned int cpi_model; + unsigned int cpi_family; + unsigned int cpi_vendor; /* enum of cpi_vendorstr */ + unsigned int cpi_maxeax; /* fn 0: %eax */ + char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ +} cpuid_info_t; + + +#if defined(__i386__) || defined(__x86_64) +static uint_t +cpuid_vendorstr_to_vendorcode (char *vendorstr) +{ + if (strcmp (vendorstr, X86_VENDORSTR_Intel) == 0) + return X86_VENDOR_Intel; + else if (strcmp (vendorstr, X86_VENDORSTR_AMD) == 0) + return X86_VENDOR_AMD; + else + return X86_VENDOR_IntelClone; +} + +static int +my_cpuid (unsigned int op, cpuid_regs_t *regs) +{ + regs->eax = regs->ebx = regs->ecx = regs->edx = 0; + int ret = __get_cpuid (op, ®s->eax, ®s->ebx, ®s->ecx, ®s->edx); + TprintfT (DBG_LT1, "my_cpuid: __get_cpuid(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) returns %d\n", + op, regs->eax, regs->ebx, regs->ecx, regs->edx, ret); + return ret; +} +#endif + +static cpuid_info_t * +get_cpuid_info () +{ + static int cpuid_inited = 0; + static cpuid_info_t cpuid_info; + cpuid_info_t *cpi = &cpuid_info; + if (cpuid_inited) + return cpi; + cpuid_inited = 1; + +#if defined(__aarch64__) + // CPUID bit assignments: + // [31:24] IMPLEMENTER (0x50 - ARM_CPU_IMP_APM) + // [23:20] VARIANT indicates processor revision (0x2 = Revision 2) + // [19:16] Constant (Reads as 0xF) + // [15:04] PARTNO indicates part number (0xC23 = Cortex-M3) + // [03:00] REVISION indicates patch release (0x0 = Patch 0) + uint_t reg = 0; + __asm volatile ("MRS %[result], MIDR_EL1" : [result] "=r" (reg)); + cpi->cpi_vendor = reg >> 24; + cpi->cpi_model = (reg >> 4) & 0xfff; + switch (cpi->cpi_vendor) + { + case ARM_CPU_IMP_APM: + case ARM_CPU_IMP_ARM: + case ARM_CPU_IMP_CAVIUM: + case ARM_CPU_IMP_BRCM: + case ARM_CPU_IMP_QCOM: + strncpy (cpi->cpi_vendorstr, AARCH64_VENDORSTR_ARM, sizeof (cpi->cpi_vendorstr)); + break; + default: + strncpy (cpi->cpi_vendorstr, "UNKNOWN ARM", sizeof (cpi->cpi_vendorstr)); + break; + } + Tprintf (DBG_LT0, "cpuid.c:%d read_cpuid_id() MIDR_EL1==0x%016x cpi_vendor=%d cpi_model=%d\n", + __LINE__, (unsigned int) reg, cpi->cpi_vendor, cpi->cpi_model); + +#elif defined(__i386__) || defined(__x86_64) + cpuid_regs_t regs; + my_cpuid (0, ®s); + cpi->cpi_maxeax = regs.eax; + ((uint32_t *) cpi->cpi_vendorstr)[0] = regs.ebx; + ((uint32_t *) cpi->cpi_vendorstr)[1] = regs.edx; + ((uint32_t *) cpi->cpi_vendorstr)[2] = regs.ecx; + cpi->cpi_vendorstr[12] = 0; + cpi->cpi_vendor = cpuid_vendorstr_to_vendorcode (cpi->cpi_vendorstr); + + my_cpuid (1, ®s); + cpi->cpi_model = CPI_MODEL (regs.eax); + cpi->cpi_family = CPI_FAMILY (regs.eax); + if (cpi->cpi_family == 0xf) + cpi->cpi_family += CPI_FAMILY_XTD (regs.eax); + + /* + * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. + * Intel, and presumably everyone else, uses model == 0xf, as + * one would expect (max value means possible overflow). Sigh. + */ + switch (cpi->cpi_vendor) + { + case X86_VENDOR_Intel: + if (IS_EXTENDED_MODEL_INTEL (cpi->cpi_family)) + cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4; + break; + case X86_VENDOR_AMD: + if (CPI_FAMILY (cpi->cpi_family) == 0xf) + cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4; + break; + default: + if (cpi->cpi_model == 0xf) + cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4; + break; + } +#endif + return cpi; +} + +static inline uint_t +cpuid_getvendor () +{ + return get_cpuid_info ()->cpi_vendor; +} + +static inline uint_t +cpuid_getfamily () +{ + return get_cpuid_info ()->cpi_family; +} + +static inline uint_t +cpuid_getmodel () +{ + return get_cpuid_info ()->cpi_model; +} diff --git a/gprofng/common/gp-defs.h b/gprofng/common/gp-defs.h new file mode 100644 index 0000000..440bfb1 --- /dev/null +++ b/gprofng/common/gp-defs.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _GP_DEFS_H_ +#define _GP_DEFS_H_ + +/* Define the ARCH and WSIZE predicates */ +/* + * The way we define and use predicates is similar to the + * standard #assert with one important exception: + * if an argument of a predicate is not known the result + * is 'false' and we want a compile time error to avoid + * silent results from typos like ARCH(INTEL), COMPILER(gnu), + * etc. + */ +#define ARCH(x) TOK_A_##x(ARCH) +#define TOK_A_Aarch64(x) x##_Aarch64 +#define TOK_A_SPARC(x) x##_SPARC +#define TOK_A_Intel(x) x##_Intel + +#define WSIZE(x) TOK_W_##x(WSIZE) +#define TOK_W_32(x) x##_32 +#define TOK_W_64(x) x##_64 + +#if defined(sparc) || defined(__sparcv9) +#define ARCH_SPARC 1 +#elif defined(__i386__) || defined(__x86_64) +#define ARCH_Intel 1 +#elif defined(__aarch64__) +#define ARCH_Aarch64 1 +#else +#error "Undefined platform" +#endif + +#if defined(__sparcv9) || defined(__x86_64) || defined(__aarch64__) +#define WSIZE_64 1 +#else +#define WSIZE_32 1 +#endif + +#endif diff --git a/gprofng/common/gp-experiment.h b/gprofng/common/gp-experiment.h new file mode 100644 index 0000000..040c2d1 --- /dev/null +++ b/gprofng/common/gp-experiment.h @@ -0,0 +1,186 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _EXPERIMENT_H +#define _EXPERIMENT_H + +/* version numbers define experiment format */ +#define SUNPERF_VERNUM 12 +#define SUNPERF_VERNUM_MINOR 4 + +/* backward compatibility down to: */ +#define SUNPERF_VERNUM_LEAST 12 + +#include "Emsgnum.h" /* for COL_ERROR_*, etc. symbols */ + +#define SP_REMOTE_PROTOCOL_VERSION "12.4.1" + +#define SP_GROUP_HEADER "#analyzer experiment group" + +/* Experiment name macro definitions */ + +/* for descendant experiments */ +#define DESCENDANT_EXPT_KEY ".er/_" +#define IS_DESC_EXPT(exptname) (strstr(exptname,DESCENDANT_EXPT_KEY) != NULL) +#define IS_FNDR_EXPT(exptname) (strstr(exptname,DESCENDANT_EXPT_KEY) == NULL) + +/* File name definitions */ +#define SP_ARCHIVES_DIR "archives" +#define SP_ARCHIVE_LOG_FILE "archive.log" +#define SP_LOG_FILE "log.xml" +#define SP_NOTES_FILE "notes" +#define SP_IFREQ_FILE "ifreq" +#define SP_MAP_FILE "map.xml" +#define SP_LABELS_FILE "labels.xml" +#define SP_DYNTEXT_FILE "dyntext" +#define SP_OVERVIEW_FILE "overview" +#define SP_PROFILE_FILE "profile" +#define SP_SYNCTRACE_FILE "synctrace" +#define SP_IOTRACE_FILE "iotrace" +#define SP_OMPTRACE_FILE "omptrace" +#define SP_MPVIEW_FILE "mpview.dat3" +#define SP_HWCNTR_FILE "hwcounters" +#define SP_HEAPTRACE_FILE "heaptrace" +#define SP_JCLASSES_FILE "jclasses" +#define SP_DYNAMIC_CLASSES "jdynclasses" +#define SP_RACETRACE_FILE "dataraces" +#define SP_DEADLOCK_FILE "deadlocks" +#define SP_FRINFO_FILE "frameinfo" +#define SP_WARN_FILE "warnings.xml" + +#define SP_LIBCOLLECTOR_NAME "libgp-collector.so" +#define SP_LIBAUDIT_NAME "libcollect-ng.so" + +/* XML tags */ +#define SP_TAG_COLLECTOR "collector" +#define SP_TAG_CPU "cpu" +#define SP_TAG_DATAPTR "dataptr" +#define SP_TAG_EVENT "event" +#define SP_TAG_EXPERIMENT "experiment" +#define SP_TAG_FIELD "field" +#define SP_TAG_PROCESS "process" +#define SP_TAG_PROFILE "profile" +#define SP_TAG_PROFDATA "profdata" +#define SP_TAG_PROFPCKT "profpckt" +#define SP_TAG_SETTING "setting" +#define SP_TAG_STATE "state" +#define SP_TAG_SYSTEM "system" +#define SP_TAG_POWERM "powerm" +#define SP_TAG_FREQUENCY "frequency" +#define SP_TAG_DTRACEFATAL "dtracefatal" + +/* records for log and loadobjects files */ +/* note that these are in alphabetical order */ +#define SP_JCMD_ARCH "architecture" +#define SP_JCMD_ARCHIVE "archive_run" +#define SP_JCMD_ARGLIST "arglist" +#define SP_JCMD_BLKSZ "blksz" +#define SP_JCMD_CERROR "cerror" +#define SP_JCMD_CLASS_LOAD "class_load" +#define SP_JCMD_CLASS_UNLOAD "class_unload" +#define SP_JCMD_COLLENV "collenv" +#define SP_JCMD_COMMENT "comment" +#define SP_JCMD_CPUID "cpuid" +#define SP_JCMD_CWARN "cwarn" +#define SP_JCMD_CWD "cwd" +#define SP_JCMD_CVERSION "cversion" +#define SP_JCMD_DATARACE "datarace" +#define SP_JCMD_DEADLOCK "deadlock" +#define SP_JCMD_DELAYSTART "delay_start" +#define SP_JCMD_DESC_START "desc_start" +#define SP_JCMD_DESC_STARTED "desc_started" +#define SP_JCMD_DVERSION "dversion" +#define SP_JCMD_EXEC_START "exec_start" +#define SP_JCMD_EXEC_ERROR "exec_error" +#define SP_JCMD_EXIT "exit" +#define SP_JCMD_EXPT_DURATION "exp_duration" +#define SP_JCMD_FAKETIME "faketime" +#define SP_JCMD_FN_LOAD "fn_load" +#define SP_JCMD_FN_UNLOAD "fn_unload" +#define SP_JCMD_FUN_MAP "fun_map" +#define SP_JCMD_FUN_UNMAP "fun_unmap" +#define SP_JCMD_HEAPTRACE "heaptrace" +#define SP_JCMD_HOSTNAME "hostname" +#define SP_JCMD_HWC_DEFAULT "hwc_default" +#define SP_JCMD_HW_COUNTER "hwcounter" +#define SP_JCMD_HW_SIM_CTR "hwsimctr" +#define SP_JCMD_IOTRACE "iotrace" +#define SP_JCMD_JCM_LOAD "jcm_load" +#define SP_JCMD_JCM_UNLOAD "jcm_unload" +#define SP_JCMD_JCM_MAP "jcm_map" +#define SP_JCMD_JCM_UNMAP "jcm_unmap" +#define SP_JCMD_JTHREND "jthread_end" +#define SP_JCMD_JTHRSTART "jthread_start" +#define SP_JCMD_GCEND "gc_end" +#define SP_JCMD_GCSTART "gc_start" +#define SP_JCMD_JVERSION "jversion" +//#define SP_JCMD_KPROFILE "kprofile" /* TBR */ +#define SP_JCMD_LIMIT "limit" +#define SP_JCMD_LINETRACE "linetrace" +#define SP_JCMD_LO_OPEN "lo_open" +#define SP_JCMD_LO_CLOSE "lo_close" +#define SP_JCMD_MOD_OPEN "mod_open" +#define SP_JCMD_MPIEXP "MPIexperiment" +#define SP_JCMD_MPI_NO_TRACE "MPI_no_trace" +#define SP_JCMD_MPIOMPVER "mpi_openmpi_version" +#define SP_JCMD_MPITRACEVER "mpi_trace_version" +#define SP_JCMD_MPIPP "mpipp" +#define SP_JCMD_MPIPPERR "mpipp_err" +#define SP_JCMD_MPIPPWARN "mpipp_warn" +#define SP_JCMD_MPISTATE "mpistate" +#define SP_JCMD_MPITRACE "mpitrace" /* backwards compat only */ +#define SP_JCMD_MPVIEW "mpview" +#define SP_JCMD_MSGTRACE "msgtrace" +#define SP_JCMD_NOIDLE "noidle" +#define SP_JCMD_OMPTRACE "omptrace" +#define SP_JCMD_OS "os" +#define SP_JCMD_PAGESIZE "pagesize" +#define SP_JCMD_PAUSE "pause" +#define SP_JCMD_PAUSE_SIG "pause_signal" +#define SP_JCMD_PROFILE "profile" +#define SP_JCMD_RESUME "resume" +#define SP_JCMD_RUN "run" +#define SP_JCMD_SAMPLE "sample" +#define SP_JCMD_SAMPLE_PERIOD "sample_period" +#define SP_JCMD_SAMPLE_SIG "sample_signal" +#define SP_JCMD_SEGMENT_MAP "seg_map" +#define SP_JCMD_SEGMENT_UNMAP "seg_unmap" +#define SP_JCMD_SRCHPATH "search_path" +#define SP_JCMD_STACKBASE "stackbase" +#define SP_JCMD_SUNPERF "sunperf" +#define SP_JCMD_SYNCTRACE "synctrace" +#define SP_JCMD_TERMINATE "terminate" +#define SP_JCMD_THREAD_PAUSE "thread_pause" +#define SP_JCMD_THREAD_RESUME "thread_resume" +#define SP_JCMD_USERNAME "username" +#define SP_JCMD_VERSION "version" +#define SP_JCMD_WSIZE "wsize" + +/* strings naming memory-segments */ +#define SP_MAP_ANON "Anon" +#define SP_MAP_HEAP "Heap" +#define SP_MAP_STACK "Stack" +#define SP_MAP_SHMEM "SHMid" +#define SP_MAP_UNRESOLVABLE "Unresolvable" + +#define SP_UNKNOWN_NAME "(unknown)" + +#define MAX_STACKDEPTH 2048 +#endif /* _EXPERIMENT_H */ diff --git a/gprofng/common/gp-time.h b/gprofng/common/gp-time.h new file mode 100644 index 0000000..7755370 --- /dev/null +++ b/gprofng/common/gp-time.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _GP_TIME_H_ +#define _GP_TIME_H_ + +#include <sys/time.h> + +typedef long long hrtime_t; +typedef struct timespec timestruc_t; + +#define ITIMER_REALPROF ITIMER_PROF +#define NANOSEC 1000000000 +#define MICROSEC 1000000 + +#ifdef __cplusplus +extern "C" +{ +#endif + + hrtime_t gethrtime (void); + hrtime_t gethrvtime (void); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h new file mode 100644 index 0000000..ff7b303 --- /dev/null +++ b/gprofng/common/hwc_cpus.h @@ -0,0 +1,198 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* Hardware counter profiling: cpu types */ + +#ifndef __HWC_CPUS_H +#define __HWC_CPUS_H + +#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */ + + /* type for specifying CPU register number */ + typedef int regno_t; +#define REGNO_ANY ((regno_t)-1) +#define REGNO_INVALID ((regno_t)-2) + + /* --- Utilities for use with regno_t and reg_list[] --- */ +#define REG_LIST_IS_EMPTY(reg_list) (!(reg_list) || (reg_list)[0] == REGNO_ANY) +#define REG_LIST_EOL(regno) ((regno)==REGNO_ANY) +#define REG_LIST_SINGLE_VALID_ENTRY(reg_list) \ + (((reg_list) && (reg_list)[1] == REGNO_ANY && \ + (reg_list)[0] != REGNO_ANY ) ? (reg_list)[0] : REGNO_ANY) + + /* enum for specifying unknown or uninitialized CPU */ + enum + { + CPUVER_GENERIC = 0, + CPUVER_UNDEFINED = -1 + }; + + // Note: changing an values below may make older HWC experiments unreadable. + // --- Sun/Oracle SPARC --- +#define CPC_ULTRA1 1000 +#define CPC_ULTRA2 1001 +#define CPC_ULTRA3 1002 +#define CPC_ULTRA3_PLUS 1003 +#define CPC_ULTRA3_I 1004 +#define CPC_ULTRA4_PLUS 1005 /* Panther */ +#define CPC_ULTRA4 1017 /* Jaguar */ +#define CPC_ULTRA_T1 1100 /* Niagara1 */ +#define CPC_ULTRA_T2 1101 /* Niagara2 */ +#define CPC_ULTRA_T2P 1102 +#define CPC_ULTRA_T3 1103 +#define CPC_SPARC_T4 1104 +#define CPC_SPARC_T5 1110 +#define CPC_SPARC_T6 1120 +// #define CPC_SPARC_T7 1130 // use CPC_SPARC_M7 +#define CPC_SPARC_M4 1204 /* Obsolete */ +#define CPC_SPARC_M5 1210 +#define CPC_SPARC_M6 1220 +#define CPC_SPARC_M7 1230 +#define CPC_SPARC_M8 1240 + + // --- Intel --- + // Pentium +#define CPC_PENTIUM 2000 +#define CPC_PENTIUM_MMX 2001 +#define CPC_PENTIUM_PRO 2002 +#define CPC_PENTIUM_PRO_MMX 2003 +#define CPC_PENTIUM_4 2017 +#define CPC_PENTIUM_4_HT 2027 + + // Core Microarchitecture (Merom/Menryn) +#define CPC_INTEL_CORE2 2028 +#define CPC_INTEL_NEHALEM 2040 +#define CPC_INTEL_WESTMERE 2042 +#define CPC_INTEL_SANDYBRIDGE 2045 +#define CPC_INTEL_IVYBRIDGE 2047 +#define CPC_INTEL_ATOM 2050 /* Atom*/ +#define CPC_INTEL_HASWELL 2060 +#define CPC_INTEL_BROADWELL 2070 +#define CPC_INTEL_SKYLAKE 2080 +#define CPC_INTEL_UNKNOWN 2499 +#define CPC_AMD_K8C 2500 /* Opteron, Athlon... */ +#define CPC_AMD_FAM_10H 2501 /* Barcelona, Shanghai... */ +#define CPC_AMD_FAM_11H 2502 /* Griffin... */ +#define CPC_AMD_FAM_15H 2503 +#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier) +#define CPC_FOX 3004 /* pseudo-chip */ + + // --- Fujitsu --- +#define CPC_SPARC64_III 3000 +#define CPC_SPARC64_V 3002 +#define CPC_SPARC64_VI 4003 /* OPL-C */ +#define CPC_SPARC64_VII 4004 /* Jupiter */ +#define CPC_SPARC64_X 4006 /* Athena */ +#define CPC_SPARC64_XII 4010 /* Athena++ */ + +// aarch64. Constants from arch/arm64/include/asm/cputype.h +enum { + ARM_CPU_IMP_ARM = 0x41, + ARM_CPU_IMP_BRCM = 0x42, + ARM_CPU_IMP_CAVIUM = 0x43, + ARM_CPU_IMP_APM = 0x50, + ARM_CPU_IMP_QCOM = 0x51 +}; + +#define AARCH64_VENDORSTR_ARM "ARM" + + /* strings below must match those returned by cpc_getcpuver() */ + typedef struct + { + int cpc2_cpuver; + const char * cpc2_cciname; + } libcpc2_cpu_lookup_t; +#define LIBCPC2_CPU_LOOKUP_LIST \ + {CPC_AMD_K8C , "AMD Opteron & Athlon64"}, \ + {CPC_AMD_FAM_10H , "AMD Family 10h"}, \ + {CPC_AMD_FAM_11H , "AMD Family 11h"}, \ + {CPC_AMD_FAM_15H , "AMD Family 15h Model 01h"}, \ + {CPC_AMD_FAM_15H , "AMD Family 15h Model 02h"},/*future*/ \ + {CPC_AMD_FAM_15H , "AMD Family 15h Model 03h"},/*future*/ \ + {CPC_PENTIUM_4_HT , "Pentium 4 with HyperThreading"}, \ + {CPC_PENTIUM_4 , "Pentium 4"}, \ + {CPC_PENTIUM_PRO_MMX , "Pentium Pro with MMX, Pentium II"}, \ + {CPC_PENTIUM_PRO , "Pentium Pro, Pentium II"}, \ + {CPC_PENTIUM_MMX , "Pentium with MMX"}, \ + {CPC_PENTIUM , "Pentium"}, \ + {CPC_INTEL_CORE2 , "Core Microarchitecture"}, \ + /* Merom: F6M15: Clovertown, Kentsfield, Conroe, Merom, Woodcrest */ \ + /* Merom: F6M22: Merom Conroe */ \ + /* Penryn: F6M23: Yorkfield, Wolfdale, Penryn, Harpertown */ \ + /* Penryn: F6M29: Dunnington */ \ + {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 26"},/*Bloomfield, Nehalem EP*/ \ + {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 30"},/*Clarksfield, Lynnfield, Jasper Forest*/ \ + {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 31"},/*(TBD)*/ \ + {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 46"},/*Nehalem EX*/ \ + {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 37"},/*Arrandale, Clarskdale*/ \ + {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 44"},/*Gulftown, Westmere EP*/ \ + {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 47"},/*Westmere EX*/ \ + {CPC_INTEL_SANDYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 42"},/*Sandy Bridge*/ \ + {CPC_INTEL_SANDYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 45"},/*Sandy Bridge E, SandyBridge-EN, SandyBridge EP*/ \ + {CPC_INTEL_IVYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 58"},/*Ivy Bridge*/ \ + {CPC_INTEL_IVYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 62"},/*(TBD)*/ \ + {CPC_INTEL_ATOM , "Intel Arch PerfMon v3 on Family 6 Model 28"},/*Atom*/ \ + {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 60"},/*Haswell*/ \ + {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 63"},/*Haswell*/ \ + {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 69"},/*Haswell*/ \ + {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 70"},/*Haswell*/ \ + {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 61"},/*Broadwell*/ \ + {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 71"},/*Broadwell*/ \ + {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 79"},/*Broadwell*/ \ + {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 86"},/*Broadwell*/ \ + {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 78"},/*Skylake*/ \ + {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 85"},/*Skylake*/ \ + {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 94"},/*Skylake*/ \ + {CPC_INTEL_UNKNOWN , "Intel Arch PerfMon"},/*Not yet in table*/ \ + {CPC_SPARC64_III , "SPARC64 III"/*?*/}, \ + {CPC_SPARC64_V , "SPARC64 V"/*?*/}, \ + {CPC_SPARC64_VI , "SPARC64 VI"}, \ + {CPC_SPARC64_VII , "SPARC64 VI & VII"}, \ + {CPC_SPARC64_X , "SPARC64 X"}, \ + {CPC_SPARC64_XII , "SPARC64 XII"}, \ + {CPC_ULTRA_T1 , "UltraSPARC T1"}, \ + {CPC_ULTRA_T2 , "UltraSPARC T2"}, \ + {CPC_ULTRA_T2P , "UltraSPARC T2+"}, \ + {CPC_ULTRA_T3 , "SPARC T3"}, \ + {CPC_SPARC_T4 , "SPARC T4"}, \ + {CPC_SPARC_M4 , "SPARC M4"}, \ + {CPC_SPARC_T5 , "SPARC T5"}, \ + {CPC_SPARC_M5 , "SPARC M5"}, \ + {CPC_SPARC_T6 , "SPARC T6"}, \ + {CPC_SPARC_M6 , "SPARC M6"}, \ + {CPC_SPARC_M7 , "SPARC T7"}, \ + {CPC_SPARC_M7 , "SPARC 3e40"}, \ + {CPC_SPARC_M7 , "SPARC M7"}, \ + {CPC_SPARC_M8 , "SPARC 3e50"}, \ + {CPC_ULTRA4_PLUS , "UltraSPARC IV+"}, \ + {CPC_ULTRA4 , "UltraSPARC IV"}, \ + {CPC_ULTRA3_I , "UltraSPARC IIIi"}, \ + {CPC_ULTRA3_I , "UltraSPARC IIIi & IIIi+"}, \ + {CPC_ULTRA3_PLUS , "UltraSPARC III+"}, \ + {CPC_ULTRA3_PLUS , "UltraSPARC III+ & IV"}, \ + {CPC_ULTRA3 , "UltraSPARC III"}, \ + {CPC_ULTRA2 , "UltraSPARC I&II"}, \ + {CPC_ULTRA1 , "UltraSPARC I&II"}, \ + {ARM_CPU_IMP_APM , AARCH64_VENDORSTR_ARM}, \ + {0, NULL} + /* init like this: + static libcpc2_cpu_lookup_t cpu_table[]={LIBCPC2_CPU_LOOKUP_LIST}; + */ +#endif diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c new file mode 100644 index 0000000..caab983 --- /dev/null +++ b/gprofng/common/hwcdrv.c @@ -0,0 +1,1454 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <linux/perf_event.h> + +#include "hwcdrv.h" + +/*---------------------------------------------------------------------------*/ +/* macros */ +#define IS_GLOBAL /* Mark global symbols */ + +#include "cpuid.c" /* ftns for identifying a chip */ + +static hdrv_pcbe_api_t hdrv_pcbe_core_api; +static hdrv_pcbe_api_t hdrv_pcbe_opteron_api; +static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = { + &hdrv_pcbe_core_api, + &hdrv_pcbe_opteron_api, + NULL +}; +#include "opteron_pcbe.c" /* CPU-specific code */ +#include "core_pcbe.c" /* CPU-specific code */ + +extern hwcdrv_api_t hwcdrv_pcl_api; +IS_GLOBAL hwcdrv_api_t *hwcdrv_drivers[] = { + &hwcdrv_pcl_api, + NULL +}; + +/*---------------------------------------------------------------------------*/ + +/* utils for drivers */ +IS_GLOBAL int +hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs) +{ + unsigned int pmc_assigned[MAX_PICS]; + unsigned idx; + for (int ii = 0; ii < MAX_PICS; ii++) + pmc_assigned[ii] = 0; + + /* assign the HWCs that we already know about */ + for (idx = 0; idx < numctrs; idx++) + { + regno_t regno = entries[idx]->reg_num; + if (regno == REGNO_ANY) + { + /* check to see if list of possible registers only contains one entry */ + regno = REG_LIST_SINGLE_VALID_ENTRY (entries[idx]->reg_list); + } + if (regno != REGNO_ANY) + { + if (regno < 0 || regno >= MAX_PICS || !regno_is_valid (entries[idx], regno)) + { + logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ + return HWCFUNCS_ERROR_HWCARGS; + } + TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx, regno); + entries[idx]->reg_num = regno; /* assigning back to entries */ + pmc_assigned[regno] = 1; + } + } + + /* assign HWCs that are currently REGNO_ANY */ + for (idx = 0; idx < numctrs; idx++) + { + if (entries[idx]->reg_num == REGNO_ANY) + { + int assigned = 0; + regno_t *reg_list = entries[idx]->reg_list; + for (; reg_list && *reg_list != REGNO_ANY; reg_list++) + { + regno_t regno = *reg_list; + if (regno < 0 || regno >= MAX_PICS) + { + logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ + return HWCFUNCS_ERROR_HWCARGS; + } + if (pmc_assigned[regno] == 0) + { + TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): assigned: idx=%d, regno=%d\n", idx, regno); + entries[idx]->reg_num = regno; /* assigning back to entries */ + pmc_assigned[regno] = 1; + assigned = 1; + break; + } + } + if (!assigned) + { + logerr (GTXT ("Counter '%s' could not be bound to a register\n"), + entries[idx]->name ? entries[idx]->name : "<NULL>"); + return HWCFUNCS_ERROR_HWCARGS; + } + } + } + return 0; +} + +IS_GLOBAL int +hwcdrv_lookup_cpuver (const char * cpcN_cciname) +{ + libcpc2_cpu_lookup_t *plookup; + static libcpc2_cpu_lookup_t cpu_table[] = { + LIBCPC2_CPU_LOOKUP_LIST + }; + if (cpcN_cciname == NULL) + return CPUVER_UNDEFINED; + + /* search table for name */ + for (plookup = cpu_table; plookup->cpc2_cciname; plookup++) + { + int n = strlen (plookup->cpc2_cciname); + if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n)) + return plookup->cpc2_cpuver; + } + /* unknown, but does have a descriptive string */ + TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' " + "could not be determined\n", + cpcN_cciname); + return CPUVER_GENERIC; +} + +/*---------------------------------------------------------------------------*/ +/* utils to generate x86 register definitions on Linux */ + +/* + * This code is structured as though we're going to initialize the + * HWC by writing the Intel MSR register directly. That is, we + * assume the lowest 16 bits of the event number will have the event + * and that higher bits will set attributes. + * + * While SPARC is different, we can nonetheless use basically the + * same "x86"-named functions: + * + * - The event code will still be 16 bits. It will still + * be in the lowest 16 bits of the event number. Though + * perf_event_code() on SPARC will expect those bits to + * shifted, hwcdrv_pcl.c can easily perform that shift. + * + * - On SPARC we support only two attributes, "user" and "system", + * which hwcdrv_pcl.c already converts to the "exclude_user" + * and "exclude_kernel" fields expected by perf_event_open(). + * "user" and "system" are stored in event bits 16 and 17. + * For M8, a 4-bit mask of supported PICs is stored in bits [23:20]. + */ + +IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0; + +static const attr_info_t perfctr_sparc_attrs[] = { + {NTXT ("user"), 0, 0x01, 16}, //usr + {NTXT ("system"), 0, 0x01, 17}, //os + {NULL, 0, 0x00, 0}, +}; +static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */ + {NTXT ("umask"), 0, 0xff, 8}, + {NTXT ("user"), 0, 0x01, 16}, //usr + //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted) + {NTXT ("system"), 0, 0x01, 17}, //os + {NTXT ("edge"), 0, 0x01, 18}, + {NTXT ("pc"), 0, 0x01, 19}, + {NTXT ("inv"), 0, 0x01, 23}, + {NTXT ("cmask"), 0, 0xff, 24}, + {NULL, 0, 0x00, 0}, +}; +const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs; + +static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */ + // (0xff << 0) | /* event*/ + // (0xff << 8) | /* umask */ + // (0x01 << 17) | /* os */ + // (0x01 << 18) | /* edge */ + // (0x01 << 19) | /* pc */ + (0x01 << 20) | /* int */ + // (0x01 << 21) | /* reserved */ + (0x01 << 22) | /* enable */ + // (0x01 << 23) | /* inv */ + // (0xff << 24) | /* cmask */ + 0; + +static int +myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc, + eventsel_t *eventsel, eventsel_t *valid_umask, + uint_t *pmc_sel) +{ + if (hwcdrv_get_x86_eventnum && + !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel)) + return 0; + + /* check for numerically-specified counters */ + char * endptr; + uint64_t num = strtoull (eventname, &endptr, 0); + if (*eventname && !*endptr) + { + *eventsel = EXTENDED_EVNUM_2_EVSEL (num); + *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */ + *pmc_sel = pmc; + return 0; + } + + /* name does not specify a numeric value */ + *eventsel = (eventsel_t) - 1; + *valid_umask = 0x0; + *pmc_sel = pmc; + return -1; +} + +static int +mask_shift_set (eventsel_t *presult, eventsel_t invalue, + eventsel_t mask, eventsel_t shift) +{ + if (invalue & ~mask) + return -1; /* invalue attempts to set bits outside of mask */ + *presult &= ~(mask << shift); /* clear all the mask bits */ + *presult |= (invalue << shift); /* set bits according to invalue */ + return 0; +} + +static int +set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask, + hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly) +{ + eventsel_t evntsel = *result_mask; + for (int ii = 0; ii < (int) nattrs; ii++) + { + const char *attrname = attrs[ii].ca_name; + eventsel_t attrval = (eventsel_t) attrs[ii].ca_val; + const char *tmpname; + int attr_found = 0; + for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++) + { + if (strcmp (attrname, tmpname) == 0) + { + if (strcmp (attrname, "umask") == 0) + { + if (attrval & ~evnt_valid_umask) + { + logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"), + nameOnly, (long long) evnt_valid_umask); + return -1; + } + } + if (mask_shift_set (&evntsel, + perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval, + perfctr_attrs_table[jj].mask, + perfctr_attrs_table[jj].shift)) + { + logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"), + nameOnly, attrname, (long long) attrval); + return -1; + } + TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n", + nameOnly, attrname, (long long) attrval); + attr_found = 1; + break; + } + } + if (!attr_found) + { + logerr (GTXT ("attribute `%s' is invalid\n"), attrname); + return -1; + } + } + *result_mask = evntsel; + return 0; +} + +IS_GLOBAL int +hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, + eventsel_t *return_event, uint_t *return_pmc_sel) +{ + hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1]; + unsigned nattrs = 0; + char *nameOnly = NULL; + eventsel_t evntsel = 0; // event number + eventsel_t evnt_valid_umask = 0; + uint_t pmc_sel = 0; + int rc = -1; + *return_event = 0; + *return_pmc_sel = 0; + void *attr_mem = hwcfuncs_parse_attrs (int_name, attrs, HWCFUNCS_MAX_ATTRS, + &nattrs, NULL); + if (!attr_mem) + { + logerr (GTXT ("out of memory, could not parse attributes\n")); + return -1; + } + hwcfuncs_parse_ctr (int_name, NULL, &nameOnly, NULL, NULL, NULL); + if (regno == REGNO_ANY) + { + logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly); + goto attr_wrapup; + } + + /* look up evntsel */ + if (myperfctr_get_x86_eventnum (nameOnly, regno, + &evntsel, &evnt_valid_umask, &pmc_sel)) + { + logerr (GTXT ("counter `%s' is not valid\n"), nameOnly); + goto attr_wrapup; + } + TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n", + (long long) evntsel, pmc_sel, nameOnly, nattrs); + + /* determine event attributes */ + eventsel_t evnt_attrs = perfctr_evntsel_enable_bits; + if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly)) + goto attr_wrapup; + if (evntsel & evnt_attrs) + TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n", + (long long) evntsel, (long long) evnt_attrs, + (long long) (evntsel & evnt_attrs)); + *return_event = evntsel | evnt_attrs; + *return_pmc_sel = pmc_sel; + rc = 0; + +attr_wrapup: + free (attr_mem); + free (nameOnly); + return rc; +} + +#ifdef __x86_64__ +#define syscall_instr "syscall" +#define syscall_clobber "rcx", "r11", "memory" +#endif +#ifdef __i386__ +#define syscall_instr "int $0x80" +#define syscall_clobber "memory" +#endif + +static inline int +perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + /* It seems that perf_event_open() sometimes fails spuriously, + * even while an immediate retry succeeds. + * So, let's try a few retries if the call fails just to be sure. + */ + int rc; + for (int retry = 0; retry < 5; retry++) + { + rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags); + if (rc != -1) + return rc; + } + return rc; +} + +/*---------------------------------------------------------------------------*/ +/* macros & fwd prototypes */ + +#define HWCDRV_API static /* Mark functions used by hwcdrv API */ + +HWCDRV_API int hwcdrv_start (void); +HWCDRV_API int hwcdrv_free_counters (); + +static pid_t +hwcdrv_gettid (void) +{ +#ifndef LIBCOLLECTOR_SRC + return syscall (__NR_gettid); +#elif defined(intel) + pid_t r; + __asm__ __volatile__(syscall_instr + : "=a" (r) : "0" (__NR_gettid) + : syscall_clobber); + return r; +#else + return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm +#endif +} + +/*---------------------------------------------------------------------------*/ +/* types */ + +#define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples +// must be a power of 2 + +/*---------------------------------------------------------------------------*/ + +/* typedefs */ + +typedef struct +{ // event (hwc) definition + unsigned int reg_num; // PMC assignment, potentially for detecting conflicts + eventsel_t eventsel; // raw event bits (Intel/AMD) + uint64_t counter_preload; // number of HWC events before signal + struct perf_event_attr hw; // perf_event definition + hrtime_t min_time; // minimum time we're targeting between events + char *name; +} perf_event_def_t; + +typedef struct +{ // runtime state of perf_event buffer + void *buf; // pointer to mmapped buffer + size_t pagesz; // size of pages +} buffer_state_t; + +typedef struct +{ // runtime state of counter values + uint64_t prev_ena_ts; // previous perf_event "enabled" time + uint64_t prev_run_ts; // previous perf_event "running" time + uint64_t prev_value; // previous HWC value +} counter_value_state_t; + +typedef struct +{ // per-counter information + perf_event_def_t *ev_def; // global HWC definition for one counter + int fd; // perf_event fd + buffer_state_t buf_state; // perf_event buffer's state + counter_value_state_t value_state; // counter state + int needs_restart; // workaround for dbx failure to preserve si_fd + uint64_t last_overflow_period; + hrtime_t last_overflow_time; +} counter_state_t; + +typedef struct +{ // per-thread context + counter_state_t *ctr_list; + int signal_fd; // fd that caused the most recent signal + pthread_t tid; // for debugging signal delivery problems +} hdrv_pcl_ctx_t; + +/*---------------------------------------------------------------------------*/ + +/* static variables */ +static struct +{ + int library_ok; + int internal_open_called; + hwcfuncs_tsd_get_fn_t find_vpc_ctx; + unsigned hwcdef_cnt; /* number of *active* hardware counters */ + hwcdrv_get_events_fn_t *get_events; +} hdrv_pcl_state; + +static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED}; +static perf_event_def_t global_perf_event_def[MAX_PICS]; + +#define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt) + + +/* perf_event buffer formatting and handling */ +static void +reset_buf (buffer_state_t *bufstate) +{ + TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n"); + struct perf_event_mmap_page *metadata = bufstate->buf; + if (metadata) + metadata->data_tail = metadata->data_head; +} + +static int +skip_buf (buffer_state_t *bufstate, size_t sz) +{ + TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n"); + struct perf_event_mmap_page *metadata = bufstate->buf; + if (metadata == NULL) + return -1; + size_t pgsz = bufstate->pagesz; + size_t bufsz = NPAGES_PER_BUF*pgsz; + uint64_t d_tail = metadata->data_tail; + uint64_t d_head = metadata->data_head; + + // validate request size + if (sz > d_head - d_tail || sz >= bufsz) + { + reset_buf (bufstate); + return -1; + } + metadata->data_tail = d_tail + sz; // advance tail + return 0; +} + +static int +read_buf (buffer_state_t *bufstate, void *buf, size_t sz) +{ + struct perf_event_mmap_page *metadata = bufstate->buf; + if (metadata == NULL) + return -1; + size_t pgsz = bufstate->pagesz; + size_t bufsz = NPAGES_PER_BUF*pgsz; + uint64_t d_tail = metadata->data_tail; + uint64_t d_head = metadata->data_head; + + // validate request size + if (sz > d_head - d_tail || sz >= bufsz) + { + reset_buf (bufstate); + return -1; + } + char *buf_base = ((char *) metadata) + pgsz; // start of data buffer + uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer + size_t nbytes = sz; + if (start_pos + sz > bufsz) + { + // will wrap past end of buffer + nbytes = bufsz - start_pos; + memcpy (buf, buf_base + start_pos, nbytes); + start_pos = 0; // wrap to start + buf = (void *) (((char *) buf) + nbytes); + nbytes = sz - nbytes; + } + memcpy (buf, buf_base + start_pos, nbytes); + metadata->data_tail += sz; + return 0; +} + +static int +read_u64 (buffer_state_t *bufstate, uint64_t *value) +{ + return read_buf (bufstate, value, sizeof (uint64_t)); +} + +static int +read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue, + uint64_t *rlost) +{ + // returns count of bytes read + buffer_state_t *bufstate = &ctr_state->buf_state; + counter_value_state_t *cntstate = &ctr_state->value_state; + int readsz = 0; + + // PERF_SAMPLE_IP + uint64_t ipc = 0; + int rc = read_u64 (bufstate, &ipc); + if (rc) + return -1; + readsz += sizeof (uint64_t); + + // PERF_SAMPLE_READ: value + uint64_t value = 0; + rc = read_u64 (bufstate, &value); + if (rc) + return -2; + readsz += sizeof (uint64_t); + + /* Bug 20806896 + * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and + * running times in the sample data that correspond to the metadata times + * metadata->time_enabled + * metadata->time_running + * from the PREVIOUS (not current) sample. Probably just ignore this bug + * since it's on old kernels and we only use the enabled and running times + * to construct loss_estimate. + */ + // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED + uint64_t enabled_time = 0; + rc = read_u64 (bufstate, &enabled_time); + if (rc) + return -3; + readsz += sizeof (uint64_t); + + // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING + uint64_t running_time = 0; + rc = read_u64 (bufstate, &running_time); + if (rc) + return -4; + readsz += sizeof (uint64_t); + + uint64_t value_delta = value - cntstate->prev_value; + uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts; + uint64_t running_delta = running_time - cntstate->prev_run_ts; + cntstate->prev_value = value; + cntstate->prev_ena_ts = enabled_time; + cntstate->prev_run_ts = running_time; + + // 24830461 need workaround for Linux anomalous HWC skid overrun + int set_error_flag = 0; + if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */) + set_error_flag = 1; + + uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing + if (running_delta == enabled_delta) + { + // counter was running 100% of time, no multiplexing + } + else if (running_delta == 0) + loss_estimate = 1; // token amount to aid in debugging perfctr oddities + else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll)) + { + // running should be smaller than enabled, can't estimate + /* + * 21418391 HWC can have a negative count + * + * We've also seen enabled not only be smaller than running + * but in fact go negative. Guard against this. + */ + loss_estimate = 2; // token amount to aid in debugging perfctr oddities + } + else + { + // counter was running less than 100% of time + // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479 + uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta; + value_delta = scaled_delta; +#if 0 + // We should perhaps warn the user that multiplexing is going on, + // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values. + // For now we simply don't report. + // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(), + // but at that level "lost" has a meaning that's considerably broader than just multiplexing. + collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", + SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, + ctr_list[idx].last_overflow_period, new_period); +#endif + } + TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3, + "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu " + "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n", + ctr_state->ev_def->name, (long long) ipc, + (long long) enabled_delta, (long long) running_delta, + (long long) value_delta, (long long) value_delta, + (unsigned long long) loss_estimate, + loss_estimate ? ", WARNING - SCALED" : "", + set_error_flag ? ", ERRORFLAG" : ""); + if (set_error_flag == 1) + value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */; + *rvalue = value_delta; + *rlost = loss_estimate; + if (readsz != msgsz) + { + TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n"); + return -5; + } + return 0; +} + +static void +dump_perf_event_attr (struct perf_event_attr *at) +{ + TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n" + " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n", + (int) at->size, (int) at->type, (unsigned long long) at->sample_period, + (unsigned long long) at->config, (unsigned long long) at->config1, + (unsigned long long) at->config2, (unsigned long long) at->wakeup_events, + (unsigned long long) at->__reserved_1); +#define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld) + DUMP_F (disabled); + DUMP_F (inherit); + DUMP_F (pinned); + DUMP_F (exclusive); + DUMP_F (exclude_user); + DUMP_F (exclude_kernel); + DUMP_F (exclude_hv); + DUMP_F (exclude_idle); + // DUMP_F(xmmap); + DUMP_F (comm); + DUMP_F (freq); + DUMP_F (inherit_stat); + DUMP_F (enable_on_exec); + DUMP_F (task); + DUMP_F (watermark); +} + +static void +init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period) +{ + memset (hw, 0, sizeof (struct perf_event_attr)); + hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat + +#if defined(__i386__) || defined(__x86_64) + //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits + hw->config = event; + hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... +#elif defined(__aarch64__) + hw->type = (event >> 24) & 7; + hw->config = event & 0xff; +#elif defined(sparc) + //SPARC needs to be shifted up 16 bits + hw->config = (event & 0xFFFF) << 16; // uint64_t event + uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c + hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4] + hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... +#endif + + hw->sample_period = period; + hw->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ | + // PERF_SAMPLE_TID | + // PERF_SAMPLE_TIME | // possibly interesting + // PERF_SAMPLE_ADDR | + PERF_SAMPLE_READ | // HWC value + // PERF_SAMPLE_CALLCHAIN | // interesting + // PERF_SAMPLE_ID | + // PERF_SAMPLE_CPU | // possibly interesting + // PERF_SAMPLE_PERIOD | + // PERF_SAMPLE_STREAM_ID | + // PERF_SAMPLE_RAW | + 0; + hw->read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled + PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled + // PERF_FORMAT_ID | + // PERF_FORMAT_GROUP | + 0; + hw->disabled = 1; /* off by default */ + + // Note: the following override config.priv bits! + hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */ + hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */ + hw->exclude_hv = 1; /* ditto hypervisor */ + hw->wakeup_events = 1; /* wakeup every n events */ + dump_perf_event_attr (hw); +} + +static int +start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string) +{ + // pe_attr should have been initialized in hwcdrv_create_counters() + struct perf_event_attr pe_attr; + memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr)); + + // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set + pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period; + + int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0); + if (hwc_fd == -1) + { + TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n", + error_string, ii, errno); + return 1; + } + + size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata + void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call? + PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0); + if (buf == MAP_FAILED) + { + TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n", + (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno)); + return 1; + } + pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it + pctx->ctr_list[ii].fd = hwc_fd; + pctx->ctr_list[ii].buf_state.buf = buf; + pctx->ctr_list[ii].buf_state.pagesz = pgsz; + pctx->ctr_list[ii].value_state.prev_ena_ts = 0; + pctx->ctr_list[ii].value_state.prev_run_ts = 0; + pctx->ctr_list[ii].value_state.prev_value = 0; + pctx->ctr_list[ii].last_overflow_time = gethrtime (); + + /* set async mode */ + long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC; + int rc = fcntl (hwc_fd, F_SETFL, flags); + if (rc == -1) + { + TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii); + return 1; + } + + /* + * set lwp ownership of the fd + * See BUGS section of "man perf_event_open": + * The F_SETOWN_EX option to fcntl(2) is needed to properly get + * overflow signals in threads. This was introduced in Linux 2.6.32. + * Legacy references: + * see http://lkml.org/lkml/2009/8/4/128 + * google man fcntl F_SETOWN_EX -conflict + * "From Linux 2.6.32 onward, use F_SETOWN_EX to target + * SIGIO and SIGURG signals at a particular thread." + * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html + * See 2010 CSCADS presentation by Eranian + */ + struct f_owner_ex fowner_ex; + fowner_ex.type = F_OWNER_TID; + fowner_ex.pid = pctx->tid; + rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex); + if (rc == -1) + { + TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii); + return 1; + } + + /* Use sigio so handler can determine FD via siginfo->si_fd. */ + rc = fcntl (hwc_fd, F_SETSIG, SIGIO); + if (rc == -1) + { + TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii); + return 1; + } + return 0; +} + +static int +stop_one_ctr (int ii, counter_state_t *ctr_list) +{ + int hwc_rc = 0; + if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1)) + { + TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno); + hwc_rc = HWCFUNCS_ERROR_GENERIC; + } + void *buf = ctr_list[ii].buf_state.buf; + if (buf) + { + size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz; + ctr_list[ii].buf_state.buf = NULL; + int tmprc = munmap (buf, bufsz); + if (tmprc) + { + TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno); + hwc_rc = HWCFUNCS_ERROR_GENERIC; + } + } + if (-1 == close (ctr_list[ii].fd)) + { + TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno); + hwc_rc = HWCFUNCS_ERROR_GENERIC; + } + return hwc_rc; +} + +/* HWCDRV_API for thread-specific actions */ +HWCDRV_API int +hwcdrv_lwp_init (void) +{ + return hwcdrv_start (); +} + +HWCDRV_API void +hwcdrv_lwp_fini (void) +{ + hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */ +} + +/* open */ +static int +hdrv_pcl_internal_open () +{ + if (hdrv_pcl_state.internal_open_called) + { + TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n"); + return HWCFUNCS_ERROR_ALREADY_CALLED; + } + + // determine if PCL is available + perf_event_def_t tmp_event_def; + memset (&tmp_event_def, 0, sizeof (tmp_event_def)); + struct perf_event_attr *pe_attr = &tmp_event_def.hw; + init_perf_event (pe_attr, 0, 0); + pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event + pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts + int hwc_fd = perf_event_open (pe_attr, + 0, // pid/tid, 0 is self + -1, // cpu, -1 is per-thread mode + -1, // group_fd, -1 is root + 0); // flags + if (hwc_fd == -1) + { + TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:" + " perf_event_open() failed, errno=%d\n", errno); + goto internal_open_error; + } + + /* see if the PCL is new enough to know about F_SETOWN_EX */ + struct f_owner_ex fowner_ex; + fowner_ex.type = F_OWNER_TID; + fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID + if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1) + { + TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: " + "F_SETOWN failed, errno=%d\n", errno); + close (hwc_fd); + goto internal_open_error; + } + close (hwc_fd); + + hdrv_pcl_state.internal_open_called = 1; + hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted + hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; + TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n"); + for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++) + { + hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii]; + if (!ppcbe->hdrv_pcbe_init ()) + { + hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name (); + hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname); + if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) + goto internal_open_error; + hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters (); + hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref (); + hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events; + hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum; + break; + } + } + if (hdrv_pcl_about.cpcN_npics > MAX_PICS) + { + TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:" + " reducing number of HWCs from %u to %u on processor '%s'\n", + hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname); + hdrv_pcl_about.cpcN_npics = MAX_PICS; + } + TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:" + " perf_event cpuver=%d, name='%s'\n", + hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname); + return 0; + +internal_open_error: + hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; + hdrv_pcl_about.cpcN_npics = 0; + hdrv_pcl_about.cpcN_docref = NULL; + hdrv_pcl_about.cpcN_cciname = NULL; + return HWCFUNCS_ERROR_NOT_SUPPORTED; +} + +static void * +single_thread_tsd_ftn () +{ + static hdrv_pcl_ctx_t tsd_context; + return &tsd_context; +} + +/* HWCDRV_API */ +HWCDRV_API int +hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz) +{ + hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn; + if (tsd_sz) + *tsd_sz = sizeof (hdrv_pcl_ctx_t); + + if (hdrv_pcl_state.internal_open_called) + return HWCFUNCS_ERROR_ALREADY_CALLED; + return hdrv_pcl_internal_open (); +} + +HWCDRV_API void +hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics, + const char **docref, uint64_t *support) +{ + if (cpuver) + *cpuver = hdrv_pcl_about.cpcN_cpuver; + if (cciname) + *cciname = hdrv_pcl_about.cpcN_cciname; + if (npics) + *npics = hdrv_pcl_about.cpcN_npics; + if (docref) + *docref = hdrv_pcl_about.cpcN_docref; + if (support) + *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID; +} + +HWCDRV_API int +hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) +{ + if (tsd_ftn) + hdrv_pcl_state.find_vpc_ctx = tsd_ftn; + else + { + TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n"); + return HWCFUNCS_ERROR_UNAVAIL; + } + return 0; +} + +HWCDRV_API int +hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb) +{ + int count = 0; + if (hwc_cb && hdrv_pcl_state.get_events) + count = hdrv_pcl_state.get_events (hwc_cb); + if (attr_cb) + for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++) + attr_cb (perfctr_attrs_table[ii].attrname); + if (!count) + return -1; + return 0; +} + +HWCDRV_API int +hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs) +{ + return hwcdrv_assign_all_regnos (entries, numctrs); +} + +static int +internal_hwc_start (int fd) +{ + int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1); + if (rc == -1) + { + TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:" + " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno); + return HWCFUNCS_ERROR_UNAVAIL; + } + TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd); + return 0; +} + +HWCDRV_API int +hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events) +{ + /* set expired counters to overflow value and all others to 0 */ + /* return 0: OK, counters should be restarted */ + /* return non-zero: eventp not set, counters should not be restarted */ + /* clear return values */ + int ii; + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + { + eventp->ce_pic[ii] = 0; + lost_events->ce_pic[ii] = 0; + } + hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event? + eventp->ce_hrt = sig_ts; + lost_events->ce_hrt = sig_ts; + + /* determine source signal */ + int signal_fd = -1; + switch (si->si_code) + { + case POLL_HUP: /* expected value from pcl */ + /* According to Stephane Eranian: + * "expect POLL_HUP instead of POLL_IN because we are + * in one-shot mode (IOC_REFRESH)" + */ + signal_fd = si->si_fd; + break; + case SI_TKILL: /* event forwarded by tkill */ + /* DBX can only forward SI_TKILL when it detects POLL_HUP + * unfortunately, this means that si->si_fd has been lost... + * We need to process the buffers, but we don't know the fd! + */ + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " SI_TKILL detected\n", sig_ts); + break; + default: + // "sometimes we see a POLL_IN (1) with very high event rates," + // according to eranian(?) + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" + " unexpected si_code 0x%x\n", sig_ts, si->si_code); + return HWCFUNCS_ERROR_GENERIC; + } + + hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); + if (!pctx) + { + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" + " tsd context is NULL\n", sig_ts); + return HWCFUNCS_ERROR_UNEXPECTED; + } + counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; + if (!ctr_list) + { + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " ctr_list is NULL\n", sig_ts); + return HWCFUNCS_ERROR_UNEXPECTED; + } + + /* clear needs_restart flag */ + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + ctr_list[ii].needs_restart = 0; + + /* attempt to identify the counter to read */ + int signal_idx = -1; + pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t + if (signal_fd != -1) + { + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + { + if (ctr_list[ii].fd == signal_fd) + { + signal_idx = ii; + break; + } + } + } + + if (signal_idx < 0) + { + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" + " pmc not determined!\n", sig_ts); + lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */ + // note: bogus value may get overwritten in loop below + } + + /* capture sample(s). In addition to signal_idx, check other counters. */ + struct perf_event_header sheader; + int idx; + for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++) + { + int num_recs = 0; + while (1) + { + /* check for samples */ + struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf; + if (metadata == NULL) + break; // empty + if (metadata->data_tail == metadata->data_head) + break; // empty + + /* read header */ + if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader))) + break; + num_recs++; + + /* check for PERF_RECORD_SAMPLE */ + size_t datasz = sheader.size - sizeof (struct perf_event_header); + if (sheader.type != PERF_RECORD_SAMPLE) + { + TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " unexpected recd type=%d\n", + sig_ts, sheader.type); + if (skip_buf (&ctr_list[idx].buf_state, datasz)) + { + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" + " skip recd type=%d failed\n", sig_ts, sheader.type); + lost_events->ce_pic[idx] = 4; /* record a bogus value */ + break; // failed to skip buffer?? + } + lost_events->ce_pic[idx] = 2; /* record a bogus value */ + continue; // advance to next record + } + + /* type is PERF_RECORD_SAMPLE */ + uint64_t value, lostv; + if (read_sample (&ctr_list[idx], datasz, &value, &lostv)) + { + TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" + " read_sample() failed\n", sig_ts); + lost_events->ce_pic[idx] = 3; // record a bogus value + break; // failed to read sample data?? + } + TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:" + " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts, + idx, (unsigned long long) value, (unsigned long long) lostv); + if (eventp->ce_pic[idx]) + { + TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " idx=%d previous sample recorded as lost_event\n", sig_ts, idx); + lost_events->ce_pic[idx] += eventp->ce_pic[idx]; + } + eventp->ce_pic[idx] = value; + lost_events->ce_pic[idx] += lostv; + } + + /* debug output for unexpected (but common) cases */ + if (idx == signal_idx) + { + if (num_recs != 1) + TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx); + } + else if (num_recs) + TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" + " %d unexpected record(s) for idx=%d (signal_idx=%d)\n", + sig_ts, num_recs, idx, signal_idx); + + /* trigger counter restart whenever records were found */ + if (num_recs) + { + /* check whether to adapt the overflow interval */ + /* This is the Linux version. + * The Solaris version is in hwprofile.c collector_update_overflow_counters(). + */ + hrtime_t min_time = global_perf_event_def[idx].min_time; + if (min_time > 0 // overflow interval is adaptive + && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min + { + /* pick a new overflow interval */ + /* roughly doubled, but add funny numbers */ + /* hopefully the result is prime or not a multiple of some # of ops/loop */ + uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37; +#if 0 + // On Solaris, we report the adjustment to the log file. + // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ. + // For now we simply don't report. + collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n", + SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, + ctr_list[idx].last_overflow_period, new_period); +#endif + /* There are a variety of ways of resetting the period on Linux. + * The most elegant is + * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period) + * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD: + * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel. + * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect + * until after the next overflow. + * So we're kind of stuck shutting the fd down and restarting it with the new period. + */ + if (stop_one_ctr (idx, ctr_list)) + { + // EUGENE figure out what to do on error + } + ctr_list[idx].last_overflow_period = new_period; + if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):")) + { + // EUGENE figure out what to do on error + } + } + ctr_list[idx].last_overflow_time = sig_ts; +#if 0 + ctr_list[idx].needs_restart = 1; +#else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart() + internal_hwc_start (ctr_list[idx].fd); +#endif + } + } + return 0; // OK to restart counters +} + +HWCDRV_API int +hwcdrv_sighlr_restart (const hwc_event_t *pp) +{ +#if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow() + hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); + if (!pctx) + { + TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n"); + return -1; + } + counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; + if (!ctr_list) + { + TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n"); + return -1; + } + int errors = 0; + for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + { + if (ctr_list[ii].needs_restart) + errors |= internal_hwc_start (ctr_list[ii].fd); + ctr_list[ii].needs_restart = 0; + } + return errors; +#else + return 0; +#endif +} + +/* create counters based on hwcdef[] */ +HWCDRV_API int +hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) +{ + if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics) + { + logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/ + return HWCFUNCS_ERROR_HWCARGS; + } + if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) + { + logerr (GTXT ("Processor not supported\n")); + return HWCFUNCS_ERROR_HWCARGS; + } + + /* add counters */ + for (unsigned idx = 0; idx < hwcdef_cnt; idx++) + { + perf_event_def_t *glb_event_def = &global_perf_event_def[idx]; + memset (glb_event_def, 0, sizeof (perf_event_def_t)); + unsigned int pmc_sel; + eventsel_t evntsel; + if (hwcfuncs_get_x86_eventsel (hwcdef[idx].reg_num, + hwcdef[idx].int_name, &evntsel, &pmc_sel)) + { + TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n"); + return HWCFUNCS_ERROR_HWCARGS; + } + glb_event_def->reg_num = pmc_sel; + glb_event_def->eventsel = evntsel; + glb_event_def->counter_preload = hwcdef[idx].val; + glb_event_def->min_time = hwcdef[idx].min_time; + glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor + init_perf_event (&glb_event_def->hw, glb_event_def->eventsel, + glb_event_def->counter_preload); + TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld" + "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n", + idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload, + (long long) glb_event_def->min_time, (int) glb_event_def->reg_num, + (long long) glb_event_def->eventsel, + (long long) HW_INTERVAL_PRESET (hwcdef[idx].val), + (long long) glb_event_def->hw.exclude_user, + (long long) glb_event_def->hw.exclude_kernel); + } + + hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt; + return 0; +} + +HWCDRV_API int +hwcdrv_free_counters () // note: only performs shutdown for this thread +{ + hdrv_pcl_ctx_t * pctx; + if (!COUNTERS_ENABLED ()) + return 0; + pctx = hdrv_pcl_state.find_vpc_ctx (); + if (!pctx) + { + TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n"); + return HWCFUNCS_ERROR_GENERIC; + } + counter_state_t *ctr_list = pctx->ctr_list; + if (!ctr_list) + { + // fork child: prolog suspends hwcs, then epilog frees them + TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n"); + return 0; + } + int hwc_rc = 0; + for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + if (stop_one_ctr (ii, ctr_list)) + hwc_rc = HWCFUNCS_ERROR_GENERIC; + TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", pctx->tid); + pctx->ctr_list = NULL; + return hwc_rc; +} + +HWCDRV_API int +hwcdrv_start (void) /* must be called from each thread ? */ +{ + hdrv_pcl_ctx_t *pctx = NULL; + if (!COUNTERS_ENABLED ()) + { + TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n"); + return 0; + } + if (!hdrv_pcl_state.library_ok) + { + TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n"); + return HWCFUNCS_ERROR_NOT_SUPPORTED; + } + + /* + * set up per-thread context + */ + pctx = hdrv_pcl_state.find_vpc_ctx (); + if (!pctx) + { + TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n"); + return HWCFUNCS_ERROR_UNEXPECTED; + } + pctx->tid = hwcdrv_gettid (); + TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", pctx->tid); + + /* + * create per-thread counter list + */ + counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt, + sizeof (counter_state_t)); + if (!ctr_list) + { + TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n"); + return HWCFUNCS_ERROR_MEMORY; + } + int ii; + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely + pctx->ctr_list = ctr_list; + + /* + * bind the counters + */ + size_t pgsz = sysconf (_SC_PAGESIZE); + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + { + ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period; + if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup; + } + + /* + * start the counters + */ + for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) + { + int rc = internal_hwc_start (ctr_list[ii].fd); + if (rc < 0) + goto hwcdrv_start_cleanup; + } + return 0; + +hwcdrv_start_cleanup: + hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds + return HWCFUNCS_ERROR_UNAVAIL; +} + +HWCDRV_API int +hwcdrv_lwp_suspend (void) /* must be called from each thread */ +{ + if (!COUNTERS_ENABLED ()) + { + TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n"); + return 0; + } + TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n"); + return hwcdrv_free_counters (); +} + +HWCDRV_API int +hwcdrv_lwp_resume (void) /* must be called from each thread */ +{ + if (!COUNTERS_ENABLED ()) + { + TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n"); + return 0; + } + TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n"); + return hwcdrv_start (); +} + +HWCDRV_API int +hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data) +{ + overflow_data->ce_hrt = 0; + for (int i = 0; i < MAX_PICS; i++) + { + overflow_data->ce_pic[i] = 0; + if (sampled_data) + HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]); + } + return 0; +} + +/*---------------------------------------------------------------------------*/ +/* HWCDRV_API */ + +hwcdrv_api_t hwcdrv_pcl_api = { + hwcdrv_init, + hwcdrv_get_info, + hwcdrv_enable_mt, + hwcdrv_get_descriptions, + hwcdrv_assign_regnos, + hwcdrv_create_counters, + hwcdrv_start, + hwcdrv_overflow, + hwcdrv_read_events, + hwcdrv_sighlr_restart, + hwcdrv_lwp_suspend, + hwcdrv_lwp_resume, + hwcdrv_free_counters, + hwcdrv_lwp_init, + hwcdrv_lwp_fini, + -1 // hwcdrv_init_status +}; diff --git a/gprofng/common/hwcdrv.h b/gprofng/common/hwcdrv.h new file mode 100644 index 0000000..14c55cf --- /dev/null +++ b/gprofng/common/hwcdrv.h @@ -0,0 +1,330 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* Hardware counter profiling driver's header */ + +#ifndef __HWCDRV_H +#define __HWCDRV_H + +#include "hwcfuncs.h" + +#ifdef linux +#define HWCFUNCS_SIGNAL SIGIO +#define HWCFUNCS_SIGNAL_STRING "SIGIO" +#else +#define HWCFUNCS_SIGNAL SIGEMT +#define HWCFUNCS_SIGNAL_STRING "SIGEMT" +#endif + +#ifndef LIBCOLLECTOR_SRC /* not running in libcollector */ +#include <string.h> + +#else /* running in libcollector */ +#include "collector_module.h" +#include "libcol_util.h" + +#define get_hwcdrv __collector_get_hwcdrv +#define hwcdrv_drivers __collector_hwcdrv_drivers +#define hwcdrv_cpc1_api __collector_hwcdrv_cpc1_api +#define hwcdrv_cpc2_api __collector_hwcdrv_cpc2_api +#define hwcdrv_default __collector_hwcdrv_default +#define hwcdrv_driver __collector_hwcdrv_driver +#define hwcdrv_init __collector_hwcdrv_init +#define hwcdrv_get_info __collector_hwcdrv_get_info +#define hwcdrv_enable_mt __collector_hwcdrv_enable_mt +#define hwcdrv_get_descriptions __collector_hwcdrv_get_descriptions +#define hwcdrv_assign_regnos __collector_hwcdrv_assign_regnos +#define hwcdrv_create_counters __collector_hwcdrv_create_counters +#define hwcdrv_start __collector_hwcdrv_start +#define hwcdrv_overflow __collector_hwcdrv_overflow +#define hwcdrv_read_events __collector_hwcdrv_read_events +#define hwcdrv_sighlr_restart __collector_hwcdrv_sighlr_restart +#define hwcdrv_lwp_suspend __collector_hwcdrv_lwp_suspend +#define hwcdrv_lwp_resume __collector_hwcdrv_lwp_resume +#define hwcdrv_free_counters __collector_hwcdrv_free_counters +#define hwcdrv_lwp_init __collector_hwcdrv_lwp_init +#define hwcdrv_lwp_fini __collector_hwcdrv_lwp_fini +#define hwcdrv_assign_all_regnos __collector_hwcdrv_assign_all_regnos +#define hwcdrv_lookup_cpuver __collector_hwcdrv_lookup_cpuver +#define hwcfuncs_int_capture_errmsg __collector_hwcfuncs_int_capture_errmsg + +#define GTXT(x) x + +/* Implemented by libcollector */ +#define calloc __collector_calloc +#define close CALL_UTIL(close) +#define fcntl CALL_UTIL(fcntl) +#define fprintf CALL_UTIL(fprintf) +//#define free __collector_free +#define free(...) +#define gethrtime __collector_gethrtime +#define ioctl CALL_UTIL(ioctl) +#define malloc __collector_malloc +#define memcpy __collector_memcpy +#define memset CALL_UTIL(memset) +#define mmap CALL_UTIL(mmap) +#define snprintf CALL_UTIL(snprintf) +#define strchr CALL_UTIL(strchr) +#define strcmp CALL_UTIL(strcmp) +#define strncmp CALL_UTIL(strncmp) +#define strcpy CALL_UTIL(strcpy) +#define strdup __collector_strdup +#define strncpy CALL_UTIL(strncpy) +#define strerror CALL_UTIL(strerror) +#define strlen CALL_UTIL(strlen) +#define strstr CALL_UTIL(strstr) +#define strtol CALL_UTIL(strtol) +#define strtoll CALL_UTIL(strtoll) +#define strtoul CALL_UTIL(strtoul) +#define strtoull CALL_UTIL(strtoull) +#define syscall CALL_UTIL(syscall) +#define sysconf CALL_UTIL(sysconf) +#define vsnprintf CALL_UTIL(vsnprintf) + +#endif /* --- LIBCOLLECTOR_SRC --- */ + +/* TprintfT(<level>,...) definitions. Adjust per module as needed */ +#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings +#define DBG_LT1 1 // for configuration details, warnings +#define DBG_LT2 2 +#define DBG_LT3 3 +#define DBG_LT4 4 + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* hwcdrv api */ + typedef struct + { + int (*hwcdrv_init)(hwcfuncs_abort_fn_t abort_ftn, int * tsd_sz); + /* Initialize hwc counter library (do not call again after fork) + Must be called before other functions. + Input: + <abort_ftn>: NULL or callback function to be used for fatal errors + <tsd_sz>: If not NULL, returns size in bytes required for thread-specific storage + Return: 0 if successful + */ + + void (*hwcdrv_get_info)(int *cpuver, const char **cciname, uint_t *npics, + const char **docref, uint64_t *support); + /* get info about session + Input: + <cpuver>: if not NULL, returns value of CPC cpu version + <cciname>: if not NULL, returns name of CPU + <npics>: if not NULL, returns maximum # of HWCs + <docref>: if not NULL, returns documentation reference + <support>: if not NULL, returns bitmask (see hwcfuncs.h) of hwc support + Return: 0 if successful, nonzero otherwise + */ + + int (*hwcdrv_enable_mt)(hwcfuncs_tsd_get_fn_t tsd_ftn); + /* Enables multi-threaded mode (do not need to call again after fork) + Input: + <tsd_ftn>: If <tsd_sz>==0, this parameter is ignored. + Otherwise: + tsd_ftn() must be able to return a pointer to thread-specific + memory of <tsd_sz> bytes. + For a given thread, tsd_ftn() must + always return the same pointer. + Return: none + */ + + int (*hwcdrv_get_descriptions)(hwcf_hwc_cb_t *hwc_find_action, + hwcf_attr_cb_t *attr_find_action); + /* Initiate callbacks with all available HWC names and and HWC attributes. + Input: + <hwc_find_action>: if not NULL, will be called once for each HWC + <attr_find_action>: if not NULL, will be called once for each attribute + Return: 0 if successful + or a cpc return code upon error + */ + + int (*hwcdrv_assign_regnos)(Hwcentry* entries[], unsigned numctrs); + /* Assign entries[]->reg_num values as needed by platform + Input: + <entries>: array of counters + <numctrs>: number of items in <entries> + Return: 0 if successful + HWCFUNCS_ERROR_HWCINIT if resources unavailable + HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly + */ + + int (*hwcdrv_create_counters)(unsigned hwcdef_cnt, Hwcentry *hwcdef); + /* Create the counters, but don't start them. + call this once in main thread to create counters. + Input: + <defcnt>: number of counter definitions. + <hwcdef>: counter definitions. + Return: 0 if successful + or a cpc return code upon error + */ + + int (*hwcdrv_start)(void); + /* Start the counters. + call this once in main thread to start counters. + Return: 0 if successful + or a cpc return code upon error + */ + + int (*hwcdrv_overflow)(siginfo_t *si, hwc_event_t *sample, + hwc_event_t *lost_samples); + /* Linux only. Capture current counter values. + This is intended to be called from SIGEMT handler; + Input: + <si>: signal handler context information + <sample>: returns non-zero values for counters that overflowed + <lost_samples>: returns non-zero values for counters that "lost" counts + Return: 0 if successful + or a cpc return code upon error. + */ + + int (*hwcdrv_read_events)(hwc_event_t *overflow_data, + hwc_event_samples_t *sampled_data); + /* Read current counter values and samples. Read of samples is destructive. + Note: hwcdrv_read_events is not supported on Linux. + <overflow_data>: returns snapshot of counter values + <sampled_data>: returns sampled data + Return: 0 if successful + HWCFUNCS_ERROR_UNAVAIL if resource unavailable(e.g. called before initted) + (other values may be possible) + */ + + int (*hwcdrv_sighlr_restart)(const hwc_event_t* startVals); + /* Restarts the counters at the given value. + This is intended to be called from SIGEMT handler; + Input: + <startVals>: Solaris: new start values. + Linux: pointer may be NULL; startVals is ignored. + Return: 0 if successful + or a cpc return code upon error. + */ + + int (*hwcdrv_lwp_suspend)(void); + /* Attempt to stop counters on this lwp only. + hwcdrv_lwp_resume() should be used to restart counters. + Return: 0 if successful + or a cpc return code upon error. + */ + + int (*hwcdrv_lwp_resume)(void); + /* Attempt to restart counters on this lwp when counters were + stopped with hwcdrv_lwp_suspend(). + Return: 0 if successful + or a cpc return code upon error. + */ + + int (*hwcdrv_free_counters)(void); + /* Stops counters on this lwp only and frees resources. + This will fail w/ unpredictable results if other lwps's are + still running. After this call returns, + hwcdrv_create_counters() may be called with new values. + Return: 0 if successful + or a cpc return code upon error. + */ + + int (*hwcdrv_lwp_init)(void); + /* per-thread counter init. + Solaris: nop. + Linux: just after thread creation call this from inside thread + to create context and start counters. + Return: 0 if successful + or a perfctr return code upon error + */ + + void (*hwcdrv_lwp_fini)(void); + /* per-thread counter cleanup. + Solaris: nop. + Linux: call in each thread upon thread destruction. + */ + + int hwcdrv_init_status; + } hwcdrv_api_t; + + extern hwcdrv_api_t *get_hwcdrv (); + extern hwcdrv_api_t *__collector_get_hwcdrv (); + extern int __collector_hwcfuncs_bind_descriptor (const char *defstring); + extern Hwcentry **__collector_hwcfuncs_get_ctrs (unsigned *defcnt); + extern hwcdrv_api_t *hwcdrv_drivers[]; // array of available drivers + + /* prototypes for internal use by hwcdrv drivers */ + typedef struct + { // see hwcdrv_get_info() for field definitions + int cpcN_cpuver; + uint_t cpcN_npics; + const char *cpcN_docref; + const char *cpcN_cciname; + } hwcdrv_about_t; + + extern int hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs); + /* assign user's counters to specific CPU registers */ + + extern int hwcdrv_lookup_cpuver (const char * cpcN_cciname); + /* returns hwc_cpus.h ID for a given string. */ + + extern void hwcfuncs_int_capture_errmsg (const char *fn, int subcode, + const char *fmt, va_list ap); +#define logerr hwcfuncs_int_logerr + + /*---------------------------------------------------------------------------*/ + /* prototypes for internal use by linux hwcdrv drivers */ +#define PERFCTR_FIXED_MAGIC 0x40000000 /* tells perfctr to use intel fixed pmcs */ +#define PERFCTR_UMASK_SHIFT 8 +#define EXTENDED_EVNUM_2_EVSEL(evnum) \ + ( (((eventsel_t)(evnum) & 0x0f00ULL) << 24) | ((eventsel_t)(evnum) & ~0x0f00ULL) ) + + typedef uint64_t eventsel_t; + extern int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, + eventsel_t *return_event, uint_t *return_pmc_sel); + + typedef int (hwcdrv_get_events_fn_t) (hwcf_hwc_cb_t *hwc_cb); + typedef int (hwcdrv_get_eventnum_fn_t) (const char *eventname, uint_t pmc, + eventsel_t *eventnum, + eventsel_t *valid_umask, uint_t *pmc_sel); + extern hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum; + + typedef struct + { + const char * attrname; // user-visible name of attribute + int is_inverted; // nonzero means boolean attribute is inverted + eventsel_t mask; // which attribute bits can be set? + eventsel_t shift; // how far to shift bits for use in x86 register + } attr_info_t; + extern const attr_info_t *perfctr_attrs_table; + + /* hdrv_pcbe api: cpu-specific drivers for Linux */ + typedef struct + { + int (*hdrv_pcbe_init)(void); + uint_t (*hdrv_pcbe_ncounters)(void); + const char *(*hdrv_pcbe_impl_name)(void); + const char *(*hdrv_pcbe_cpuref)(void); + int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb); + int (*hdrv_pcbe_get_eventnum)(const char * eventname, uint_t pmc, + eventsel_t *eventnum, eventsel_t *valid_umask, + uint_t *pmc_sel); + } hdrv_pcbe_api_t; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gprofng/common/hwcentry.h b/gprofng/common/hwcentry.h new file mode 100644 index 0000000..8611ab7 --- /dev/null +++ b/gprofng/common/hwcentry.h @@ -0,0 +1,417 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _HWCENTRY_H +#define _HWCENTRY_H + +#ifndef LIBCOLLECTOR_SRC /* not running in libcollector */ +#include <stdio.h> /* FILE */ +#endif /* --- LIBCOLLECTOR_SRC --- */ +#include <stdlib.h> /* size_t */ +#include "hwc_cpus.h" +#include "gp-time.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* ABS backtrack types */ + typedef enum + { + /* !! Lowest 2 bits are used to indicate load and store, respectively !! */ + /* Example: On SPARC, backtrack.c did this: if (ABS_memop & inst_type) ... */ + ABST_NONE = 0x0, + ABST_LOAD = 0x1, + ABST_STORE = 0x2, + ABST_LDST = 0x3, + ABST_COUNT = 0x4, + ABST_US_DTLBM = 0xF, + ABST_NOPC = 0x100, + ABST_CLKDS = 0x103, // Obsolete + ABST_EXACT = 0x203, + ABST_LDST_SPARC64 = 0x303, + ABST_EXACT_PEBS_PLUS1 = 0x403 + /* full description below... */ + } ABST_type; + +#define ABST_PLUS_BY_DEFAULT(n) ((n)==ABST_EXACT || (n)==ABST_EXACT_PEBS_PLUS1) +#define ABST_BACKTRACK_ENABLED(n) ((n)!=ABST_NONE && (n)!=ABST_NOPC) +#define ABST_MEMSPACE_ENABLED(n) ((n)!=ABST_NONE && (n)!=ABST_NOPC && (n)!=ABST_COUNT) + + /* ABS determines the type of backtracking available for a particular metric. + * Backtracking is enabled with the "+" in "-h +<countername>...". + * + * When Backtracking is not possible: + * + * ABST_NONE=0: Either the user did not specify "+", or backtracking + * is not applicable to the metric, for example: + * clk cycles, + * instruct counts (dispatch + branch + prefetch), + * i$, + * FP ops + * ABST_NOPC=0x100 Used for non-program-related external events, for example: + * system interface events, + * memory controller counters + * Of all ABST_type options, only ABST_NOPC prevents hwprofile.c + * from recording PC/stack information. + * + * When backtracking is allowed: + * + * ABST_LOAD=1: data read events, used with metrics like: + * D$, E$, P$ read misses and hits. + * [DC+EC+PC]_rd*, Re_*_miss*, + * EC_snoop_cb(?) + * ABST_STORE=2: data write events, used with metrics like: + * D$ writes and write related misses + * DC_wr/wr-miss, EC_wb, WC=writecache, Rstall_storeQ + * [EC+PC=pcache]_snoop_inv(?), WC_snoop_cb(?), + * ABST_LDST=3: data reads/writes, used with metrics like: + * E$ references, misses. + * ABST_COUNT=4: dedicated assembly instruction: '%hi(0xfc000)' + * See SW_count_n metric on sparc. + * ABST_US_DTLBM=0xF: for load-store on Sparc -- seems to be used only + * for "unskidded DTLB_miss" with DTLB_miss metric. + * Checks two adjacent instructions for Data access. + * ABST_CLKDS=0x103: data reads/writes, used with Clock-based Dataspace + * profiling. Ultrasparc T2 and earlier. + * ABST_EXACT=0x203: data reads/writes, precise trap with no skid + * ABST_LDST_SPARC64=0x303: Fujitsu SPARC64 load/store + * ABST_EXACT_PEBS_PLUS1=0x403: data reads/writes, precise sampling with 1 instr. skid + */ + + /* Hwcentry - structure for defining a counter. + * Some fields have different usage when returned from + * hwc_lookup(), hwc_post_lookup(), or hwc_scan_*(). + * Each function will describe its return values in more detail. + */ + typedef struct + { + char *name; /* user HWC specification */ + char *int_name; /* internal HWC specification */ + regno_t reg_num; /* register in CPU, aka picnum, or REGNO_ANY */ + char *metric; /* descriptive name, for well-known counters only */ + volatile int val; /* default or actual overflow value */ + int timecvt; /* multiplier to convert metric to time, 0 if N/A */ + ABST_type memop; /* type of backtracking allowed */ + char *short_desc; /* optional one-liner description, or NULL */ + int type; /* Type of perf_event_attr */ + long long config; /* perf_event_type -specific configuration */ + /* the fields above this line are expected, in order, by the tables in hwctable.c */ + /* ================================================== */ + /* the fields below this line are more flexible */ + int sort_order; /* "tag" to associate experiment record with HWC def */ + regno_t *reg_list; /* if not NULL, legal values for <reg_num> field above */ + /* Note: reg_list will be terminated by REGNO_ANY */ + /* Max size of array is MAX_PICS */ + hrtime_t min_time; /* target minimum time between overflow events. 0 is off. See HWCTIME_* macros */ + hrtime_t min_time_default; /* if min_time==HWCTIME_AUTO, use this value instead. 0 is off. */ + int ref_val; /* if min_time==HWCTIME_AUTO, use this time. 0 is off. */ + int lval, hval; /* temporary to allow DBX to build until dbx glue.cc fixed */ + } Hwcentry; + + // Hwcentry.min_time canned values +#define HWCTIME_TBD ((hrtime_t)( -1LL)) /* self-adjusting enabled but nsecs not yet selected */ +#define HWCTIME_HI ( 1 * 1000 * 1000LL ) /* 1 msec represented in nsecs */ +#define HWCTIME_ON ( 10 * 1000 * 1000LL ) /* 10 msec represented in nsecs */ +#define HWCTIME_LO ( 100 * 1000 * 1000LL ) /* 100 msec represented in nsecs */ + +#define HWC_VAL_HI(refVal) (((refVal)/10) + 1) +#define HWC_VAL_ON(refVal) (refVal) +#define HWC_VAL_LO(refVal) (((refVal)*10)/100*100 + 1) // zero's out lower digits, add 1 +#define HWC_VAL_CUSTOM(refVal, targetNanoSec) ((double)(refVal)*(targetNanoSec)/HWCTIME_ON) + +#define HWCENTRY_USES_SAMPLING(h) ((h)->memop==ABST_EXACT_PEBS_PLUS1) + + extern int hwc_lookup (int forKernel, hrtime_t min_time_default, + const char *uname, Hwcentry *list[], unsigned listsz, + char **emsg, char **wmsg); + /* Parses counter cmdline string. Returns counter definitions. + * Input: + * <forKernel> lookup using which table: 0-collect or 1-er_kernel + * <min_time_default> minimum nseconds between events if Hwcentry.min_time == HWCTIME_TBD. 0 to disable. + * <uname> command line HWC definition of format: + * <ctr_def>...[{','|(whitespace)}<ctr_n_def>] where + * <ctr_def> == [+]<ctr>[/<reg#>][,<interval>] + * <list> array of pointers to store counter definitions + * <listsz> number of elements in <list> + * Returns: + * Success: + * Returns number of valid counters in <list> and <list>'s elements + * will be initialized as follows: + * + * <list[]->name>: + * Copy of the <uname> with the following modification: + * if backtracking is not supported, the + will be removed. + * <list[]->int_name>: + * For well-known and convenience ctrs, the internal HWC specification, + * e.g. BSQ_cache_reference~emask=0x0100. + * For raw ctrs, this will be a copy of <name>. + * <list[]->reg_num>: + * Register number if specified by user or table, REGNO_ANY otherwise. + * <list[]->metric>: + * For well-known counters, descriptive name, e.g. "D$ Read Misses". + * NULL otherwise. + * <list[]->val>: + * Overflow value selected by user, default value otherwise. + * <list[]->timecvt>: + * Value from tables. + * <list[]->memop>: + * If + is selected and backtracking is allowed, value from table. + * ABST_NONE or ABST_NOPC otherwise. + * + * It is the responsibility of the caller to free 'name' and 'int_name'. + * 'metric' is a static string and shouldn't be freed. + * 'emsg' will point to NULL + * + * Failure: + * Frees all allocated elements. + * emsg will point to a string with an error message to print + * returns -1 + */ + + extern char *hwc_validate_ctrs (int forKernel, Hwcentry *list[], unsigned listsz); + /* Validates that the vector of specified HW counters can be loaded (more-or-less) + * Some invalid combinations, especially on Linux will not be detected + */ + + extern int hwc_get_cpc_cpuver (); + /* Return the cpc_cpuver for this system. Other possible values: + * CPUVER_GENERIC=0, CPU could not be determined, but HWCs are ok. + * CPUVER_UNDEFINED=-1, HWCs are not available. + */ + + extern char *hwc_get_docref (char *buf, size_t buflen); + /* Return a CPU HWC document reference, or NULL. */ + + // TBR + extern char *hwc_get_default_cntrs (); + /* Return a default HW counter string; may be NULL, or zero-length */ + /* NULL means none is defined in the table; or zero-length means string defined could not be loaded */ + + extern char *hwc_get_default_cntrs2 (int forKernel, int style); + /* like hwc_get_default_cntrs() for style==1 */ + /* but allows other styles of formatting as well */ + /* deprecate and eventually remove hwc_get_default_cntrs() */ + + extern char *hwc_get_orig_default_cntrs (); + /* Get the default HW counter string as set in the table */ + /* NULL means none is defined in the table */ + + extern void hwc_update_val (Hwcentry *ctr); + /* Check time-based intervals and update Hwcentry.val as needed */ + + extern char *hwc_get_cpuname (char *buf, size_t buflen); + /* Return the cpc cpu name for this system, or NULL. */ + + extern unsigned hwc_get_max_regs (); + /* Return number of counters registers for this system. */ + + extern unsigned hwc_get_max_concurrent (int forKernel); + /* Return the max number of simultaneous counters for this system. */ + + extern char **hwc_get_attrs (int forKernel); + /* Return: + * Array of attributes (strings) supported by this system. + * Last element in array is null. + * Array and its elements should NOT be freed by the caller. + */ + + extern unsigned hwc_scan_attrs (void (*action)(const char *attr, + const char *desc)); + /* Scan the HW counter attributes, and call function for each attribute. + * Input: + * <action>: + * If NULL, no action is performed, but count is still returned. + * Otherwise called for each type of attributes, or if none exist, + * called once with NULL parameter. + * Return: count of times <action> would have been called w/ non-NULL data. + */ + + extern Hwcentry *hwc_post_lookup (Hwcentry * pret_ctr, char *uname, + char * int_name, int cpc_cpuver); + /* When post-processing a run, look up a Hwcentry for given type of system. + * Input: + * <pret_ctr>: storage for counter definition + * <uname>: well-known name, convenience name, or complete HWC defintion. + * <int_name>: Hwcentry->int_name or NULL for don't care + * <cpc_cpuver>: version of cpu used for experiment. + * Return: + * <pret_ctr>'s elements set as follows: + * + * <pret_ctr->name>: + * Copy of <uname> with the following modifications: + * 1) + and /<regnum> will be stripped off + * 2) attributes will be sorted and values will shown in hex. + * <pret_ctr->int_name>: + * For well-known/convenience counters, the internal HWC specification + * from the table, e.g. BSQ_cache_reference~emask=0x0100. + * Otherwise, a copy of <uname>. + * <pret_ctr->reg_num>: + * Register number if specified by user or table, + * REGNO_ANY othewise. + * <pret_ctr->metric>: + * For well-known counters, descriptive name, e.g. "D$ Read Misses". + * NULL otherwise. + * <pret_ctr->timecvt>: + * For well-known/convenience/hidden counters, value from table. + * 0 otherwise. + * <pret_ctr->memop>: + * For well-known/convenience/hidden counters, value from table. + * ABST_NONE otherwise. + * <pret_ctr->sort_order>: + * Set to 0. + * + * It is the responsibility of the caller to free 'name' and 'int_name'. + * 'metric' is a static string and shouldn't be freed. + */ + + extern Hwcentry **hwc_get_std_ctrs (int forKernel); + /* Return: + * Array of well-known counters supported by this system. + * Last element in array will be NULL. + * Array and its elements should NOT be freed by the caller. + */ + + extern unsigned hwc_scan_std_ctrs (void (*action)(const Hwcentry *)); + /* Call <action> for each well-known counter. + * Input: + * <action>: + * If NULL, no action is performed, but count is still returned. + * Otherwise called for each type of attributes, or if none exist, + * called once with NULL parameter. + * Return: + * Count of times <action> would have been called w/ non-NULL data. + * If <action> is not NULL, Hwcentry fields will be set as follows: + * <ctr->name>: + * HWC alias name, e.g. dcrm. + * <ctr->int_name>: + * The internal HWC specification, e.g. BSQ_cache_reference~emask=0x0100. + * <ctr->reg_num>: + * Register number if specified by the table, REGNO_ANY otherwise. + * <ctr->metric>: + * Descriptive name, e.g. "D$ Read Misses". + * <ctr->lval>: + * Low-resolution overflow value. + * <ctr->val>: + * Default overflow value. + * <ctr->hval>: + * High-resolution overflow value. + * <ctr->timecvt>: + * multiplier to convert metric to time, 0 otherwise. + * <ctr->memop>: + * ABST_* type for this counter. + * <ctr->reg_list>: + * Array of legal <reg_num> values. Terminated by REGNO_ANY. + * + * Note: All fields point to static data, none should be freed. + */ + + extern Hwcentry **hwc_get_raw_ctrs (int forKernel); + /* Return: + * Table of raw (not well-known) counters supported by this system. + * Last element in array will be NULL. + * Table and its elements should NOT be freed by the caller. + */ + + extern unsigned hwc_scan_raw_ctrs (void (*action)(const Hwcentry *)); + /* Call <action> for each raw counter. + * Input: + * <action>: + * If NULL, no action is performed, but count is still returned. + * Otherwise called for each type of attributes, or if none exist, + * called once with NULL parameter. + * Return: + * Count of times <action> would have been called w/ non-NULL data. + * If <action> is not NULL, Hwcentry fields will be set as follows: + * <ctr->name>: + * HWC raw name without attributes, e.g. BSQ_cache_reference. + * <ctr->int_name>: + * NULL. + * <ctr->metric>: + * NULL. + * The remainder of the fields are the same as for + * hwc_scan_std_ctrs(). + * + * Note: All fields point to static data, none should be freed. + */ + + extern void + hwc_usage (int forKernel, const char *cmd, const char *dataspace_msg); + /* Print an i18n'd description of "-h" usage, used by collect and er_kernel. + */ + + extern void hwc_usage_f (int forKernel, FILE *f, const char *cmd, + const char *dataspace_msg, int show_syntax, + int show_short_desc); + /* Print an i18n'd description of "-h" usage to a FILE. Used by GUI. */ + + extern char *hwc_rate_string (const Hwcentry *pctr, int force_numeric_format); + /* Returns {"on"|"hi"|"lo"|""|<value>}. Return value must be freed by caller. */ + + extern char *hwc_i18n_metric (const Hwcentry *ctr); + /* Get a basic lable for a counter, properly i18n'd. + * Note: NOT MT SAFE. + * Examples: + * CPU Cycles + * DC_rd Events + * Pseudocode: + * if(ctr->metric != NULL) { + * sprintf(metricbuf, PTXT(ctr->metric) ); + * } else if (ctr->name != NULL) { + * sprintf(metricbuf, GTXT("%s Events"), ctr->name ); + * } else if (ctr->int_name != NULL) { + * sprintf(metricbuf, GTXT("%s Events"), ctr->int_name ); + * } + * Return: pointer to a buffer containing the above description. + */ + + extern char *hwc_hwcentry_string (char *buf, size_t buflen, const Hwcentry *ctr); + /* Get a i18n'd description of a HW counter's options. + * Examples of well-known counters: + * cycles[/{0|1}],9999991 ('CPU Cycles', alias for Cycle_cnt; CPU-cycles) + * dcr[/0],1000003 ('D$ Read Refs', alias for DC_rd; load events) + * Examples of raw counters: + * Cycle_cnt[/{0|1}],1000003 (CPU-cycles) + * DC_rd[/0],1000003 (load events) + * Return: <buf>, filled in. + */ + + extern char *hwc_hwcentry_specd_string (char *buf, size_t buflen, const Hwcentry *ctr); + /* Get a i18n'd description of a HW counter's specific configuration. + * Examples of well-known counters: + * cycles,9999991 ('CPU Cycles') + * +dcr/0,1000003 ('D$ Read Refs') + * Examples of raw counters: + * Cycle_cnt,1000003 + * +DC_rd/0,1000003 + * Return: <buf>, filled in. + */ + + extern const char *hwc_memop_string (ABST_type memop); + /* Get a i18n'd description of a variable of type ABST_type. + * Return: pointer to static string. + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c new file mode 100644 index 0000000..2f9764d --- /dev/null +++ b/gprofng/common/hwcfuncs.c @@ -0,0 +1,704 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* Hardware counter profiling */ +#include "hwcdrv.h" +#include "hwcfuncs.h" + +/*---------------------------------------------------------------------------*/ +/* macros */ + +#define IS_GLOBAL /* Mark global symbols */ +#define HWCDRV_API static /* Mark functions used by hwcdrv API */ + +/*---------------------------------------------------------------------------*/ +/* static variables */ +static uint_t cpcN_npics; +static char hwcfuncs_errmsg_buf[1024]; +static int hwcfuncs_errmsg_enabled = 1; +static int hwcfuncs_errmsg_valid; + +/* --- user counter selections and options */ +static unsigned hwcdef_cnt; /* number of *active* hardware counters */ +static Hwcentry hwcdef[MAX_PICS]; /* HWC definitions */ +static Hwcentry *hwctable[MAX_PICS]; /* HWC definitions */ + +/* --- drivers --- */ + +// default driver + +HWCDRV_API int +hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int* tsd_sz) +{ + return -1; +} + +HWCDRV_API void +hwcdrv_get_info ( + int * cpuver, const char ** cciname, + uint_t * npics, const char ** docref, uint64_t* support) { } + +HWCDRV_API int +hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_find_action, + hwcf_attr_cb_t *attr_find_action) +{ + return 0; +} + +HWCDRV_API int +hwcdrv_assign_regnos (Hwcentry *entries[], unsigned numctrs) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_read_events (hwc_event_t *events, hwc_event_samples_t*samples) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_start (void) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_overflow (siginfo_t *si, hwc_event_t *s, hwc_event_t *t) +{ + return 0; +} + +HWCDRV_API int +hwcdrv_sighlr_restart (const hwc_event_t *sample) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_lwp_suspend (void) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_lwp_resume (void) +{ + return -1; +} + +HWCDRV_API int +hwcdrv_free_counters (void) +{ + return 0; +} + +HWCDRV_API int +hwcdrv_lwp_init (void) +{ + return 0; +} + +HWCDRV_API void +hwcdrv_lwp_fini (void) { } + +static hwcdrv_api_t hwcdrv_default = { + hwcdrv_init, + hwcdrv_get_info, + hwcdrv_enable_mt, + hwcdrv_get_descriptions, + hwcdrv_assign_regnos, + hwcdrv_create_counters, + hwcdrv_start, + hwcdrv_overflow, + hwcdrv_read_events, + hwcdrv_sighlr_restart, + hwcdrv_lwp_suspend, + hwcdrv_lwp_resume, + hwcdrv_free_counters, + hwcdrv_lwp_init, + hwcdrv_lwp_fini, + -1 // hwcdrv_init_status +}; + +static hwcdrv_api_t *hwcdrv_driver = &hwcdrv_default; + + +/*---------------------------------------------------------------------------*/ +/* misc */ + +/* print a counter definition (for debugging) */ +static void +ctrdefprint (int dbg_lvl, const char * hdr, Hwcentry*phwcdef) +{ + TprintfT (dbg_lvl, "%s: name='%s', int_name='%s'," + " reg_num=%d, timecvt=%d, memop=%d, " + "interval=%d, tag=%u, reg_list=%p\n", + hdr, phwcdef->name, phwcdef->int_name, phwcdef->reg_num, + phwcdef->timecvt, phwcdef->memop, phwcdef->val, + phwcdef->sort_order, phwcdef->reg_list); +} + +/*---------------------------------------------------------------------------*/ +/* errmsg buffering */ + +/* errmsg buffering is needed only because the most descriptive error + messages from CPC are delivered using a callback mechanism. + hwcfuncs_errmsg_get() should only be used during initialization, and + ideally, only to provide feedback to an end user when his counters can't + be bound to HW. + */ +IS_GLOBAL char * +hwcfuncs_errmsg_get (char *buf, size_t bufsize, int enable) +{ + hwcfuncs_errmsg_enabled = 0; + if (buf && bufsize) + { + if (hwcfuncs_errmsg_valid) + { + strncpy (buf, hwcfuncs_errmsg_buf, bufsize); + buf[bufsize - 1] = 0; + } + else + *buf = 0; + } + hwcfuncs_errmsg_buf[0] = 0; + hwcfuncs_errmsg_valid = 0; + hwcfuncs_errmsg_enabled = enable; + return buf; +} + +/* used by cpc to log an error */ +IS_GLOBAL void +hwcfuncs_int_capture_errmsg (const char *fn, int subcode, + const char *fmt, va_list ap) +{ + if (hwcfuncs_errmsg_enabled && + !hwcfuncs_errmsg_valid) + { + vsnprintf (hwcfuncs_errmsg_buf, sizeof (hwcfuncs_errmsg_buf), fmt, ap); + TprintfT (DBG_LT0, "hwcfuncs: cpcN_capture_errmsg(): %s\n", + hwcfuncs_errmsg_buf); + hwcfuncs_errmsg_valid = 1; + } + return; +} + +/* Log an internal error to the CPC error buffer. + * Note: only call this during init functions. + * Note: when most cpc calls fail, they will call cpcN_capture_errmsg() + * directly, so only call logerr() when a non-cpc function fails. + */ +IS_GLOBAL void +hwcfuncs_int_logerr (const char *format, ...) +{ + va_list va; + va_start (va, format); + hwcfuncs_int_capture_errmsg ("logerr", 0, format, va); + va_end (va); +} + +/* utils to parse counter strings */ +static void +clear_hwcdefs () +{ + for (unsigned idx = 0; idx < MAX_PICS; idx++) + { + static Hwcentry empty; + hwcdef[idx] = empty; // leaks strings and reg_list array + hwcdef[idx].reg_num = REGNO_ANY; + hwcdef[idx].val = -1; + hwcdef[idx].sort_order = -1; + } +} + +/* initialize hwcdef[] based on user's counter definitions */ +static int +process_data_descriptor (const char *defstring) +{ + /* + * <defstring> format should be of format + * :%s:%s:0x%x:%d:%lld:%d:%d:0x%x[,%s...repeat for each ctr] + * where the counter fields are: + * :<userName>:<internalCtr>:<register>:<timeoutVal>[:m<min_time>]:<tag>:<timecvt>:<memop> + * See Coll_Ctrl::build_data_desc(). + */ + int err = 0; + char *ds = NULL; + char *dsp = NULL; + unsigned idx; + + clear_hwcdefs (); + if (!defstring || !strlen (defstring)) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + ds = strdup (defstring); + if (!ds) + { + err = HWCFUNCS_ERROR_HWCINIT; + goto ext_hw_install_end; + } + dsp = ds; + + for (idx = 0; idx < MAX_PICS && *dsp; idx++) + { + char *name = NULL; + char *int_name = NULL; + regno_t reg = REGNO_ANY; + ABST_type memop = ABST_NONE; + int interval = 0; + int timecvt = 0; + unsigned sort_order = (unsigned) - 1; + + /* name */ + name = dsp; + dsp = strchr (dsp, ':'); + if (dsp == NULL) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + *dsp++ = (char) 0; + + /* int_name */ + int_name = dsp; + dsp = strchr (dsp, ':'); + if (dsp == NULL) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + *dsp++ = (char) 0; + + /* reg_num */ + reg = (int) strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + if (reg < 0 && reg != -1) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + if (reg >= 0) + hwcdef[idx].reg_num = reg; + + /* val */ + interval = (int) strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + if (interval < 0) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + hwcdef[idx].val = interval; + + /* min_time */ + /* + * This is a new field. + * An old launcher (dbx, etc.) would not include it. + * Detect the presence of the field by the char 'm'. + */ + if (*dsp == 'm') + { + long long tmp_ll = 0; + dsp++; + tmp_ll = strtoll (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + if (tmp_ll < 0) + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + hwcdef[idx].min_time = tmp_ll; + } + else + hwcdef[idx].min_time = 0; + + /* sort_order */ + sort_order = (int) strtoul (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + hwcdef[idx].sort_order = sort_order; + + /* timecvt */ + timecvt = (int) strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + hwcdef[idx].timecvt = timecvt; + + /* memop */ + memop = (ABST_type) strtol (dsp, &dsp, 0); + if (*dsp != 0 && *dsp++ != ',') + { + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + hwcdef[idx].memop = memop; + if (*name) + hwcdef[idx].name = strdup (name); + else + hwcdef[idx].name = strdup (int_name); + if (*int_name) + hwcdef[idx].int_name = strdup (int_name); + else + hwcdef[idx].int_name = strdup (name); + ctrdefprint (DBG_LT1, "hwcfuncs: process_data_descriptor", &hwcdef[idx]); + } + + if (*dsp) + { + TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): " + "ctr string had some trailing garbage:" + " '%s'\n", dsp); + err = HWCFUNCS_ERROR_HWCARGS; + goto ext_hw_install_end; + } + free (ds); + hwcdef_cnt = idx; + return 0; + +ext_hw_install_end: + if (dsp && *dsp) + { + TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): " + " syntax error just before:" + " '%s;\n", dsp); + logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp); + } + else + logerr (GTXT ("Data descriptor syntax error\n")); + free (ds); + return err; +} + +/* initialize hwcdef[] based on user's counter definitions */ +static int +process_hwcentrylist (const Hwcentry* entries[], unsigned numctrs) +{ + int err = 0; + clear_hwcdefs (); + if (numctrs > cpcN_npics) + { + logerr (GTXT ("More than %d counters were specified\n"), cpcN_npics); /*!*/ + return HWCFUNCS_ERROR_HWCARGS; + } + for (unsigned idx = 0; idx < numctrs; idx++) + { + Hwcentry *phwcdef = &hwcdef[idx]; + *phwcdef = *entries[idx]; + if (phwcdef->name) + phwcdef->name = strdup (phwcdef->name); + else + phwcdef->name = "NULL"; + if (phwcdef->int_name) + phwcdef->int_name = strdup (phwcdef->int_name); + else + phwcdef->int_name = "NULL"; + if (phwcdef->val < 0) + { + logerr (GTXT ("Negative interval specified for HW counter `%s'\n"), /*!*/ + phwcdef->name); + err = HWCFUNCS_ERROR_HWCARGS; + break; + } + ctrdefprint (DBG_LT1, "hwcfuncs: process_hwcentrylist", phwcdef); + } + if (!err) + hwcdef_cnt = numctrs; + return err; +} + +/* see hwcfuncs.h */ +IS_GLOBAL void * +hwcfuncs_parse_attrs (const char *countername, hwcfuncs_attr_t attrs[], + unsigned max_attrs, uint_t *pnum_attrs, char**errstring) +{ + char *head = NULL; + char *tail = NULL; + uint_t nattrs = 0; + char *counter_copy; + int success = 0; + char errbuf[512]; + errbuf[0] = 0; + counter_copy = strdup (countername); + + /* advance pointer to first attribute */ + tail = strchr (counter_copy, HWCFUNCS_PARSE_ATTR); + if (tail) + *tail = 0; + + /* remove regno and value, if supplied */ + { + char *tmp = strchr (counter_copy, HWCFUNCS_PARSE_REGNUM); + if (tmp) + *tmp = 0; + tmp = strchr (counter_copy, HWCFUNCS_PARSE_VALUE); + if (tmp) + *tmp = 0; + } + + while (tail) + { + char *pch; + if (nattrs >= max_attrs) + { + snprintf (errbuf, sizeof (errbuf), + GTXT ("Too many attributes defined in `%s'"), + countername); + goto mycpc2_parse_attrs_end; + } + /* get attribute name */ + head = tail + 1; + tail = strchr (head, HWCFUNCS_PARSE_EQUAL); + if (!tail) + { + snprintf (errbuf, sizeof (errbuf), + GTXT ("Missing value for attribute `%s' in `%s'"), + head, countername); + goto mycpc2_parse_attrs_end; + } + *tail = 0; /* null terminate current component */ + attrs[nattrs].ca_name = head; + + /* get attribute value */ + head = tail + 1; + tail = strchr (head, HWCFUNCS_PARSE_ATTR); + if (tail) + *tail = 0; /* null terminate current component */ + attrs[nattrs].ca_val = strtoull (head, &pch, 0); + if (pch == head) + { + snprintf (errbuf, sizeof (errbuf), + GTXT ("Illegal value for attribute `%s' in `%s'"), + attrs[nattrs].ca_name, countername); + goto mycpc2_parse_attrs_end; + } + TprintfT (DBG_LT0, "hwcfuncs: pic_: '%s', attribute[%u]" + " '%s' = 0x%llx\n", + counter_copy, nattrs, attrs[nattrs].ca_name, + (long long unsigned int) attrs[nattrs].ca_val); + + nattrs++; + } + success = 1; + +mycpc2_parse_attrs_end: + *pnum_attrs = nattrs; + if (success) + { + if (errstring) + *errstring = NULL; + } + else + { + if (errstring) + *errstring = strdup (errbuf); + free (counter_copy); + counter_copy = NULL; + } + return counter_copy; +} + +IS_GLOBAL void +hwcfuncs_parse_ctr (const char *counter_def, int *pplus, char **pnameOnly, + char **pattrs, char **pregstr, regno_t *pregno) +{ + char *nameptr, *copy, *slash, *attr_delim; + int plus; + regno_t regno; + nameptr = copy = strdup (counter_def); + + /* plus */ + plus = 0; + if (nameptr[0] == HWCFUNCS_PARSE_BACKTRACK) + { + plus = 1; + nameptr++; + } + else if (nameptr[0] == HWCFUNCS_PARSE_BACKTRACK_OFF) + { + plus = -1; + nameptr++; + } + if (pplus) + *pplus = plus; + + /* regno */ + regno = REGNO_ANY; + if (pregstr) + *pregstr = NULL; + slash = strchr (nameptr, HWCFUNCS_PARSE_REGNUM); + if (slash != NULL) + { + /* the remaining string should be a number > 0 */ + if (pregstr) + *pregstr = strdup (slash); + char *endchar = NULL; + regno = (regno_t) strtol (slash + 1, &endchar, 0); + if (*endchar != 0) + regno = -2; + if (*(slash + 1) == '-') + regno = -2; + /* terminate previous element up to slash */ + *slash = 0; + } + if (pregno) + *pregno = regno; + + /* attrs */ + if (pattrs) + *pattrs = NULL; + attr_delim = strchr (nameptr, HWCFUNCS_PARSE_ATTR); + if (attr_delim != NULL) + { + if (pattrs) + *pattrs = strdup (attr_delim); + /* terminate previous element up to attr_delim */ + *attr_delim++ = 0; + } + if (pnameOnly) + *pnameOnly = strdup (nameptr); + free (copy); +} + +/* create counters */ +IS_GLOBAL int +hwcfuncs_bind_descriptor (const char *defstring) +{ + int err = process_data_descriptor (defstring); + if (err) + { + TprintfT (DBG_LT0, "hwcfuncs: ERROR: hwcfuncs_bind_descriptor failed\n"); + return err; + } + err = hwcdrv_driver->hwcdrv_create_counters (hwcdef_cnt, hwcdef); + return err; +} + +/* see hwcfuncs.h */ +IS_GLOBAL int +hwcfuncs_bind_hwcentry (const Hwcentry* entries[], unsigned numctrs) +{ + int err = -1; + err = process_hwcentrylist (entries, numctrs); + if (err) + { + TprintfT (DBG_LT0, "hwcfuncs: ERROR: hwcfuncs_bind_hwcentry\n"); + return err; + } + err = hwcdrv_driver->hwcdrv_create_counters (hwcdef_cnt, hwcdef); + return err; +} + +/* see hwcfuncs.h */ +IS_GLOBAL Hwcentry ** +hwcfuncs_get_ctrs (unsigned *defcnt) +{ + if (defcnt) + *defcnt = hwcdef_cnt; + return hwctable; +} + +/* return 1 if <regno> is in Hwcentry's list */ +IS_GLOBAL int +regno_is_valid (const Hwcentry * pctr, regno_t regno) +{ + regno_t *reg_list = pctr->reg_list; + if (REG_LIST_IS_EMPTY (reg_list)) + return 0; + if (regno == REGNO_ANY) /* wildcard */ + return 1; + for (int ii = 0; ii < MAX_PICS; ii++) + { + regno_t tmp = reg_list[ii]; + if (REG_LIST_EOL (tmp)) /* end of list */ + break; + if (tmp == regno) /* is in list */ + return 1; + } + return 0; +} + +/* supplied by hwcdrv_api drivers */ +IS_GLOBAL int +hwcfuncs_assign_regnos (Hwcentry* entries[], + unsigned numctrs) +{ + if (numctrs > cpcN_npics) + { + logerr (GTXT ("More than %d counters were specified\n"), cpcN_npics); /*!*/ + return HWCFUNCS_ERROR_HWCARGS; + } + return hwcdrv_driver->hwcdrv_assign_regnos (entries, numctrs); +} + +extern hwcdrv_api_t hwcdrv_pcl_api; +static int hwcdrv_driver_inited = 0; + +hwcdrv_api_t * +get_hwcdrv () +{ + if (hwcdrv_driver_inited) + return hwcdrv_driver; + hwcdrv_driver_inited = 1; + cpcN_npics = 0; + for (int i = 0; i < MAX_PICS; i++) + hwctable[i] = &hwcdef[i]; + hwcdrv_driver = &hwcdrv_pcl_api; + hwcdrv_driver->hwcdrv_init_status = hwcdrv_driver->hwcdrv_init (NULL, NULL); + if (hwcdrv_driver->hwcdrv_init_status == 0) + { + hwcdrv_driver->hwcdrv_get_info (NULL, NULL, &cpcN_npics, NULL, NULL); + return hwcdrv_driver; + } + hwcdrv_driver = &hwcdrv_default; + return hwcdrv_driver; +} diff --git a/gprofng/common/hwcfuncs.h b/gprofng/common/hwcfuncs.h new file mode 100644 index 0000000..ef0360b --- /dev/null +++ b/gprofng/common/hwcfuncs.h @@ -0,0 +1,269 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* Hardware counter profiling */ + +#ifndef __HWCFUNCS_H +#define __HWCFUNCS_H + +#ifdef LIBCOLLECTOR_SRC /* running in libcollector */ +#define hwcfuncs_int_logerr __collector_hwcfuncs_int_logerr +#define hwcfuncs_parse_ctr __collector_hwcfuncs_parse_ctr +#define hwcfuncs_parse_attrs __collector_hwcfuncs_parse_attrs +#define hwcfuncs_bind_descriptor __collector_hwcfuncs_bind_descriptor +#define hwcfuncs_bind_hwcentry __collector_hwcfuncs_bind_hwcentry +#define hwcfuncs_assign_regnos __collector_hwcfuncs_assign_regnos +#define regno_is_valid __collector_regno_is_valid +#define hwcfuncs_get_ctrs __collector_hwcfuncs_get_ctrs +#define hwcfuncs_errmsg_get __collector_hwcfuncs_errmsg_get +#endif /* --- LIBCOLLECTOR_SRC --- */ + +#include <signal.h> /* siginfo_t */ +#include <limits.h> /* UINT64_t */ +#include <sys/types.h> +#include <stdint.h> + +#include "hwcentry.h" /* for Hwcentry type */ +#include "gp-time.h" + +typedef unsigned int uint_t; + +#ifdef __cplusplus +extern "C" { +#endif + +/*---------------------------------------------------------------------------*/ +/* compile options */ + +#define HWC_DEBUG 0 /* 0/1 to enable extra HWC debug */ + +/*---------------------------------------------------------------------------*/ +/* typedefs */ +/* generic hw event */ + typedef struct _hwc_event_t + { /* generalized counter event */ + hrtime_t ce_hrt; /* gethrtime() */ + uint64_t ce_pic[MAX_PICS]; /* counter samples or start values */ + } hwc_event_t; + + /* supplementary data that accompanies some hw events */ + typedef struct + { /* supplementary data fields */ + uint64_t smpl_pc; /* pc related to event */ + uint64_t smpl_data_source; /* chip-specific data source encoding */ + uint64_t smpl_latency; /* latency related to event */ + uint64_t smpl_mem_addr; /* memory address related to event */ + } hwc_sample_t; +#define HWCFUNCS_INVALID_U64 0xFEEDBEEFDEADBEEFllu /* identifies fields as unused */ + +typedef struct { /* supplementary data fields */ + hwc_sample_t sample[MAX_PICS]; /* counter samples or start values */ +} hwc_event_samples_t; + +#define HWCFUNCS_SAMPLE_RESET(sample) \ + do { \ + (sample)->smpl_pc =HWCFUNCS_INVALID_U64; \ + (sample)->smpl_data_source =HWCFUNCS_INVALID_U64; \ + (sample)->smpl_latency =HWCFUNCS_INVALID_U64; \ + (sample)->smpl_mem_addr =HWCFUNCS_INVALID_U64; \ + } while(0) + +#define HWCFUNCS_SAMPLE_IS_RESET(sample) \ + ( \ + (sample)->smpl_pc ==HWCFUNCS_INVALID_U64 && \ + (sample)->smpl_data_source==HWCFUNCS_INVALID_U64 && \ + (sample)->smpl_latency ==HWCFUNCS_INVALID_U64 && \ + (sample)->smpl_mem_addr ==HWCFUNCS_INVALID_U64 \ + ) + +/*---------------------------------------------------------------------------*/ +/* macros */ + +#define HW_INTERVAL_MAX UINT64_MAX +#define HW_INTERVAL_PRESET(x) (HW_INTERVAL_MAX - ((uint64_t)(x) - 1)) +#define HW_INTERVAL_TYPE(x) ((uint64_t) (x) + +/* parsing */ +#define HWCFUNCS_MAX_ATTRS 20 +#define HWCFUNCS_PARSE_ATTR '~' +#define HWCFUNCS_PARSE_EQUAL '=' +#define HWCFUNCS_PARSE_BACKTRACK '+' +#define HWCFUNCS_PARSE_BACKTRACK_OFF '-' +#define HWCFUNCS_PARSE_REGNUM '/' +#define HWCFUNCS_PARSE_VALUE ',' + +/* error codes */ +#define HWCFUNCS_ERROR_GENERIC (-1) +#define HWCFUNCS_ERROR_NOT_SUPPORTED (-2) +#define HWCFUNCS_ERROR_ALREADY_CALLED (-3) +#define HWCFUNCS_ERROR_HWCINIT (-4) +#define HWCFUNCS_ERROR_HWCARGS (-5) +#define HWCFUNCS_ERROR_MEMORY (-6) +#define HWCFUNCS_ERROR_UNAVAIL (-7) +#define HWCFUNCS_ERROR_ERRNO_ZERO (-8) +#define HWCFUNCS_ERROR_UNEXPECTED (-99) + +/*---------------------------------------------------------------------------*/ +/* prototypes */ + +typedef void (*hwcfuncs_abort_fn_t) (int errnum, const char *msg); + +extern void hwcfuncs_int_logerr(const char *format,...); +/* Log an error to the internal error buffer. See hwcfuncs_errmsg_get(). + Note: Not MT-safe; don't even enable logging in an MT environment. + Recommend using this call only during init. + Note: when a libcpc call fails, it may automatically call + cpcN_capture_errmsg() to log the error message in the same internal buffer. + Recommend using this call only for non-cpc failures. + */ + +#define HWCFUNCS_SUPPORT_OVERFLOW_PROFILING 0x01llu +#define HWCFUNCS_SUPPORT_PEBS_SAMPLING 0x02llu +#define HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID 0x04llu // OS identifies which counter overflowed + /* get info about session + Input: + <cpuver>: if not NULL, returns value of CPC cpu version + <cciname>: if not NULL, returns name of CPU + <npics>: if not NULL, returns maximum # of HWCs + <docref>: if not NULL, returns documentation reference + <support>: if not NULL, returns bitmask (see above) of hwc support + Return: none + */ + + typedef void* (*hwcfuncs_tsd_get_fn_t) (void); + typedef void (hwcf_hwc_cb_t) (uint_t cpcregno, const char *name); + typedef void (hwcf_attr_cb_t) (const char *attr); + + extern void + hwcfuncs_parse_ctr (const char *counter_def, int *pplus, char **pnameOnly, + char **pattrs, char **pregstr, regno_t *pregno); +/* Parse a counter definition string (value must already be stripped off). + Input: + <counter_def>: input whose format is + [+|-]<countername>[~attrs...][/<regno>] + pointers to return values: Any can be NULL. + Return: + <plus>: 1 if [+] is found, -1 if [-] is found, 0 otherwise + <pnameonly>: strdup(<countername>) + <pattrs>: strdup([~attrs...]) if specified, NULL otherwise. + <pregstr>: strdup(/<regno>) if specified, NULL otherwise. + <pregno>: <regno> if readable, REGNO_ANY if not specd, or -2 otherwise. + */ + + typedef struct + { + char *ca_name; + uint64_t ca_val; + } hwcfuncs_attr_t; /* matches cpc_attr_t */ + + void * hwcfuncs_parse_attrs (const char *countername, + hwcfuncs_attr_t attrs[], unsigned max_attrs, + uint_t *pnum_attrs, char **errstring); + /* Extract the attribute fields from <countername>. + Input: + <countername>: string whose format is + [+]<ctrname>[~attributes...][/<regno>][,...] + <attrs>: array of attributes to be returned + <max_attrs>: number of elements in <attrs> + <pnum_attrs>: if not NULL, will return how many attrs were found. + <errstring>: pointer to a buffer for storing error info, or NULL. + Return: upon success, a pointer to an allocated copy of <countername>, or + NULL if there's a failure. (A copy is made in order to provide storage + for the ca_name fields in the <attrs> array.) + + The pointer should be freed when <attrs> is no longer in use. + <attrs> will be filled in data from countername. + <pnum_attrs> will have the number of elements in <attrs>. May be + non-zero even if return value indicates an error. + <errstring> NULL if no error, otherwise, a malloc'd GTXT string. + */ + + extern int hwcfuncs_bind_descriptor (const char *defstring); + /* Bind counters to resources. + Input: + <defstring>: string whose format is + :%s:%s:0x%x:%d:%d,0x%x[:%s...repeat for each ctr] + where the fields are: + :<userName>:<internalCtr>:<register>:<timeoutVal>:<tag>:<memop> + Return: 0 if successful + HWCFUNCS_ERROR_HWCINIT if resources unavailable + HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly + */ + + extern int hwcfuncs_bind_hwcentry (const Hwcentry *entries[], + unsigned numctrs); + /* Bind counters to resources. + Input: + <entries>: array of counters + <numctrs>: number of items in <entries> + Return: 0 if successful + HWCFUNCS_ERROR_HWCINIT if resources unavailable + HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly + */ + + extern int hwcfuncs_assign_regnos (Hwcentry *entries[], unsigned numctrs); + /* Assign entries[]->reg_num values as needed by platform + Note: modifies <entries> by supplying a regno to each counter + Input: + <entries>: array of counters + <numctrs>: number of items in <entries> + Output: + <entries>: array of counters is modified + Return: 0 if successful + HWCFUNCS_ERROR_HWCINIT if resources unavailable + HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly + */ + + extern int regno_is_valid (const Hwcentry *pctr, regno_t regno); + /* return 1 if <regno> is in Hwcentry's list + Input: + <pctr>: counter definition, reg_list[] should be initialized + <regno>: register to check + Return: 1 if <regno> is in Hwcentry's list, 0 otherwise + */ + + extern Hwcentry **hwcfuncs_get_ctrs (unsigned *defcnt); + /* Get descriptions of the currently bound counters. + Input: + <defcnt>: if not NULL, returns number of counter definitions. + Return: + table of counter definition pointers + */ + + extern char *hwcfuncs_errmsg_get (char * buf, size_t bufsize, + int enable_capture); + /* Gets a recent HWC error message. + To clear previous error messages and insure error message is enabled, + call hwcfuncs_errmsg_get(NULL,0,1). + Once enabled, one error is stored in an internal buffer. A call to this + function will clear the buffer and allow a new message to be captured. + Note: Not MT-safe - don't enable this feature in an MT environment. + Input: + <buf>: pointer to buffer or NULL. + <bufsize>: size of <buf> + <enable_capture>: 0 - disable buffering, 1 - enable buffering. + Return: error string or an empty string. + */ + +#ifdef __cplusplus +} +#endif + +#endif /* ! __HWCFUNCS_H */ diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c new file mode 100644 index 0000000..bc441e1 --- /dev/null +++ b/gprofng/common/hwctable.c @@ -0,0 +1,5410 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <limits.h> + +#include "hwcdrv.h" +#include "hwcfuncs.h" + +/* TprintfT(<level>,...) definitions. Adjust per module as needed */ +#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings +#define DBG_LT1 1 // for configuration details, warnings +#define DBG_LT2 2 +#define DBG_LT3 3 + +/*---------------------------------------------------------------------------*/ +/* compile options */ + +#define DISALLOW_USI_USII_6357446 +/* Solaris 9/libcpc1 allows cpc_bind() to work on US-IIe processors, even + though this processor cannot generate profiling interrupts. */ + +#define DISALLOW_PENTIUM_PRO_MMX_7007575 +/* Solaris/libcpc2 defaults to "Pentium Pro with MMX, Pentium II" + when it doesn't recognize an Intel processor. As a result, + when collect attempts to start Pentium Pro counters on a + new machine (e.g. Westmere as of 1/2011), the OS may hang. */ + +/* Register 0 counter doesn't work on Niagara T1 version (?) */ +#define WORKAROUND_6231196_NIAGARA1_NO_CTR_0 + +/*---------------------------------------------------------------------------*/ +/* consts, macros */ + +/* 10^N rates */ +#define PRELOADS_9 1001000001 +#define PRELOADS_85 320100001 +#define PRELOADS_8 100100001 +#define PRELOADS_75 32010001 +#define PRELOADS_7 10010001 +#define PRELOADS_65 3201001 +#define PRELOADS_6 1001001 +#define PRELOADS_55 320101 +#define PRELOADS_5 100101 +#define PRELOADS_45 32001 +#define PRELOADS_4 10001 +#define PRELOADS_35 3201 +#define PRELOADS_3 1001 +#define PRELOADS_25 301 + +#define ABST_TBD ABST_NONE /* to be determined */ + +/*---------------------------------------------------------------------------*/ +/* prototypes */ +static void hwc_cb (uint_t cpc_regno, const char *name); +static void attrs_cb (const char *attr); +static int attr_is_valid (int forKernel, const char *attr); + +/*---------------------------------------------------------------------------*/ +/* HWC definition tables */ + +/* + comments on hwcentry tables + --------------------------- + name: this field should not contain '~'. + int_name: actual name of register, may contain ~ attribute specifications. + regnum: assigned register. + metric: if non-NULL, is a 'standard' counter that will show up in help. + timecvt: >0: can convert to time, 'timecvt' CPU cycles per event + =0: counts events + <0: can convert to time, count reference-clock cycles at '-timecvt' MHz + memop: see description for ABST_type enum + */ + +// PRELOAD(): generates an interval based on the cycles/event and CPU GHZ. +// Note: the macro tweaks the interval so that it ends in decimal 001. +#define CYC_PER_SAMPLE (1000ULL*1000*1000/100) // cycles per signal at 1ghz, 100 samples/second +#define PRELOAD(min_cycles_per_event,ghz) (((ghz)*CYC_PER_SAMPLE/(min_cycles_per_event))/100*100+1) + +// PRELOAD_DEF: initial value for uncalibrated events. +// This value should be based on a rate that will work for the slowest changing +// HWCs, HWCs where there are many CPU cycles between events. +// +// The interval needs to target the slowest HWCs so that +// automatic adjustment of HWC overflow intervals can adapt. +#define PRELOAD_DEF PRELOAD(1000,3) // default interval targets 1000 cycles/event at 3ghz +// For er_kernel, which HWC intervals cannot be adjusted automatically for ON/HI/LO, +// The interval should target some safe interval for fast events +#define PRELOAD_DEF_ERKERNEL PRELOAD(4,4) // default interval targets 4 cycles/event at 4ghz + +static const Hwcentry empty_ctr = {NULL, NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, 0}; + + +// --- use cycles counter to expose "system_time" on Linux --- +#define SYSTIME_REGNOS REGNO_ANY // Linux: make sys_time/usr_time available for data collection +// Note: For x86, Linux and Solaris use different ref-clock names +#define USE_INTEL_REF_CYCLES(MHZ) \ + {"usr_time","unhalted-reference-cycles", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \ + {"usr_time","cpu_clk_unhalted.ref_p", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \ + {"sys_time","unhalted-reference-cycles~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \ + {"sys_time","cpu_clk_unhalted.ref_p~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, \ + {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \ + {"cycles0", "cpu_clk_unhalted.ref_p", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \ + {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \ + {"cycles1", "cpu_clk_unhalted.ref_p", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \ + /* end of list */ + +#define SPARC_CYCLES \ + {"usr_time","Cycles_user", SYSTIME_REGNOS, STXT("User CPU"), PRELOADS_75,1, ABST_NONE}, \ + {"sys_time","Cycles_user~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOADS_75,1, ABST_NONE}, \ + /* end of list */ + + +/* --- PERF_EVENTS "software" definitions --- */ +#define PERF_EVENTS_SW_EVENT_ALIASES \ +// none supported for now +#if 0 + {"usr", "PERF_COUNT_SW_TASK_CLOCK", REGNO_ANY, STXT("User CPU"), PRELOADS_7, -(1000), ABST_NONE}, \ + {"sys", "PERF_COUNT_SW_TASK_CLOCK~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_7, -(1000), ABST_NONE}, \ + /* end of list */ +#endif + +#define PERF_EVENTS_SW_EVENT_DEFS \ +// none supported for now +#if 0 + {"PERF_COUNT_SW_TASK_CLOCK", NULL, REGNO_ANY, NULL, PRELOADS_7, -(1000),ABST_NONE}, \ + /* end of list */ +#endif + +/* + * The PAPI descriptive strings used to be wrapped with STXT(), + * a macro defined in perfan/include/i18n.h. For the time being, + * we want to demote the PAPI counters by omitting the + * descriptions. So we use a new macro PAPITXT() for this purpose. + */ +#define PAPITXT(x) NULL + +/* Solaris "Generic" Counters */ +static Hwcentry papi_generic_list[] = { + {"PAPI_l1_dcm", NULL, REGNO_ANY, PAPITXT ("L1 D-cache misses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l1_icm", NULL, REGNO_ANY, PAPITXT ("L1 I-cache misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l2_dcm", NULL, REGNO_ANY, PAPITXT ("L2 D-cache misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l2_icm", NULL, REGNO_ANY, PAPITXT ("L2 I-cache misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l3_dcm", NULL, REGNO_ANY, PAPITXT ("L3 D-cache misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_l3_icm", NULL, REGNO_ANY, PAPITXT ("L3 I-cache misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_l1_tcm", NULL, REGNO_ANY, PAPITXT ("L1 misses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l2_tcm", NULL, REGNO_ANY, PAPITXT ("L2 misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l3_tcm", NULL, REGNO_ANY, PAPITXT ("L3 misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_ca_snp", NULL, REGNO_ANY, PAPITXT ("Requests for a snoop"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_ca_shr", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to shared cache line"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_ca_cln", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to clean cache line"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_ca_inv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line invalidation"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_ca_itv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line intervention"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l3_ldm", NULL, REGNO_ANY, PAPITXT ("L3 load misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_l3_stm", NULL, REGNO_ANY, PAPITXT ("L3 store misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_bru_idl", NULL, REGNO_ANY, PAPITXT ("Cycles branch units are idle"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_fxu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles integer units are idle"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_fpu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles FP units are idle"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_lsu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles load/store units are idle"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_tlb_dm", NULL, REGNO_ANY, PAPITXT ("DTLB misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_tlb_im", NULL, REGNO_ANY, PAPITXT ("ITLB misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_tlb_tl", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_tlb_tm", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_ldm", NULL, REGNO_ANY, PAPITXT ("L1 load misses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l1_stm", NULL, REGNO_ANY, PAPITXT ("L1 store misses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l2_ldm", NULL, REGNO_ANY, PAPITXT ("L2 load misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l2_stm", NULL, REGNO_ANY, PAPITXT ("L2 store misses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_btac_m", NULL, REGNO_ANY, PAPITXT ("Branch target address cache misses"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_prf_dm", NULL, REGNO_ANY, PAPITXT ("Data prefetch cache misses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_dch", NULL, REGNO_ANY, PAPITXT ("L3 D-cache hits"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_tlb_sd", NULL, REGNO_ANY, PAPITXT ("TLB shootdowns"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_csr_fal", NULL, REGNO_ANY, PAPITXT ("Failed store conditional instructions"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_csr_suc", NULL, REGNO_ANY, PAPITXT ("Successful store conditional instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_csr_tot", NULL, REGNO_ANY, PAPITXT ("Total store conditional instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_mem_scy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory accesses"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_mem_rcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory reads"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_mem_wcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory writes"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_stl_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instruction issue"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_ful_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instruction issue"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_stl_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instructions completed"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_ful_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instructions completed"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_hw_int", NULL, REGNO_ANY, PAPITXT ("Hardware interrupts"), PRELOADS_5, 0, ABST_NONE}, + {"PAPI_br_ucn", NULL, REGNO_ANY, PAPITXT ("Unconditional branch instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_br_cn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_br_tkn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions taken"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_br_ntk", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions not taken"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_br_msp", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions mispredicted"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_br_prc", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions correctly predicted"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fma_ins", NULL, REGNO_ANY, PAPITXT ("FMA instructions completed"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_tot_iis", NULL, REGNO_ANY, PAPITXT ("Instructions issued"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_tot_ins", NULL, REGNO_ANY, PAPITXT ("Instructions completed"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_int_ins", NULL, REGNO_ANY, PAPITXT ("Integer instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fp_ins", NULL, REGNO_ANY, PAPITXT ("Floating-point instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_ld_ins", NULL, REGNO_ANY, PAPITXT ("Load instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_sr_ins", NULL, REGNO_ANY, PAPITXT ("Store instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_br_ins", NULL, REGNO_ANY, PAPITXT ("Branch instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_vec_ins", NULL, REGNO_ANY, PAPITXT ("Vector/SIMD instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_res_stl", NULL, REGNO_ANY, PAPITXT ("Cycles stalled on any resource"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_fp_stal", NULL, REGNO_ANY, PAPITXT ("Cycles the FP unit(s) are stalled"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_tot_cyc", NULL, REGNO_ANY, PAPITXT ("Total cycles"), PRELOADS_7, 1, ABST_NONE}, + {"PAPI_lst_ins", NULL, REGNO_ANY, PAPITXT ("Load/store instructions completed"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_syc_ins", NULL, REGNO_ANY, PAPITXT ("Sync instructions completed"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l1_dch", NULL, REGNO_ANY, PAPITXT ("L1 D-cache hits"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_dch", NULL, REGNO_ANY, PAPITXT ("L2 D-cache hits"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l1_dca", NULL, REGNO_ANY, PAPITXT ("L1 D-cache accesses"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_dca", NULL, REGNO_ANY, PAPITXT ("L2 D-cache accesses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_dca", NULL, REGNO_ANY, PAPITXT ("L3 D-cache accesses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_dcr", NULL, REGNO_ANY, PAPITXT ("L1 D-cache reads"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_dcr", NULL, REGNO_ANY, PAPITXT ("L2 D-cache reads"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_dcr", NULL, REGNO_ANY, PAPITXT ("L3 D-cache reads"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_dcw", NULL, REGNO_ANY, PAPITXT ("L1 D-cache writes"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_dcw", NULL, REGNO_ANY, PAPITXT ("L2 D-cache writes"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_dcw", NULL, REGNO_ANY, PAPITXT ("L3 D-cache writes"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_ich", NULL, REGNO_ANY, PAPITXT ("L1 I-cache hits"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_ich", NULL, REGNO_ANY, PAPITXT ("L2 I-cache hits"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_ich", NULL, REGNO_ANY, PAPITXT ("L3 I-cache hits"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_ica", NULL, REGNO_ANY, PAPITXT ("L1 I-cache accesses"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_ica", NULL, REGNO_ANY, PAPITXT ("L2 I-cache accesses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_ica", NULL, REGNO_ANY, PAPITXT ("L3 I-cache accesses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_icr", NULL, REGNO_ANY, PAPITXT ("L1 I-cache reads"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_icr", NULL, REGNO_ANY, PAPITXT ("L2 I-cache reads"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_icr", NULL, REGNO_ANY, PAPITXT ("L3 I-cache reads"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_icw", NULL, REGNO_ANY, PAPITXT ("L1 I-cache writes"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_icw", NULL, REGNO_ANY, PAPITXT ("L2 I-cache writes"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_icw", NULL, REGNO_ANY, PAPITXT ("L3 I-cache writes"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_tch", NULL, REGNO_ANY, PAPITXT ("L1 total hits"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_tch", NULL, REGNO_ANY, PAPITXT ("L2 total hits"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_tch", NULL, REGNO_ANY, PAPITXT ("L3 total hits"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_tca", NULL, REGNO_ANY, PAPITXT ("L1 total accesses"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_tca", NULL, REGNO_ANY, PAPITXT ("L2 total accesses"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_tca", NULL, REGNO_ANY, PAPITXT ("L3 total accesses"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_tcr", NULL, REGNO_ANY, PAPITXT ("L1 total reads"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_tcr", NULL, REGNO_ANY, PAPITXT ("L2 total reads"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_tcr", NULL, REGNO_ANY, PAPITXT ("L3 total reads"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_l1_tcw", NULL, REGNO_ANY, PAPITXT ("L1 total writes"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_l2_tcw", NULL, REGNO_ANY, PAPITXT ("L2 total writes"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_l3_tcw", NULL, REGNO_ANY, PAPITXT ("L3 total writes"), PRELOADS_6, 0, ABST_NONE}, + {"PAPI_fml_ins", NULL, REGNO_ANY, PAPITXT ("FP multiply instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fad_ins", NULL, REGNO_ANY, PAPITXT ("FP add instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fdv_ins", NULL, REGNO_ANY, PAPITXT ("FP divide instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fsq_ins", NULL, REGNO_ANY, PAPITXT ("FP square root instructions"), PRELOADS_65, 0, ABST_NONE}, + {"PAPI_fnv_ins", NULL, REGNO_ANY, PAPITXT ("FP inverse instructions"), PRELOADS_7, 0, ABST_NONE}, + {"PAPI_fp_ops", NULL, REGNO_ANY, PAPITXT ("FP operations"), PRELOADS_7, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usIlist[] = { + {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usIIIlist[] = /* III, IIIi, IIIp. Note that some counters are processor-specific */{ + {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"icm", "IC_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dcrm", "DC_rd_miss", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_LOAD}, + {"dcwm", "DC_wr_miss", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_STORE}, + {"dcr", "DC_rd", REGNO_ANY, STXT ("D$ Read Refs"), PRELOADS_6, 0, ABST_LOAD}, + {"dcw", "DC_wr", REGNO_ANY, STXT ("D$ Write Refs"), PRELOADS_6, 0, ABST_STORE}, + {"ecref", "EC_ref", REGNO_ANY, STXT ("E$ Refs"), PRELOADS_6, 0, ABST_LDST}, + {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_US_DTLBM}, + {"ecm", "EC_misses", REGNO_ANY, STXT ("E$ Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecrm", "EC_rd_miss", REGNO_ANY, STXT ("E$ Read Misses"), PRELOADS_5, 0, ABST_LOAD}, + {"ecml", "EC_miss_local", REGNO_ANY, STXT ("E$ Local Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecmr", "EC_miss_remote", REGNO_ANY, STXT ("E$ Remote Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecim", "EC_ic_miss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE}, + {"icstall", "Dispatch0_IC_miss", REGNO_ANY, STXT ("I$ Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"dcstall", "Re_DC_miss", REGNO_ANY, STXT ("D$ and E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"ecstall", "Re_EC_miss", REGNO_ANY, STXT ("E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"sqstall", "Rstall_storeQ", REGNO_ANY, STXT ("StoreQ Stall Cycles"), PRELOADS_6, 1, ABST_STORE}, + {"rawstall", "Re_RAW_miss", REGNO_ANY, STXT ("RAW Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"dcmissov", "Re_DC_missovhd", REGNO_ANY, STXT ("DC Miss Ovhd"), PRELOADS_6, 1, ABST_LOAD}, + {"fpustall", "Re_FPU_bypass", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"fpusestall", "Rstall_FP_use", REGNO_ANY, STXT ("FPU Use Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"iustall", "Rstall_IU_use", REGNO_ANY, STXT ("IU Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"fpadd", "FA_pipe_completion", REGNO_ANY, STXT ("FP Adds"), PRELOADS_6, 0, ABST_NONE}, + {"fpmul", "FM_pipe_completion", REGNO_ANY, STXT ("FP Muls"), PRELOADS_6, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"Cycle_cnt", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {"EC_miss_mtag_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"DC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"DC_wr_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"DC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"DC_wr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"EC_ref", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"EC_snoop_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"EC_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"EC_wb_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_US_DTLBM}, + {"EC_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"EC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_port0_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"EC_miss_local", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"EC_miss_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"EC_snoop_cb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"WC_snoop_cb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"WC_scrubbed", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"WC_wb_wo_read", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"PC_MS_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_soft_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_hard_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_port1_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_snoop_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/}, + {"SW_count_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_COUNT}, + {"SW_count_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_COUNT}, + {"Dispatch0_IC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch0_mispred", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch0_br_target", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch0_2nd_br", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch_rs_mispred", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rstall_storeQ", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_STORE}, + {"Rstall_FP_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rstall_IU_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"EC_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"Re_RAW_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_DC_missovhd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_endian_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_FPU_bypass", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Re_DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_EC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_PC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"SI_snoop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"SI_ciq_flow", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"SI_owned", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_msl_busy_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_mdb_overflow_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_page_close_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_reads_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_stalls_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycle_cnt", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "Cycle_cnt", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "Instr_cnt", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usIVplist[] = { + {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"icm", "IC_fill", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dcrm", "DC_rd_miss", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_LOAD}, + {"dcwm", "DC_wr_miss", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_STORE}, + {"dcr", "DC_rd", REGNO_ANY, STXT ("D$ Read Refs"), PRELOADS_6, 0, ABST_LOAD}, + {"dcw", "DC_wr", REGNO_ANY, STXT ("D$ Write Refs"), PRELOADS_6, 0, ABST_STORE}, + {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_US_DTLBM}, + {"l2ref", "L2_ref", REGNO_ANY, STXT ("L2$ Refs"), PRELOADS_5, 0, ABST_LDST}, + {"l2m", "L2_miss", REGNO_ANY, STXT ("L2$ Misses"), PRELOADS_5, 0, ABST_LDST}, + {"l2rm", "L2_rd_miss", REGNO_ANY, STXT ("L2$ Read Misses"), PRELOADS_5, 0, ABST_LOAD}, + {"l2im", "L2_IC_miss", REGNO_ANY, STXT ("L2$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE}, + {"ecm", "L3_miss", REGNO_ANY, STXT ("E$ Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecrm", "L3_rd_miss", REGNO_ANY, STXT ("E$ Read Misses"), PRELOADS_5, 0, ABST_LOAD}, + {"ecml", "SSM_L3_miss_local", REGNO_ANY, STXT ("E$ Local Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecmr", "SSM_L3_miss_remote", REGNO_ANY, STXT ("E$ Remote Misses"), PRELOADS_5, 0, ABST_LDST}, + {"ecim", "L3_IC_miss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE}, + {"icstall", "Dispatch0_IC_miss", REGNO_ANY, STXT ("I$ Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"dcstall", "Re_DC_miss", REGNO_ANY, STXT ("D$ and E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"ecstall", "Re_L3_miss", REGNO_ANY, STXT ("E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"sqstall", "Rstall_storeQ", REGNO_ANY, STXT ("StoreQ Stall Cycles"), PRELOADS_6, 1, ABST_STORE}, + {"rawstall", "Re_RAW_miss", REGNO_ANY, STXT ("RAW Stall Cycles"), PRELOADS_6, 1, ABST_LOAD}, + {"dcmissov", "Re_DC_missovhd", REGNO_ANY, STXT ("DC Miss Ovhd"), PRELOADS_6, 1, ABST_LOAD}, + {"fpustall", "Re_FPU_bypass", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"fpusestall", "Rstall_FP_use", REGNO_ANY, STXT ("FPU Use Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"iustall", "Rstall_IU_use", REGNO_ANY, STXT ("IU Stall Cycles"), PRELOADS_6, 1, ABST_NONE}, + {"fpadd", "FA_pipe_completion", REGNO_ANY, STXT ("FP Adds"), PRELOADS_6, 0, ABST_NONE}, + {"fpmul", "FM_pipe_completion", REGNO_ANY, STXT ("FP Muls"), PRELOADS_6, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"Cycle_cnt", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {"DC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"DC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"DC_wr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"DC_wr_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_US_DTLBM}, + {"Dispatch0_2nd_br", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch0_IC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Dispatch0_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2L3_snoop_cb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"L2L3_snoop_inv_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"L2_hit_I_state_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/}, + {"L2_hit_other_half", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L2_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"L2_ref", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L2_snoop_cb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"L2_snoop_inv_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/}, + {"L2_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L2_wb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L2_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L2_write_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L3_hit_I_state_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L3_hit_other_half", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"L3_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"L3_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L3_wb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L3_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"L3_write_miss_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE}, + {"MC_reads_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_reads_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_stalls_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_stalls_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"MC_writes_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"MC_writes_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + /*? {"PC_MS_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, */ + {"PC_hard_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/}, + {"PC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"PC_soft_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, + {"Re_DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_DC_missovhd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_FPU_bypass", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Re_L2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Re_PFQ_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Re_RAW_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD}, + {"Rstall_FP_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rstall_IU_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rstall_storeQ", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_STORE}, + {"SI_RTO_src_data", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"SI_RTS_src_data", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"SI_ciq_flow_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC}, + {"SI_owned_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"SI_snoop_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC}, + {"ecml", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"ecmr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST}, + {"SSM_L3_miss_local", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/}, + {"SSM_L3_miss_mtag_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/}, + {"SSM_L3_miss_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/}, + {"SSM_L3_wb_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/}, + {"SSM_new_transaction_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_TBD /*?*/}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycle_cnt", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "Cycle_cnt", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "Instr_cnt", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry niagara1[] = + /* CPC_ULTRA_T1 , "UltraSPARC T1" */{ + {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, +#ifndef WORKAROUND_6231196_NIAGARA1_NO_CTR_0 /* since register 0 counter don't work XXX */ + {"icm", "IC_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE}, + {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"ecim", "L2_imiss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_4, 0, ABST_NONE}, + {"dcm", "DC_miss", REGNO_ANY, STXT ("D$ Misses"), PRELOADS_5, 0, ABST_EXACT}, + {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_EXACT}, + {"ecdm", "L2_dmiss_ld", REGNO_ANY, STXT ("E$ Data Misses"), PRELOADS_4, 0, ABST_EXACT}, + {"flops", "FP_instr_cnt", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_6, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"SB_full", NULL, REGNO_ANY, NULL, PRELOADS_6, 1, ABST_NONE}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT}, + {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT}, + {"L2_dmiss_ld", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT}, +#endif + + /* additional (hidden) aliases, for convenience */ + {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry niagara2[] = { + /* CPC_ULTRA_T2 , "UltraSPARC T2" */ + /* CPC_ULTRA_T2 , "UltraSPARC T2+" */ + {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT}, + {"dcm", "DC_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT}, + {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l2drm", "L2_dmiss_ld", REGNO_ANY, STXT ("L2 D-cache Read Misses (See Bug 15664448)"), PRELOADS_5, 0, ABST_EXACT}, + {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_5, 0, ABST_NONE}, + {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"l2im", "L2_imiss", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_4, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOADS_7, 0, ABST_EXACT}, + {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"L2_dmiss_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"DES_3DES_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"AES_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Kasumi_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"MD5_SHA-1_SHA-256_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"MA_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases, for convenience */ + {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry sparc_t4[] = { + // Identical to sparc_t5_m6 except for: l3m_spec + // when updating this table, also update sparc_t5_m6[] + // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments) + {"l2l3dh", "DC_miss_L2_L3_hit_nospec", REGNO_INVALID, STXT ("L2 or L3 D-cache Hits"), PRELOADS_6, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"l3m", "DC_miss_remote_L3_hit_nospec~emask=0x6", REGNO_INVALID, STXT ("L3 D-cache Misses"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"lmh", "DC_miss_local_hit_nospec", REGNO_INVALID, STXT ("Local Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"rmh", "DC_miss_remote_L3_hit_nospec", REGNO_INVALID, STXT ("Remote Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"pqs", "PQ_tag_wait", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE}, // old alias name + {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_q) + {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_buf) + {"sel_stalls", "Sel_0_ready", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE}, + {"icm", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_55, 0, ABST_NONE}, // 20@ l2/l3 hit (guess) + {"icm_stalls", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Miss Est Stalls"), PRELOADS_55, 25, ABST_NONE}, // 25@ l2-20/l3-50 + + // current aliases + SPARC_CYCLES + {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"c_stalls", "Commit_0", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"dcm", "DC_miss_nospec", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_EXACT}, + {"l3m_spec", "DC_miss_local_hit~emask=0x6", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")}, // T4 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile + // {"l3m_spec", "DC_miss_local_hit~emask=0x30", REGNO_ANY, STXT("L3 D-cache Speculative Misses"),PRELOADS_5,0, ABST_NONE, STXT("Loads that speculatively missed local L3")}, // T5/M6 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile + {"lmh_spec", "DC_miss_local_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + {"rmh_spec", "DC_miss_remote_L3_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + // + {"dtlbm", "DTLB_miss_asynch", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle + {"dtlb_hwtw_stalls", "DTLB_HWTW_all", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50 + {"dtlb_trap_stalls", "DTLB_fill_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap + {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")}, + {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad + {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, // 24@miss, %5 of branches is bad + {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, // 2 cycles minimum + {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, // 24@miss, %5 of branches is bad + {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, // 1 cycle/event + + /* explicit definitions of (hidden) entries for proper counters */ + /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */ + + {"Sel_pipe_drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")}, + {"Sel_0_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")}, + {"Sel_0_ready", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")}, + {"Sel_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")}, + {"Sel_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")}, + + {"Pick_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")}, + {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")}, + {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")}, + {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")}, + {"SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")}, + {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")}, + {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")}, + + {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")}, + {"Sw_count_intr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")}, + {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")}, + {"SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")}, + {"Block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")}, + + {"BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")}, + + {"ITLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"ITLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"ITLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"ITLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"ITLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"ITLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")}, + {"ITLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")}, + + {"Fetch_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Instr_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"PQ_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"DTLB_miss_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")}, + {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")}, + {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")}, + + {"DC_miss_L2_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT}, + {"DC_miss_local_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT}, + {"DC_miss_remote_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT}, + {"DC_miss_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")}, + + {"DTLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"DTLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"DTLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"DTLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"DTLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"DTLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")}, + {"DTLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")}, + {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")}, + {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")}, + + {"St_q_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"St_hit_L2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")}, + {"St_hit_L3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")}, + + {"DC_miss_L2_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D or L3")}, + {"DC_miss_local_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")}, + {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache or remote memory")}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed L1D")}, + + {"L2_pipe_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")}, + {"Br_trg_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")}, + {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")}, + + {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")}, + // + {"Commit_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")}, + {"Commit_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")}, + {"Commit_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")}, + {"Commit_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")}, + {"Commit_1_or_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry sparc_t5_m6[] = { + // Identical to sparc_t4 except for: l3m_spec + // when updating this table, also update sparc_t4[] + // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments) + {"l2l3dh", "DC_miss_L2_L3_hit_nospec", REGNO_INVALID, STXT ("L2 or L3 D-cache Hits"), PRELOADS_6, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"l3m", "DC_miss_remote_L3_hit_nospec~emask=0x6", REGNO_INVALID, STXT ("L3 D-cache Misses"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"lmh", "DC_miss_local_hit_nospec", REGNO_INVALID, STXT ("Local Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"rmh", "DC_miss_remote_L3_hit_nospec", REGNO_INVALID, STXT ("Remote Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue + {"pqs", "PQ_tag_wait", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE}, // old alias name + {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_q) + {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_buf) + {"sel_stalls", "Sel_0_ready", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE}, + {"icm", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_55, 0, ABST_NONE}, // 20@ l2/l3 hit (guess) + {"icm_stalls", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Miss Est Stalls"), PRELOADS_55, 25, ABST_NONE}, // 25@ l2-20/l3-50 + + // current aliases + SPARC_CYCLES + {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"c_stalls", "Commit_0", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + + {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"dcm", "DC_miss_nospec", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_EXACT}, + // {"l3m_spec", "DC_miss_local_hit~emask=0x6", REGNO_ANY, STXT("L3 D-cache Speculative Misses"),PRELOADS_5,0, ABST_NONE, STXT("Loads that speculatively missed local L3")}, // T4 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile + {"l3m_spec", "DC_miss_local_hit~emask=0x30", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")}, // T5/M6 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile + {"lmh_spec", "DC_miss_local_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + {"rmh_spec", "DC_miss_remote_L3_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + // + {"dtlbm", "DTLB_miss_asynch", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle + {"dtlb_hwtw_stalls", "DTLB_HWTW_all", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50 + {"dtlb_trap_stalls", "DTLB_fill_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap + {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")}, + {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad + {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, // 24@miss, %5 of branches is bad + {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, // 2 cycles minimum + {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, // 24@miss, %5 of branches is bad + {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, // 1 cycle/event + + /* explicit definitions of (hidden) entries for proper counters */ + /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */ + + {"Sel_pipe_drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")}, + {"Sel_0_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")}, + {"Sel_0_ready", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")}, + {"Sel_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")}, + {"Sel_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")}, + + {"Pick_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")}, + {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")}, + {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")}, + {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")}, + {"SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")}, + {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")}, + {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")}, + + {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")}, + {"Sw_count_intr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")}, + {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")}, + {"SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")}, + {"Block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")}, + + {"BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")}, + + {"ITLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"ITLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"ITLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"ITLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"ITLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"ITLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")}, + {"ITLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")}, + + {"Fetch_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Instr_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"PQ_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"DTLB_miss_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")}, + {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")}, + {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")}, + + {"DC_miss_L2_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT}, + {"DC_miss_local_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT}, + {"DC_miss_remote_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT}, + {"DC_miss_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")}, + + {"DTLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"DTLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"DTLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"DTLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"DTLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"DTLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")}, + {"DTLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")}, + {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")}, + {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")}, + + {"St_q_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"St_hit_L2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")}, + {"St_hit_L3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")}, + + {"DC_miss_L2_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D or L3")}, + {"DC_miss_local_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")}, + {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache or remote memory")}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed L1D")}, + + {"L2_pipe_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")}, + {"Br_trg_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")}, + {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")}, + + {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")}, + // + {"Commit_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")}, + {"Commit_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")}, + {"Commit_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")}, + {"Commit_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")}, + {"Commit_1_or_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry sparc_m7[] = { + // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments) + {"icm", "IC_miss_commit", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_6, 0, ABST_EXACT}, + {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE}, + {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE}, + {"pqs", "PQ_tag_wait_cyc", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE}, + {"sel_stalls", "Sel_0_ready_cyc", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE}, + + // current aliases + SPARC_CYCLES + {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"c_stalls", "Commit_0_cyc", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + + {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT}, + {"dcm", "DC_miss_commit", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT}, + + {"l3m_spec", "DC_miss_L3_miss", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE}, + {"lmh_spec", "DC_miss_local_mem_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + {"rmh_spec", "DC_miss_remote_mem_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + // + {"dtlbm", "DTLB_HWTW_search", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle + {"dtlb_hwtw_stalls", "DTLB_HWTW_ref", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50 + {"dtlb_trap_stalls", "DTLB_HWTW_miss_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap + {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")}, + {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad + {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, + {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, + {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, + {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, + {"spill_fill", "Flush_arch_exception", REGNO_ANY, STXT ("Reg Window Spill/Fill Est Stalls"), PRELOAD (100, 4), 80, ABST_NONE, STXT ("Estimated time stalled on flushing pipeline due to register window spill/fill")}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */ + {"Sel_pipe_drain_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")}, + {"Sel_0_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")}, + {"Sel_0_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")}, + {"Sel_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")}, + {"Sel_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")}, + + {"Pick_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_3_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Pick_any_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")}, + {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")}, + {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")}, + {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")}, + {"Instr_SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")}, + {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")}, + {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")}, + + {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")}, + {"Instr_SW_count", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")}, + {"Instr_atomic", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")}, + {"Instr_SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")}, + {"Instr_block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")}, + + {"Br_BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")}, + + {"ITLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"ITLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"ITLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"ITLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"ITLB_HWTW_hit_2G_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"ITLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")}, + {"ITLB_HWTW_search", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")}, + + {"Fetch_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"Instr_buffer_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"PQ_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"ROB_LB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"DTLB_miss_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")}, + {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")}, + {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_ref", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")}, + + {"DC_miss_L2_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT}, + {"DC_miss_nbr_scc_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT}, + {"DC_miss_nbr_scc_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT}, + {"DC_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")}, + + {"DTLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"DTLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"DTLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"DTLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"DTLB_HWTW_hit_2G_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")}, + {"DTLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")}, + {"DTLB_HWTW_search", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")}, + {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")}, + {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")}, + + {"St_q_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"St_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")}, + {"St_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")}, + + {"DC_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Loads that speculatively hit local L1D")}, + {"DC_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D")}, + {"DC_miss_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3")}, + {"DC_miss_nbr_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L2D via local L3")}, + {"DC_miss_nbr_scc_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L3 on same socket")}, + {"DC_miss_nbr_scc_miss", NULL, REGNO_ANY, NULL, PRELOAD (400, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed all caches on same socket")}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (10, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L1D")}, + {"DC_miss_L2_miss", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L2D")}, + {"DC_miss_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (200, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")}, + + {"DC_miss_remote_scc_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache on different socket")}, + {"DC_miss_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")}, + {"DC_miss_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote memory")}, + {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")}, + {"Br_tgt_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")}, + {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")}, + + {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")}, + + {"Flush_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to a load that misses L3 when more than 1 hardware thread is active on the core")}, + {"Flush_br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to a branch misprediction")}, + {"Flush_arch_exception", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to SPARC architecture exceptions and trap entry/return")}, + {"Flush_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to hardware thread state change to/from halted/paused state")}, + // + {"Commit_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")}, + {"Commit_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")}, + {"Commit_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")}, + {"Commit_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")}, + {"Commit_1_or_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")}, + + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry sparc_m8[] = { + // current aliases + SPARC_CYCLES + {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"c_stalls", "Commit_0_cyc", 3, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, // 22825776: limit to reg 3 + {"Sel_0_wait_cyc", "Sel_0_cyc~emask=0x3f", REGNO_ANY, STXT ("Select Stall Cycles"), PRELOADS_7, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved that prevent it being selected")}, + + {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT}, + {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT}, + {"dcm", "DC_miss_commit", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT}, + + {"lmh_spec", "DC_miss_local_mem_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + {"rmh_spec", "DC_miss_remote_mem_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE}, + + {"dtlbm", "DTLB_HWTW", REGNO_ANY, STXT ("DTLB Misses"), PRELOAD (40, 5), 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle + {"dtlb_hwtw_stalls", "DTLB_HWTW", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOAD (40, 5), 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50 + {"dtlb_trap_stalls", "DTLB_HWTW_miss_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOAD (800, 5), 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap + {"rawhaz", "RAW_hit", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOAD (40, 5), 0, ABST_NONE}, + {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOAD (40, 5), 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad + {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOAD (40, 5), 0, ABST_NONE}, + {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, + {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, + {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, + {"spill_fill", "Flush_spill_fill", REGNO_ANY, STXT ("Reg Window Spill/Fill Est Stalls"), PRELOAD (100, 5), 80, ABST_NONE, STXT ("Estimated time stalled on flushing pipeline due to register window spill/fill")}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */ + //0x01 + {"Fetch_stall_IFU_reset_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_IC_miss_MB_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_IC_miss_MB_avail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_IC_miss_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_ITLB_miss_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_SEL_buf_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x02 + {"Fetch_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_3_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_4_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_5_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_6_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_7_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_8_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x07 + {"ITLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"ITLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"ITLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"ITLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"ITLB_HWTW_hit_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 16G page")}, + {"ITLB_HWTW_hit_1T", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 1T page")}, + // { "ITLB_HWTW_miss_RA2PAC", 0x0740, 0xf07ff }, + // { "ITLB_HWTW_miss_not_RA2PAC", 0x0780, 0xf07ff }, + {"ITLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")}, + {"ITLB_HWTW", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")}, + //0x08 + {"Br_BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")}, + //0x09 + {"Sel_0_no_instr_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select because no instructions are available")}, + {"Sel_0_pipe_drain_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")}, + {"Sel_0_postsync_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for prior instructions to commit")}, + {"Sel_0_presync_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select with instruction that cannot decode until prior instructions have committed")}, + {"Sel_0_thread_hog_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select to prevent strand monopolizing resources")}, + {"Sel_0_tag_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select because no required tags are available")}, + {"Sel_0_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")}, + {"Sel_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread is not selected")}, + // No direct equivalent Sel_1/2_cyc. Nearest is Decode_uop, which increments by 0-4 each cycle according to how many uops were decoded. + //0x13 + {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")}, + {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")}, + {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")}, + {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")}, + {"DTLB_HWTW_ref", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")}, + //0x0E + {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")}, + {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")}, + {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")}, + {"Instr_block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_EXACT, STXT ("Block load/store instructions completed")}, + {"Instr_SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")}, + {"Instr_atomic", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")}, + {"Instr_SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")}, + {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 5), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")}, + {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 5), 0, ABST_NONE, STXT ("Total instructions completed")}, + //0x0F + {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")}, + //0x10 + {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")}, + //0x11 + {"Rename_tag_wait_PQ_1_EXU_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rename_tag_wait_PQ_0_LSU_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Rename_wait_crypto_diag_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Sel_0_wait_ROB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Sel_0_wait_WRF_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Sel_0_wait_LB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Sel_0_wait_SB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x12 + {"Fetch_stall_BDA_tag_unavail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_BTA_tag_unavail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_misc_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"Fetch_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"MMU_TTE_buffer_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"MMU_PRQ_pool_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x15 + {"L2I_request_block_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2I_thread_hog_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2I_MB_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2I_snoop_eviction", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2I_stall_no_request_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2I_stall_no_response_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x16 + {"Flush_thread_hog", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes to prevent thread from monopolizing resources")}, + {"Flush_br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to a branch misprediction")}, + {"Flush_arch_exception", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to SPARC architecture exceptions and trap entry/return")}, + {"Flush_evil_twin", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to detecting floating point evil twin condition")}, + {"Flush_LSU_trap", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes to refetch Next-PC")}, + {"Flush_mode_change", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to strand mode change")}, + {"Flush_misalign", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to detecting misaligned load/store requiring transition to misaligned mitigation mode")}, + {"Flush_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to hardware thread state change to/from halted/paused state")}, + {"Flush_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to any reason")}, + //0x17 + {"Flush_spill_n_normal", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill_n_normal exception")}, + {"Flush_spill_n_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill_n_other exception")}, + {"Flush_fill_n_normal", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to fill_n_normal exception")}, + {"Flush_fill_n_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to fill_n_other exception")}, + {"Flush_spill_fill", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill/fill exceptions")}, + {"Flush_lost_load", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to speculatively executed load violating memory order")}, + //0x21 + {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")}, + {"Br_tgt_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")}, + {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")}, + //0x23 + {"LSU_st_q_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LSU_st_q_tag_wait_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2D_stall_no_request_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2D_stall_no_response_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //0x27 + {"DC_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D")}, + {"DC_miss_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3")}, + {"DC_miss_L3_dirty_copyback", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3 but require copyback from L2D within same CPC")}, + {"DC_miss_nbr_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L3 on same socket")}, + {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (400, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache on different socket")}, + {"DC_miss_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")}, + {"DC_miss_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit remote memory")}, + {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (10, 5), 0, ABST_NONE, STXT ("Loads that speculatively missed local L1D")}, + //0x28 + {"DC_sec_miss_L2_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_L2_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_L3_dirty_copyback_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_nbr_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_remote_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_local_mem_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_remote_mem_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT}, + {"DC_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 5), 0, ABST_EXACT, STXT ("Loads that missed local L1D")}, + //0x29 + // {"Store_DC_sec_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT("")}, + {"Store_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")}, + {"Store_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")}, + {"Store_nbr_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in neighbor L2 on same socket")}, + {"Store_nbr_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in neighbor L3 on same socket")}, + {"Store_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (400, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in remote cache on different socket")}, + {"Store_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local memory")}, + {"Store_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in remote memory")}, + {"Store_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was observed to be somewhere in the memory hierarchy")}, + //0x2d + {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")}, + {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")}, + {"RAW_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")}, + //0x2f + {"Cycles_user_non_MLA", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"Cycles_user_MLA", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 5), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")}, + //0x37 + {"DTLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")}, + {"DTLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")}, + {"DTLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")}, + {"DTLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")}, + {"DTLB_HWTW_hit_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 16G page")}, + {"DTLB_HWTW_hit_1T", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 1T page")}, + {"DTLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")}, + {"DTLB_HWTW", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")}, + //0x3f + {"Commit_0_cyc", /*22825776*/ NULL, 3, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")}, + {"Commit_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")}, + // Similar situation to Sel_1_cyc etc. No direct equivalent, nearest is Commit_uop, which increments by 0-4 each cycle according to how many uops were committed. + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usfuji_V_list[] = { + {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"flops", "floating_instructions", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_6, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {"load_store_instructions", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cycle_counts", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "cycle_counts", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "instruction_counts", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "instruction_counts", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usfuji_VI_VII_list[] = { + {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"dcm", "op_r_iu_req_mi_go", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dcstall", "op_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"dtlbm", "write_op_uTLB", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, + // l2m: mem_cache_load test shows undercount of 3x, however, we don't care too much about this chip, keeping the alias for now + {"l2m", "sx_miss_count_dm", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, /*YXXX undercounts?*/ + {"l2wm", "dvp_count_dm", REGNO_ANY, STXT ("L2 Cache Writeback Misses"), PRELOADS_5, 0, ABST_NONE}, + {"l2ref", "sx_read_count_dm", REGNO_ANY, STXT ("L2 Cache Refs"), PRELOADS_6, 0, ABST_NONE}, + {"l2stall", "sx_miss_wait_dm", REGNO_ANY, STXT ("L2 Cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"icm", "if_r_iu_req_mi_go", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"icstall", "if_wait_all", REGNO_ANY, STXT ("L1 I-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"itlbm", "write_if_uTLB", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"flops", "floating_instructions", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_7, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"load_store_instructions", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"active_cycle_count", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"write_op_uTLB", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"sx_miss_wait_pf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"sx_miss_wait_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"sx_miss_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_r_iu_req_mi_go", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"sx_miss_count_dm_if", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"swpf_lbs_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"sx_read_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"trap_DMMU_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"op_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"sx_miss_count_dm_opex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"if_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"dvp_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + {"sx_miss_count_dm_opsh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + + +static Hwcentry usfuji_X_list[] = { + {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"dcm", "L1D_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, + {"dcstall", "L1D_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"w_op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"d_move_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L1D_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"cse_priority_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L1I_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"single_mode_cycle_counts", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"suspend_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"sleep_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry usfuji_XII_list[] = { + {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"dcm", "L1D_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, + {"dcstall", "L1D_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"L1D_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L1I_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"L2_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"LL_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"cse_priority_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"d_move_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"l2_sy_miss_wait_dm_part1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"l2_sy_miss_wait_dm_part2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"msgr_reqp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"msgr_rtnp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"msgs_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_l1d_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_l2_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_ll_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_ll_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_l1d_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_l2_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_ll_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_ll_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"w_op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {"single_mode_cycle_counts", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"suspend_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"sleep_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +/* Kernel profiling pseudo-chip, OBSOLETE (To support 12.3 and earlier, TBR) */ +static Hwcentry kproflist[] = { + {"kcycles", "kcycles", 0, STXT ("KCPU Cycles"), PRELOADS_5, 1, ABST_NONE}, + {"kucycles", "kucycles", 0, STXT ("KUCPU Cycles"), PRELOADS_5, 1, ABST_NONE}, + {"kthr", "kthr", 0, STXT ("KTHR Cycles"), PRELOADS_5, 1, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry pentiumIIlist[] = { + /* note -- missing entries for dtlbm, ecm */ + {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"icm", "ifu_ifetch_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dcrm", "dcu_m_lines_in", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dcwm", "dcu_m_lines_out", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_NONE}, + {"flops", "flops", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_7, 0, ABST_NONE}, + {"itlbm", "itlb_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"ecim", "l2_ifetch", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry pentiumIIIlist[] = { + /* note -- many missing entries; no reference machine to try */ + {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry pentium4[] = { + {"cycles", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "instr_retired~emask=0x3", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + {"l1m", "BSQ_cache_reference~emask=0x0507", REGNO_ANY, STXT ("L1 Cache Misses"), PRELOADS_7, 0, ABST_NONE}, + {"l2h", "BSQ_cache_reference~emask=0x0007", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"l2m", "BSQ_cache_reference~emask=0x0500", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"TC_deliver_mode", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"machine_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 5, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 6, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "instr_retired~emask=0x3", 15, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "instr_retired~emask=0x3", 16, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry intelCore2list[] = { + // For post-processing, both Linux and Solaris definitions need to be "live". + // However, for data collection, OS-specific definitions may need to be hidden. + // Use REGNO_INVALID for definitions that should be hidden for data collection. +#define LINUX_ONLY REGNO_ANY +#define SOLARIS_ONLY REGNO_INVALID /* hidden for Linux data collection */ + + {"cycles", "cpu_clk_unhalted.core", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */ + {"cycles", "cpu_clk_unhalted", LINUX_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + + {"insts", "instr_retired.any", SOLARIS_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */ + {"insts", "inst_retired", LINUX_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + // The following counters were identified in "Cycle Accounting Analysis on Intel Core2 Processors" by David Levinthal + {"uops_stalled", "rs_uops_dispatched~cmask=1~inv=1", REGNO_ANY, STXT ("uOps Stalled"), PRELOADS_7, 1, ABST_NONE}, + {"l2m", "mem_load_retired~umask=0x08", REGNO_ANY, STXT ("L2 Line Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dtlbm", "mem_load_retired~umask=0x10", REGNO_ANY, STXT ("L1 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, + {"l1m", "mem_load_retired~umask=0x02", REGNO_ANY, STXT ("L1 Line Misses"), PRELOADS_6, 0, ABST_NONE}, + // {"stalls_resources","resource_stalls~umask=0x1f", REGNO_ANY, STXT("Resource Stalls"), PRELOADS_6, 1, ABST_NONE}, + {"rs_full", "resource_stalls~umask=0x02", REGNO_ANY, STXT ("Reservation Station Full"), PRELOADS_6, 1, ABST_NONE}, + {"br_miss_flush", "resource_stalls~umask=0x10", REGNO_ANY, STXT ("Mispredicted Branch Flushes"), PRELOADS_6, 1, ABST_NONE}, + {"ld_st_full", "resource_stalls~umask=0x04", REGNO_ANY, STXT ("Load/Store Buffers Full"), PRELOADS_6, 1, ABST_NONE}, + {"rob_full", "resource_stalls~umask=0x01", REGNO_ANY, STXT ("Reorder Buffer Full"), PRELOADS_6, 1, ABST_NONE}, + {"slow_decode", "ild_stall", REGNO_ANY, STXT ("Slow Instruction Decode"), PRELOADS_6, 1, ABST_NONE}, + {"br_miss", "br_cnd_missp_exec", REGNO_ANY, STXT ("Mispredicted Branches"), PRELOADS_5, 0, ABST_NONE}, + {"ret_miss", "br_call_missp_exec", REGNO_ANY, STXT ("Mispredicted Return Calls"), PRELOADS_5, 0, ABST_NONE}, + {"div_busy", "idle_during_div", REGNO_ANY, STXT ("Divider Unit Busy"), PRELOADS_5, 1, ABST_NONE}, + {"fp_assists", "fp_assist", REGNO_ANY, STXT ("FP Microcode Assists"), PRELOADS_5, 0, ABST_NONE}, + {"bus_busy", "bus_drdy_clocks~umask=0x60", REGNO_ANY, STXT ("Busy Data Bus"), PRELOADS_5, 1, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*03*/"store_block", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*03*/"store_block.drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*03*/"store_block.order", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*03*/"store_block.snoop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*09*/"memory_disambiguation.reset", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*18*/"idle_during_div", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*19*/"delayed_bypass.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*23*/"l2_dbus_busy_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*42*/"l1d_cache_lock.duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*83*/"inst_queue", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*83*/"inst_queue.full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*86*/"cycles_l1i_mem_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1*/"rs_uops_dispatched_port.5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*6c*/"cycles_int", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*6c*/"cycles_int.masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*6c*/"cycles_int.pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.partial_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.fpsw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.other_serialization_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls.es", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls.ds", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls.fs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls.gs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.rob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.ld_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.br_miss_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*dc*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "inst_retired", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + + +static Hwcentry intelNehalemList[] = { + /* 6832635: on Linux, we're not seeing consistent overflows on FFCs */ + /* 15634344==6940930: HWC overflow profiling can cause system hang on Solaris/core-i7 systems */ + /* 17578620: counter overflow for fixed-function counters hangs systems */ + /* same issues for intelSandyBridgeList and intelHaswellList */ + PERF_EVENTS_SW_EVENT_ALIASES + USE_INTEL_REF_CYCLES (133) + {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + // cpu_clk_unhalted.ref: at the ref requency of the cpu. Should not be affected by Speedstep or Turbo. + // cpu_clk_unhalted.thread_p: with HT & 2 threads, 2x cycles. Affected by Speedstep and Turbo. + + // PEBs (Sampling) + {"l2m_latency", "mem_inst_retired.latency_above_threshold", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1}, + + // See file hwctable.README.corei7 + {"dch", "mem_load_retired.l1d_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"dcm", "0xCB~umask=0x1e", REGNO_ANY, STXT ("L1 D-Cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_retired*/ + {"lfbdh", "mem_load_retired.hit_lfb", REGNO_ANY, STXT ("LFB D-cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2m", "0xCB~umask=0x1c", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_retired*/ + {"l3h", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE}, + {"l3h_stall", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop x 35: Est. Stalls"), PRELOADS_6, 35, ABST_NONE}, + {"l3hsnoop", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop"), PRELOADS_6, 0, ABST_NONE}, + {"l3hsnoop_stall", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop x 74: Est. Stalls"), PRELOADS_6, 74, ABST_NONE}, + {"l3m", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + {"l3m_stall", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses x 180: Estimated Stalls"), PRELOADS_5, 180, ABST_NONE}, + {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE}, + {"addr_alias_stall", "partial_address_alias", REGNO_ANY, STXT ("Partial Address Aliases x 3: Est. Stalls"), PRELOADS_6, 3, ABST_NONE}, + {"uope_stall", "uops_executed.port234~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Execute Stalls per Core"), PRELOADS_7, 1, ABST_NONE}, + {"uopr_stall", "uops_retired.any~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Retired Stalls"), PRELOADS_7, 1, ABST_NONE}, + {"itlbm", "itlb_miss_retired", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l1i_stall", "l1i.cycles_stalled", REGNO_ANY, STXT ("L1 I-cache Stalls"), PRELOADS_6, 1, ABST_NONE}, + {"br_rets", "br_inst_retired.all_branches", REGNO_ANY, STXT ("Branch Instruction Retires"), PRELOADS_7, 0, ABST_NONE}, + {"br_misp", "br_misp_exec.any", REGNO_ANY, STXT ("Branch Mispredicts"), PRELOADS_6, 0, ABST_NONE}, + {"mach_clear", "machine_clears.cycles", REGNO_ANY, STXT ("Machine Clear Asserted"), PRELOADS_6, 1, ABST_NONE}, + {"fp_mmx", "fp_mmx_trans.any", REGNO_ANY, STXT ("FP-MMX Transistions"), PRELOADS_6, 0, ABST_NONE}, + {"div_busy", "arith.cycles_div_busy", REGNO_ANY, STXT ("Divider Busy Cycles"), PRELOADS_6, 1, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*04*/"sb_drain.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*08.04*/"dtlb_load_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //{/*0e*/"uops_issued.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*09*/"memory_disambiguation.reset", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*09*/"memory_disambiguation.watch_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0b*/"mem_inst_retired.latency_above_threshold", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow + {/*14*/"arith.cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*17*/"inst_queue_write_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*1d*/"hw_int.cycles_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*1d*/"hw_int.cycles_pending_and_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*48*/"l1d_pend_miss.load_buffers_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.04*/"dtlb_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*4e*/"sfence_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*4f.10*/"ept.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60*/"offcore_requests_outstanding.demand.read_data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60*/"offcore_requests_outstanding.demand.read_code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60*/"offcore_requests_outstanding.demand.rfo", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60*/"offcore_requests_outstanding.any.read", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63*/"cache_lock_cycles.l1d", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63*/"cache_lock_cycles.l1d_l2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*80*/"l1i.cycles_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85*/"itlb_misses.walk_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85*/"itlb_misses.pmh_busy_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall.mru", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall.regen", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87*/"ild_stall.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.store", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.rob_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.mxcsr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2*/"resource_stalls.other", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b0*/"offcore_requests_sq_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b3*/"snoopq_requests_outstanding.data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b3*/"snoopq_requests_outstanding.invalidate", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b3*/"snoopq_requests_outstanding.code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //{/*c2*/"uops_retired.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c3*/"machine_clears.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.registers", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.scoreboard", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*f6*/"sq_full_stall_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ +#if 0 + USE_INTEL_REF_CYCLES (133), +#endif + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + + +static Hwcentry intelSandyBridgeList[] = { + /* see comments for "cycles" and "insts" for intelNehalemList */ + PERF_EVENTS_SW_EVENT_ALIASES + USE_INTEL_REF_CYCLES (100) + {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + // PEBS (sampling) + {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + + // See file hwctable.README.sandybridge + {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_uops_retired*/ + {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_uops_retired*/ + // Intel errata: BT241 and BT243 says the mem_load_uops_retired.llc* counters may not be reliable on some CPU variants + {"l3h", "mem_load_uops_retired.llc_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE}, // may undercount + {"l3m", "longest_lat_cache.miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + + /* dtlbm has not been confirmed via Intel white paper */ + {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE}, + {"dtlbm", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dtlbm_stall", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*08.04*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*08.84*/"dtlb_load_misses.demand_ld_walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.40*/"int_misc.rat_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*59.20*/"partial_rat_stalls.flags_merge_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*59.20*/"partial_rat_stalls.flags_merge_uop_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*59.40*/"partial_rat_stalls.slow_lea_window", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + //{/*59.80*/"partial_rat_stalls.mul_single_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5b.0c*/"resource_stalls2.all_fl_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5b.0f*/"resource_stalls2.all_prf_control", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5b.40*/"resource_stalls2.bob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5b.4f*/"resource_stalls2.ooo_rsrc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.xx*/"cpl_cycles.ring0_transition", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.demand_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.08*/"offcore_requests_outstanding.all_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.3c*/"idq.mite_all_cycles", /* Linux, but not in docs? */ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.02*/"resource_stalls.lb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.0a*/"resource_stalls.lb_sb", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.0e*/"resource_stalls.mem_rs", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.20*/"resource_stalls.fcsw", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.40*/"resource_stalls.mxcsr", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.80*/"resource_stalls.other", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.F0*/"resource_stalls.ooo_rsrc", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*a3.01*/"cycle_activity.cycles_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*??.??*/"cycle_activity.stalls_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.02*/"cycle_activity.cycles_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*??.??*/"cycle_activity.stalls_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.04*/"cycle_activity.cycles_no_execute", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.04*/"cycle_activity.cycles_no_dispatch", /*sandybridge*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.08*/"cycle_activity.cycles_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*??.??*/"cycle_activity.stalls_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_dispatched.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.stall_cycles", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*bf.05*/"l1d_blocks.bank_conflict_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x10*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.active_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, +#if 0 // need to see documentation on the following before marking them as cycles + uops_executed.cycles_ge_1_uop_exec[ / {0 | 1 | 2 | 3}], 1000003 (events) + uops_executed.cycles_ge_2_uops_exec[ / + {0 | 1 | 2 | 3} + ], 1000003 (events) + uops_executed.cycles_ge_3_uops_exec[ / + {0 | 1 | 2 | 3} + ], 1000003 (events) + uops_executed.cycles_ge_4_uops_exec[ / + {0 | 1 | 2 | 3} + ], 1000003 (events) +#endif + {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow + + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ +#if 0 + USE_INTEL_REF_CYCLES (100), +#endif + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + + +static Hwcentry intelHaswellList[] = { + /* see comments for "cycles" and "insts" for intelNehalemList */ + PERF_EVENTS_SW_EVENT_ALIASES + USE_INTEL_REF_CYCLES (100) + {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + // PEBS (sampling) + {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + + {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired + {"dcm", "0xd1~umask=0x08", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired + {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired + {"l2m", "0xd1~umask=0x10", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired + {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE}, + {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired + {"l3m", "0xd1~umask=0x20", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired + + /* dtlbm has not been confirmed via Intel white paper */ + {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, // Intel SDM says these are stalls, not cycles + {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + // {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*a3.01*/"cycle_activity.cycles_l2_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + // {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.02*/"cycle_activity.cycles_ldm_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + // {/*a3.05*/"cycle_activity.stalls_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.08*/"cycle_activity.cycles_l1d_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + // {/*a3.??*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + // {/*a3.??*/"cycle_activity.stalls_ldm_pending",/*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.??*/"uops_executed.cycles_ge_1_uop_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.??*/"uops_executed.cycles_ge_2_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.??*/"uops_executed.cycles_ge_3_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.??*/"uops_executed.cycles_ge_4_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*c2.01*/"uops_retired.stall_cycles", /*cmask==1 + INV*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.core_stall_cycles", /*PEBS Any==1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow + + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ +#if 0 + USE_INTEL_REF_CYCLES (100), +#endif + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + + +static Hwcentry intelBroadwellList[] = { + /* see comments for "cycles" and "insts" for intelNehalemList */ + PERF_EVENTS_SW_EVENT_ALIASES + USE_INTEL_REF_CYCLES (100) + {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + // PEBS (sampling) + {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + + // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency) + {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, + {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE}, + {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE}, + + // counters that can be time converted (add FFCs if we decide to support them) + // counters that are load-store (did not include any... do we want to?) + {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c.02*/"cpu_clk_thread_unhalted.one_thread_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.10*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*4f.10*/"ept.walk_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.01*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.02*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85.10*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.02*/"cycle_activity.cycles_ldm_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.04*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.08*/"cycle_activity.cycles_l1d_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ +#if 0 + USE_INTEL_REF_CYCLES (100), +#endif + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry intelSkylakeList[] = { + /* see comments for "cycles" and "insts" for intelNehalemList */ + PERF_EVENTS_SW_EVENT_ALIASES + USE_INTEL_REF_CYCLES (25) + {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + // PEBS (sampling) + {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, + + // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency) + {"dch", "mem_load_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE}, + {"dcm", "mem_load_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, + {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE}, + {"l2m", "mem_load_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l2m_stall", "cycle_activity.stalls_l2_miss", REGNO_ANY, STXT ("L2 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation + {"l3h", "mem_load_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE}, + {"l3m", "mem_load_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + {"l3m_stall", "cycle_activity.stalls_l3_miss", REGNO_ANY, STXT ("L3 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation + {"dtlbm_stall", "dtlb_load_misses.walk_active", REGNO_ANY, STXT ("DTLB Miss Est Stall"), PRELOADS_7, 1, ABST_NONE, STXT ("Estimated time stalled on DTLB misses requiring a tablewalk. Does not include time related to STLB hits.")}, // needs validation + // PEBS mem_inst_retired.stlb_miss_loads for finding location of DTLB issues + // what about: dtlb_load_misses.walk_completed, dtlb_load_misses.walk_pending, dtlb_load_misses.stlb_hit + + {"fp_scalar", "fp_arith_inst_retired.scalar_double~umask=0x3", REGNO_ANY, STXT ("FP Scalar uOps"), PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point scalar micro-ops that retired")}, + {"fp_vector", "fp_arith_inst_retired.128b_packed_double~umask=0x3c", REGNO_ANY, STXT ("FP Vector uOps"), /*needs test*/ PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point vector micro-ops that retired")}, + + // counters that can be time converted (add FFCs if we decide to support them) + // counters that are load-store (did not include any... do we want to?) + {/*08.10*/"dtlb_load_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*08.10*/"dtlb_load_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.01*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.01*/"int_misc.recovery_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d.80*/"int_misc.clear_resteer_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*14.01*/"arith.divider_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.ring0_trans", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*3c.00*/"cpu_clk_unhalted.core", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.10*/"dtlb_store_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*49.10*/"dtlb_store_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*4f.10*/"ept.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.01*/"offcore_requests_outstanding.demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.02*/"offcore_requests_outstanding.cycles_with_demand_code_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.10*/"offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*60.10*/"offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*80.04*/"icache_16b.ifdata_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*83.04*/"icache_64b.iftag_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85.10*/"itlb_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*85.10*/"itlb_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.01*/"uops_dispatched_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.02*/"uops_dispatched_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.04*/"uops_dispatched_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.08*/"uops_dispatched_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.10*/"uops_dispatched_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.20*/"uops_dispatched_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.40*/"uops_dispatched_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a1.80*/"uops_dispatched_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.01*/"cycle_activity.cycles_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.02*/"cycle_activity.cycles_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.04*/"cycle_activity.stalls_total", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.05*/"cycle_activity.stalls_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.06*/"cycle_activity.stalls_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.08*/"cycle_activity.cycles_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.0c*/"cycle_activity.stalls_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.10*/"cycle_activity.cycles_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a3.14*/"cycle_activity.stalls_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.01*/"exe_activity.exe_bound_0_ports", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.02*/"exe_activity.1_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.04*/"exe_activity.2_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.08*/"exe_activity.3_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.10*/"exe_activity.4_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a6.40*/"exe_activity.bound_on_stores", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.02*/"uops_executed.core_cycles_ge_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.02*/"uops_executed.core_cycles_ge_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.02*/"uops_executed.core_cycles_ge_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.02*/"uops_executed.core_cycles_ge_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*b1.02*/"uops_executed.core_cycles_none", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c0.1*/"inst_retired.total_cycles_ps", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* "Architectural" events: */ + {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ +#if 0 + USE_INTEL_REF_CYCLES (25), +#endif + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry intelLinuxUnknown[] = { + PERF_EVENTS_SW_EVENT_ALIASES + // USE_INTEL_REF_CYCLES(100) // freq is unknown + {"cycles", "unhalted-core-cycles", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"cycles", "PERF_COUNT_HW_CPU_CYCLES", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "instruction-retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"insts", "PERF_COUNT_HW_INSTRUCTIONS", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + + {"dcm", "PERF_COUNT_HW_CACHE_MISSES.L1D", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, + {"llm", "llc-misses", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + {"llm", "PERF_COUNT_HW_CACHE_MISSES.LL", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE}, + + {"br_msp", "branch-misses-retired", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, + {"br_msp", "PERF_COUNT_HW_BRANCH_MISSES", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, + {"br_ins", "branch-instruction-retired", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, + {"br_ins", "PERF_COUNT_HW_BRANCH_INSTRUCTIONS", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, + + // counters that can be time converted (add FFCs if we decide to support them) + // counters that are load-store (did not include any... do we want to?) + /* "Architectural" events: */ + {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + PERF_EVENTS_SW_EVENT_DEFS + + /* additional (hidden) aliases for convenience */ + {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles # + {"cycles0", "PERF_COUNT_HW_BUS_CYCLES", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles # + {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles # + {"cycles1", "PERF_COUNT_HW_BUS_CYCLES", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles # + {"insts0", "instruction-retired", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts0", "PERF_COUNT_HW_INSTRUCTIONS", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "instruction-retired", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "PERF_COUNT_HW_INSTRUCTIONS", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry intelAtomList[] = { + {"cycles", "cpu_clk_unhalted.core", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"insts", "instr_retired.any", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE}, + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + /* XXXX add core2-related entries if appropriate */ + {/*30A*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {/*30A*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*22*/"l2_dbus_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c6*/"cycles_int_masked.cycles_int_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*c6*/"cycles_int_masked.cycles_int_pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* "Architectural" events: */ + {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases for convenience */ + {"cycles0", "cpu_clk_unhalted.core_p", 0, NULL, PRELOADS_75, 1, ABST_NONE}, + {"cycles1", "cpu_clk_unhalted.core_p", 1, NULL, PRELOADS_75, 1, ABST_NONE}, + {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_75, 0, ABST_NONE}, + {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_75, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry amd_opteron_10h_11h[] = { + {"cycles", "BU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "FR_retired_x86_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */ + {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l2itlbh", "IC_itlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 ITLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */ + {"l2itlbm", "IC_itlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */ + {"l2ir", "BU_internal_L2_req~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Refs"), PRELOADS_6, 0, ABST_NONE}, + {"l2im", "BU_fill_req_missed_L2~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_4, 0, ABST_NONE}, + {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */ + {"dcm", "DC_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */ + {"l2dtlbh", "DC_dtlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 DTLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */ + {"l2dtlbm", "DC_dtlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */ + {"l2dr", "BU_internal_L2_req~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Refs"), PRELOADS_65, 0, ABST_NONE}, /* hwc_cache_load: 1.6x overcount on shanghai01 */ + {"l2dm", "BU_fill_req_missed_L2~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */ + {"fpadd", "FP_dispatched_fpu_ops~umask=0x1", REGNO_ANY, STXT ("FP Adds"), PRELOADS_7, 0, ABST_NONE}, + {"fpmul", "FP_dispatched_fpu_ops~umask=0x2", REGNO_ANY, STXT ("FP Muls"), PRELOADS_7, 0, ABST_NONE}, + {"fpustall", "FR_dispatch_stall_fpu_full", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + {"memstall", "FR_dispatch_stall_ls_full", REGNO_ANY, STXT ("Memory Unit Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, + // For PAPI mappings, see hwctable.README.family10h + // For PAPI mappings, see hwctable.README.opteron + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {"BU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {"FP_cycles_no_fpu_ops_retired", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"FP_serialize_ops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"FR_dispatch_stall_branch_abort_to_retire", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_fpu_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_ls_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_reorder_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_resv_stations_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_segment_load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"FR_nothing_to_dispatch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"LS_buffer_2_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + {"NB_mem_ctrlr_dram_cmd_slots_missed", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {"NB_mem_ctrlr_turnaround", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "BU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "BU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "FR_retired_x86_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "FR_retired_x86_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry amd_15h[] = { + {"cycles", "CU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE}, + {"insts", "EX_retired_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */ + {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"l2im", "IC_refill_from_system", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_6, 0, ABST_NONE}, + {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */ + {"dcm", "DC_miss~umask=0x3", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */ + {"l2dm", "DC_refill_from_system", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */ + {"dtlbm", "DC_unified_tlb_miss~umask=0x7", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */ + // For PAPI mappings, see hwctable.README.family15h + + /* explicit definitions of (hidden) entries for proper counters */ + /* Only counters that can be time converted, or are load-store need to be in this table */ + {/*001.xx*/"FP_scheduler_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*006.xx*/"FP_bottom_execute_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*023.xx*/"LS_ldq_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*024.xx*/"LS_locked_operation", /*umask!=0*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*069.xx*/"CU_mab_wait_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*076.xx*/"CU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE}, + {/*087.xx*/"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0cd.xx*/"EX_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0ce.xx*/"EX_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d0.xx*/"DE_nothing_to_dispatch", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d1.xx*/"DE_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d3.xx*/"DE_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d5.xx*/"DE_dispatch_stall_instr_retire_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d6.xx*/"DE_dispatch_stall_int_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d7.xx*/"DE_dispatch_stall_fp_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d8.xx*/"DE_dispatch_stall_ldq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*0d9.xx*/"DE_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + {/*1d8.xx*/"EX_dispatch_stall_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, + + /* additional (hidden) aliases, for convenience */ + {"cycles0", "CU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE}, + {"cycles1", "CU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE}, + {"insts0", "EX_retired_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE}, + {"insts1", "EX_retired_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE}, + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +#define USE_ARM_REF_CYCLES \ + {"usr_time","cycles", REGNO_ANY, STXT("User CPU"), PRELOADS_85, 1, ABST_NONE}, \ + {"sys_time","cycles~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_85, 1, ABST_NONE}, \ + +static Hwcentry armlist[] = { + USE_ARM_REF_CYCLES +// Hardware event: + {"branch-instructions", NULL, REGNO_ANY, STXT("Branch-instructions"), PRELOADS_35, 0, ABST_NONE}, + {"branch-misses", NULL, REGNO_ANY, STXT("Branch-misses"), PRELOADS_35, 0, ABST_NONE}, + {"bus-cycles", NULL, REGNO_ANY, STXT("Bus Cycles"), PRELOADS_35, 1, ABST_NONE}, + {"cache-misses", NULL, REGNO_ANY, STXT("Cache-misses"), PRELOADS_35, 0, ABST_NONE}, + {"cache-references", NULL, REGNO_ANY, STXT("Cache-references"), PRELOADS_35, 0, ABST_NONE}, + {"cycles", NULL, REGNO_ANY, STXT("CPU Cycles"), PRELOADS_85, 1, ABST_NONE}, + {"insts", "instructions", REGNO_ANY, STXT("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, + {"ref-cycles", NULL, REGNO_ANY, STXT("Total Cycles"), PRELOADS_85, 1, ABST_NONE}, + {"stalled-cycles-backend", NULL, REGNO_ANY, STXT("Stalled Cycles during issue."), PRELOADS_85, 1, ABST_NONE}, + {"stalled-cycles-frontend", NULL, REGNO_ANY, STXT("Stalled Cycles during retirement."), PRELOADS_85, 1, ABST_NONE}, + +// Software event: + {"alignment-faults", NULL, REGNO_ANY, STXT("Alignment Faults"), PRELOADS_85, 0, ABST_NONE}, + {"context-switches", NULL, REGNO_ANY, STXT("Context Switches"), PRELOADS_85, 0, ABST_NONE}, + {"cpu-clock", NULL, REGNO_ANY, STXT("CPU Clock"), PRELOADS_85, 1, ABST_NONE}, + {"cpu-migrations", NULL, REGNO_ANY, STXT("CPU Migrations"), PRELOADS_85, 0, ABST_NONE}, + {"emulation-faults", NULL, REGNO_ANY, STXT("Emulation Faults"), PRELOADS_85, 0, ABST_NONE}, + {"major-faults", NULL, REGNO_ANY, STXT("Major Page Faults"), PRELOADS_85, 0, ABST_NONE}, + {"minor-faults", NULL, REGNO_ANY, STXT("Minor Page Faults"), PRELOADS_85, 0, ABST_NONE}, + {"page-faults", NULL, REGNO_ANY, STXT("Page Faults"), PRELOADS_85, 0, ABST_NONE}, + {"task-clock", NULL, REGNO_ANY, STXT("Clock Count Specific"), PRELOADS_85, 1, ABST_NONE}, + +// Hardware cache event + {"L1-dcache-load-misses", NULL, REGNO_ANY, STXT("L1 D-cache Load Misses"), PRELOADS_35, 0, ABST_NONE}, + {"L1-dcache-loads", NULL, REGNO_ANY, STXT("L1 D-cache Loads"), PRELOADS_35, 0, ABST_NONE}, + {"L1-dcache-store-misses", NULL, REGNO_ANY, STXT("L1 D-cache Store Misses"), PRELOADS_35, 0, ABST_NONE}, + {"L1-dcache-stores", NULL, REGNO_ANY, STXT("L1 D-cache Store Stores"), PRELOADS_35, 0, ABST_NONE}, + {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Load Misses"), PRELOADS_35, 0, ABST_NONE}, + {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Loads"), PRELOADS_35, 0, ABST_NONE}, + {"dTLB-load-misses", NULL, REGNO_ANY, STXT("D-TLB Load Misses"), PRELOADS_35, 0, ABST_NONE}, + {"dTLB-loads", NULL, REGNO_ANY, STXT("D-TLB Loads"), PRELOADS_35, 0, ABST_NONE}, + {"iTLB-load-misses", NULL, REGNO_ANY, STXT("The Instruction TLB Load Misses"), PRELOADS_35, 0, ABST_NONE}, + {"iTLB-loads", NULL, REGNO_ANY, STXT("The Instruction TLB Loads"), PRELOADS_35, 0, ABST_NONE}, + + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +static Hwcentry unknownlist[] = + /* used for unrecognized CPU type */{ + {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} +}; + +/* structure defining the counters for a CPU type */ +typedef struct +{ + int cputag; + Hwcentry *stdlist_table; +#define MAX_DEFAULT_HWC_DEFS 4 // allows multiple defs to handle OS variations; extend as needed + char *default_exp_p[MAX_DEFAULT_HWC_DEFS + 1]; // end of list MUST be marked with NULL +} cpu_list_t; + +/* IMPORTANT NOTE: + * + * Any default HWC string must consist of counter names separated by -TWO- commas, + * with a no trailing comma/value after the last counter name + * + * Only aliased counters should be specified; non-aliased counters will + * not get the right overflow values set. + * If the string is not formatted that way, -h hi and -h lo will fail + */ +static cpu_list_t cputabs[] = { + {CPC_ULTRA1, usIlist, {NULL}}, /* bind will fail */ + {CPC_ULTRA2, usIlist, {NULL}}, /* bind will fail */ + {CPC_ULTRA3, usIIIlist, {"insts,,ecstall", 0}}, + {CPC_ULTRA3_PLUS, usIIIlist, {"insts,,ecstall", 0}}, + {CPC_ULTRA3_I, usIIIlist, {"insts,,ecstall", 0}}, + {CPC_ULTRA4_PLUS, usIVplist, {"insts,,ecstall", 0}}, + {CPC_ULTRA_T1, niagara1, {"insts", 0}}, + {CPC_ULTRA_T2, niagara2, {"insts,,+l2drm", 0}}, + {CPC_ULTRA_T2P, niagara2, {"insts,,+l2drm", 0}}, + {CPC_ULTRA_T3, niagara2, {"insts,,+l2drm", 0}}, + {CPC_SPARC_T4, sparc_t4, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, + {CPC_SPARC_M4, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // renamed to m5 + {CPC_SPARC_T5, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, + {CPC_SPARC_M5, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, + {CPC_SPARC_T6, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // no such processor + {CPC_SPARC_M6, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, + {CPC_SPARC_M7, sparc_m7, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // includes T7 + {CPC_SPARC_M8, sparc_m8, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, + {CPC_PENTIUM_PRO_MMX, pentiumIIlist, {"insts", 0}}, + {CPC_PENTIUM_PRO, pentiumIIIlist, {"insts", 0}}, + {CPC_PENTIUM_4, pentium4, {"insts", 0}}, + {CPC_PENTIUM_4_HT, pentium4, {"insts", 0}}, + {CPC_INTEL_CORE2, intelCore2list, {"insts,,cycles", 0}}, + {CPC_INTEL_NEHALEM, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}}, + {CPC_INTEL_WESTMERE, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}}, + {CPC_INTEL_SANDYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l3m,,dtlbm", 0}}, + {CPC_INTEL_IVYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l3m,,dtlbm", 0}}, + {CPC_INTEL_HASWELL, intelHaswellList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l3m,,dtlbm", 0}}, + {CPC_INTEL_BROADWELL, intelBroadwellList, {"insts,,cycles,,+l2m_latency,,dtlbm", + "insts,,cycles,,l3m,,dtlbm", 0}}, + {CPC_INTEL_SKYLAKE, intelSkylakeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall", + "insts,,cycles,,l2m_stall,,dtlbm_stall", 0}}, + {CPC_INTEL_UNKNOWN, intelLinuxUnknown, {"cycles,,insts,,llm", + "user_time,,system_time,,cycles,,insts,,llm", 0}}, + {CPC_INTEL_ATOM, intelAtomList, {"insts", 0}}, + {CPC_AMD_K8C, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}}, + {CPC_AMD_FAM_10H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}}, + {CPC_AMD_FAM_11H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}}, + {CPC_AMD_FAM_15H, amd_15h, {"insts,,cycles", 0}}, + {CPC_SPARC64_V, usfuji_V_list, {"insts,,cycles", 0}}, + {CPC_SPARC64_VI, usfuji_VI_VII_list, {"insts,,cycles,,dcstall", 0}}, + {CPC_SPARC64_VII, usfuji_VI_VII_list, {"insts,,cycles,,dcstall", 0}}, + {CPC_SPARC64_X, usfuji_X_list, {"insts,,cycles,,dcstall", 0}}, + {CPC_SPARC64_XII, usfuji_XII_list, {"insts,,cycles,,dcstall", 0}}, + {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR) + {ARM_CPU_IMP_APM, armlist, {"insts,,cycles", 0}}, + {0, unknownlist, {NULL}} /* processor is unknown, but experiment is allowed */ +}; + +/*---------------------------------------------------------------------------*/ +/* state variables */ +static int initialized; +static int signals_disabled; + +// Simple array list +typedef struct +{ + void** array; // array of ptrs, last item set to null + int sz; // num live elements in array + int max; // array allocation size +} ptr_list; + +static void +ptr_list_init (ptr_list *lst) +{ + lst->sz = 0; + lst->max = 0; + lst->array = 0; +} + +static void +ptr_list_add (ptr_list *lst, char* ptr) +{ // ptr must be freeable + if (lst->sz >= lst->max - 1) + { + void * * new; + int newmax = lst->max ? lst->max * 2 : 16; + new = (void**) realloc (lst->array, newmax * sizeof (void*)); + if (!new) return; // failed, discard add + lst->max = newmax; + lst->array = new; + } + lst->array[lst->sz++] = ptr; + lst->array[lst->sz] = NULL; // mark new end-of-list +} + +static void +ptr_list_free (ptr_list *lst) +{ // includes shallow free of all elements + if (lst->array) + { + for (int ii = 0; lst->array[ii]; ii++) + free (lst->array[ii]); + free (lst->array); + } + lst->sz = 0; + lst->max = 0; + lst->array = 0; +} + +// Capabilities of this machine (initialized by setup_cpc()) +static int cpcx_cpuver = CPUVER_UNDEFINED; +static uint_t cpcx_npics; +static const char *cpcx_cciname; +static const char *cpcx_docref; +static uint64_t cpcx_support_bitmask; + +// cpcx_*[0]: collect lists +// cpcx_*[1]: er_kernel lists +// Each cpcx_*[] list is an array of ptrs with null ptr marking end of list +static char **cpcx_attrs[2]; + +static Hwcentry **cpcx_std[2]; +static Hwcentry **cpcx_raw[2]; +static Hwcentry **cpcx_hidden[2]; + +static uint_t cpcx_max_concurrent[2]; +static char *cpcx_default_hwcs[2]; +static char *cpcx_orig_default_hwcs[2]; +static int cpcx_has_precise[2]; + +#define VALID_FOR_KERNEL(forKernel) ((forKernel)>=0 && (forKernel)<=1) +#define IS_KERNEL(forKernel) ((forKernel)==1) + +// used to build lists: +static ptr_list unfiltered_attrs; +static ptr_list unfiltered_raw; + +/*---------------------------------------------------------------------------*/ +/* misc internal utilities */ + +/* compare 2 strings to either \0 or <termchar> */ +#define IS_EOL(currchar, termchar) ((currchar)==(termchar) || (currchar)==0) + +static int +is_same (const char * regname, const char * int_name, char termchar) +{ + do + { + char a = *regname; + char b = *int_name; + if (IS_EOL (a, termchar)) + { + if (IS_EOL (b, termchar)) + return 1; /* strings are the same up to terminating char */ + else + break; /* strings differ */ + } + if (a != b) + break; /* strings differ */ + regname++; + int_name++; + } + while (1); + return 0; +} + +static int +is_numeric (const char *name, uint64_t *pval) +{ + char *endptr; + uint64_t val = strtoull (name, &endptr, 0); + if (!*name || *endptr) + return 0; /* name does not specify a numeric value */ + if (pval) + *pval = val; + return 1; +} + +static int +is_visible_alias (Hwcentry* pctr) +{ + if (!pctr) + return 0; + if (pctr->name && pctr->int_name && pctr->metric) + return 1; + return 0; +} + +static int +is_hidden_alias (Hwcentry* pctr) +{ + if (!pctr) + return 0; + if (pctr->name && pctr->int_name && pctr->metric == NULL) + return 1; + return 0; +} + +static int +is_numeric_alias (Hwcentry* pctr) +{ + int is_numeric_alias = 0; + regno_t regno; + char *nameOnly = NULL; + hwcfuncs_parse_ctr (pctr->int_name, NULL, &nameOnly, NULL, NULL, ®no); + if (is_numeric (nameOnly, NULL)) + is_numeric_alias = 1; + free (nameOnly); + return is_numeric_alias; +} + +/* print list of register to a buffer */ +/* + * style e x a m p l e s + * 0 NONE 2 {0|1|2|3} + * 1 NONE 2 : 0, 1, 2, or 3 + * 2 0 1 2 3 6 + */ +static char * +get_regnolist (char *buf, size_t sz, const regno_t *reg_list, int style) +{ + if (!buf || !sz) + return "INTERNAL ERROR"; + buf[0] = 0; + if (style == 2) + { + int ii; + // width should be consistent with that in format_columns() + // the format will accommodate cpcx_npics regs + if (cpcx_npics < 1) + return "INTERNAL ERROR"; + // clear out the buffer + for (ii = 0; ii < sz; ii++) + buf[ii] = '_'; + if (cpcx_npics <= 9) + { + // one char per reg, plus terminating null char + if (cpcx_npics + 1 > sz) + return "INTERNAL ERROR"; + buf[cpcx_npics] = '\0'; + + // fill buf with regnos + for (ii = 0; ii < MAX_PICS; ii++) + { + regno_t regno = reg_list[ii]; + if (REG_LIST_EOL (regno)) + break; + if (regno < 0 || regno >= cpcx_npics) + return "INTERNAL ERROR"; + buf[regno] = '0' + regno; + } + } + else + { + /* space between regs, which may be 1 or 2 digits each + * 1 char for reg 0 + * 2 chars for regs 1-9 each + * 3 chars for regs 10- each + * 1 char for terminating null char + */ + int nchars = 17 + 3 * (cpcx_npics - 9); + if (nchars > sz) + return "INTERNAL ERROR"; + buf[nchars - 1] = '\0'; + + // fill buf with regnos + for (ii = 0; ii < MAX_PICS; ii++) + { + regno_t regno = reg_list[ii]; + if (REG_LIST_EOL (regno)) + break; + if (regno <= 9) + buf[2 * regno ] = '0' + regno; + else + { + buf[3 * (regno - 9) + 17] = '0' + (regno / 10); + buf[3 * (regno - 9) + 18] = '0' + (regno % 10); + } + } + } + return buf; + } + if (REG_LIST_IS_EMPTY (reg_list)) + { + snprintf (buf, sz, GTXT ("NONE")); + return buf; + } + else if (REG_LIST_EOL (reg_list[1])) + { + /* 1 item in list */ + snprintf (buf, sz, "%d", reg_list[0]); + return buf; + } + else + { + /* 2 more items in list */ + int ii, num_regs; + for (ii = 0; ii < MAX_PICS; ii++) + { + regno_t regno = reg_list[ii]; + if (REG_LIST_EOL (regno)) + break; + } + num_regs = ii; + buf[0] = 0; + for (ii = 0; ii < num_regs; ii++) + { + regno_t regno = reg_list[ii]; + if (style == 0) + snprintf (buf + strlen (buf), sz - strlen (buf), + "%c%d", ii ? '|' : '{', regno); + else + { + if (num_regs == 2) + snprintf (buf + strlen (buf), sz - strlen (buf), + "%d%s", regno, !ii ? " or " : ""); + else + { + /* 3 or more items in list */ + if (ii < num_regs - 2) + snprintf (buf + strlen (buf), sz - strlen (buf), + "%d, ", regno); + else if (ii == num_regs - 2) + snprintf (buf + strlen (buf), sz - strlen (buf), + "%d, or ", regno); + else + snprintf (buf + strlen (buf), sz - strlen (buf), + "%d", regno); + } + } + } + if (style == 0) + snprintf (buf + strlen (buf), sz - strlen (buf), "}"); + } + return buf; +} + +#if !HWC_DEBUG +#define hwcentry_print(lvl,x1,x2) +#else + +/* print a Hwcentry */ +static void +hwcentry_print (int lvl, const char * header, const Hwcentry *pentry) +{ + char buf[1024]; + Tprintf (lvl, "%s '%s', '%s', %d, '%s', %d, %d, %d, %d, %d, %d, /", + header, + pentry->name ? pentry->name : "NULL", + pentry->int_name ? pentry->int_name : "NULL", + pentry->reg_num, + pentry->metric ? pentry->metric : "NULL", + pentry->lval, /* low-resolution/long run */ + pentry->val, /* normal */ + pentry->hval, /* high-resolution/short run */ + pentry->timecvt, + pentry->memop, /* type of instruction that can trigger */ + pentry->sort_order); + get_regnolist (buf, sizeof (buf), pentry->reg_list, 0); + Tprintf (lvl, "%s\n", buf); +} +#endif + +/* add <regno> to a Hwcentry's list */ +static void +regno_add (Hwcentry * pctr, regno_t regno) +{ + int jj; + regno_t *reg_list; + if (!pctr) + { + Tprintf (0, "hwctable: regno_add(): ERROR: pctr==NULL\n"); + return; + } + reg_list = pctr->reg_list; + if (!reg_list) + { + /* create list */ + reg_list = (regno_t*) malloc (sizeof (regno_t*) * MAX_PICS); + if (!reg_list) + { + hwcentry_print (DBG_LT0, "hwctable: regno_add: ERROR:" + " Out of memory: ", pctr); + return; + } + /* initialize list */ + for (jj = 0; jj < MAX_PICS; jj++) + reg_list[jj] = REGNO_ANY; + pctr->reg_list = reg_list; + } + if (regno == REGNO_ANY) + { + /* add all counters up to cpcx_npics */ + for (jj = 0; jj < MAX_PICS && jj < cpcx_npics; jj++) + reg_list[jj] = jj; + } + else + { + /* add <regno> to list of registers */ + for (jj = 0; jj < MAX_PICS; jj++) + { + if (reg_list[jj] == regno) + { + hwcentry_print (DBG_LT0, "hwctable: regno_add: WARNING: " + "Duplicate regno: ", pctr); + break; + } + if (reg_list[jj] == REGNO_ANY) + { + reg_list[jj] = regno; + break; + } + } + } + if (jj == MAX_PICS) + hwcentry_print (DBG_LT0, "hwctable: regno_add: WARNING:" + " regno list is full:", pctr); +} + +/*---------------------------------------------------------------------------*/ +/* utilities for rawlist (list of raw counters with reglist[] filled in) */ + +/* search the 'raw' list of counters for <name> */ +static Hwcentry * +ptrarray_find_by_name (Hwcentry** array, const char * name) +{ + if (name == NULL) + return NULL; + Tprintf (DBG_LT3, "hwctable: array_find_by_name(%s):\n", name); + for (int ii = 0; array && array[ii]; ii++) + if (strcmp (array[ii]->name, name) == 0) + return array[ii]; + return NULL; /* not found */ +} + +/* add Hwcentry to the 'raw' list of counters */ +static Hwcentry * +alloc_shallow_copy (const Hwcentry *pctr) +{ + Hwcentry *node = (Hwcentry *) malloc (sizeof (Hwcentry)); + if (!node) + return NULL; // fail + *node = *pctr; /* shallow copy! */ + if (pctr->name) + node->name = strdup (pctr->name); + return node; +} + +/* add Hwcentry to the 'raw' list of counters */ +static Hwcentry * +list_append_shallow_copy (ptr_list *list, const Hwcentry *pctr) +{ + Hwcentry *node = alloc_shallow_copy (pctr); + if (!node) + return NULL; // fail + ptr_list_add (list, (void*) node); + return node; +} + +static Hwcentry * +list_add (ptr_list *list, uint_t regno, const char *name) +{ + Hwcentry *praw; + praw = ptrarray_find_by_name ((Hwcentry**) list->array, name); + if (!praw) + { + Hwcentry tmpctr = empty_ctr; + tmpctr.name = (char *) name; + praw = list_append_shallow_copy (list, &tmpctr); + } + if (praw) + regno_add (praw, regno); + return praw; +} + +/*---------------------------------------------------------------------------*/ +/* utilities for stdlist (table of aliased, hidden, & convenience, ctrs) */ + +/* find top level definition for <cpuid> */ +static cpu_list_t* +cputabs_find_entry (int cpuid) +{ + int i; + /* now search for the appropriate table */ + for (i = 0;; i++) + { + if (cputabs[i].cputag == 0) + break; + if (cpuid == cputabs[i].cputag) + return &cputabs[i]; + } + Tprintf (0, "hwctable: cputabs_find_entry: WARNING: " + "cpu_id = %d not defined. No 'standard' counters are available\n", + cpuid); + return &cputabs[i]; +} + +/* find Hwcentry table for <cpuid> */ +static Hwcentry* +stdlist_get_table (int cpuid) +{ + cpu_list_t* tmp = cputabs_find_entry (cpuid); + if (tmp) + return tmp->stdlist_table; + return NULL; +} + +/* search the 'standard' list of counters for <name>,<regno> */ +/* note: <regno>=REGNO_ANY is a wildcard that matches any value. */ + +/* note: int_name==NULL is a wildcard */ +static const Hwcentry * +ptrarray_find (const Hwcentry **array, const char *name, const char *int_name, + int check_regno, regno_t regno) +{ + const Hwcentry *pctr; + if (!array) + return NULL; + for (int ii = 0; array[ii]; ii++) + { + pctr = array[ii]; + if (strcmp (pctr->name, name)) + continue; + if (int_name && int_name[0] != 0 && pctr->int_name) + { + if (NULL == strstr (int_name, pctr->int_name)) + continue; + } + if (!check_regno) + return pctr; + else + { + /* duplicates aliases are allowed in table because of 6759307 */ + if (REG_LIST_IS_EMPTY (pctr->reg_list)) + { + /* skip aliases that don't have a valid list of registers */ + hwcentry_print (1, "hwctable: stdlist_find_by_name:" + " WARNING: alias found, but event not supported by HW:", + pctr); + continue; + } + if (!regno_is_valid (pctr, regno)) + { + hwcentry_print (1, "hwctable: stdlist_find_by_name():" + " WARNING: alias found, but regno doesn't match:", + pctr); + continue; + } + return pctr; + } + } + return NULL; +} + +/* search the 'standard' list of counters for <name>,<regno> */ + +/* note: <regno>=REGNO_ANY is a wildcard that matches any value. */ +static const Hwcentry * +static_table_find (const Hwcentry *table, const char *name, const char *int_name, + int check_regno, regno_t regno) +{ + int sz; + for (sz = 0; table && table[sz].name; sz++) + ; + if (!sz) + return NULL; + const Hwcentry ** list = calloc (sz + 1, sizeof (void*)); + if (!list) + return NULL; + for (int ii = 0; ii < sz; ii++) + list[ii] = &table[ii]; + list[sz] = NULL; + const Hwcentry *pctr = ptrarray_find (list, name, int_name, check_regno, regno); + free (list); + return pctr; +} + +#if !HWC_DEBUG +#define stdlist_print(dbg_lvl,table) +#else + +/* print all Hwcentries in standard table. Check for weird stuff */ +static void +stdlist_print (int dbg_lvl, const Hwcentry* table) +{ + const Hwcentry *pctr; + if (!table) + { + Tprintf (0, "hwctable: stdlist_print: ERROR: " + "table is invalid.\n"); + return; + } + for (pctr = table; pctr->name; pctr++) + { + int ii; + hwcentry_print (dbg_lvl, "hwctable: stdlist: ", pctr); + if (REG_LIST_IS_EMPTY (pctr->reg_list)) + { + if (pctr->int_name || !pctr->metric) + hwcentry_print (DBG_LT1, "hwctable: stdlist_print: WARNING: " + "no hardware event found for table entry", pctr); + continue; + } + /* check if incorrect reg_num used in table */ + if (!regno_is_valid (pctr, pctr->reg_num)) + { + hwcentry_print (DBG_LT0, "hwctable: stdlist_print: ERROR: " + "reg_num is not in table. ", pctr); + continue; + } + for (ii = 0; ii < MAX_PICS; ii++) + { + regno_t regno = pctr->reg_list[ii]; + if (REG_LIST_EOL (regno)) + break; + } + if (ii > 1 && pctr->reg_num != REGNO_ANY) + { + /* several regnos were valid, but only one can be specified */ + if (pctr->metric || !pctr->int_name) + { + /* pctr is standard or a raw definition */ + /* (pctr is not an alias like cycles0) */ + hwcentry_print (DBG_LT0, "hwctable: stdlist_print: ERROR: " + "regno in table should have been REGNO_ANY. ", + pctr); + } + } + } +} +#endif + +/*---------------------------------------------------------------------------*/ +/* utilities for init */ + +/* try to bind counters to hw. Return 0 on success, nonzero otherwise */ +static int +test_hwcs (const Hwcentry* entries[], unsigned numctrs) +{ + int rc = -1; + hwc_event_t sample; + int created = 0; + hwcdrv_api_t *hwcdrv = get_hwcdrv (); + Tprintf (DBG_LT2, "hwctable: test_hwcs()...\n"); + rc = hwcfuncs_bind_hwcentry (entries, numctrs); + if (rc) + { + Tprintf (0, "hwctable: WARNING: test " + "counters could not be created\n"); + goto end_test_hwcs; + } + created = 1; + if (!signals_disabled) + { + (void) signal (HWCFUNCS_SIGNAL, SIG_IGN); + signals_disabled = 1; + } + rc = hwcdrv->hwcdrv_start (); + if (rc) + { + Tprintf (0, "hwctable: WARNING: test " + "counters could not be started\n"); + goto end_test_hwcs; + } + rc = hwcdrv->hwcdrv_read_events (&sample, NULL); + if (rc) + Tprintf (0, "hwctable: WARNING: test sample failed\n"); + rc = 0; +#if HWC_DEBUG + { + unsigned ii; + Tprintf (DBG_LT1, "hwctable: test_hwcs("); + for (ii = 0; ii < numctrs; ii++) + Tprintf (DBG_LT1, "%s%s", ii ? "," : "", entries[ii]->name); + Tprintf (DBG_LT1, ") PASS\n"); + } +#endif + +end_test_hwcs: + if (created && hwcdrv->hwcdrv_free_counters ()) + Tprintf (0, "hwctable: WARNING: test counters could not be freed\n"); + return rc; +} + +#if !HWC_DEBUG +#define check_tables() +#else + +/* check for typos in tables */ +static void +check_tables () +{ + int i; + /* now search the known table of counters */ + for (i = 0;; i++) + { + Hwcentry * pentry; + int cputag = cputabs[i].cputag; + if (cputag == 0) + break; + if (cputag == CPC_KPROF) + continue; + pentry = cputabs[i].stdlist_table; + for (; pentry; pentry++) + { + if (!pentry->name) + break; + if (!pentry->int_name) + {/* internal, only to supply ABST and timecvt */ + if (pentry->metric) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " internal && metric @%d, %s\n", cputag, pentry->name); + if (pentry->reg_num != REGNO_ANY) + Tprintf (DBG_LT1, "hwctable: check_tables: WARNING:" + " internal && reg_num!=REGNO_ANY @%d, %s\n", + cputag, pentry->name); + if (pentry->val != PRELOAD_DEF + && pentry->memop != ABST_EXACT_PEBS_PLUS1) + Tprintf (DBG_LT2, "hwctable: check_tables: INFO:" + " internal && custom val=%d @%d, %s\n", + pentry->val, cputag, pentry->name); +#if 0 + if (!pentry->timecvt && pentry->memop == ABST_NONE) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " internal && not special! @%d, %s\n", + cputag, pentry->name); +#endif + } + if (pentry->metric) + { /* aliased */ + if (!pentry->int_name) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " aliased && !int_name @%d, %s\n", cputag, pentry->name); +#if 0 + else if (!strcmp (pentry->name, pentry->int_name)) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " name==int_name @%d, %s\n", + cputag, pentry->name); +#endif + if (pentry->reg_num != REGNO_ANY && pentry->reg_num != REGNO_INVALID) + Tprintf (DBG_LT1, "hwctable: check_tables: INFO:" + " aliased && custom reg_num==%d @%d, %s\n", + pentry->reg_num, cputag, pentry->name); + if (pentry->reg_num == REGNO_INVALID) + Tprintf (DBG_LT2, "hwctable: check_tables: INFO:" + " aliased && reg_num==REGNO_INVALID @%d, %s\n", + cputag, pentry->name); + } + if (pentry->int_name && !pentry->metric) + { /* convenience */ + if (!strcmp (pentry->name, pentry->int_name)) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " convenience && name==int_name @%d, %s\n", + cputag, pentry->name); + if (pentry->reg_num == REGNO_ANY) + Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:" + " convenience && reg_num==REGNO_ANY @%d, %s\n", + cputag, pentry->name); + } + } + } +} +#endif + +static int try_a_counter (); +static void hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out, + Hwcentry ***praw_out, Hwcentry ***phidden_out, + Hwcentry**static_tables, + Hwcentry **raw_unfiltered_in); + +/* internal call to initialize libs, ctr tables */ +static void +setup_cpc_general (int skip_hwc_test) +{ + const cpu_list_t* cputabs_entry; + int rc = -1; + Tprintf (DBG_LT2, "hwctable: setup_cpc()... \n"); + if (initialized) + { + Tprintf (0, "hwctable: WARNING: setup_cpc() has already been called\n"); + return; + } + initialized = 1; + cpcx_cpuver = CPUVER_UNDEFINED; + cpcx_cciname = NULL; + cpcx_npics = 0; + cpcx_docref = NULL; + cpcx_support_bitmask = 0; + for (int kk = 0; kk < 2; kk++) + { // collect-0 and kernel-1 + cpcx_attrs[kk] = NULL; + cpcx_std[kk] = NULL; + cpcx_raw[kk] = NULL; + cpcx_hidden[kk] = NULL; + cpcx_max_concurrent[kk] = 0; + cpcx_default_hwcs[kk] = NULL; + cpcx_orig_default_hwcs[kk] = NULL; + cpcx_has_precise[kk] = 0; + } + check_tables (); + hwcdrv_api_t *hwcdrv = get_hwcdrv (); + if (hwcdrv->hwcdrv_init_status) + { + Tprintf (0, "WARNING: setup_cpc_general() failed. init_status=%d \n", + hwcdrv->hwcdrv_init_status); + goto setup_cpc_wrapup; + } + hwcdrv->hwcdrv_get_info (&cpcx_cpuver, &cpcx_cciname, &cpcx_npics, + &cpcx_docref, &cpcx_support_bitmask); + +#ifdef DISALLOW_USI_USII_6357446 + if (cpcx_cpuver == CPC_ULTRA1 || cpcx_cpuver == CPC_ULTRA2) + { + Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d" + " US-I/US-II cannot provide profile interrupts\n", cpcx_cpuver); + /* profiling interrupts don't work on US-I, US-II */ + hwcfuncs_int_logerr (GTXT ("UltraSPARC I and II cannot provide overflow interrupts\n")); + goto setup_cpc_wrapup; + } +#endif + +#ifdef DISALLOW_PENTIUM_PRO_MMX_7007575 + if (cpcx_cpuver == CPC_PENTIUM_PRO_MMX) + { + Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d" + " `Pentium Pro with MMX, Pentium II' is not supported\n", cpcx_cpuver); + hwcfuncs_int_logerr (GTXT ("libcpc cannot identify processor type\n")); + goto setup_cpc_wrapup; + } +#endif + + /* now search the known table of counters */ + cputabs_entry = cputabs_find_entry (cpcx_cpuver); + if (cputabs_entry == NULL) + { + Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d" + " could not be found in the tables\n", cpcx_cpuver); + /* strange, should have at least selected "unknownlist" */ + hwcfuncs_int_logerr (GTXT ("Analyzer CPU table could not be found\n")); + goto setup_cpc_wrapup; + } + + Hwcentry * valid_cpu_tables[2]; // [0]:static table of counters, [1]:static table of generic counters + valid_cpu_tables[0] = cputabs_entry->stdlist_table; + if (valid_cpu_tables[0] == NULL) + { + Tprintf (0, "hwctable: WARNING: setup_cpc(): " + " valid_cpu_tables was NULL??\n"); + /* strange, someone put a NULL in the lookup table? */ + hwcfuncs_int_logerr (GTXT ("Analyzer CPU table is invalid\n")); + goto setup_cpc_wrapup; + } + valid_cpu_tables[1] = papi_generic_list; + Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n"); + // populate cpcx_raw and cpcx_attr + hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb); + for (int kk = 0; kk < 2; kk++) + { // collect and er_kernel + hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk], + valid_cpu_tables, (Hwcentry**) unfiltered_raw.array); + cpcx_has_precise[kk] = 0; + for (int rr = 0; cpcx_raw[kk] && cpcx_raw[kk][rr]; rr++) + { + int memop = cpcx_raw[kk][rr]->memop; + if (ABST_MEMSPACE_ENABLED (memop)) + { + cpcx_has_precise[kk] = 1; + break; + } + } + cpcx_attrs[kk] = (char**) unfiltered_attrs.array; + cpcx_max_concurrent[kk] = cpcx_npics; + } +#if 1 // 22897042 - DTrace cpc provider does not support profiling on multiple ctrs on some systems + if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID) != HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID) + { + // kernel profiling only supports one counter if overflowing counter can't be identified + cpcx_max_concurrent[1] = cpcx_npics ? 1 : 0; + } +#endif + + /* --- quick test of the cpc interface --- */ + if (skip_hwc_test) + rc = 0; + else + rc = try_a_counter (0); + + /* initialize the default counter string definition */ + for (int kk = 0; kk < 2; kk++) + { + char * default_exp = 0; + int jj; + for (jj = 0; (default_exp = cputabs_entry->default_exp_p[jj]); jj++) + { + int rc = hwc_lookup (kk, 0, default_exp, NULL, 0, NULL, NULL); + if (rc > 0) + break; + } + if (!default_exp) + { + char * fallback[3] = {NTXT ("insts,,cycles,,l3m"), NTXT ("insts,,cycles"), NTXT ("insts")}; + for (int ff = 0; ff < 3; ff++) + { + int rc = hwc_lookup (kk, 0, fallback[ff], NULL, 0, NULL, NULL); + if (rc > 0) + { + default_exp = strdup (fallback[ff]); + break; + } + } + } + cpcx_default_hwcs[kk] = default_exp; + cpcx_orig_default_hwcs[kk] = default_exp; + } + +setup_cpc_wrapup: + if (rc) + { + cpcx_npics = 0; + /* + ptr_list_free(&tmp_raw); // free stuff... YXXX + ptr_list_free(&unfiltered_attrs); + */ + } + return; +} + +static void +setup_cpcx () +{ + if (initialized) + return; + setup_cpc_general (0); // set up and include a hwc test run +} + +static void +setup_cpc_skip_hwctest () +{ + if (initialized) + return; + setup_cpc_general (1); // set up but skip hwc test run +} + +static int +try_a_counter (int forKernel) +{ + if (!VALID_FOR_KERNEL (forKernel)) + return -1; + int rc = -1; + const Hwcentry * testevent; + if (cpcx_std[forKernel] == NULL) + { + Tprintf (0, "hwctable: WARNING: cpcx_std not initialized"); + return 0; /* consider this an automatic PASS */ + } + /* look for a valid table entry, only try valid_cpu_tables[0] */ + { + testevent = cpcx_std[forKernel][0]; + if (!testevent || !testevent->name) + { + Tprintf (0, "hwctable: WARNING: no test metric" + " available to verify counters\n"); + return 0; /* consider this an automatic PASS */ + } + if (REG_LIST_IS_EMPTY (testevent->reg_list)) + return 0; // weird + } + Hwcentry tmp_testevent; + tmp_testevent = *testevent; /* shallow copy */ + if (tmp_testevent.int_name == NULL) + { + /* counter is defined in 'hidden' section of table, supply int_name */ + tmp_testevent.int_name = strdup (tmp_testevent.name); + } + Hwcentry * test_array[1] = {&tmp_testevent}; + rc = hwcfuncs_assign_regnos (test_array, 1); /* may modify test_array */ + if (rc) + return rc; + rc = test_hwcs ((const Hwcentry**) test_array, 1); + if (rc == HWCFUNCS_ERROR_UNAVAIL) + { + // consider this a pass (allow HWC table to be printed) + Tprintf (0, "hwctable: WARNING: " + "cpc_bind_event() shows counters busy; allow to continue\n"); + return 0; + } + else if (rc) + { + // failed to start for some other reason + Tprintf (0, "hwctable: WARNING: " + "test of counter '%s' failed\n", + testevent->name); + return rc; + } + return 0; +} + +void +hwc_update_val (Hwcentry *hwc) +{ + if (hwc->ref_val == 0) + hwc->ref_val = hwc->val; // save original reference + int64_t newVal; + hrtime_t min_time_nsec = hwc->min_time; + if (min_time_nsec == HWCTIME_TBD) + min_time_nsec = hwc->min_time_default; + switch (min_time_nsec) + { + case 0: // disable time-based intervals + // do not modify val + return; + case HWCTIME_ON: + case HWCTIME_TBD: + newVal = HWC_VAL_ON (hwc->ref_val); + break; + case HWCTIME_LO: + newVal = HWC_VAL_LO (hwc->ref_val); + break; + case HWCTIME_HI: + newVal = HWC_VAL_HI (hwc->ref_val); + break; + default: + newVal = HWC_VAL_CUSTOM (hwc->ref_val, min_time_nsec); + break; + } +#define MAX_INT_VAL (2*1000*1000*1000 + 1000100)// yuck, limited to signed int + if (newVal >= MAX_INT_VAL) + newVal = MAX_INT_VAL; + hwc->val = newVal; +} + +/* convert value string to value and store result in hwc->val */ +/* This function moved here from collctrl.cc */ +/* + * Keep the HWCTIME_* definitions in sync with those in + * collctrl.cc Coll_Ctrl::add_hwcstring(). + */ +static int +set_hwcval (Hwcentry *hwc, hrtime_t global_min_time_nsec, const char *valptr) +{ + hwc->min_time_default = global_min_time_nsec; + if (hwc->val == 1) + { + // An interval of 1 is used for certain types of count data. + // (er_bit, er_generic, er_rock ...) + // Hi and Lo do not apply. + /* use the default */ + } + else if (valptr == NULL || valptr[0] == 0 || strcmp (valptr, "auto") == 0) + hwc->min_time = HWCTIME_TBD; + else if (strcmp (valptr, "on") == 0) + hwc->min_time = HWCTIME_ON; + else if (strcmp (valptr, "lo") == 0 || strcmp (valptr, "low") == 0) + hwc->min_time = HWCTIME_LO; + else if (strcmp (valptr, "hi") == 0 || strcmp (valptr, "high") == 0 + || strcmp (valptr, "h") == 0) + hwc->min_time = HWCTIME_HI; + else + { + /* the remaining string should be a number > 0 */ + char *endchar = NULL; + long long tmp = strtoll (valptr, &endchar, 0); + int value = (int) tmp; + if (*endchar != 0 || tmp <= 0 || value != tmp) + { + // also covers errno == ERANGE + Tprintf (0, "hwctable: set_hwcval(): ERROR: " + "Invalid counter value %s for counter `%s'\n", + valptr, hwc->name); + return -1; + } + if (tmp > UINT32_MAX / 2) + { + /* Roch B. says that we MUST do this check for er_kernel + because some platforms deliver overflow interrupts without + identifying which counter overflowed. The only way to + determine which counter overflowed is to have enough + margin on 32 bit counters to make sure they don't + wrap. + */ + Tprintf (0, "hwctable: set_hwcval(): ERROR: " + "Counter value %s exceeds %lu\n", + valptr, (unsigned long) UINT32_MAX / 2); + return -1; + } + /* set the value */ + if (value != 0) + { + if (hwc->ref_val == 0) + hwc->ref_val = hwc->val; // save original reference + hwc->val = value; + hwc->min_time = 0; // turn off auto-adjust + } + } + hwc_update_val (hwc); + return 0; +} + +static char * +canonical_name (const char *counter) +{ + char *nameOnly = NULL; + char *attrs = NULL; + char tmpbuf[1024]; + tmpbuf[0] = 0; + hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, NULL); + snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf), + "%s", nameOnly); + if (attrs) + { + hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS]; + void * attr_mem; + unsigned nattrs; + int ii, jj; + + /* extract attributes from counter */ + attr_mem = hwcfuncs_parse_attrs (counter, cpc2_attrs, HWCFUNCS_MAX_ATTRS, + &nattrs, NULL); + if (!attr_mem) + { + snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf), + "~UNKNOWN"); + goto canonical_attrs_wrapup; + } + + /* sort the attributes */ + for (ii = 0; ii < (int) nattrs - 1; ii++) + { + for (jj = ii + 1; jj < nattrs; jj++) + { + int cmp = strcmp (cpc2_attrs[ii].ca_name, + cpc2_attrs[jj].ca_name); + if (cmp > 0) + { + hwcfuncs_attr_t tmp = cpc2_attrs[jj]; + cpc2_attrs[jj] = cpc2_attrs[ii]; + cpc2_attrs[ii] = tmp; + } + } + } + + /* print attributes in canonical format */ + for (ii = 0; ii < nattrs; ii++) + snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf), + "~%s=0x%llx", cpc2_attrs[ii].ca_name, (long long) cpc2_attrs[ii].ca_val); + free (attr_mem); + } +canonical_attrs_wrapup: + free (nameOnly); + free (attrs); + return strdup (tmpbuf); +} + +/* process counter and value strings - put results in <*pret_ctr> */ + +/* Print errors to UEbuf for any failure that results in nonzero return */ +static int +process_ctr_def (int forKernel, hrtime_t global_min_time_nsec, + const char *counter, const char *value, Hwcentry *pret_ctr, + char* UWbuf, size_t UWsz, char* UEbuf, size_t UEsz) +{ + int rc = -1; + char *nameOnly = NULL; + char *attrs = NULL; + char *regstr = NULL; + int plus; + regno_t regno; + const Hwcentry *pfound = NULL; + const char *uname = NULL; + int disable_backtrack; + UEbuf[0] = 0; + UWbuf[0] = 0; + Tprintf (DBG_LT3, "hwctable: process_ctr_def(): counter=%s value=%s \n", + counter, value ? value : "NULL"); + hwcfuncs_parse_ctr (counter, &plus, &nameOnly, &attrs, ®str, ®no); + + /* search for the counter in the std and raw lists */ + { + pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, regno); + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist:", + pfound); + } + if (!pfound) + { + pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, regno); + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden):", pfound); + } + if (!pfound) + { + pfound = ptrarray_find_by_name (cpcx_raw[forKernel], nameOnly); /* (regno match checked later) */ + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in rawlist:", pfound); + } + if (!pfound) + { + pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, REGNO_ANY); + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist but regno didn't match:", pfound); + } + if (!pfound) + { + pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, REGNO_ANY); + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden) but regno didn't match:", pfound); + } + if (!pfound) + { + uint64_t val = 0; + if (is_numeric (nameOnly, &val)) + { + Hwcentry *tmp = alloc_shallow_copy (&empty_ctr); // Leaks? + if (tmp) + { + tmp->name = strdup (nameOnly); + regno_add (tmp, REGNO_ANY); + pfound = tmp; + } + } + if (pfound) + hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: counter specified by numeric value:", pfound); + } + if (!pfound) + { + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Invalid HW counter name: %s\n"), nameOnly); + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"), + (IS_KERNEL (forKernel) ? "er_kernel" : "collect")); + goto process_ctr_def_wrapup; + } + + /* counter found */ + *pret_ctr = *pfound; /* shallow copy */ + pret_ctr->int_name = NULL; /* so free doesn't try to free these pfound's ptrs */ + pret_ctr->name = NULL; /* so free doesn't try to free these pfound's ptrs */ + + /* update uname,memop */ + uname = counter; + disable_backtrack = 0; + if (plus != 0 || ABST_PLUS_BY_DEFAULT (pret_ctr->memop)) + { + // attempt to process memoryspace profiling + int message_printed = 0; + if (cpcx_cpuver == CPUVER_GENERIC) + { + // accept plus, since we don't know what this CPU is + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("`+' may not be correctly supported on `%s' because processor is not recognized."), + cpcx_cciname); + pret_ctr->memop = ABST_LDST; // supply a backtracking data type - required for collector + } + else if (cpcx_cpuver == CPC_ULTRA1 || cpcx_cpuver == CPC_ULTRA2 + || cpcx_cpuver == CPC_ULTRA3 || cpcx_cpuver == CPC_ULTRA3_PLUS + || cpcx_cpuver == CPC_ULTRA3_I || cpcx_cpuver == CPC_ULTRA4_PLUS + || cpcx_cpuver == CPC_ULTRA4 || cpcx_cpuver == CPC_ULTRA_T1 + || cpcx_cpuver == CPC_ULTRA_T2 || cpcx_cpuver == CPC_ULTRA_T2P + || cpcx_cpuver == CPC_ULTRA_T3) + { + if (!ABST_BACKTRACK_ENABLED (pret_ctr->memop)) + disable_backtrack = 1; + } + else if (cpcx_cpuver == CPC_SPARC_T4 || cpcx_cpuver == CPC_SPARC_T5 + || cpcx_cpuver == CPC_SPARC_T6 || cpcx_cpuver == CPC_SPARC_M4 + || cpcx_cpuver == CPC_SPARC_M5 || cpcx_cpuver == CPC_SPARC_M6 + || cpcx_cpuver == CPC_SPARC_M7 || cpcx_cpuver == CPC_SPARC_M8) + { + if (pret_ctr->memop != ABST_EXACT) + disable_backtrack = 1; + } + else if (cpcx_cpuver == CPC_INTEL_NEHALEM || cpcx_cpuver == CPC_INTEL_WESTMERE + || cpcx_cpuver == CPC_INTEL_SANDYBRIDGE + || cpcx_cpuver == CPC_INTEL_IVYBRIDGE + || cpcx_cpuver == CPC_INTEL_HASWELL + || cpcx_cpuver == CPC_INTEL_BROADWELL + || cpcx_cpuver == CPC_INTEL_SKYLAKE) + { + if (pret_ctr->memop != ABST_EXACT_PEBS_PLUS1) + disable_backtrack = 1; + else if (plus < 0) + { + // disabling memoryspace not supported for + // remove specified - + uname++; + plus = 0; + snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf), + GTXT ("Warning: `-' is not supported on `%s' -- memory reference backtracking will remain enabled for this counter\n"), + nameOnly); + } + } + else + { + message_printed = 1; + snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf), + GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for `%s'\n"), + cpcx_cciname, nameOnly); + disable_backtrack = 1; + } + if (disable_backtrack) + { + if (plus != 0) + uname++; // remove specified + or - + if (!message_printed && plus > 0) + snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf), + GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for this counter\n"), + nameOnly); + } + } + else + disable_backtrack = 1; + if (disable_backtrack || plus < 0) + if (pret_ctr->memop != ABST_NOPC) + pret_ctr->memop = ABST_NONE; + if (pret_ctr->memop == ABST_NOPC) + snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf), + GTXT ("Warning: HW counter `%s' is not program-related -- callstacks will be not be recorded for this counter\n"), + uname); + + /* update reg_num */ + if (!regno_is_valid (pfound, regno)) + { + char buf[1024]; + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("For counter `%s', %s is not a valid register; valid registers: %s\n"), + nameOnly, regstr ? regstr + 1 : "?", + get_regnolist (buf, sizeof (buf), pfound->reg_list, 1)); + goto process_ctr_def_wrapup; + } + if (pret_ctr->reg_num == REGNO_ANY) + { /* table's regno is a wildcard */ + if (REG_LIST_EOL (pfound->reg_list[1])) + { + /* valid list only contains one regno, so use it */ + pret_ctr->reg_num = pfound->reg_list[0]; + } + else + pret_ctr->reg_num = regno; /* use user's selection */ + } + + /* update name and int_name */ + { + // validate attributes + if (attrs) + { + hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS]; + void * attr_mem; + unsigned nattrs; + char *errbuf; + /* extract attributes from uname */ + attr_mem = hwcfuncs_parse_attrs (uname, cpc2_attrs, HWCFUNCS_MAX_ATTRS, + &nattrs, &errbuf); + if (!attr_mem) + { + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + "%s\n", errbuf); + free (errbuf); + goto process_ctr_def_wrapup; + } + /* make sure all attributes are valid */ + for (unsigned ii = 0; ii < nattrs; ii++) + { + if (!attr_is_valid (forKernel, cpc2_attrs[ii].ca_name)) + { + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Invalid attribute specified for counter `%s': %s\n"), + nameOnly, cpc2_attrs[ii].ca_name); + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"), + (IS_KERNEL (forKernel) ? "er_kernel" : "collect")); + free (attr_mem); + goto process_ctr_def_wrapup; + } + for (unsigned jj = ii + 1; jj < nattrs; jj++) + { + if (strcmp (cpc2_attrs[ii].ca_name, + cpc2_attrs[jj].ca_name) == 0) + { + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Duplicate attribute specified for counter `%s': %s\n"), + nameOnly, cpc2_attrs[ii].ca_name); + free (attr_mem); + goto process_ctr_def_wrapup; + } + } + } + free (attr_mem); + } + pret_ctr->name = strdup (uname); + + // assign int_name + if (pfound->int_name) + { + // Counter is one of the following: + // - aliased (e.g. cycles~system=1), + // - convenience (e.g. cycles0~system=1), + if (!attrs) // convert alias to internal name + pret_ctr->int_name = strdup (pfound->int_name); + else + { + // convert alias to internal name and + // append user-supplied attributes + size_t sz = strlen (pfound->int_name) + strlen (attrs) + 1; + char *tbuf = calloc (sz, 1); + if (tbuf) + snprintf (tbuf, sz, "%s%s", pfound->int_name, attrs); + pret_ctr->int_name = tbuf; + } + } + else + pret_ctr->int_name = strdup (uname); // user-supplied name + } + + /* update val */ + if (set_hwcval (pret_ctr, global_min_time_nsec, value)) + { + snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf), + GTXT ("Invalid interval for HW counter `%s': %s\n"), + nameOnly, value); + goto process_ctr_def_wrapup; + } + hwcentry_print (DBG_LT2, "hwctable: process_ctr_def:", pret_ctr); + rc = 0; + +process_ctr_def_wrapup: + free (regstr); + free (attrs); + free (nameOnly); + return rc; +} + +/*---------------------------------------------------------------------------*/ + +/* external interfaces, see hwcentry.h for descriptions. */ + +extern int +hwc_lookup (int forKernel, hrtime_t global_min_time_nsec, const char *instring, + Hwcentry *caller_entries[], unsigned maxctrs, char **emsg, char **wmsg) +{ + unsigned ii; + char *instr_copy = NULL, *ss = NULL; + unsigned numctrs = 0; + int rc = 0; + char *tokenptr[MAX_PICS * 2]; + unsigned numtokens = 0; + char UEbuf[1024 * 5]; /* error message buffer; strdup of it is passed back to user */ + char UWbuf[1024 * 5]; /* warning message buffer; strdup of it is passed back to user */ + if (emsg) + *emsg = NULL; + if (wmsg) + *wmsg = NULL; + UEbuf[0] = 0; + UWbuf[0] = 0; + + // supply temporary result buffers as needed + Hwcentry tmp_entry_table[MAX_PICS]; + Hwcentry * tmp_entries[MAX_PICS]; + Hwcentry **entries; + if (caller_entries) + entries = caller_entries; + else + { + // user doesn't care about results; provide temporary storage for results + for (ii = 0; ii < MAX_PICS; ii++) + tmp_entries[ii] = &tmp_entry_table[ii]; + entries = tmp_entries; + maxctrs = MAX_PICS; + } + Tprintf (DBG_LT1, "hwctable: hwc_lookup(%s)\n", + instring ? instring : "NULL"); + + /* clear <entries> first - prevent seg faults in hwc_lookup_wrapup */ + for (ii = 0; ii < maxctrs; ii++) + *entries[ii] = empty_ctr; + if (!instring) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("No HW counters were specified.")); + rc = -1; + goto hwc_lookup_wrapup; + } + + /* make sure tables are initialized */ + setup_cpc_skip_hwctest (); + if (cpcx_npics == 0) + { + if (cpcx_cpuver < 0) + { + char buf[1024]; + *buf = 0; + char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */ + if (*pch) + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("HW counter profiling is not supported on this system: %s%s"), + pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n"); + else + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("HW counter profiling is not supported on this system\n")); + } + else + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("HW counter profiling is not supported on '%s'\n"), + cpcx_cciname); + rc = -1; + goto hwc_lookup_wrapup; + } + ss = instr_copy = strdup (instring); + while (*ss != 0 && (*ss == ' ' || *ss == '\t')) + ss++; + tokenptr[numtokens++] = ss; + do + { + /* find end of previous token, replace w/ NULL, skip whitespace, set <tokenptr>, repeat */ + for (; *ss; ss++) + { + if (*ss == ',' || *ss == ' ' || *ss == '\t') + { + /* end of previous token found */ + *ss = 0; /* terminate the previous token */ + ss++; + while (*ss != 0 && (*ss == ' ' || *ss == '\t')) + ss++; + if (*ss) + tokenptr[numtokens++] = ss; + break; // from for loop + } + } + } + while (*ss && numtokens < (MAX_PICS * 2)); + + if (*ss) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("The number of HW counters specified exceeds internal resources\n")); + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"), + (IS_KERNEL (forKernel) ? "er_kernel" : "collect")); + rc = -1; + goto hwc_lookup_wrapup; + } + Tprintf (DBG_LT3, "hwctable: hwc_lookup(): numtokens=%d\n", numtokens); + + /* look up individual counters */ + { + int fail = 0; + for (ii = 0; ii < numtokens && numctrs < maxctrs; ii += 2) + { + const char *counter; + const char *value; + Hwcentry *pret_ctr = entries[numctrs]; + + /* assign the tokens to ctrnames, timeoutValues. */ + counter = tokenptr[ii]; + if (ii + 1 < numtokens) + value = tokenptr[ii + 1]; + else + value = 0; + if (process_ctr_def (forKernel, global_min_time_nsec, counter, value, pret_ctr, + UWbuf + strlen (UWbuf), + sizeof (UWbuf) - strlen (UWbuf), + UEbuf + strlen (UEbuf), + sizeof (UEbuf) - strlen (UEbuf))) + { + /* could choose to set fail=1 and continue here, + but errmsgs would be aggregated (messy) */ + rc = -1; + goto hwc_lookup_wrapup; + } + numctrs++; + } + if (fail) + { + rc = -1; + goto hwc_lookup_wrapup; + } + } + + if (!numctrs) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("No HW counters were specified.\n")); + rc = -1; + goto hwc_lookup_wrapup; + } + if (numctrs > cpcx_max_concurrent[forKernel]) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("The HW counter configuration could not be loaded: More than %d counters were specified\n"), cpcx_max_concurrent[forKernel]); + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"), + (IS_KERNEL (forKernel) ? "er_kernel" : "collect")); + rc = -1; + goto hwc_lookup_wrapup; + } + +hwc_lookup_wrapup: + free (instr_copy); + if (wmsg && strlen (UWbuf)) + *wmsg = strdup (UWbuf); + if (emsg && strlen (UEbuf)) + *emsg = strdup (UEbuf); + if (rc == 0) + rc = numctrs; + return rc; +} + +extern char * +hwc_validate_ctrs (int forKernel, Hwcentry *entries[], unsigned numctrs) +{ + char UEbuf[1024 * 5]; + UEbuf[0] = 0; + + /* search for obvious duplicates*/ + unsigned ii; + for (ii = 0; ii < numctrs; ii++) + { + regno_t reg_a = entries[ii]->reg_num; + if (reg_a != REGNO_ANY) + { + unsigned jj; + for (jj = ii + 1; jj < numctrs; jj++) + { + int reg_b = entries[jj]->reg_num; + if (reg_a == reg_b) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("Only one HW counter is allowed per register. The following counters use register %d: \n"), + reg_a); + for (jj = 0; jj < numctrs; jj++) + { + char buf[256]; + int reg_b = entries[jj]->reg_num; + if (reg_a == reg_b) + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT (" %d. %s\n"), jj + 1, + hwc_hwcentry_specd_string (buf, sizeof (buf), + entries[jj])); + } + return strdup (UEbuf); + } + } + } + } + + /* test counters */ + hwcfuncs_errmsg_get (NULL, 0, 1); /* enable errmsg capture */ + int hwc_rc = hwcfuncs_assign_regnos (entries, numctrs); + if (!hwc_rc) + hwc_rc = test_hwcs ((const Hwcentry**) entries, numctrs); + if (hwc_rc) + { + if (cpcx_cpuver == CPC_PENTIUM_4_HT || cpcx_cpuver == CPC_PENTIUM_4) + { + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("HW counter profiling is disabled unless only one logical CPU per HyperThreaded processor is online (see psradm)\n")); + return strdup (UEbuf); + } + char buf[1024]; + *buf = 0; + char * pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */ + if (*pch) + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("The HW counter configuration could not be loaded: %s%s"), + pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n"); + else + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("The HW counter configuration could not be loaded\n")); + snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf), + GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"), + (IS_KERNEL (forKernel) ? "er_kernel" : "collect")); + return strdup (UEbuf); + } + return NULL; +} + +extern Hwcentry * +hwc_post_lookup (Hwcentry * pret_ctr, char *counter, char * int_name, int cpuver) +{ + const Hwcentry *pfound; + regno_t regno; + char *nameOnly = NULL; + char *attrs = NULL; + + /* fields in pret_ctr (name and int_name) should already be free */ + hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, ®no); + + /* look for it in the canonical list */ + pfound = static_table_find (stdlist_get_table (cpuver), + nameOnly, int_name, 0, REGNO_ANY); + if (!pfound) /* try the generic list */ + pfound = static_table_find (papi_generic_list, + nameOnly, int_name, 0, REGNO_ANY); + if (pfound) + { + /* in standard list */ + *pret_ctr = *pfound; /* shallow copy */ + if (pret_ctr->int_name) + { + // aliased counter + pret_ctr->int_name = strdup (pret_ctr->int_name); + if (pret_ctr->short_desc == NULL) + { + // look for short_desc of corresponding raw counter + const Hwcentry *praw = static_table_find (stdlist_get_table (cpuver), + pret_ctr->int_name, NULL, 0, REGNO_ANY); + if (praw && praw->short_desc) + pret_ctr->short_desc = strdup (praw->short_desc); + } + } + else + pret_ctr->int_name = strdup (counter); + if (pret_ctr->reg_num == REGNO_ANY) + pret_ctr->reg_num = regno; /* table's regno is a wildcard */ + } + else + { + /* not a standard counter */ + *pret_ctr = empty_ctr; + pret_ctr->int_name = strdup (counter); + pret_ctr->reg_num = regno; + } + + /* update the name */ + if (attrs) + { + pret_ctr->name = canonical_name (counter); + if (pret_ctr->metric) + { + // metric text is supplied from a table. (User supplied HWC alias) + // Append user-supplied attributes to metric name: + size_t len = strlen (pret_ctr->metric) + strlen (attrs) + 4; + char *pch = calloc (len, 1); + if (pch) + snprintf (pch, len, "%s (%s)", pret_ctr->metric, attrs); + pret_ctr->metric = pch; // leaks + } + } + else + pret_ctr->name = strdup (nameOnly); + + if (pfound) + hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: found: ", pret_ctr); + else + hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: default: ", pret_ctr); + free (attrs); + free (nameOnly); + return pret_ctr; +} + +static const char * +hwc_on_lo_hi (const Hwcentry *pctr) +{ + char* rate; + { + switch (pctr->min_time) + { + case (HWCTIME_LO): + rate = NTXT ("lo"); + break; + case (HWCTIME_ON): + rate = NTXT ("on"); + break; + case (HWCTIME_HI): + rate = NTXT ("hi"); + break; + case (0): + rate = NULL; // null => use interval count + break; + default: + case (HWCTIME_TBD): + rate = NTXT ("on"); + break; + } + } + return rate; //strdup( rate ); +} + +extern char * +hwc_rate_string (const Hwcentry *pctr, int force_numeric) +{ + const char * rateString = hwc_on_lo_hi (pctr); + char buf[128]; + if (!rateString || force_numeric) + { + snprintf (buf, sizeof (buf), NTXT ("%d"), pctr->val); + rateString = buf; + } + return strdup (rateString); +} + +static char metricbuf[2048]; + +extern char * +hwc_i18n_metric (const Hwcentry *pctr) +{ + if (pctr->metric != NULL) + snprintf (metricbuf, sizeof (metricbuf), NTXT ("%s"), PTXT (pctr->metric)); + else if (pctr->name != NULL) + snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->name); + else if (pctr->int_name != NULL) + snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->int_name); + else + snprintf (metricbuf, sizeof (metricbuf), GTXT ("Undefined Events")); + return metricbuf; +} + +/* return cpu version, should only be called when about to generate an experiment, + not when reading back an experiment */ +#if 0 /* called by ... */ +. / perfan / collect / src / collect.cc : start : 245 : cpuver = hwc_get_cpc_cpuver (); +. / ccr_components / Collector_Interface / collctrl.cc : constructor : 202 : cpcx_cpuver = hwc_get_cpc_cpuver (); +. / perfan / dbe / src / Dbe.cc : 3041 : JApplication::cpuver = hwc_get_cpc_cpuver (); +. / perfan / dbe / src / Dbe.cc : 3164 : JApplication::cpuver = hwc_get_cpc_cpuver (); + +note: +cpc_getcpuver () : only papi, ostest, this and hwprofile.c call it +#endif +int +hwc_get_cpc_cpuver () +{ + setup_cpcx (); + return cpcx_cpuver; +} + +extern char* +hwc_get_cpuname (char *buf, size_t buflen) +{ + setup_cpcx (); + if (!buf || !buflen) + return buf; + buf[0] = 0; + if (cpcx_cciname) + { + strncpy (buf, cpcx_cciname, buflen - 1); + buf[buflen - 1] = 0; + } + return buf; +} + +extern char* +hwc_get_docref (char *buf, size_t buflen) +{ + setup_cpcx (); + if (!buf || !buflen) + return buf; + buf[0] = 0; + if (cpcx_docref) + { + strncpy (buf, cpcx_docref, buflen - 1); + buf[buflen - 1] = 0; + } + return buf; +} + +//TBR: + +extern char* +hwc_get_default_cntrs () +{ + setup_cpcx (); + if (cpcx_default_hwcs[0] != NULL) + return strdup (cpcx_default_hwcs[0]); // TBR deprecate this + return NULL; +} + +extern char* +hwc_get_default_cntrs2 (int forKernel, int style) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return NULL; + char *cpcx_default = cpcx_default_hwcs[forKernel]; + if (cpcx_default == NULL || cpcx_npics == 0) + return NULL; + if (style == 1) + return strdup (cpcx_default); + + // style == 2 + // we will replace "," delimiters with " -h " (an extra 3 chars per HWC) + char *s = (char *) malloc (strlen (cpcx_default) + 3 * cpcx_npics); + if (s == NULL) return s; + char *p = s; + char *q = cpcx_default; + int i; + for (i = 0; i < cpcx_npics; i++) + { + int qlen = strlen (q); + if (qlen == 0) + { + p[0] = '\0'; + break; + } + // add " -h " if not the first HWC + if (i != 0) + { + p[0] = ' '; + p[1] = '-'; + p[2] = 'h'; + p[3] = ' '; + p += 4; + } + + // find second comma + char *r = strchr (q, ','); + if (r) + r = strchr (r + 1, ','); + + // we didn't find one, so the rest of the string is the last HWC + if (r == NULL) + { + // EUGENE could check i==cpcx_npicx-1, but what if it isn't??? + strcpy (p, q); + if (p[qlen - 1] == ',') + qlen--; + p[qlen] = '\0'; + break; + } + + // copy the HWC, trim trailing comma, add null char + qlen = r - q - 1; + strcpy (p, q); + if (p[qlen - 1] == ',') + qlen--; + p += qlen; + p[0] = '\0'; + q = r + 1; + } + return s; +} + +extern char* +hwc_get_orig_default_cntrs (int forKernel) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return NULL; + if (cpcx_orig_default_hwcs[forKernel] != NULL) + return strdup (cpcx_orig_default_hwcs[forKernel]); + return NULL; +} + +extern const char * +hwc_memop_string (ABST_type memop) +{ + const char * s; + switch (memop) + { + case ABST_NONE: + s = ""; + break; + case ABST_LOAD: + s = GTXT ("load "); + break; + case ABST_STORE: + s = GTXT ("store "); + break; + case ABST_LDST: + case ABST_US_DTLBM: + case ABST_LDST_SPARC64: + s = GTXT ("load-store "); + break; + case ABST_EXACT_PEBS_PLUS1: + case ABST_EXACT: + s = GTXT ("memoryspace "); + break; + case ABST_COUNT: + s = GTXT ("count "); + break; + case ABST_NOPC: + s = GTXT ("not-program-related "); + break; + default: + s = ""; // was "ABST_UNK", but that's meaningless to users + break; + } + return s; +} + +static const char * +timecvt_string (int timecvt) +{ + if (timecvt > 0) + return GTXT ("CPU-cycles"); + if (timecvt < 0) + return GTXT ("ref-cycles"); + return GTXT ("events"); +} + +int show_regs = 0; // The register setting is available on Solaris only + +/* + * print the specified strings in aligned columns + */ +static void +format_columns (char *buf, int bufsiz, char *s1, char *s2, const char *s3, + const char *s4, char *s5, const char *s6) +{ + // NULL strings are blanks + char *blank = NTXT (""); + if (s2 == NULL) + s2 = blank; + if (s3 == NULL) + s3 = blank; + if (s6 == NULL) + s6 = blank; + + // get the lengths and target widths + // (s6 can be as wide as it likes) + int l1 = strlen (s1), n1 = 10, l2 = strlen (s2), n2 = 13; + int l3 = strlen (s3), n3 = 20, l4 = strlen (s4), n4 = 10, n5; + char divide = ' '; + + // adjust widths, stealing from one column to help a neighbor + // There's a ragged boundary between s2 and s3. + // So push this boundary to the right. + n2 += n3 - l3; + n3 -= n3 - l3; + + // If s3 is empty, push the boundary over to s4. + if (l3 == 0) + { + n2 += n4 - l4; + n4 -= n4 - l4; + } + + // If there's enough room to fit s1 and s2, do so. + if (n1 + n2 >= l1 + l2) + { + if (n1 < l1) + { + n2 -= l1 - n1; + n1 += l1 - n1; + } + if (n2 < l2) + { + n1 -= l2 - n2; + n2 += l2 - n2; + } + } + else + { + // not enough room, so we need to divide the line + n3 += 4 // 4-blank margin + + n1 // 1st column + + 1 // space between 1st and 2nd columns + + n2 // 2nd column + + 1; // space between 2nd and 3th columns + divide = '\n'; + + // make 1st column large enough + if (n1 < l1) + n1 = l1; + + // width of 2nd column no longer matters since we divided the line + n2 = 0; + } + + if (show_regs) + { + // fifth column should be wide enough for regnolist + // see function get_regnolist() + if (cpcx_npics < 10) + n5 = cpcx_npics; // one char per regno + else + n5 = 16 + 3 * (cpcx_npics - 9); // spaces between regnos and some regnos are 2-char wide + // ... and be wide enough for header "regs" + if (n5 < 4) + n5 = 4; + + // print to buffer + // (don't need a space before s4 since historical precedent to have a trailing space in s3) + snprintf (buf, bufsiz, "%-*s %-*s%c%*s%*s %-*s %s", + n1, s1, n2, s2, divide, n3, s3, n4, s4, n5, s5, s6); + } + else + snprintf (buf, bufsiz, "%-*s %-*s%c%*s%*s %s", + n1, s1, n2, s2, divide, n3, s3, n4, s4, s6); + for (int i = strlen (buf); i > 0; i--) + if (buf[i] == ' ' || buf[i] == '\t') + buf[i] = 0; + else + break; +} + +/* routine to return HW counter string formatted and i18n'd */ +static char * +hwc_hwcentry_string_internal (char *buf, size_t buflen, const Hwcentry *ctr, + int show_short_desc) +{ + char stderrbuf[1024]; + char regnolist[256]; + if (!buf || !buflen) + return buf; + buf[0] = 0; + if (ctr == NULL) + { + snprintf (stderrbuf, sizeof (stderrbuf), GTXT ("HW counter not available")); + goto hwc_hwcentry_string_done; + } + char *desc = NULL; + if (show_short_desc) + desc = ctr->short_desc; + if (desc == NULL) + desc = ctr->metric ? hwc_i18n_metric (ctr) : NULL; + format_columns (stderrbuf, sizeof (stderrbuf), ctr->name, ctr->int_name, + hwc_memop_string (ctr->memop), timecvt_string (ctr->timecvt), + get_regnolist (regnolist, sizeof (regnolist), ctr->reg_list, 2), + desc); + +hwc_hwcentry_string_done: + strncpy (buf, stderrbuf, buflen - 1); + buf[buflen - 1] = 0; + return buf; +} + +/* routine to return HW counter string formatted and i18n'd */ +extern char * +hwc_hwcentry_string (char *buf, size_t buflen, const Hwcentry *ctr) +{ + return hwc_hwcentry_string_internal (buf, buflen, ctr, 0); +} + +/* routine to return HW counter string formatted and i18n'd */ +extern char * +hwc_hwcentry_specd_string (char *buf, size_t buflen, const Hwcentry *ctr) +{ + char stderrbuf[1024]; + const char *memop, *timecvt; + char descstr[1024]; + if (!buf || !buflen) + return buf; + buf[0] = 0; + if (ctr == NULL) + { + snprintf (stderrbuf, sizeof (stderrbuf), GTXT ("HW counter not available")); + goto hwc_hwcentry_specd_string_done; + } + timecvt = timecvt_string (ctr->timecvt); + if (ctr->memop) + memop = hwc_memop_string (ctr->memop); + else + memop = ""; + if (ctr->metric != NULL) /* a standard counter for a specific register */ + snprintf (descstr, sizeof (descstr), GTXT (" (`%s'; %s%s)"), + hwc_i18n_metric (ctr), memop, timecvt); + else /* raw counter */ + snprintf (descstr, sizeof (descstr), GTXT (" (%s%s)"), memop, timecvt); + + char *rateString = hwc_rate_string (ctr, 1); + snprintf (stderrbuf, sizeof (stderrbuf), NTXT ("%s,%s%s"), ctr->name, + rateString ? rateString : "", descstr); + free (rateString); + +hwc_hwcentry_specd_string_done: + strncpy (buf, stderrbuf, buflen - 1); + buf[buflen - 1] = 0; + return buf; +} + +unsigned +hwc_get_max_regs () +{ + setup_cpcx (); + return cpcx_npics; +} + +unsigned +hwc_get_max_concurrent (int forKernel) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return 0; + return cpcx_max_concurrent[forKernel]; +} + +char** +hwc_get_attrs (int forKernel) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return NULL; + return cpcx_attrs[forKernel]; +} + +Hwcentry ** +hwc_get_std_ctrs (int forKernel) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return NULL; + return cpcx_std[forKernel]; +} + +Hwcentry ** +hwc_get_raw_ctrs (int forKernel) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel)) + return NULL; + return cpcx_raw[forKernel]; +} + +/* Call an action function for each attribute supported */ +unsigned +hwc_scan_attrs (void (*action)(const char *attr, const char *desc)) +{ + setup_cpcx (); + int cnt = 0; + for (int ii = 0; cpcx_attrs[0] && cpcx_attrs[0][ii]; ii++, cnt++) + { + if (action) + action (cpcx_attrs[0][ii], NULL); + } + if (!cnt && action) + action (NULL, NULL); + return cnt; +} + +unsigned +hwc_scan_std_ctrs (void (*action)(const Hwcentry *)) +{ + setup_cpcx (); + Tprintf (DBG_LT1, "hwctable: hwc_scan_standard_ctrs()...\n"); + int cnt = 0; + for (int ii = 0; cpcx_std[0] && cpcx_std[0][ii]; ii++, cnt++) + if (action) + action (cpcx_std[0][ii]); + if (!cnt && action) + action (NULL); + return cnt; +} + +/* Call an action function for each counter supported */ +/* action is called with NULL when all counters have been seen */ +unsigned +hwc_scan_raw_ctrs (void (*action)(const Hwcentry *)) +{ + setup_cpcx (); + Tprintf (DBG_LT1, "hwctable: hwc_scan_raw_ctrs()...\n"); + int cnt = 0; + for (int ii = 0; cpcx_raw[0] && cpcx_raw[0][ii]; ii++, cnt++) + if (action) + action (cpcx_raw[0][ii]); + if (!cnt && action) + action (NULL); + return cnt; +} + +static void +hwc_usage_raw_overview_sparc (FILE *f_usage, int cpuver) +{ + /* All these cpuver's use cputabs[]==sparc_t5_m6 anyhow. */ + if ((cpuver == CPC_SPARC_M5) || (cpuver == CPC_SPARC_M6) + || (cpuver == CPC_SPARC_T5) || (cpuver == CPC_SPARC_T6)) + cpuver = CPC_SPARC_M4; // M4 was renamed to M5 + + /* While there are small differences between + * cputabs[]== sparc_t4 + * cputabs[]== sparc_t5_m6 + * they are in HWCs we don't discuss in the overview anyhow. + * So just lump them in with T4. + */ + if (cpuver == CPC_SPARC_M4) + cpuver = CPC_SPARC_T4; + + /* Check for the cases we support. */ + if (cpuver != CPC_SPARC_T4 && cpuver != CPC_SPARC_M7 && cpuver != CPC_SPARC_M8) + return; + fprintf (f_usage, GTXT (" While the above aliases represent the most useful hardware counters\n" + " for this processor, a full list of raw (unaliased) counter names appears\n" + " below. First is an overview of some of these names.\n\n")); + fprintf (f_usage, GTXT (" == Cycles.\n" + " Count active cycles with\n" + " Cycles_user\n" + " Set attributes to choose user, system, and/or hyperprivileged cycles.\n\n")); + fprintf (f_usage, GTXT (" == Instructions.\n" + " Count instructions when they are committed with:\n")); + fprintf (f_usage, NTXT (" Instr_all\n")); + if (cpuver != CPC_SPARC_M8) + fprintf (f_usage, GTXT (" It is the total of these counters:\n")); + else + fprintf (f_usage, GTXT (" Some subsets of instructions can be counted separately:\n")); + fprintf (f_usage, NTXT (" Branches %s\n"), GTXT ("branches")); + fprintf (f_usage, NTXT (" Instr_FGU_crypto %s\n"), GTXT ("Floating Point and Graphics Unit")); + fprintf (f_usage, NTXT (" Instr_ld %s\n"), GTXT ("loads")); + fprintf (f_usage, NTXT (" Instr_st %s\n"), GTXT ("stores")); + fprintf (f_usage, NTXT (" %-19s %s\n"), + cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SPR_ring_ops") + : NTXT ("SPR_ring_ops"), + GTXT ("internal use of SPR ring")); + fprintf (f_usage, NTXT (" Instr_other %s\n"), GTXT ("basic arithmetic and logical instructions")); + if (cpuver != CPC_SPARC_M8) + fprintf (f_usage, GTXT (" Some subsets of these instructions can be counted separately:\n")); + fprintf (f_usage, NTXT (" Br_taken %s\n"), GTXT ("Branches that are taken")); + fprintf (f_usage, NTXT (" %-19s %s\n"), + cpuver == CPC_SPARC_M7 ? NTXT ("Instr_block_ld_st") + : NTXT ("Block_ld_st"), + GTXT ("block load/store")); + fprintf (f_usage, NTXT (" %-19s %s\n"), + cpuver == CPC_SPARC_M7 ? NTXT ("Instr_atomic") + : NTXT ("Atomics"), + GTXT ("atomic instructions")); + fprintf (f_usage, NTXT (" %-19s %s\n"), + cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_prefetch") + : NTXT ("SW_prefetch"), + GTXT ("prefetches")); + fprintf (f_usage, NTXT (" %-19s %s\n"), + cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_count") + : NTXT ("Sw_count_intr"), + GTXT ("SW Count instructions (counts special no-op assembler instructions)")); + fprintf (f_usage, NTXT ("\n")); + +#ifdef TMPLEN + compilation error : we're trying to use a macro that's already defined +#endif +#define TMPLEN 32 + char s0[TMPLEN], s1[TMPLEN], s2[TMPLEN], s3[TMPLEN]; + if (cpuver == CPC_SPARC_M7) + { + snprintf (s0, TMPLEN, "Commit_0_cyc"); + snprintf (s1, TMPLEN, "Commit_1_cyc"); + snprintf (s2, TMPLEN, "Commit_2_cyc"); + snprintf (s3, TMPLEN, "Commit_1_or_2_cyc"); + } + else + { + snprintf (s0, TMPLEN, "Commit_0"); + snprintf (s1, TMPLEN, "Commit_1"); + snprintf (s2, TMPLEN, "Commit_2"); + snprintf (s3, TMPLEN, "Commit_1_or_2"); + } +#undef TMPLEN + fprintf (f_usage, GTXT (" == Commit.\n" + " Instructions may be launched speculatively, executed out of order, etc.\n")); + if (cpuver != CPC_SPARC_M8) + { + fprintf (f_usage, GTXT (" We can count the number of cycles during which 0, 1, or 2 instructions are\n" + " actually completed and their results committed:\n")); + fprintf (f_usage, GTXT (" %s\n" + " %s\n" + " %s\n" + " %s\n" + " %s is a useful way of identifying parts of your application with\n" + " high-latency instructions.\n\n"), + s0, s1, s2, s3, s0); + } + else + { + fprintf (f_usage, GTXT (" We can count the number of cycles during which no instructions were\n" + " able to commit results using:\n")); + fprintf (f_usage, GTXT (" %s\n" + " %s is a useful way of identifying parts of your application with\n" + " high-latency instructions.\n\n"), + s0, s0); + } + + fprintf (f_usage, GTXT (" == Cache/memory hierarchy.\n")); + if (cpuver == CPC_SPARC_M7) + { + fprintf (f_usage, GTXT (" In the cache hierarchy:\n" + " * Each socket has memory and multiple SPARC core clusters (scc).\n" + " * Each scc has an L3 cache and multiple L2 and L1 caches.\n")); + fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_hit"), GTXT ("hit own L1 data cache")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_nbr_L2_hit"), GTXT ("hit neighbor L2 (same scc)")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_nbr_scc_hit"), GTXT ("hit neighbor scc (same socket)")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_nbr_scc_miss"), GTXT ("miss all caches (same socket)")); + fprintf (f_usage, GTXT (" These loads can also be grouped:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss"), GTXT ("all - DC_hit")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L2_miss"), GTXT ("all - DC_hit - DC_miss_L2_hit")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L3_miss"), GTXT ("DC_miss_nbr_scc_hit + DC_miss_nbr_scc_miss")); + fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_remote_scc_hit"), GTXT ("hit cache on different socket")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)")); + fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n" + " of helping performance but whose results might not be committed.\n")); +#if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR) + /* 21869427 should not look like memoryspace profiling is supported on Linux */ + /* 21869424 desire memoryspace profiling on Linux */ + fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n")); + fprintf (f_usage, NTXT (" DC_miss_commit\n")); +#else + fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n" + " use the 'memoryspace' counter:\n")); + fprintf (f_usage, NTXT (" DC_miss_commit\n")); +#endif + fprintf (f_usage, NTXT ("\n")); + } + else if (cpuver == CPC_SPARC_M8) + { + fprintf (f_usage, GTXT (" In the cache hierarchy:\n" + " * Each processor has 4 memory controllers and 2 quad core clusters (QCC).\n" + " * Each QCC contains 4 cache processor clusters (CPC).\n" + " * Each CPC contains 4 cores.\n" + " * Each core supports 8 hardware threads.\n" + " * The L3 consists of 2 partitions with 1 QCC per partition.\n" + )); + fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_L3_dirty_copyback"), GTXT ("hit own L3 but require copyback from L2D")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_nbr_L3_hit"), GTXT ("hit neighbor L3 (same socket)")); + fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_remote_L3_hit"), GTXT ("hit cache on different socket")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)")); + fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n" + " of helping performance but whose results might not be committed.\n")); +#if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR) + /* 21869427 should not look like memoryspace profiling is supported on Linux */ + /* 21869424 desire memoryspace profiling on Linux */ + fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n")); + fprintf (f_usage, NTXT (" DC_miss_commit\n")); +#else + fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n" + " use the 'memoryspace' counter:\n")); + fprintf (f_usage, NTXT (" DC_miss_commit\n")); +#endif + fprintf (f_usage, NTXT ("\n")); + } + else + { + fprintf (f_usage, GTXT (" Total data-cache misses can be counted with:\n")); + fprintf (f_usage, NTXT (" DC_miss DC_miss_nospec\n")); + fprintf (f_usage, GTXT (" They are the totals of misses that hit in L2/L3 cache, local memory, or\n" + " remote memory:\n")); + fprintf (f_usage, NTXT (" DC_miss_L2_L3_hit DC_miss_L2_L3_hit_nospec\n")); + fprintf (f_usage, NTXT (" DC_miss_local_hit DC_miss_local_hit_nospec\n")); + fprintf (f_usage, NTXT (" DC_miss_remote_L3_hit DC_miss_remote_L3_hit_nospec\n")); + fprintf (f_usage, GTXT (" The events in the left column include speculative operations. Use the\n" + " right-hand _nospec events to count only data accesses that commit\n" + " or for memoryspace profiling.\n\n")); + } + + fprintf (f_usage, GTXT (" == TLB misses.\n" + " The Translation Lookaside Buffer (TLB) is a cache of virtual-to-physical\n" + " page translations.")); + fprintf (f_usage, GTXT (" If a virtual address (VA) is not represented in the\n" + " TLB, an expensive hardware table walk (HWTW) must be conducted.")); + fprintf (f_usage, GTXT (" If the\n" + " page is still not found, a trap results. There is a data TLB (DTLB) and\n" + " an instruction TLB (ITLB).\n\n")); + fprintf (f_usage, GTXT (" TLB misses can be counted by:\n")); + fprintf (f_usage, NTXT (" %s\n"), + cpuver == CPC_SPARC_M7 ? + NTXT ("DTLB_HWTW_search ITLB_HWTW_search") : + cpuver == CPC_SPARC_M8 ? + NTXT ("DTLB_HWTW ITLB_HWTW") : + NTXT ("DTLB_miss_asynch ITLB_miss_asynch")); + fprintf (f_usage, GTXT (" or broken down by page size:\n")); + fprintf (f_usage, NTXT (" %s"), + cpuver == CPC_SPARC_M7 ? + NTXT ("DTLB_HWTW_hit_8K ITLB_HWTW_hit_8K\n" + " DTLB_HWTW_hit_64K ITLB_HWTW_hit_64K\n" + " DTLB_HWTW_hit_4M ITLB_HWTW_hit_4M\n") : + NTXT ("DTLB_fill_8KB ITLB_fill_8KB\n" + " DTLB_fill_64KB ITLB_fill_64KB\n" + " DTLB_fill_4MB ITLB_fill_4MB\n")); + fprintf (f_usage, NTXT (" %s\n\n"), + cpuver == CPC_SPARC_M7 ? + NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n" + " DTLB_HWTW_hit_2G_16G ITLB_HWTW_hit_2G_16G\n" + " DTLB_HWTW_miss_trap ITLB_HWTW_miss_trap") : + cpuver == CPC_SPARC_M8 ? + NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n" + " DTLB_HWTW_hit_16G ITLB_HWTW_hit_16G\n" + " DTLB_HWTW_hit_1T ITLB_HWTW_hit_1T") : + NTXT ("DTLB_fill_256MB ITLB_fill_256MB\n" + " DTLB_fill_2GB ITLB_fill_2GB\n" + " DTLB_fill_trap ITLB_fill_trap")); + if (cpuver == CPC_SPARC_M8) + { + fprintf (f_usage, GTXT (" TLB traps, which can require hundreds of cycles, can be counted with:\n")); + fprintf (f_usage, NTXT (" %s\n\n"), + NTXT ("DTLB_fill_trap ITLB_fill_trap")); + } + + fprintf (f_usage, GTXT (" == Branch misprediction.\n" + " Count branch mispredictions with:\n" + " Br_mispred\n" + " It is the total of:\n" + " Br_dir_mispred direction was mispredicted\n" + " %s target was mispredicted\n" + "\n"), cpuver == CPC_SPARC_M7 ? NTXT ("Br_tgt_mispred") : NTXT ("Br_trg_mispred")); + + fprintf (f_usage, GTXT (" == RAW hazards.\n" + " A read-after-write (RAW) delay occurs when we attempt to read a datum\n" + " before an earlier write has had time to complete:\n")); + if (cpuver == CPC_SPARC_M8) + { + fprintf (f_usage, NTXT (" RAW_hit\n")); + fprintf (f_usage, GTXT (" RAW_hit events can be broken down into:\n")); + } + else + { + fprintf (f_usage, NTXT (" RAW_hit_st_q~emask=0xf\n")); + fprintf (f_usage, GTXT (" The mask 0xf counts the total of all types such as:\n")); + } + fprintf (f_usage, NTXT (" RAW_hit_st_buf write is still in store buffer\n" + " RAW_hit_st_q write is still in store queue\n" + "\n")); + if (cpuver == CPC_SPARC_M7) + { + fprintf (f_usage, GTXT (" == Flush.\n" + " One can count the number of times the pipeline must be flushed:\n")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("Flush_L3_miss"), GTXT ("load missed L3 and >1 strand is active on the core")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("Flush_br_mispred"), GTXT ("branch misprediction")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("Flush_arch_exception"), GTXT ("SPARC exceptions and trap entry/return")); + fprintf (f_usage, NTXT (" %-22s %s\n"), + NTXT ("Flush_other"), GTXT ("state change to/from halted/paused")); + fprintf (f_usage, NTXT ("\n")); + } +} + +static void +hwc_usage_internal (int forKernel, FILE *f_usage, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc) +{ + if (!VALID_FOR_KERNEL (forKernel)) + return; + char cpuname[128]; + hwc_get_cpuname (cpuname, 128); + Hwcentry** raw_ctrs = hwc_get_raw_ctrs (forKernel); + int has_raw_ctrs = (raw_ctrs && raw_ctrs[0]); + Hwcentry** std_ctrs = hwc_get_std_ctrs (forKernel); + int has_std_ctrs = (std_ctrs && std_ctrs[0]); + unsigned hwc_maxregs = hwc_get_max_concurrent (forKernel); + int cpuver = hwc_get_cpc_cpuver (); + if (hwc_maxregs != 0) + { + if (show_syntax) + { + fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d):\n\n"), cpuname, cpuver); + fprintf (f_usage, GTXT (" -h {auto|lo|on|hi}\n")); + fprintf (f_usage, GTXT ("\tturn on default set of HW counters at the specified rate\n")); + if (hwc_maxregs == 1) + { + fprintf (f_usage, GTXT (" -h <ctr_def>\n")); + fprintf (f_usage, GTXT ("\tspecify HW counter profiling for one HW counter only\n")); + } + else + { + fprintf (f_usage, GTXT (" -h <ctr_def> [-h <ctr_def>]...\n")); + fprintf (f_usage, GTXT (" -h <ctr_def>[,<ctr_def>]...\n")); + fprintf (f_usage, GTXT ("\tspecify HW counter profiling for up to %u HW counters\n"), hwc_maxregs); + } + fprintf (f_usage, NTXT ("\n")); + } + else + { + fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d)\n\n"), cpuname, cpuver); + if (hwc_maxregs == 1) + fprintf (f_usage, GTXT (" Hardware counter profiling is supported for only one counter.\n")); + else + fprintf (f_usage, GTXT (" Hardware counter profiling is supported for up to %u HW counters.\n"), hwc_maxregs); + } + } + else + { + if (!IS_KERNEL (forKernel)) + { // EUGENE I don't see why we don't also use this for er_kernel + char buf[1024]; + *buf = 0; + char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); + if (*pch) + fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system: %s%s"), + pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n"); + else + fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system\n")); + } + return; + } + + /* At this point, we know we have counters */ + char**hwc_attrs = hwc_get_attrs (forKernel); + int has_attrs = (hwc_attrs && hwc_attrs[0]); + if (show_syntax) + { + const char *reg_s = show_regs ? "[/<reg#>]" : ""; + const char *attr_s = has_attrs ? "[[~<attr>=<val>]...]" : ""; + fprintf (f_usage, GTXT (" <ctr_def> == <ctr>%s%s,[<rate>]\n"), attr_s, reg_s); + if (dataspace_msg) + fprintf (f_usage, NTXT ("%s"), dataspace_msg); + fprintf (f_usage, GTXT (" <ctr>\n")); + fprintf (f_usage, GTXT (" counter name, ")); + } + else + fprintf (f_usage, GTXT (" Counter name ")); + fprintf (f_usage, GTXT ("must be selected from the available counters\n" + " listed below. On most systems, if a counter is not listed\n" + " below, it may still be specified by its numeric value.\n")); + if (cpcx_has_precise[forKernel]) + { + if (!forKernel) + fprintf (f_usage, GTXT (" Counters labeled as 'memoryspace' in the list below will\n" + " collect memoryspace data by default.\n")); + } + fprintf (f_usage, GTXT ("\n")); + if (has_attrs) + { + if (show_syntax) + { + fprintf (f_usage, GTXT (" ~<attr>=<val>\n")); + fprintf (f_usage, GTXT (" optional attribute where <val> can be in decimal or hex\n" + " format, and <attr> can be one of: \n")); + } + else + fprintf (f_usage, GTXT (" Optional attribute where <val> can be in decimal or hex\n" + " format, and <attr> can be one of: \n")); + for (char **pattr = hwc_attrs; *pattr; pattr++) + fprintf (f_usage, NTXT (" `%s'\n"), *pattr); + if (show_syntax) + fprintf (f_usage, GTXT (" Multiple attributes may be specified, and each must be preceded by a ~.\n\n")); + else + fprintf (f_usage, GTXT (" Multiple attributes may be specified.\n\n")); + if (IS_KERNEL (forKernel)) + fprintf (f_usage, GTXT (" Other attributes may be supported by the chip, but are not supported by DTrace and will be ignored by er_kernel.\n\n")); + } + + if (show_syntax) + { + if (show_regs) + fprintf (f_usage, GTXT (" /<reg#>\n" + " forces use of a specific hardware register. (Solaris only)\n" + " If not specified, %s will attempt to place the counter into the first\n" + " available register and as a result may be unable to place\n" + " subsequent counters due to register conflicts.\n" + " The / in front of the register number is required if a register is specified.\n\n"), + cmd); + + fprintf (f_usage, GTXT (" <rate> == {auto|lo|on|hi}\n")); + fprintf (f_usage, GTXT (" `auto' (default) match the rate used by clock profiling.\n")); + fprintf (f_usage, GTXT (" If clock profiling is disabled, use `on'.\n")); + fprintf (f_usage, GTXT (" `lo' per-thread maximum rate of ~10 samples/second\n")); + fprintf (f_usage, GTXT (" `on' per-thread maximum rate of ~100 samples/second\n")); + fprintf (f_usage, GTXT (" `hi' per-thread maximum rate of ~1000 samples/second\n\n")); + fprintf (f_usage, GTXT (" <rate> == <interval>\n")); + fprintf (f_usage, GTXT (" event interval; see collect (1) for details\n\n")); + + fprintf (f_usage, GTXT (" A comma ',' followed immediately by white space may be omitted.\n\n")); + } + + /* default counters */ + fprintf (f_usage, GTXT ("Default set of HW counters:\n\n")); + char * defctrs = hwc_get_default_cntrs2 (forKernel, 1); + if (defctrs == NULL) + fprintf (f_usage, GTXT (" No default HW counter set defined for this system.\n")); + else if (strlen (defctrs) == 0) + { + char *s = hwc_get_orig_default_cntrs (forKernel); + fprintf (f_usage, GTXT (" The default HW counter set (%s) defined for %s cannot be loaded on this system.\n"), + s, cpuname); + free (s); + free (defctrs); + } + else + { + char *defctrs2 = hwc_get_default_cntrs2 (forKernel, 2); + fprintf (f_usage, GTXT (" -h %s\n"), defctrs); + free (defctrs2); + free (defctrs); + } + + /* long listings */ + char tmp[1024]; + if (has_std_ctrs) + { + fprintf (f_usage, GTXT ("\nAliases for most useful HW counters:\n\n")); + format_columns (tmp, 1024, "alias", "raw name", "type ", "units", "regs", "description"); + fprintf (f_usage, NTXT (" %s\n\n"), tmp); + for (Hwcentry **pctr = std_ctrs; *pctr; pctr++) + { + Hwcentry *ctr = *pctr; + hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, 0); + fprintf (f_usage, NTXT (" %s\n"), tmp); + } + } + if (has_raw_ctrs) + { + fprintf (f_usage, GTXT ("\nRaw HW counters:\n\n")); + hwc_usage_raw_overview_sparc (f_usage, cpuver); + format_columns (tmp, 1024, "name", NULL, "type ", "units", "regs", "description"); + fprintf (f_usage, NTXT (" %s\n\n"), tmp); + for (Hwcentry **pctr = raw_ctrs; *pctr; pctr++) + { + Hwcentry *ctr = *pctr; + hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, show_short_desc); + fprintf (f_usage, NTXT (" %s\n"), tmp); + } + } + + /* documentation notice */ + hwc_get_docref (tmp, 1024); + if (strlen (tmp)) + fprintf (f_usage, NTXT ("\n%s\n"), tmp); +} + +/* Print a description of "-h" usage, largely common to collect and er_kernel. */ +void +hwc_usage (int forKernel, const char *cmd, const char *dataspace_msg) +{ + hwc_usage_internal (forKernel, stdout, cmd, dataspace_msg, 1, 0); +} + +void +hwc_usage_f (int forKernel, FILE *f, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc) +{ + hwc_usage_internal (forKernel, f, cmd, dataspace_msg, show_syntax, show_short_desc); +} + +/*---------------------------------------------------------------------------*/ +/* init functions */ + +static char* supported_pebs_counters[] = { + "mem_inst_retired.latency_above_threshold", + "mem_trans_retired.load_latency", + "mem_trans_retired.precise_store", + NULL +}; + +/* callback, (see setup_cpc()) called for each valid regno/name combo */ + +/* builds rawlist,, creates and updates reg_list[] arrays in stdlist table */ +static void +hwc_cb (uint_t cpc_regno, const char *name) +{ + regno_t regno = cpc_regno; /* convert type */ + list_add (&unfiltered_raw, regno, name); +} + +/* input: + * forKernel: 1 - generate lists for er_kernel, 0 - generate lists for collect + * + * raw_orig: HWCs as generated by hwc_cb() + * output: + * pstd_out[], praw_out[]: malloc'd array of pointers to malloc'd hwcentry, or NULL + */ +static void +hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out, + Hwcentry ***praw_out, Hwcentry ***phidden_out, + Hwcentry**static_tables, Hwcentry **raw_unfiltered_in) +{ + // set up output buffers + ptr_list s_outbufs[3]; + ptr_list *std_out = &s_outbufs[0]; + ptr_list_init (std_out); + ptr_list *raw_out = &s_outbufs[1]; + ptr_list_init (raw_out); + ptr_list *hidden_out = &s_outbufs[2]; + ptr_list_init (hidden_out); + +#define NUM_TABLES 3 + ptr_list table_copy[NUM_TABLES]; // copy of data from static tables. [0]std, [1]generic, and [2]hidden + for (int tt = 0; tt < NUM_TABLES; tt++) + ptr_list_init (&table_copy[tt]); + + // copy records from std [0] and generic [1] static input tables into table_copy[0],[1],or[2] + for (int tt = 0; tt < 2; tt++) + for (Hwcentry *pctr = static_tables[tt]; pctr && pctr->name; pctr++) + if (is_hidden_alias (pctr)) + list_append_shallow_copy (&table_copy[2], pctr); // hidden list + else + list_append_shallow_copy (&table_copy[tt], pctr); + + // copy raw_unfiltered_in to raw_out + for (int ii = 0; raw_unfiltered_in && raw_unfiltered_in[ii]; ii++) + { + Hwcentry *pctr = raw_unfiltered_in[ii]; + // filter out raw counters that don't work correctly + +#ifdef WORKAROUND_6231196_NIAGARA1_NO_CTR_0 + if (cpcx_cpuver == CPC_ULTRA_T1) + if (!regno_is_valid (pctr, 1)) + continue; /* Niagara can not profile on register zero; skip this */ +#endif + // remove specific PEBs counters when back end doesn't support sampling + const char *name = pctr->name; + if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_PEBS_SAMPLING) == 0 || forKernel) + { + int skip = 0; + for (int ii = 0; supported_pebs_counters[ii]; ii++) + if (strcmp (supported_pebs_counters[ii], name) == 0) + { + skip = 1; + break; + } + if (skip) + continue; + } + + Hwcentry *pnew = list_append_shallow_copy (raw_out, pctr); +#ifdef WORKAROUND_6231196_NIAGARA1_NO_CTR_0 + if (cpcx_cpuver == CPC_ULTRA_T1) + { + free (pnew->reg_list); + pnew->reg_list = NULL; + regno_add (pnew, 1); // only allow register 1 + } +#endif + } // raw_unfiltered_in + + // Scan raw counters to populate Hwcentry fields from matching static_tables entries + // Also populate reg_list for aliases found in table_copy[] + for (int uu = 0; uu < raw_out->sz; uu++) + { + Hwcentry *praw = (Hwcentry*) raw_out->array[uu]; + Hwcentry *pstd = NULL; // set if non-alias entry from std table matches + char *name = praw->name; + /* in the standard counter and generic lists, + update reg_list for all matching items */ + for (int tt = 0; tt < NUM_TABLES; tt++) + { // std, generic, and hidden + if (table_copy[tt].sz == 0) + continue; + Hwcentry **array = (Hwcentry**) table_copy[tt].array; + for (int jj = 0; array[jj]; jj++) + { // all table counters + Hwcentry *pctr = array[jj]; + char *pname; + if (pctr->int_name) + pname = pctr->int_name; + else + pname = pctr->name; + if (!is_same (name, pname, '~')) + continue; + + /* truncated pname matches <name>... */ + // check to see if table entry applies only to specific register + int specific_reg_num_only = 0; + if (pctr->reg_num != REGNO_ANY) + { + // table entry applies only to specific register + if (!regno_is_valid (praw, pctr->reg_num)) + continue; + specific_reg_num_only = 1; + } + + // Match! + // Update cpu_table_copy's supported registers + if (specific_reg_num_only) + regno_add (pctr, pctr->reg_num); + else + pctr->reg_list = praw->reg_list; + + if (!is_visible_alias (pctr) && !is_hidden_alias (pctr)) + { + // Note: we could expand criteria to also allow aliases to set default rates for raw HWCs + /* This is an 'internal' raw counter */ + if (!pstd) + pstd = pctr; /* use info as a template when adding to raw list */ + else + hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: " + "counter %s appears in table more than once: ", + pstd); + } + }/* for table rows */ + }/* for std and generic tables */ + + if (pstd) + { + /* the main table had an entry that matched <name> exactly */ + /* Apply the main table entry as a template */ + *praw = *pstd; + } + }/* for (raw_out) */ + + // update std_out and hidden_out + for (int tt = 0; tt < NUM_TABLES; tt++) + { + if (tt == 1 /*skip std_raw*/ || table_copy[tt].sz == 0) + continue; + Hwcentry *pctr; + for (int ii = 0; (pctr = table_copy[tt].array[ii]); ii++) + { + // prune unsupported rows from std table + if (!is_visible_alias (pctr) && !is_hidden_alias (pctr)) + continue; // only aliases + if (REG_LIST_IS_EMPTY (pctr->reg_list)) + { + if (is_numeric_alias (pctr)) + { +#if 1 //22844570 DTrace cpc provider does not accept numeric counter names + if (forKernel) + continue; +#endif + regno_add (pctr, REGNO_ANY); // hwcs specified by number allowed on any register + } + else + continue; + } + + ptr_list *dest = (tt == 0) ? std_out : hidden_out; + Hwcentry *isInList; + if (pctr->short_desc == NULL) + { + isInList = ptrarray_find_by_name ((Hwcentry**) raw_out->array, pctr->int_name); + if (isInList) + pctr->short_desc = isInList->short_desc; // copy the raw counter's detailed description + } + isInList = ptrarray_find_by_name ((Hwcentry**) dest->array, pctr->name); + if (isInList) + hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: " + "counter %s appears in alias list more than once: ", + pctr); + else + list_append_shallow_copy (dest, pctr); + } + } + for (int tt = 0; tt < NUM_TABLES; tt++) + ptr_list_free (&table_copy[tt]); + + if (forKernel) + { + // for er_kernel, use baseline value of PRELOAD_DEF_ERKERNEL instead of PRELOAD_DEF + for (int tt = 0; tt < 3; tt++) + { // std_out-0, raw_out-1, hidden_out-2 + Hwcentry** hwcs = (Hwcentry**) (s_outbufs[tt].array); + for (int ii = 0; hwcs && hwcs[ii]; ii++) + { + Hwcentry *hwc = hwcs[ii]; + if (hwc->val == PRELOAD_DEF) + hwc->val = PRELOAD_DEF_ERKERNEL; + } + } + } + *pstd_out = (Hwcentry**) std_out->array; + *praw_out = (Hwcentry**) raw_out->array; + *phidden_out = (Hwcentry**) hidden_out->array; +} + +/* callback, (see setup_cpc()) called for each valid attribute */ +/* builds attrlist */ +static void +attrs_cb (const char *attr) +{ + Tprintf (DBG_LT3, "hwctable: attrs_cb(): %s\n", attr); + if (strcmp (attr, "picnum") == 0) + return; /* don't make this attribute available to users */ + ptr_list_add (&unfiltered_attrs, (void*) strdup (attr)); +} + +/* returns true if attribute is valid for this platform */ +static int +attr_is_valid (int forKernel, const char *attr) +{ + setup_cpcx (); + if (!VALID_FOR_KERNEL (forKernel) || !cpcx_attrs[forKernel]) + return 0; + for (int ii = 0; cpcx_attrs[forKernel][ii]; ii++) + if (strcmp (attr, cpcx_attrs[forKernel][ii]) == 0) + return 1; + return 0; +} diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c new file mode 100644 index 0000000..d479945 --- /dev/null +++ b/gprofng/common/opteron_pcbe.c @@ -0,0 +1,448 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* + * This file contains preset event names from the Performance Application + * Programming Interface v3.5 which included the following notice: + * + * Copyright (c) 2005,6 + * Innovative Computing Labs + * Computer Science Department, + * University of Tennessee, + * Knoxville, TN. + * All Rights Reserved. + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of Tennessee nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * This open source software license conforms to the BSD License template. + */ + +/* + * Performance Counter Back-End for AMD Opteron and AMD Athlon 64 processors. + */ + +#include <sys/types.h> +#include "hwcdrv.h" + +#define CPU /* used by cpuid_get*() functions */ + +typedef struct _amd_event +{ + char *name; + uint16_t emask; /* Event mask setting */ + uint8_t umask_valid; /* Mask of unreserved UNIT_MASK bits */ +} amd_event_t; + +typedef struct _amd_generic_event +{ + char *name; + char *event; + uint8_t umask; +} amd_generic_event_t; + +#define EV_END { NULL, 0, 0 } +#define GEN_EV_END { NULL, NULL, 0 } + +#define AMD_cmn_events \ + { "FP_dispatched_fpu_ops", 0x00, 0x3F }, \ + { "FP_cycles_no_fpu_ops_retired", 0x01, 0x0 }, \ + { "FP_dispatched_fpu_ops_ff", 0x02, 0x0 }, \ + { "LS_seg_reg_load", 0x20, 0x7F }, \ + { "LS_uarch_resync_self_modify", 0x21, 0x0 }, \ + { "LS_uarch_resync_snoop", 0x22, 0x0 }, \ + { "LS_buffer_2_full", 0x23, 0x0 }, \ + { "LS_retired_cflush", 0x26, 0x0 }, \ + { "LS_retired_cpuid", 0x27, 0x0 }, \ + { "DC_access", 0x40, 0x0 }, \ + { "DC_miss", 0x41, 0x0 }, \ + { "DC_refill_from_L2", 0x42, 0x1F }, \ + { "DC_refill_from_system", 0x43, 0x1F }, \ + { "DC_misaligned_data_ref", 0x47, 0x0 }, \ + { "DC_uarch_late_cancel_access", 0x48, 0x0 }, \ + { "DC_uarch_early_cancel_access", 0x49, 0x0 }, \ + { "DC_dispatched_prefetch_instr", 0x4B, 0x7 }, \ + { "DC_dcache_accesses_by_locks", 0x4C, 0x2 }, \ + { "BU_memory_requests", 0x65, 0x83}, \ + { "BU_data_prefetch", 0x67, 0x3 }, \ + { "BU_cpu_clk_unhalted", 0x76, 0x0 }, \ + { "IC_fetch", 0x80, 0x0 }, \ + { "IC_miss", 0x81, 0x0 }, \ + { "IC_refill_from_L2", 0x82, 0x0 }, \ + { "IC_refill_from_system", 0x83, 0x0 }, \ + { "IC_itlb_L1_miss_L2_hit", 0x84, 0x0 }, \ + { "IC_uarch_resync_snoop", 0x86, 0x0 }, \ + { "IC_instr_fetch_stall", 0x87, 0x0 }, \ + { "IC_return_stack_hit", 0x88, 0x0 }, \ + { "IC_return_stack_overflow", 0x89, 0x0 }, \ + { "FR_retired_x86_instr_w_excp_intr", 0xC0, 0x0 }, \ + { "FR_retired_uops", 0xC1, 0x0 }, \ + { "FR_retired_branches_w_excp_intr", 0xC2, 0x0 }, \ + { "FR_retired_branches_mispred", 0xC3, 0x0 }, \ + { "FR_retired_taken_branches", 0xC4, 0x0 }, \ + { "FR_retired_taken_branches_mispred", 0xC5, 0x0 }, \ + { "FR_retired_far_ctl_transfer", 0xC6, 0x0 }, \ + { "FR_retired_resyncs", 0xC7, 0x0 }, \ + { "FR_retired_near_rets", 0xC8, 0x0 }, \ + { "FR_retired_near_rets_mispred", 0xC9, 0x0 }, \ + { "FR_retired_taken_branches_mispred_addr_miscomp", 0xCA, 0x0 }, \ + { "FR_retired_fastpath_double_op_instr", 0xCC, 0x7 }, \ + { "FR_intr_masked_cycles", 0xCD, 0x0 }, \ + { "FR_intr_masked_while_pending_cycles", 0xCE, 0x0 }, \ + { "FR_taken_hardware_intrs", 0xCF, 0x0 }, \ + { "FR_nothing_to_dispatch", 0xD0, 0x0 }, \ + { "FR_dispatch_stalls", 0xD1, 0x0 }, \ + { "FR_dispatch_stall_branch_abort_to_retire", 0xD2, 0x0 }, \ + { "FR_dispatch_stall_serialization", 0xD3, 0x0 }, \ + { "FR_dispatch_stall_segment_load", 0xD4, 0x0 }, \ + { "FR_dispatch_stall_reorder_buffer_full", 0xD5, 0x0 }, \ + { "FR_dispatch_stall_resv_stations_full", 0xD6, 0x0 }, \ + { "FR_dispatch_stall_fpu_full", 0xD7, 0x0 }, \ + { "FR_dispatch_stall_ls_full", 0xD8, 0x0 }, \ + { "FR_dispatch_stall_waiting_all_quiet", 0xD9, 0x0 }, \ + { "FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", 0xDA, 0x0 },\ + { "FR_fpu_exception", 0xDB, 0xF }, \ + { "FR_num_brkpts_dr0", 0xDC, 0x0 }, \ + { "FR_num_brkpts_dr1", 0xDD, 0x0 }, \ + { "FR_num_brkpts_dr2", 0xDE, 0x0 }, \ + { "FR_num_brkpts_dr3", 0xDF, 0x0 }, \ + { "NB_mem_ctrlr_bypass_counter_saturation", 0xE4, 0xF } + +#define OPT_events \ + { "LS_locked_operation", 0x24, 0x7 }, \ + { "DC_copyback", 0x44, 0x1F }, \ + { "DC_dtlb_L1_miss_L2_hit", 0x45, 0x0 }, \ + { "DC_dtlb_L1_miss_L2_miss", 0x46, 0x0 }, \ + { "DC_1bit_ecc_error_found", 0x4A, 0x3 }, \ + { "BU_system_read_responses", 0x6C, 0x7 }, \ + { "BU_quadwords_written_to_system", 0x6D, 0x1 }, \ + { "BU_internal_L2_req", 0x7D, 0x1F }, \ + { "BU_fill_req_missed_L2", 0x7E, 0x7 }, \ + { "BU_fill_into_L2", 0x7F, 0x1 }, \ + { "IC_itlb_L1_miss_L2_miss", 0x85, 0x0 }, \ + { "FR_retired_fpu_instr", 0xCB, 0xF }, \ + { "NB_mem_ctrlr_page_access", 0xE0, 0x7 }, \ + { "NB_mem_ctrlr_page_table_overflow", 0xE1, 0x0 }, \ + { "NB_mem_ctrlr_turnaround", 0xE3, 0x7 }, \ + { "NB_ECC_errors", 0xE8, 0x80}, \ + { "NB_sized_commands", 0xEB, 0x7F }, \ + { "NB_probe_result", 0xEC, 0x7F}, \ + { "NB_gart_events", 0xEE, 0x7 }, \ + { "NB_ht_bus0_bandwidth", 0xF6, 0xF }, \ + { "NB_ht_bus1_bandwidth", 0xF7, 0xF }, \ + { "NB_ht_bus2_bandwidth", 0xF8, 0xF } + +#define OPT_RevD_events \ + { "NB_sized_blocks", 0xE5, 0x3C } + +#define OPT_RevE_events \ + { "NB_cpu_io_to_mem_io", 0xE9, 0xFF}, \ + { "NB_cache_block_commands", 0xEA, 0x3D} + +#define AMD_FAMILY_10h_cmn_events \ + { "FP_retired_sse_ops", 0x3, 0x7F}, \ + { "FP_retired_move_ops", 0x4, 0xF}, \ + { "FP_retired_serialize_ops", 0x5, 0xF}, \ + { "FP_serialize_ops_cycles", 0x6, 0x3}, \ + { "DC_copyback", 0x44, 0x7F }, \ + { "DC_dtlb_L1_miss_L2_hit", 0x45, 0x3 }, \ + { "DC_dtlb_L1_miss_L2_miss", 0x46, 0x7 }, \ + { "DC_1bit_ecc_error_found", 0x4A, 0xF }, \ + { "DC_dtlb_L1_hit", 0x4D, 0x7 }, \ + { "BU_system_read_responses", 0x6C, 0x17 }, \ + { "BU_octwords_written_to_system", 0x6D, 0x1 }, \ + { "BU_internal_L2_req", 0x7D, 0x3F }, \ + { "BU_fill_req_missed_L2", 0x7E, 0xF }, \ + { "BU_fill_into_L2", 0x7F, 0x3 }, \ + { "IC_itlb_L1_miss_L2_miss", 0x85, 0x3 }, \ + { "IC_eviction", 0x8B, 0x0 }, \ + { "IC_cache_lines_invalidate", 0x8C, 0xF }, \ + { "IC_itlb_reload", 0x99, 0x0 }, \ + { "IC_itlb_reload_aborted", 0x9A, 0x0 }, \ + { "FR_retired_mmx_sse_fp_instr", 0xCB, 0x7 }, \ + { "NB_mem_ctrlr_page_access", 0xE0, 0xFF }, \ + { "NB_mem_ctrlr_page_table_overflow", 0xE1, 0x3 }, \ + { "NB_mem_ctrlr_turnaround", 0xE3, 0x3F }, \ + { "NB_thermal_status", 0xE8, 0x7C}, \ + { "NB_sized_commands", 0xEB, 0x3F }, \ + { "NB_probe_results_upstream_req", 0xEC, 0xFF}, \ + { "NB_gart_events", 0xEE, 0xFF }, \ + { "NB_ht_bus0_bandwidth", 0xF6, 0xBF }, \ + { "NB_ht_bus1_bandwidth", 0xF7, 0xBF }, \ + { "NB_ht_bus2_bandwidth", 0xF8, 0xBF }, \ + { "NB_ht_bus3_bandwidth", 0x1F9, 0xBF }, \ + { "LS_locked_operation", 0x24, 0xF }, \ + { "LS_cancelled_store_to_load_fwd_ops", 0x2A, 0x7 }, \ + { "LS_smi_received", 0x2B, 0x0 }, \ + { "LS_ineffective_prefetch", 0x52, 0x9 }, \ + { "LS_global_tlb_flush", 0x54, 0x0 }, \ + { "NB_mem_ctrlr_dram_cmd_slots_missed", 0xE2, 0x3 }, \ + { "NB_mem_ctrlr_req", 0x1F0, 0xFF }, \ + { "CB_cpu_to_dram_req_to_target", 0x1E0, 0xFF }, \ + { "CB_io_to_dram_req_to_target", 0x1E1, 0xFF }, \ + { "CB_cpu_read_cmd_latency_to_target_0_to_3", 0x1E2, 0xFF }, \ + { "CB_cpu_read_cmd_req_to_target_0_to_3", 0x1E3, 0xFF }, \ + { "CB_cpu_read_cmd_latency_to_target_4_to_7", 0x1E4, 0xFF }, \ + { "CB_cpu_read_cmd_req_to_target_4_to_7", 0x1E5, 0xFF }, \ + { "CB_cpu_cmd_latency_to_target_0_to_7", 0x1E6, 0xFF }, \ + { "CB_cpu_req_to_target_0_to_7", 0x1E7, 0xFF }, \ + { "L3_read_req", 0x4E0, 0xF7 }, \ + { "L3_miss", 0x4E1, 0xF7 }, \ + { "L3_l2_eviction_l3_fill", 0x4E2, 0xFF }, \ + { "L3_eviction", 0x4E3, 0xF } + +#define AMD_cmn_generic_events \ + { "PAPI_br_ins", "FR_retired_branches_w_excp_intr", 0x0 },\ + { "PAPI_br_msp", "FR_retired_branches_mispred", 0x0 }, \ + { "PAPI_br_tkn", "FR_retired_taken_branches", 0x0 }, \ + { "PAPI_fp_ops", "FP_dispatched_fpu_ops", 0x3 }, \ + { "PAPI_fad_ins", "FP_dispatched_fpu_ops", 0x1 }, \ + { "PAPI_fml_ins", "FP_dispatched_fpu_ops", 0x2 }, \ + { "PAPI_fpu_idl", "FP_cycles_no_fpu_ops_retired", 0x0 }, \ + { "PAPI_tot_cyc", "BU_cpu_clk_unhalted", 0x0 }, \ + { "PAPI_tot_ins", "FR_retired_x86_instr_w_excp_intr", 0x0 }, \ + { "PAPI_l1_dca", "DC_access", 0x0 }, \ + { "PAPI_l1_dcm", "DC_miss", 0x0 }, \ + { "PAPI_l1_ldm", "DC_refill_from_L2", 0xe }, \ + { "PAPI_l1_stm", "DC_refill_from_L2", 0x10 }, \ + { "PAPI_l1_ica", "IC_fetch", 0x0 }, \ + { "PAPI_l1_icm", "IC_miss", 0x0 }, \ + { "PAPI_l1_icr", "IC_fetch", 0x0 }, \ + { "PAPI_l2_dch", "DC_refill_from_L2", 0x1e }, \ + { "PAPI_l2_dcm", "DC_refill_from_system", 0x1e }, \ + { "PAPI_l2_dcr", "DC_refill_from_L2", 0xe }, \ + { "PAPI_l2_dcw", "DC_refill_from_L2", 0x10 }, \ + { "PAPI_l2_ich", "IC_refill_from_L2", 0x0 }, \ + { "PAPI_l2_icm", "IC_refill_from_system", 0x0 }, \ + { "PAPI_l2_ldm", "DC_refill_from_system", 0xe }, \ + { "PAPI_l2_stm", "DC_refill_from_system", 0x10 }, \ + { "PAPI_res_stl", "FR_dispatch_stalls", 0x0 }, \ + { "PAPI_stl_icy", "FR_nothing_to_dispatch", 0x0 }, \ + { "PAPI_hw_int", "FR_taken_hardware_intrs", 0x0 } + +#define OPT_cmn_generic_events \ + { "PAPI_tlb_dm", "DC_dtlb_L1_miss_L2_miss", 0x0 }, \ + { "PAPI_tlb_im", "IC_itlb_L1_miss_L2_miss", 0x0 }, \ + { "PAPI_fp_ins", "FR_retired_fpu_instr", 0xd }, \ + { "PAPI_vec_ins", "FR_retired_fpu_instr", 0x4 } + +#define AMD_FAMILY_10h_generic_events \ + { "PAPI_tlb_dm", "DC_dtlb_L1_miss_L2_miss", 0x7 }, \ + { "PAPI_tlb_im", "IC_itlb_L1_miss_L2_miss", 0x3 }, \ + { "PAPI_l3_dcr", "L3_read_req", 0xf1 }, \ + { "PAPI_l3_icr", "L3_read_req", 0xf2 }, \ + { "PAPI_l3_tcr", "L3_read_req", 0xf7 }, \ + { "PAPI_l3_stm", "L3_miss", 0xf4 }, \ + { "PAPI_l3_ldm", "L3_miss", 0xf3 }, \ + { "PAPI_l3_tcm", "L3_miss", 0xf7 } + +static amd_event_t opt_events_rev_E[] = { + AMD_cmn_events, + OPT_events, + OPT_RevD_events, + OPT_RevE_events, + EV_END +}; + +static amd_event_t family_10h_events[] = { + AMD_cmn_events, + OPT_RevE_events, + AMD_FAMILY_10h_cmn_events, + EV_END +}; + +static amd_generic_event_t opt_generic_events[] = { + AMD_cmn_generic_events, + OPT_cmn_generic_events, + GEN_EV_END +}; + +static amd_generic_event_t family_10h_generic_events[] = { + AMD_cmn_generic_events, + AMD_FAMILY_10h_generic_events, + GEN_EV_END +}; + +static amd_event_t *amd_events = NULL; +static uint_t amd_family; +static amd_generic_event_t *amd_generic_events = NULL; + +#define BITS(v, u, l) (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1)) +#define OPTERON_FAMILY 0x0f +#define AMD_FAMILY_10H 0x10 + +static int +opt_pcbe_init (void) +{ + amd_family = cpuid_getfamily (); + /* + * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel + * loads this module based on its name in the module directory, but it + * could have been renamed. + */ + if (cpuid_getvendor () != X86_VENDOR_AMD + || (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H)) + return (-1); + + /* + * Figure out processor revision here and assign appropriate + * event configuration. + */ + if (amd_family == OPTERON_FAMILY) + { + amd_events = opt_events_rev_E; + amd_generic_events = opt_generic_events; + } + else + { + amd_events = family_10h_events; + amd_generic_events = family_10h_generic_events; + } + return (0); +} + +static uint_t +opt_pcbe_ncounters (void) +{ + return (4); +} + +static const char * +opt_pcbe_impl_name (void) +{ + if (amd_family == OPTERON_FAMILY) + return ("AMD Opteron & Athlon64"); + else if (amd_family == AMD_FAMILY_10H) + return ("AMD Family 10h"); + else + return ("Unknown AMD processor"); +} + +static const char * +opt_pcbe_cpuref (void) +{ + if (amd_family == OPTERON_FAMILY) + return GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\nAMD publication #26094"); + else if (amd_family == AMD_FAMILY_10H) + return GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors,\"\nAMD publication #31116"); + else + return GTXT ("Unknown AMD processor"); +} + +static int +opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) +{ + int count = 0; + for (uint_t kk = 0; amd_events && amd_events[kk].name; kk++) + for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++) + { + hwc_cb (jj, amd_events[kk].name); + count++; + } + for (uint_t kk = 0; amd_generic_events && amd_generic_events[kk].name; kk++) + for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++) + { + hwc_cb (jj, amd_generic_events[kk].name); + count++; + } + return count; +} + +static int +opt_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel, + eventsel_t *event_valid_umask, uint_t *pmc_sel) +{ + uint_t kk; + *pmc_sel = pmc; /* for AMD, pmc doesn't need to be adjusted */ + *eventsel = (eventsel_t) - 1; + *event_valid_umask = 0x0; + + /* search table */ + for (kk = 0; amd_events && amd_events[kk].name; kk++) + { + if (strcmp (eventname, amd_events[kk].name) == 0) + { + *eventsel = EXTENDED_EVNUM_2_EVSEL (amd_events[kk].emask); + *event_valid_umask = amd_events[kk].umask_valid; + return 0; + } + } + + /* search generic */ + int generic = 0; + eventsel_t tmp_umask = 0; + for (kk = 0; amd_generic_events && amd_generic_events[kk].name; kk++) + { + if (strcmp (eventname, amd_generic_events[kk].name) == 0) + { + generic = 1; + eventname = amd_generic_events[kk].event; + tmp_umask = amd_generic_events[kk].umask; + break; + } + } + if (!generic) + return -1; + + /* find real event # for generic event */ + for (kk = 0; amd_events && amd_events[kk].name; kk++) + { + if (strcmp (eventname, amd_events[kk].name) == 0) + { + *eventsel = EXTENDED_EVNUM_2_EVSEL (amd_events[kk].emask); + *eventsel |= (tmp_umask << PERFCTR_UMASK_SHIFT); + *event_valid_umask = 0; /* user umask not allowed w/generic events */ + return 0; + } + } + return -1; +} + +static hdrv_pcbe_api_t hdrv_pcbe_opteron_api = { + opt_pcbe_init, + opt_pcbe_ncounters, + opt_pcbe_impl_name, + opt_pcbe_cpuref, + opt_pcbe_get_events, + opt_pcbe_get_eventnum +}; |