aboutsummaryrefslogtreecommitdiff
path: root/gprofng/common
diff options
context:
space:
mode:
authorVladimir Mezentsev <vladimir.mezentsev@oracle.com>2022-03-11 08:58:31 +0000
committerNick Clifton <nickc@redhat.com>2022-03-11 08:58:31 +0000
commitbb368aad297fe3ad40cf397e6fc85aa471429a28 (patch)
tree0ab25909b8fe789d676bbdb00d501d4d485e4afe /gprofng/common
parenta655f19af95eb685ba64f48ee8fc2b3b7a3d886a (diff)
downloadfsf-binutils-gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.zip
fsf-binutils-gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.tar.gz
fsf-binutils-gdb-bb368aad297fe3ad40cf397e6fc85aa471429a28.tar.bz2
gprofng: a new GNU profiler
top-level * Makefile.def: Add gprofng module. * configure.ac: Add --enable-gprofng option. * src-release.sh: Add gprofng. * Makefile.in: Regenerate. * configure: Regenerate. * gprofng: New directory. binutils * MAINTAINERS: Add gprofng maintainer. * README-how-to-make-a-release: Add gprofng. include. * collectorAPI.h: New file. * libcollector.h: New file. * libfcollector.h: New file.
Diffstat (limited to 'gprofng/common')
-rw-r--r--gprofng/common/cc_libcollector.h44
-rw-r--r--gprofng/common/config.h.in117
-rw-r--r--gprofng/common/core_pcbe.c3023
-rw-r--r--gprofng/common/cpu_frequency.h303
-rw-r--r--gprofng/common/cpuid.c203
-rw-r--r--gprofng/common/gp-defs.h58
-rw-r--r--gprofng/common/gp-experiment.h186
-rw-r--r--gprofng/common/gp-time.h46
-rw-r--r--gprofng/common/hwc_cpus.h198
-rw-r--r--gprofng/common/hwcdrv.c1454
-rw-r--r--gprofng/common/hwcdrv.h330
-rw-r--r--gprofng/common/hwcentry.h417
-rw-r--r--gprofng/common/hwcfuncs.c704
-rw-r--r--gprofng/common/hwcfuncs.h269
-rw-r--r--gprofng/common/hwctable.c5410
-rw-r--r--gprofng/common/opteron_pcbe.c448
16 files changed, 13210 insertions, 0 deletions
diff --git a/gprofng/common/cc_libcollector.h b/gprofng/common/cc_libcollector.h
new file mode 100644
index 0000000..e078541
--- /dev/null
+++ b/gprofng/common/cc_libcollector.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/*
+ * This file describes the enum's, etc. shared by the collector control
+ * class and libcollector and its modules. It is #included in collctrl.h
+ * so any changes to it should follow the procedure described there.
+ */
+
+#ifndef _CC_LIBCOLLECTOR_H
+#define _CC_LIBCOLLECTOR_H
+
+/* definitions for synchronization tracing scope -- a bit mask */
+#define SYNCSCOPE_NATIVE 0x1
+#define SYNCSCOPE_JAVA 0x2
+
+typedef enum
+{
+ FOLLOW_NONE = 0x0,
+ FOLLOW_EXEC = 0x1,
+ FOLLOW_FORK = 0x2,
+ FOLLOW_ON = 0x3,
+ FOLLOW_COMBO = 0x4,
+ FOLLOW_ALL = 0x7
+} Follow_type;
+
+#endif /* !__CC_LIBCOLLECTOR_H */
diff --git a/gprofng/common/config.h.in b/gprofng/common/config.h.in
new file mode 100644
index 0000000..e46e64f
--- /dev/null
+++ b/gprofng/common/config.h.in
@@ -0,0 +1,117 @@
+/* common/config.h.in. Generated from configure.ac by autoheader. */
+
+/* Enable debugging output. */
+#undef DEBUG
+
+/* Enable java profiling */
+#undef GPROFNG_JAVA_PROFILING
+
+/* Define to 1 if you have the declaration of `basename', and to 0 if you
+ don't. */
+#undef HAVE_DECL_BASENAME
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* Have PTHREAD_PRIO_INHERIT. */
+#undef HAVE_PTHREAD_PRIO_INHERIT
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strsignal' function. */
+#undef HAVE_STRSIGNAL
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#undef LT_OBJDIR
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+ your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Enable extensions on AIX 3, Interix. */
+#ifndef _ALL_SOURCE
+# undef _ALL_SOURCE
+#endif
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# undef _GNU_SOURCE
+#endif
+/* Enable threading extensions on Solaris. */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# undef _POSIX_PTHREAD_SEMANTICS
+#endif
+/* Enable extensions on HP NonStop. */
+#ifndef _TANDEM_SOURCE
+# undef _TANDEM_SOURCE
+#endif
+/* Enable general extensions on Solaris. */
+#ifndef __EXTENSIONS__
+# undef __EXTENSIONS__
+#endif
+
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to 1 if on MINIX. */
+#undef _MINIX
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+ this defined. */
+#undef _POSIX_1_SOURCE
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+#undef _POSIX_SOURCE
diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c
new file mode 100644
index 0000000..6f746d8
--- /dev/null
+++ b/gprofng/common/core_pcbe.c
@@ -0,0 +1,3023 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/*
+ * Performance Counter Back-End for Intel Family 6
+ * Models 15(06_0FH) 23(06_17H) (Core 2)
+ * Models 28(06_1CH) (Atom)
+ * Models 37(06_25H) 44(06_2CH) (Westmere)
+ * Models 26(06_1AH) 30(06_1EH) 31(06_1FH) 46(06_2EH) (Nehalem)
+ * Models 42(06_2AH) 45(06_2DH) (Sandy Bridge)
+ * Models 58(06_3AH) 62(06_3EH) (Ivy Bridge)
+ * Models 60(06_3CH) 63(06_3FH) 69(06_45H) 70(06_46H) (Haswell)
+ * Models 61(06_3DH) 71(06_47H) 79(06_4FH) 86(06_??H) (Broadwell) (79 not listed in Intel SDM as of June 2015)
+ * Models 78(06_4EH) 85(06_55H) 94(06_5EH) (Skylake) (Note Skylake and later: versionID==4)
+ * To add another model number:
+ * - add appropriate table data in the form
+ * #define EVENTS_FAM6_MODXX
+ * - add appropriate table definitions in the form
+ * const struct events_table_t events_fam6_modXX[] =
+ * - set events_table to the appropriate table
+ * using the "switch ( cpuid_getmodel(CPU) )" statement
+ * in core_pcbe_init()
+ * - check the date in core_pcbe_cpuref()
+ * Table data can be derived from:
+ * - the Intel SDM
+ * also https://download.01.org/perfmon/
+ * - libcpc source code in usr/src/uts/intel/pcbe/
+ * - libpfm4
+ * but there are typically inconsistencies among these
+ * sources of data. So, judgment is required.
+ * Other things to do to add a new processor:
+ * x file hwc_cpus.h
+ * add a cpuver enumerator
+ * add lookup entry
+ * x file hwctable.c
+ * add a table (aliases, etc.)
+ * add a cputabs entry, including default metrics
+ * look for other places where the most-recently-added CPU is mentioned
+ * x file cpu_frequency.h
+ * function get_max_turbo_freq()
+ * go to "switch (model)", and add turbo boosts
+ */
+
+#include <sys/types.h>
+#include "hwcdrv.h"
+
+static uint64_t num_gpc; /* number of general purpose counters (e.g. 2-4) */
+static uint64_t num_ffc; /* number fixed function counters (e.g. 3) */
+static uint_t total_pmc; /* num_gpc + num_ffc */
+
+/*
+ * Only the lower 32-bits can be written to in the general-purpose
+ * counters. The higher bits are extended from bit 31; all ones if
+ * bit 31 is one and all zeros otherwise.
+ *
+ * The fixed-function counters do not have this restriction.
+ */
+
+static const char *ffc_names[] = {
+/*
+ * While modern Intel processors have fixed-function counters (FFCs),
+ * on Linux we access HWCs through the perf_event_open() kernel interface,
+ * which does not allow us direct access to the FFCs.
+ * Rather, the Linux kernel manages registers opaquely.
+ * At best, it allows us extra HW events by off-loading
+ * HWCs to FFCs as available. Often, however, the FFCs
+ * are commandeered by other activities like the NMI watchdog.
+ * We will omit any explicit reference to them.
+ * https://lists.eecs.utk.edu/pipermail/perfapi-devel/2015-February/006895.html
+ * See also bug 21315497.
+ */
+#if 0
+ "instr_retired.any",
+ "cpu_clk_unhalted.core",
+ "cpu_clk_unhalted.ref",
+#endif
+ NULL
+};
+
+#define IMPL_NAME_LEN 100
+static char core_impl_name[IMPL_NAME_LEN];
+
+/*
+ * Most events require only an event code and a umask.
+ * Some also require attributes, cmasks, or MSR programming.
+ * Until Sandy Bridge, the number of these other events
+ * was small and libcpc just ignored them.
+ * With Sandy Bridge, libcpc added for support for these
+ * additional events.
+ *
+ * We use an expanded events_table_t here -- patterned
+ * after snb_pcbe_events_table_t in libcpc's
+ * usr/src/uts/intel/pcbe/snb_pcbe.h -- for all processors.
+ *
+ * Correspondingly, we also define ATTR_* macros, but we
+ * define them to set bits as they will appear
+ * in bits 16-23 of the final eventsel. Definitions of those
+ * bits can be found in "struct ia32_perfevtsel" in libcpc's
+ * usr/src/uts/intel/pcbe/intel_pcbe_utils.h .
+ *
+ * For now, I don't know how to handle msr_offset.
+ * So, let's not include events that call for it.
+ *
+ * For now, don't do anything with ATTR_PEBS other than
+ * to note it in tables (starting with Haswell).
+ *
+ * Solaris tables also have ATTR_PEBS_ONLY. We cannot
+ * use these counters from "collect -h" and so do not
+ * include them.
+ */
+#define ATTR_NONE 0
+#define ATTR_EDGE (1 << 2) /* bit 18 - offset 16 */
+#define ATTR_ANY (1 << 5) /* bit 21 - offset 16 */
+#define ATTR_INV (1 << 7) /* bit 23 - offset 16 */
+#define ATTR_PEBS ATTR_NONE // PEBS not supported
+#define ATTR_TSX ATTR_NONE // TSX MSRs not supported
+#undef ATTR_PEBS_ONLY // PEBS-only event, not supported
+#undef ATTR_PEBS_ONLY_LD_LAT // not supported
+
+struct events_table_t
+{
+ uint32_t eventselect;
+ uint32_t unitmask;
+ uint64_t supported_counters;
+ const char *name;
+ uint8_t cmask;
+ uint8_t attrs;
+ uint16_t msr_offset;
+};
+
+/* Used to describe which counters support an event */
+#define C(x) (1 << (x))
+#define C0 C(0)
+#define C1 C(1)
+#define C2 C(2)
+#define C3 C(3)
+#define C_ALL 0xFFFFFFFFFFFFFFFF
+#define CDEAD 0 /* Counter that is broken */
+
+/* note that regular events use the original spelling like "inst_retired.any_p" */
+#define ARCH_EVENTS /* NOTE: Order specified in PRM must be maintained! */ \
+{ 0x3C, 0x00, C_ALL, "unhalted-core-cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "unhalted-reference-cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x00, C_ALL, "instruction-retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C_ALL, "llc-reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C_ALL, "llc-misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C_ALL, "branch-instruction-retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C_ALL, "branch-misses-retired" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/*
+ * FAM6/MOD15:
+ * Xeon 3000, 3200, 5100, 5300, 7300
+ * Core 2 Quad, Extreme, and Duo
+ * Pentium dual-core processors
+ * FAM6/MOD23:
+ * Xeon 5200, 5400 series, Intel
+ * Core 2 Quad Q9650.
+ */
+#define EVENTS_FAM6_MOD23 \
+{ 0x03, 0x00, C0|C1, "load_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x02, C0|C1, "load_block.sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x04, C0|C1, "load_block.std" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x08, C0|C1, "load_block.overlap_store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x10, C0|C1, "load_block.until_retire" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x20, C0|C1, "load_block.l1d" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x04, 0x00, C0|C1, "store_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x04, 0x01, C0|C1, "store_block.drain_cycles" /*spell-diff*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x04, 0x02, C0|C1, "store_block.order" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x04, 0x08, C0|C1, "store_block.snoop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x00, C0|C1, "misalign_mem_ref" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x00, C0|C1, "segment_reg_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x00, C0|C1, "sse_pre_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x00, C0|C1, "sse_pre_exec.nta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C0|C1, "sse_pre_exec.l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x02, C0|C1, "sse_pre_exec.l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x03, C0|C1, "sse_pre_exec.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x00, C0|C1, "dtlb_misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C0|C1, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C0|C1, "dtlb_misses.miss_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C0|C1, "dtlb_misses.l0_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x08, C0|C1, "dtlb_misses.miss_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x09, 0x00, C0|C1, "memory_disambiguation" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x09, 0x01, C0|C1, "memory_disambiguation.reset" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x09, 0x02, C0|C1, "memory_disambiguation.success" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0c, 0x00, C0|C1, "page_walks" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0c, 0x01, C0|C1, "page_walks.count" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0c, 0x02, C0|C1, "page_walks.cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x00, C0 , "fp_comp_ops_exe" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x00, C1, "fp_assist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x00, C1, "mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x00, C1, "div" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x00, C0 , "cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x18, 0x00, C0 , "idle_during_div" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x00, C1, "delayed_bypass" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x00, C1, "delayed_bypass.fp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x01, C1, "delayed_bypass.simd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x02, C1, "delayed_bypass.load" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x21, 0x00, C0|C1, "l2_ads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x23, 0x00, C0|C1, "l2_dbus_busy_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x00, C0|C1, "l2_lines_in" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x25, 0x00, C0|C1, "l2_m_lines_in" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x00, C0|C1, "l2_lines_out" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x00, C0|C1, "l2_m_lines_out" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x00, C0|C1, "l2_ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x29, 0x00, C0|C1, "l2_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2a, 0x00, C0|C1, "l2_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2b, 0x00, C0|C1, "l2_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x00, C0|C1, "l2_rqsts" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x4f, C0|C1, "l2_rqsts.self.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x30, 0x00, C0|C1, "l2_reject_busq" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x32, 0x00, C0|C1, "l2_no_req" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3a, 0x00, C0|C1, "eist_trans" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3b, 0xc0, C0|C1, "thermal_trip" /*non-zero umask*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted.core_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x01, C0|C1, "cpu_clk_unhalted.bus" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x02, C0|C1, "cpu_clk_unhalted.no_other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x00, C0|C1, "l1d_cache_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x41, 0x00, C0|C1, "l1d_cache_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x00, C0|C1, "l1d_cache_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x10, C0|C1, "l1d_cache_lock.duration" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x00, C0|C1, "l1d_all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x00, C0|C1, "l1d_all_ref" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x01, C0|C1, "l1d_all.ref" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x02, C0|C1, "l1d_all.cache_ref" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x45, 0x0f, C0|C1, "l1d_repl" /*non-zero umask*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x46, 0x00, C0|C1, "l1d_m_repl" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x47, 0x00, C0|C1, "l1d_m_evict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x00, C0|C1, "l1d_pend_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x00, C0|C1, "l1d_split" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x01, C0|C1, "l1d_split.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C0|C1, "l1d_split.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4b, 0x00, C0|C1, "sse_pre_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4b, 0x00, C0|C1, "sse_pre_miss.nta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4b, 0x01, C0|C1, "sse_pre_miss.l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4b, 0x02, C0|C1, "sse_pre_miss.l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4c, 0x00, C0|C1, "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4e, 0x00, C0|C1, "l1d_prefetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4e, 0x10, C0|C1, "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x00, C0|C1, "bus_request_outstanding" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x61, 0x00, C0|C1, "bus_bnr_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x62, 0x00, C0|C1, "bus_drdy_clocks" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x00, C0|C1, "bus_lock_clocks" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x64, 0x00, C0|C1, "bus_data_rcv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x65, 0x00, C0|C1, "bus_trans_brd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x66, 0x00, C0|C1, "bus_trans_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x67, 0x00, C0|C1, "bus_trans_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x68, 0x00, C0|C1, "bus_trans_ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x69, 0x00, C0|C1, "bus_trans_inval" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6a, 0x00, C0|C1, "bus_trans_pwr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6b, 0x00, C0|C1, "bus_trans_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6c, 0x00, C0|C1, "bus_trans_io" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6d, 0x00, C0|C1, "bus_trans_def" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6e, 0x00, C0|C1, "bus_trans_burst" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6f, 0x00, C0|C1, "bus_trans_mem" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x70, 0x00, C0|C1, "bus_trans_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x77, 0x00, C0|C1, "ext_snoop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x78, 0x00, C0|C1, "cmp_snoop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7a, 0x00, C0|C1, "bus_hit_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7b, 0x00, C0|C1, "bus_hitm_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7d, 0x00, C0|C1, "busq_empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7e, 0x00, C0|C1, "snoop_stall_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7f, 0x00, C0|C1, "bus_io_wait" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x00, C0|C1, "l1i_reads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x81, 0x00, C0|C1, "l1i_misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x00, C0|C1, "itlb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x02, C0|C1, "itlb.small_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x10, C0|C1, "itlb.large_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x12, C0|C1, "itlb.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x40, C0|C1, "itlb.flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x83, 0x00, C0|C1, "inst_queue" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x83, 0x02, C0|C1, "inst_queue.full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x86, 0x00, C0|C1, "cycles_l1i_mem_stalled" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x00, C0|C1, "ild_stall" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x00, C0|C1, "br_inst_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x00, C0|C1, "br_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8a, 0x00, C0|C1, "br_bac_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8b, 0x00, C0|C1, "br_cnd_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8c, 0x00, C0|C1, "br_cnd_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8d, 0x00, C0|C1, "br_ind_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8e, 0x00, C0|C1, "br_ind_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x8f, 0x00, C0|C1, "br_ret_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x90, 0x00, C0|C1, "br_ret_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x91, 0x00, C0|C1, "br_ret_bac_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x92, 0x00, C0|C1, "br_call_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x93, 0x00, C0|C1, "br_call_missp_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x94, 0x00, C0|C1, "br_ind_call_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x97, 0x00, C0|C1, "br_tkn_bubble_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x98, 0x00, C0|C1, "br_tkn_bubble_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa0, 0x00, C0|C1, "rs_uops_dispatched" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x00, C0 , "rs_uops_dispatched_port" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x01, C0 , "rs_uops_dispatched_port.0" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x02, C0 , "rs_uops_dispatched_port.1" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x04, C0 , "rs_uops_dispatched_port.2" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x08, C0 , "rs_uops_dispatched_port.3" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x10, C0 , "rs_uops_dispatched_port.4" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xa1, 0x20, C0 , "rs_uops_dispatched_port.5" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xaa, 0x00, C0|C1, "macro_insts" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xaa, 0x01, C0|C1, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xaa, 0x08, C0|C1, "macro_insts.cisc_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xab, 0x00, C0|C1, "esp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xab, 0x01, C0|C1, "esp.synch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xab, 0x02, C0|C1, "esp.additions" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb0, 0x00, C0|C1, "simd_uops_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb1, 0x00, C0|C1, "simd_sat_uop_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x00, C0|C1, "simd_uop_type_exec" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x01, C0|C1, "simd_uop_type_exec.mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x02, C0|C1, "simd_uop_type_exec.shift" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x04, C0|C1, "simd_uop_type_exec.pack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x08, C0|C1, "simd_uop_type_exec.unpack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x10, C0|C1, "simd_uop_type_exec.logical" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x00, C0|C1, "inst_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x00, C0|C1, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x01, C0|C1, "inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x02, C0|C1, "inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x04, C0|C1, "inst_retired.other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x08, C0|C1, "inst_retired.vm_h" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc1, 0x00, C0|C1, "x87_ops_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc1, 0x01, C0|C1, "x87_ops_retired.fxch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc1, 0xfe, C0|C1, "x87_ops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x00, C0|C1, "uops_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x01, C0|C1, "uops_retired.ld_ind_br" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x02, C0|C1, "uops_retired.std_sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x04, C0|C1, "uops_retired.macro_fusion" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x07, C0|C1, "uops_retired.fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x08, C0|C1, "uops_retired.non_fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x0f, C0|C1, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc3, 0x00, C0|C1, "machine_nukes" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc3, 0x01, C0|C1, "machine_nukes.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc3, 0x04, C0|C1, "machine_nukes.mem_order" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x00, C0|C1, "br_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x00, C0|C1, "br_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x01, C0|C1, "br_inst_retired.pred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x02, C0|C1, "br_inst_retired.mispred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x04, C0|C1, "br_inst_retired.pred_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x08, C0|C1, "br_inst_retired.mispred_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x0c, C0|C1, "br_inst_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc5, 0x00, C0|C1, "br_inst_retired_mispred" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc5, 0x00, C0|C1, "br_inst_retired.mispred" /*alt-spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc6, 0x00, C0|C1, "cycles_int" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc6, 0x01, C0|C1, "cycles_int.masked" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc6, 0x02, C0|C1, "cycles_int.pending_and_masked" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x00, C0|C1, "simd_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x01, C0|C1, "simd_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x02, C0|C1, "simd_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x04, C0|C1, "simd_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x08, C0|C1, "simd_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x10, C0|C1, "simd_inst_retired.vector" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x1f, C0|C1, "simd_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc8, 0x00, C0|C1, "hw_int_rcv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc9, 0x00, C0|C1, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x00, C0|C1, "simd_comp_inst_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x01, C0|C1, "simd_comp_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x02, C0|C1, "simd_comp_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x04, C0|C1, "simd_comp_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x08, C0|C1, "simd_comp_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x00, C0 , "mem_load_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x01, C0 , "mem_load_retired.l1d_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x02, C0 , "mem_load_retired.l1d_line_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x04, C0 , "mem_load_retired.l2_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x08, C0 , "mem_load_retired.l2_line_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x10, C0 , "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcc, 0x00, C0|C1, "fp_mmx_trans" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcc, 0x01, C0|C1, "fp_mmx_trans.to_mmx" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcc, 0x02, C0|C1, "fp_mmx_trans.to_fp" /*spelling*/ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcd, 0x00, C0|C1, "simd_assist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xce, 0x00, C0|C1, "simd_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcf, 0x00, C0|C1, "simd_sat_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x00, C0|C1, "rat_stalls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x01, C0|C1, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x02, C0|C1, "rat_stalls.partial_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x04, C0|C1, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x08, C0|C1, "rat_stalls.fpsw" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x0f, C0|C1, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd2, 0x10, C0|C1, "rat_stalls.other_serialization_stalls", 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x00, C0|C1, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x01, C0|C1, "seg_rename_stalls.es" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x02, C0|C1, "seg_rename_stalls.ds" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x04, C0|C1, "seg_rename_stalls.fs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x08, C0|C1, "seg_rename_stalls.gs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd4, 0x0f, C0|C1, "seg_rename_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x00, C0|C1, "seg_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x01, C0|C1, "seg_reg_renames.es" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x02, C0|C1, "seg_reg_renames.ds" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x04, C0|C1, "seg_reg_renames.fs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x08, C0|C1, "seg_reg_renames.gs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xd5, 0x0f, C0|C1, "seg_reg_renames.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x00, C0|C1, "resource_stalls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x01, C0|C1, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x02, C0|C1, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x04, C0|C1, "resource_stalls.ld_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x08, C0|C1, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x10, C0|C1, "resource_stalls.br_miss_clear" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xdc, 0x1f, C0|C1, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe0, 0x00, C0|C1, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe4, 0x00, C0|C1, "bogus_br" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe6, 0x00, C0|C1, "baclears" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xf0, 0x00, C0|C1, "pref_rqsts_up" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xf8, 0x00, C0|C1, "pref_rqsts_dn" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* FAM6 MOD28: Intel Atom processor */
+#define EVENTS_FAM6_MOD28 \
+{ 0x02, 0x81, C0|C1, "store_forwards.good" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x00, C0|C1, "segment_reg_loads.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C0|C1, "prefetch.prefetcht0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x06, C0|C1, "prefetch.sw_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x08, C0|C1, "prefetch.prefetchnta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x05, C0|C1, "data_tlb_misses.dtlb_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x06, C0|C1, "data_tlb_misses.dtlb_miss_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x07, C0|C1, "data_tlb_misses.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x09, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0c, 0x03, C0|C1, "page_walks.cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x01, C0|C1, "x87_comp_ops_exe.any.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x01, C0|C1, "fp_assist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x81, C0|C1, "fp_assist.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x01, C0|C1, "mul.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x81, C0|C1, "mul.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x01, C0|C1, "div.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x81, C0|C1, "div.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C0|C1, "cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x21, 0x00, C0|C1, "l2_ads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x22, 0x00, C0|C1, "l2_dbus_busy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x00, C0|C1, "l2_lines_in" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x25, 0x00, C0|C1, "l2_m_lines_in" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x00, C0|C1, "l2_lines_out" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x00, C0|C1, "l2_m_lines_out" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x00, C0|C1, "l2_ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x29, 0x00, C0|C1, "l2_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2a, 0x00, C0|C1, "l2_st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2b, 0x00, C0|C1, "l2_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x00, C0|C1, "l2_rqsts" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x4f, C0|C1, "l2_rqsts.self.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x30, 0x00, C0|C1, "l2_reject_busq" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x32, 0x00, C0|C1, "l2_no_req" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3a, 0x00, C0|C1, "eist_trans" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3b, 0xc0, C0|C1, "thermal_trip" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x00, C0|C1, "cpu_clk_unhalted.core_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x01, C0|C1, "cpu_clk_unhalted.bus" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x02, C0|C1, "cpu_clk_unhalted.no_other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x21, C0|C1, "l1d_cache.ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x22, C0|C1, "l1d_cache.st" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x00, C0|C1, "bus_request_outstanding" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x61, 0x00, C0|C1, "bus_bnr_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x62, 0x00, C0|C1, "bus_drdy_clocks" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x00, C0|C1, "bus_lock_clocks" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x64, 0x00, C0|C1, "bus_data_rcv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x65, 0x00, C0|C1, "bus_trans_brd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x66, 0x00, C0|C1, "bus_trans_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x67, 0x00, C0|C1, "bus_trans_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x68, 0x00, C0|C1, "bus_trans_ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x69, 0x00, C0|C1, "bus_trans_inval" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6a, 0x00, C0|C1, "bus_trans_pwr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6b, 0x00, C0|C1, "bus_trans_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6c, 0x00, C0|C1, "bus_trans_io" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6d, 0x00, C0|C1, "bus_trans_def" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6e, 0x00, C0|C1, "bus_trans_burst" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6f, 0x00, C0|C1, "bus_trans_mem" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x70, 0x00, C0|C1, "bus_trans_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x77, 0x00, C0|C1, "ext_snoop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7a, 0x00, C0|C1, "bus_hit_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7b, 0x00, C0|C1, "bus_hitm_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7d, 0x00, C0|C1, "busq_empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7e, 0x00, C0|C1, "snoop_stall_drv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x7f, 0x00, C0|C1, "bus_io_wait" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C0|C1, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x03, C0|C1, "icache.accesses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x02, C0|C1, "itlb.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x04, C0|C1, "itlb.flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xaa, 0x02, C0|C1, "macro_insts.cisc_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xaa, 0x03, C0|C1, "macro_insts.all_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb0, 0x00, C0|C1, "simd_uops_exec.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb0, 0x80, C0|C1, "simd_uops_exec.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb1, 0x00, C0|C1, "simd_sat_uop_exec.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb1, 0x80, C0|C1, "simd_sat_uop_exec.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x01, C0|C1, "simd_uop_type_exec.mul.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xb3, 0xa0, C0|C1, "simd_uop_type_exec.arithmetic.ar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x00, C0|C1, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc2, 0x10, C0|C1, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc3, 0x01, C0|C1, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x00, C0|C1, "br_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x01, C0|C1, "br_inst_retired.pred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x02, C0|C1, "br_inst_retired.mispred_not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x04, C0|C1, "br_inst_retired.pred_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x08, C0|C1, "br_inst_retired.mispred_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x0a, C0|C1, "br_inst_retired.mispred" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x0c, C0|C1, "br_inst_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x0f, C0|C1, "br_inst_retired.any1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc5, 0x00, C0|C1, "br_inst_retired.mispred" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc6, 0x01, C0|C1, "cycles_int_masked.cycles_int_masked" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc6, 0x02, C0|C1, "cycles_int_masked.cycles_int_pending_and_masked" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x01, C0|C1, "simd_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x02, C0|C1, "simd_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x04, C0|C1, "simd_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x08, C0|C1, "simd_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x10, C0|C1, "simd_inst_retired.vector" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc7, 0x1f, C0|C1, "simd_inst_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc8, 0x00, C0|C1, "hw_int_rcv" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x01, C0|C1, "simd_comp_inst_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x02, C0|C1, "simd_comp_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x04, C0|C1, "simd_comp_inst_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xca, 0x08, C0|C1, "simd_comp_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x01, C0|C1, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x02, C0|C1, "mem_load_retired.l2_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcb, 0x04, C0|C1, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcd, 0x00, C0|C1, "simd_assist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xce, 0x00, C0|C1, "simd_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xcf, 0x00, C0|C1, "simd_sat_instr_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe0, 0x01, C0|C1, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe4, 0x01, C0|C1, "bogus_br" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xe6, 0x01, C0|C1, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* Intel Core i7 (Nehalem) Processor */
+/*
+ * The Nehalem tables are basically from Bug 16457009
+ * libcpc counter names should be based on public Intel documentation -- Nehalem
+ * and those tables are basically from the
+ * Intel SDM, January 2013, Section 19.5, Table 19-11.
+ * We omit the Table 19-12 uncore events.
+ *
+ * Note that the table below includes some events from
+ * the Intel SDM that require cmask or attr settings.
+ * These events are not in libcpc, which did not include
+ * events requiring cmask or attr until Sandy Bridge.
+ */
+
+#define EVENTS_FAM6_MOD26 \
+{ 0x04, 0x07, C0|C1|C2|C3, "sb_drain.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C0|C1|C2|C3, "partial_address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C0|C1|C2|C3, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x80, C0|C1|C2|C3, "dtlb_load_misses.large_walk_completed", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x10, C0|C1|C2|C3, "mem_inst_retired.latency_above_threshold" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.stalled_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C0|C1|C2|C3, "arith.fpu_div" , 0x1, ATTR_EDGE | ATTR_INV, 0x0 }, \
+{ 0x14, 0x02, C0|C1|C2|C3, "arith.mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x80, C0|C1|C2|C3, "l2_write.lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0xE0, C0|C1|C2|C3, "l2_write.lock.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0xF0, C0|C1|C2|C3, "l2_write.lock.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x02, C0|C1|C2|C3, "l1d_wb_l2.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x0F, C0|C1|C2|C3, "l1d_wb_l2.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C0|C1|C2|C3, "l3_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C0|C1|C2|C3, "l3_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C0|C1|C2|C3, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C0|C1|C2|C3, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x01, C0|C1 , "l1d_cache_ld.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x02, C0|C1 , "l1d_cache_ld.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x04, C0|C1 , "l1d_cache_ld.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x08, C0|C1 , "l1d_cache_ld.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x40, 0x0F, C0|C1 , "l1d_cache_ld.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x41, 0x02, C0|C1 , "l1d_cache_st.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x41, 0x04, C0|C1 , "l1d_cache_st.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x41, 0x08, C0|C1 , "l1d_cache_st.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x01, C0|C1 , "l1d_cache_lock.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x02, C0|C1 , "l1d_cache_lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x04, C0|C1 , "l1d_cache_lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x42, 0x08, C0|C1 , "l1d_cache_lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x01, C0|C1 , "l1d_all_ref.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x43, 0x02, C0|C1 , "l1d_all_ref.cacheable" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x20, C0|C1|C2|C3, "dtlb_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x80, C0|C1|C2|C3, "dtlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C0|C1|C2|C3, "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x01, C0|C1|C2|C3, "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x02, C0|C1|C2|C3, "l1d_prefetch.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x04, C0|C1|C2|C3, "l1d_prefetch.triggers" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C0|C1 , "l1d.repl" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x02, C0|C1 , "l1d.m_repl" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x04, C0|C1 , "l1d.m_evict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x08, C0|C1 , "l1d.m_snoop_evict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x52, 0x01, C0|C1|C2|C3, "l1d_cache_prefetch_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x53, 0x01, C0|C1|C2|C3, "l1d_cache_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x01, C0|C1 , "cache_lock_cycles.l1d_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C0|C1 , "cache_lock_cycles.l1d" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C0|C1|C2|C3, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x80, C0|C1|C2|C3, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xAE, 0x01, C0|C1|C2|C3, "itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x04, C0|C1|C2|C3, "uops_executed.port2_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x08, C0|C1|C2|C3, "uops_executed.port3_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x10, C0|C1|C2|C3, "uops_executed.port4_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x1F, C0|C1|C2|C3, "uops_executed.core_active_cycles_no_port5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x20, C0|C1|C2|C3, "uops_executed.port5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x3F, C0|C1|C2|C3, "uops_executed.core_active_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015_stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xB1, 0x80, C0|C1|C2|C3, "uops_executed.port234" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB2, 0x01, C0|C1|C2|C3, "offcore_requests_sq_full" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xB7, 0x01, C0|C1|C2|C3, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \
+{ 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xBB, 0x01, C0|C1|C2|C3, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \
+{ 0xC0, 0x00, C0|C1|C2|C3, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x02, C0|C1|C2|C3, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x01, C0|C1|C2|C3, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x02, C0|C1|C2|C3, "machine_clears.mem_order" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C0|C1|C2|C3, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x08, C0|C1|C2|C3, "l2_lines_out.prefetch_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF6, 0x01, C0|C1|C2|C3, "sq_full_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+#define EVENTS_FAM6_MOD46_ONLY \
+{ 0x0F, 0x01, C0|C1|C2|C3, "mem_uncore_retired.l3_data_miss_unknown" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x80, C0|C1|C2|C3, "mem_uncore_retired.uncacheable" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* Intel Westmere Processor */
+/*
+ * The Westmere tables are basically from Bug 16173963
+ * libcpc counter names should be based on public Intel documentation -- Westmere
+ * and those tables are basically from the
+ * Intel SDM, January 2013, Section 19.6, Table 19-13.
+ * We omit the Table 19-14 uncore events.
+ *
+ * Note that the table below includes some events from
+ * the Intel SDM that require cmask or attr settings.
+ * These events are not in libcpc, which did not include
+ * events requiring cmask or attr until Sandy Bridge.
+ */
+
+#define EVENTS_FAM6_MOD37 \
+{ 0x03, 0x02, C0|C1|C2|C3, "load_block.overlap_store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x04, 0x07, C0|C1|C2|C3, "sb_drain.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x02, C0|C1|C2|C3, "misalign_mem_ref.store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C0|C1|C2|C3, "partial_address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C0|C1|C2|C3, "dtlb_load_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C0|C1|C2|C3, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0B, 0x10, C0|C1|C2|C3, "mem_inst_retired.latency_above_threshold" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x01, C0|C1|C2|C3, "mem_uncore_retired.unknown_source" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x80, C0|C1|C2|C3, "mem_uncore_retired.uncacheable" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x02, C0|C1|C2|C3, "arith.mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x80, C0|C1|C2|C3, "l2_write.lock.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0xE0, C0|C1|C2|C3, "l2_write.lock.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0xF0, C0|C1|C2|C3, "l2_write.lock.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x02, C0|C1|C2|C3, "l1d_wb_l2.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x0F, C0|C1|C2|C3, "l1d_wb_l2.mesi" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C0|C1|C2|C3, "l3_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C0|C1|C2|C3, "l3_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C0|C1|C2|C3, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C0|C1|C2|C3, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C0|C1|C2|C3, "dtlb_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x20, C0|C1|C2|C3, "dtlb_misses.pde_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x80, C0|C1|C2|C3, "dtlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C0|C1 , "load_hit_pre" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x01, C0|C1 , "l1d_prefetch.requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x02, C0|C1 , "l1d_prefetch.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x04, C0|C1 , "l1d_prefetch.triggers" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4F, 0x10, C0|C1|C2|C3, "ept.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C0|C1 , "l1d.repl" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x02, C0|C1 , "l1d.m_repl" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x04, C0|C1 , "l1d.m_evict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x08, C0|C1 , "l1d.m_snoop_evict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x52, 0x01, C0|C1|C2|C3, "l1d_cache_prefetch_lock_fb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C0 , "offcore_requests_outstanding.demand.read_data", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C0 , "offcore_requests_outstanding.demand.read_code", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C0 , "offcore_requests_outstanding.demand.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C0 , "offcore_requests_outstanding.any_read", 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x01, C0|C1 , "cache_lock_cycles.l1d_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C0|C1 , "cache_lock_cycles.l1d" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C0|C1|C2|C3, "itlb_misses.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C0|C1|C2|C3, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x80, C0|C1|C2|C3, "itlb_misses.large_walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C0|C1|C2|C3, "resource_stalls.rob_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x80, C0|C1|C2|C3, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C0|C1|C2|C3, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAE, 0x01, C0|C1|C2|C3, "itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x01, C0|C1|C2|C3, "offcore_requests.demand.read_data" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x02, C0|C1|C2|C3, "offcore_requests.demand.read_code" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C0|C1|C2|C3, "offcore_requests.demand.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C0|C1|C2|C3, "offcore_requests.any.read" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x04, C0|C1|C2|C3, "uops_executed.port2_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x08, C0|C1|C2|C3, "uops_executed.port3_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x10, C0|C1|C2|C3, "uops_executed.port4_core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x1F, C0|C1|C2|C3, "uops_executed.core_active_cycles_no_port5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x20, C0|C1|C2|C3, "uops_executed.port5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x3F, C0|C1|C2|C3, "uops_executed.core_active_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x40, C0|C1|C2|C3, "uops_executed.port015" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x80, C0|C1|C2|C3, "uops_executed.port234" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB2, 0x01, C0|C1|C2|C3, "offcore_requests_sq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB3, 0x01, C0, "snoopq_requests_outstanding.data" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB3, 0x02, C0, "snoopq_requests_outstanding.invalidate" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB3, 0x04, C0, "snoopq_requests_outstanding.code" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB4, 0x01, C0|C1|C2|C3, "snoopq_requests.code" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB4, 0x02, C0|C1|C2|C3, "snoopq_requests.data" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB4, 0x04, C0|C1|C2|C3, "snoopq_requests.invalidate" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xB7, 0x01, C0|C1|C2|C3, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \
+{ 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xBB, 0x01, C0|C1|C2|C3, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \
+{ 0xC0, 0x00, C0|C1|C2|C3, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x02, C0|C1|C2|C3, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x01, C0|C1|C2|C3, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x02, C0|C1|C2|C3, "machine_clears.mem_order" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C0|C1|C2|C3, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x01, C0|C1|C2|C3, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x04, C0|C1|C2|C3, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x01, C0|C1|C2|C3, "uops_decoded.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xEC, 0x01, C0|C1|C2|C3, "thread_active" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x08, C0|C1|C2|C3, "l2_lines_out.prefetch_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF4, 0x04, C0|C1|C2|C3, "sq_misc.lru_hints" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF6, 0x01, C0|C1|C2|C3, "sq_full_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/*
+ * This special omission of the following events from Model 47
+ * is due to usr/src/uts/intel/pcbe/wm_pcbe.h . There seems
+ * to be no substantiation for this treatment in the Intel SDM.
+ */
+#define EVENTS_FAM6_MOD37_ALSO \
+{ 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x04, C0|C1|C2|C3, "mem_uncore_retired.remote_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.local_dram_remote_cache_hit", 0x0, ATTR_NONE, 0x0 },\
+{ 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.other_llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* Intel Sandy Bridge Processor */
+/*
+ * The Sandy Bridge tables are basically from Bug 16457080
+ * libcpc counter names should be based on public Intel documentation -- Sandy Bridge
+ * and those tables are basically from the
+ * Intel SDM, January 2013, Section 19.4, Table 19-7.
+ * Additionally, there are
+ * Table 19-8. Model 42 only.
+ * Table 19-9. Model 45 only.
+ * We omit the Table 19-10 uncore events.
+ */
+
+#define EVENTS_FAM6_MOD42 \
+{ 0x03, 0x01, C_ALL, "ld_blocks.data_unknown" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x10, C_ALL, "ld_blocks.all_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x08, C_ALL, "ld_blocks_partial.all_sta_block" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_stalls_count" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x0D, 0x40, C_ALL, "int_misc.rat_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \
+{ 0x10, 0x01, C_ALL, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x10, C_ALL, "fp_comp_ops_exe.sse_fp_packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x20, C_ALL, "fp_comp_ops_exe.sse_fp_scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x40, C_ALL, "fp_comp_ops_exe.sse_packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x80, C_ALL, "fp_comp_ops_exe.sse_scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x01, C_ALL, "simd_fp_256.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x02, C_ALL, "simd_fp_256.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C_ALL, "arith.fpu_div" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x17, 0x01, C_ALL, "insts_written_to_iq.insts" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x01, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x03, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x04, C_ALL, "l2_rqsts.rfo_hits" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x08, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x0C, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x10, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x20, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x40, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x80, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xC0, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x01, C_ALL, "l2_store_lock_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x04, C_ALL, "l2_store_lock_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x08, C_ALL, "l2_store_lock_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0F, C_ALL, "l2_store_lock_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x01, C_ALL, "l2_l1d_wb_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x02, C_ALL, "l2_l1d_wb_rqsts.hit_s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x04, C_ALL, "l2_l1d_wb_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x08, C_ALL, "l2_l1d_wb_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x0F, C_ALL, "l2_l1d_wb_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C2 , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C2 , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C2 , "l1d_pend_miss.occurrences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4E, 0x02, C_ALL, "hw_pre_req.dl1_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x02, C_ALL, "l1d.allocated_in_m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x04, C_ALL, "l1d.eviction" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x08, C_ALL, "l1d.all_m_replacement" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x59, 0x20, C_ALL, "partial_rat_stalls.flags_merge_uop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x59, 0x20, C_ALL, "partial_rat_stalls.flags_merge_uop_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x59, 0x40, C_ALL, "partial_rat_stalls.slow_lea_window" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x59, 0x80, C_ALL, "partial_rat_stalls.mul_single_uop" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5B, 0x0C, C0|C1|C2|C3, "resource_stalls2.all_fl_empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5B, 0x0F, C_ALL, "resource_stalls2.all_prf_control" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5B, 0x40, C_ALL, "resource_stalls2.bob_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5B, 0x4F, C_ALL, "resource_stalls2.ooo_rsrc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_transition" , 0x0, ATTR_EDGE, 0x0 }, \
+{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_activations" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xC1, C_ALL, "br_inst_exec.all_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xC1, C_ALL, "br_misp_exec.all_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_2_sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x0C, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_3_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_3_sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x30, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x02, C_ALL, "resource_stalls.lb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x20, C_ALL, "resource_stalls.fcsw" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x40, C_ALL, "resource_stalls.mxcsr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x80, C_ALL, "resource_stalls.other" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C2 , "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x04, C0|C1|C2|C3, "cycle_activity.cycles_no_dispatch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAB, 0x01, C_ALL, "dsb2mite_switches.count" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAC, 0x02, C_ALL, "dsb_fill.other_cancel" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAC, 0x08, C_ALL, "dsb_fill.exceed_dsb_lines" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAC, 0x0A, C_ALL, "dsb_fill.all_cancel" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C0|C1|C2|C3, "uops_dispatched.thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C0|C1|C2|C3, "uops_dispatched.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_dispatched.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB6, 0x01, C_ALL, "agu_bypass_cancel.count" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xB7, 0x01, C_ALL, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \
+/* { 0xBB, 0x01, C_ALL, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \
+{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBF, 0x05, C_ALL, "l1d_blocks.bank_conflict_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x01, C1, "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x02, C_ALL, "other_assists.itlb_miss_retired" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x08, C_ALL, "other_assists.avx_store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x10, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x20, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x10, C_ALL, "br_misp_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x20, C_ALL, "br_misp_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xCD, 0x01, C3, "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, ignore events that require msr_offset */ /* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \
+{ 0xCD, 0x02, C3, "mem_trans_retired.precise_store" , 0x0, ATTR_NONE, 0x0 }, /* See Section "Precise Store Facility" */ \
+{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x01, C0|C1|C2|C3, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.llc_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x01, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x02, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x04, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x08, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_none" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x01, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x01, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x02, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x04, C_ALL, "l2_lines_out.pf_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x08, C_ALL, "l2_lines_out.pf_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x0A, C_ALL, "l2_lines_out.dirty_all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF4, 0x10, C_ALL, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+#define EVENTS_FAM6_MOD42_ONLY \
+{ 0xD4, 0x02, C0|C1|C2|C3, "mem_load_uops_misc_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+#define EVENTS_FAM6_MOD45_ONLY \
+/* { 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x3C9 }, ignore events that require msr_offset */ \
+/* { 0xD3, 0x04, C_ALL, "mem_load_uops_llc_miss_retired.remote_dram" , 0x0, ATTR_NONE, 0x3C9 }, ignore events that require msr_offset */ \
+/* end of #define */
+
+/* Intel Ivy Bridge Processor */
+/*
+ * The Ivy Bridge tables are basically from Bug 16457100
+ * libcpc counter names should be based on public Intel documentation -- Ivy Bridge
+ * and those tables are basically from the
+ * Intel SDM, January 2013, Section 19.3, Table 19-5.
+ * Additionally, there is
+ * Table 19-6. Model 62 only.
+ */
+
+#define EVENTS_FAM6_MOD58 \
+{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x81, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x82, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x84, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \
+{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x40, C_ALL, "uops_issued.sIngle_mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x01, C_ALL, "fp_comp_ops_exe.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x10, C_ALL, "fp_comp_ops_exe.sse_fp_packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x20, C_ALL, "fp_comp_ops_exe.sse_fp_scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x40, C_ALL, "fp_comp_ops_exe.sse_packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x10, 0x80, C_ALL, "fp_comp_ops_exe.sse_scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x01, C_ALL, "simd_fp_256.packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x11, 0x02, C_ALL, "simd_fp_256.packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C_ALL, "arith.fpu_div" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x24, 0x01, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x03, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x04, C_ALL, "l2_rqsts.rfo_hits" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x08, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x0C, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x10, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x20, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x40, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x80, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xC0, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x01, C_ALL, "l2_store_lock_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x08, C_ALL, "l2_store_lock_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x0F, C_ALL, "l2_store_lock_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x01, C_ALL, "l2_l1d_wb_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x04, C_ALL, "l2_l1d_wb_rqsts.hit_e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x08, C_ALL, "l2_l1d_wb_rqsts.hit_m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x28, 0x0F, C_ALL, "l2_l1d_wb_rqsts.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2), "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2), "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2), "l1d_pend_miss.occurrences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x0, ATTR_EDGE, 0x0 }, \
+{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5F, 0x04, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_activations" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" /* synonym, from Intel SDM */ , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" /* synonym, from Intel SDM */ , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" /* weird name suggested by Intel docs */ , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_cond" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jmp_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_2_sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x0C, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_3_ld" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_3_sta" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x30, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA3, 0x02, C0|C1|C2|C3, "cycle_activity.cycles_ldm_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C0|C1|C2|C3, "cycle_activity.cycles_ldm_pending_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA3, 0x08, C(2), "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x08, C(2), "cycle_activity.cycles_l1d_pending_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xAB, 0x01, C_ALL, "dsb2mite_switches.count" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAC, 0x08, C_ALL, "dsb_fill.exceed_dsb_lines" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, 0x1A6 }, ignore events that require msr_offset */ \
+/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, 0x1A7 }, ignore events that require msr_offset */ \
+{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x01, C(1), "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x08, C_ALL, "other_assists.avx_store" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x10, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x20, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.active_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x10, C_ALL, "br_misp_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x20, C_ALL, "br_misp_retired.taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xCD, 0x01, C3 , "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, ignore events that require msr_offset */ /* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \
+{ 0xCD, 0x02, C3 , "mem_trans_retired.precise_store" , 0x0, ATTR_NONE, 0x0 }, /* See Section "Precise Store Facility" */ \
+{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.llc_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.llc_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x01, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x02, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x04, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x08, C_ALL, "mem_load_uops_llc_hit_retired.xsnp_none" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x01, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x02, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x04, C_ALL, "l2_lines_out.pf_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x08, C_ALL, "l2_lines_out.pf_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x0A, C_ALL, "l2_lines_out.dirty_all" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+#define EVENTS_FAM6_MOD62_ONLY \
+{ 0xD3, 0x01, C_ALL, "mem_load_uops_llc_miss_retired.local_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD3, 0x04, C_ALL, "mem_load_uops_llc_miss_retired.remote_dram" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD3, 0x10, C_ALL, "mem_load_uops_llc_miss_retired.remote_hitm" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD3, 0x20, C_ALL, "mem_load_uops_llc_miss_retired.remote_fwd" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* Intel Haswell Processor */
+/*
+ * The Haswell tables take into account Bug 17006019
+ * libcpc counter names should be based on public Intel documentation -- Haswell
+ * and are basically from the
+ * Intel SDM, June 2013, Section 19.3, Table 19-2 and Table 19-3.
+ * We omit the Table 19-4 uncore events.
+ */
+
+#define EVENTS_FAM6_MOD60 \
+{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x40, C_ALL, "dtlb_load_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x60, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x80, C_ALL, "dtlb_load_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_occurrences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1, ATTR_INV | ATTR_ANY, 0x0 }, \
+{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x40, C_ALL, "uops_issued.single_mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C_ALL, "l2_rqsts.l2_pf_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x50, C_ALL, "l2_rqsts.l2_pf_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x27, 0x50, C_ALL, "l2_demand_rqsts.wb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x4F, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2E, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.occurences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x40, C_ALL, "dtlb_store_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x60, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x80, C_ALL, "dtlb_store_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x40, C_ALL, "tx_mem.abort_hle_elision_buffer_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x0, ATTR_EDGE, 0x0 }, \
+{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_occur" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x40, C_ALL, "itlb_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x60, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jump" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x90, C_ALL, "br_misp_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending_cycles" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending_cycles" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending_cycles" , 0x8, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xB7, 0x01, C_ALL, "off_core_response_0" , 0x0, ATTR_NONE, 0x1A6 }, omit events requiring MSR programming */ \
+/* { 0xBB, 0x01, C_ALL, "off_core_response_1" , 0x0, ATTR_NONE, 0x1A7 }, omit events requiring MSR programming */ \
+{ 0xBC, 0x11, C_ALL, "page_walker_loads.dtlb_l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x21, C_ALL, "page_walker_loads.itlb_l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x12, C_ALL, "page_walker_loads.dtlb_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x22, C_ALL, "page_walker_loads.itlb_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x14, C_ALL, "page_walker_loads.dtlb_l3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x24, C_ALL, "page_walker_loads.itlb_l3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x18, C_ALL, "page_walker_loads.dtlb_memory" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x28, C_ALL, "page_walker_loads.itlb_memory" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x01, C(1) , "inst_retired.prec_dist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x08, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x10, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x40, C_ALL, "other_assists.any_wb_assist" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_misc1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_misc2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_misc3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_misc4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_misc5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_misc1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_misc2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_misc3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_misc4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_misc5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xCD, 0x01, C_ALL, "mem_trans_retired.load_latency" , 0x0, ATTR_NONE, 0x3F6 }, omit events requiring MSR programming */ \
+{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xD2, 0x01, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x02, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x04, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x08, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD3, 0x01, C_ALL, "mem_load_uops_l3_miss_retired.local_dram" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x05, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x06, C_ALL, "l2_lines_out.demand_dirty" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+/* Intel Broadwell Processor */
+/*
+ * This table is essentially taken from:
+ * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/bdw_pcbe_tbl.c
+ */
+
+#define EVENTS_FAM6_MOD61 \
+{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x05, 0x01, C_ALL, "misalign_mem_ref.loads" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x05, 0x02, C_ALL, "misalign_mem_ref.stores" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x40, C_ALL, "dtlb_load_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x60, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x80, C_ALL, "dtlb_load_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_any" , 0x1, ATTR_ANY , 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0x0D, 0x03, C_ALL, "int_misc.recovery_cycles_occurrences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x0D, 0x08, C_ALL, "int_misc.rat_stall_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x10, C_ALL, "uops_issued.flags_merge" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x40, C_ALL, "uops_issued.single_mul" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.core_stall_cycles" , 0x1,(ATTR_INV | ATTR_ANY), 0x0 }, \
+ \
+{ 0x14, 0x01, C_ALL, "arith.fpu_div_active" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x30, C_ALL, "l2_rqsts.l2_pf_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x50, C_ALL, "l2_rqsts.l2_pf_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x27, 0x50, C_ALL, "l2_demand_rqsts.wb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p_any" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk_any" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0x3C, 0x02, C_ALL, "cpu_clk_thread_unhalted.one_thread_active" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.pending_cycles_any" , 0x1, ATTR_ANY , 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0x48, 0x01, C(2) , "l1d_pend_miss.occurences" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x48, 0x02, C_ALL, "l1d_pend_miss.fb_full" , 0x1, ATTR_NONE, 0x0 }, \
+ \
+{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x40, C_ALL, "dtlb_store_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x60, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x80, C_ALL, "dtlb_store_misses.pde_cache_miss" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x02, C_ALL, "load_hit_pre.hw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x4F, 0x10, C_ALL, "ept.walk_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity_write" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x54, 0x40, C_ALL, "tx_mem.hle_elision_buffer_full" , 0x0, ATTR_TSX , 0x0 }, \
+ \
+{ 0x58, 0x01, C_ALL, "move_elimination.int_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x02, C_ALL, "move_elimination.simd_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x04, C_ALL, "move_elimination.int_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x58, 0x08, C_ALL, "move_elimination.simd_not_eliminated" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5C, 0x01, C_ALL, "cpl_cycles.ring0_trans" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x5C, 0x02, C_ALL, "cpl_cycles.ring123" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_TSX , 0x0 }, \
+ \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_end" , 0x1, (ATTR_INV | ATTR_EDGE), 0x0 }, \
+ \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_ge_6 " , 0x6, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd_cycles", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \
+ \
+{ 0x63, 0x01, C_ALL, "lock_cycles.split_lock_uc_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x63, 0x02, C_ALL, "lock_cycles.cache_lock_duration" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x79, 0x02, C_ALL, "idq.empty" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_occur" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_switches" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x79, 0x3C, C_ALL, "idq.mite_all_uops" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x80, 0x01, C_ALL, "icache.hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x02, C_ALL, "icache.misses" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x80, 0x04, C_ALL, "icache.ifdata_stall" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.walk_duration" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x40, C_ALL, "itlb_misses.stlb_hit_2m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x60, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x04, C_ALL, "ild_stall.iq_full" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x88, 0x41, C_ALL, "br_inst_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x81, C_ALL, "br_inst_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x82, C_ALL, "br_inst_exec.taken_direct_jump" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x84, C_ALL, "br_inst_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x88, C_ALL, "br_inst_exec.taken_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0x90, C_ALL, "br_inst_exec.taken_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xA0, C_ALL, "br_inst_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xC1, C_ALL, "br_inst_exec.all_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xC2, C_ALL, "br_inst_exec.all_direct_jmp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xC4, C_ALL, "br_inst_exec.all_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xC8, C_ALL, "br_inst_exec.all_indirect_near_return" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xD0, C_ALL, "br_inst_exec.all_direct_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x88, 0xFF, C_ALL, "br_inst_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0x89, 0x41, C_ALL, "br_misp_exec.nontaken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x81, C_ALL, "br_misp_exec.taken_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0x84, C_ALL, "br_misp_exec.taken_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0x89, 0x88, C_ALL, "br_misp_exec.taken_return_near" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xC1, C_ALL, "br_misp_exec.all_conditional" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xC4, C_ALL, "br_misp_exec.all_indirect_jump_non_call_ret" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xA0, C_ALL, "br_misp_exec.taken_indirect_near_call" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x89, 0xFF, C_ALL, "br_misp_exec.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+/* Use Cmask to qualify uop b/w */ \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_INV , 0x0 }, \
+ \
+{ 0xA0, 0x03, C_ALL, "uop_dispatches_cancelled.simd_prf" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_executed_port.port_0_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_executed_port.port_1_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_executed_port.port_2_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_executed_port.port_3_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_executed_port.port_4_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_executed_port.port_5_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_executed_port.port_6_core" , 0x0, ATTR_ANY , 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_executed_port.port_7_core" , 0x0, ATTR_ANY , 0x0 }, \
+ \
+{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x04, C_ALL, "resource_stalls.rs" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x10, C_ALL, "resource_stalls.rob" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_pending" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_ldm_pending" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x04, C_ALL, "cycle_activity.cycles_no_execute" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_pending" , 0x5, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x06, C_ALL, "cycle_activity.stalls_ldm_pending" , 0x6, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x08, C(2) , "cycle_activity.cycles_l1d_pending" , 0x8, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x0C, C(2) , "cycle_activity.stalls_l1d_pending" , 0xC, ATTR_NONE, 0x0 }, \
+ \
+{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.cycles_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+ \
+{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_1_uop_exec" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_2_uops_exec" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_3_uops_exec" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_4_uops_exec" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_none" , 0x0, ATTR_INV , 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_1" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_2" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_3" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_4" , 0x4, ATTR_NONE, 0x0 }, \
+ \
+{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+/* \
+ * See Section "Off-core Response Performance Monitoring" \
+ * \
+ * Though these two off_core events support all counters, only 1 of \
+ * them can be used at any given time. This is due to the extra MSR \
+ * programming required. \
+ */ \
+/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, OFFCORE_RSP_0 }, omit events requiring MSR programming */ \
+/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, OFFCORE_RSP_1 }, omit events requiring MSR programming */ \
+ \
+{ 0xBC, 0x11, C_ALL, "page_walker_loads.dtlb_l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x21, C_ALL, "page_walker_loads.itlb_l1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x12, C_ALL, "page_walker_loads.dtlb_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x22, C_ALL, "page_walker_loads.itlb_l2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x14, C_ALL, "page_walker_loads.dtlb_l3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x24, C_ALL, "page_walker_loads.itlb_l3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBC, 0x18, C_ALL, "page_walker_loads.dtlb_memory" , 0x0, ATTR_NONE, 0x0 }, \
+/* itlb_memory is not in the Intel SDM or spreadsheet for Broadwell; "cputrack -h" does have it though */ \
+{ 0xBC, 0x28, C_ALL, "page_walker_loads.itlb_memory" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xC0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC0, 0x02, C_ALL, "inst_retired.x87" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xC1, 0x08, C_ALL, "other_assists.avx_to_sse" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x10, C_ALL, "other_assists.sse_to_avx" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC1, 0x40, C_ALL, "other_assists.any_wb_assist" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xC2, 0x01, C_ALL, "uops_retired.all" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.total_cycles" , 0xA, ATTR_INV , 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.core_stall_cycles" , 0x1, (ATTR_INV | ATTR_ANY), 0x0 }, \
+{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xC3, 0x01, C_ALL, "machine_clears.cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x01, C_ALL, "machine_clears.count" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x20, C_ALL, "machine_clears.maskmov" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call_r3" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xC7, 0x01, C_ALL, "fp_arith_inst_retired.scalar_double" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x02, C_ALL, "fp_arith_inst_retired.scalar_single" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x03, C_ALL, "fp_arith_inst_retired.scalar" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x04, C_ALL, "fp_arith_inst_retired.128b_packed_double" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x08, C_ALL, "fp_arith_inst_retired.128b_packed_single" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x10, C_ALL, "fp_arith_inst_retired.256b_packed_double" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x15, C_ALL, "fp_arith_inst_retired.double" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x20, C_ALL, "fp_arith_inst_retired.256b_packed_single" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x2A, C_ALL, "fp_arith_inst_retired.single" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC7, 0x3C, C_ALL, "fp_arith_inst_retired.packed" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_misc1" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_misc2" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_misc3" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_misc4" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_misc5" , 0x0, ATTR_TSX , 0x0 }, \
+ \
+{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_misc1" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_misc2" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_misc3" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_misc4" , 0x0, ATTR_TSX , 0x0 }, \
+{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_misc5" , 0x0, ATTR_TSX , 0x0 }, \
+ \
+{ 0xCA, 0x02, C_ALL, "fp_assist.x87_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x04, C_ALL, "fp_assist.x87_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x08, C_ALL, "fp_assist.simd_output" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x10, C_ALL, "fp_assist.simd_input" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x1, ATTR_NONE, 0x0 }, \
+ \
+{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+/* See Section "MSR_PEBS_LD_LAT_THRESHOLD" */ \
+/* { 0xCD, 0x01, C(3) , "mem_trans_retired.load_latency" , 0x0, ATTR_PEBS_ONLY_LD_LAT, PEBS_LD_LAT_THRESHOLD }, omit events requiring MSR programming */ \
+ \
+/* \
+ * Event 0xD0 must be combined with umasks 0x1(loads) or 0x2(stores) \
+ */ \
+{ 0xD0, 0x11, C_ALL, "mem_uops_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x12, C_ALL, "mem_uops_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x21, C_ALL, "mem_uops_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+/* Private event, not public by Intel */ \
+{ 0xD0, 0x22, C_ALL, "mem_uops_retired.lock_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x41, C_ALL, "mem_uops_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x42, C_ALL, "mem_uops_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x81, C_ALL, "mem_uops_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x82, C_ALL, "mem_uops_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xD1, 0x01, C_ALL, "mem_load_uops_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x02, C_ALL, "mem_load_uops_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x04, C_ALL, "mem_load_uops_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x08, C_ALL, "mem_load_uops_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x10, C_ALL, "mem_load_uops_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x20, C_ALL, "mem_load_uops_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x40, C_ALL, "mem_load_uops_retired.hit_lfb" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xD2, 0x01, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x02, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x04, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x08, C_ALL, "mem_load_uops_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+{ 0xD3, 0x01, C_ALL, "mem_load_uops_l3_miss_retired.local_dram" , 0x0, ATTR_PEBS, 0x0 }, \
+ \
+/* The mem_load_l4_miss_retired events are not in "cputrack -h" output nor in the Intel spreadsheet. */ \
+/* { 0xD5, 0x01, C_ALL, "mem_load_l4_miss_retired.local_hit" , 0x0, ATTR_NONE, 0x0 }, */ \
+/* { 0xD5, 0x04, C_ALL, "mem_load_l4_miss_retired.local_miss" , 0x0, ATTR_NONE, 0x0 }, */ \
+ \
+{ 0xE6, 0x1F, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xF0, 0x01, C_ALL, "l2_trans.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x02, C_ALL, "l2_trans.rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x04, C_ALL, "l2_trans.code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x08, C_ALL, "l2_trans.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x10, C_ALL, "l2_trans.l1d_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x20, C_ALL, "l2_trans.l2_fill" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x80, C_ALL, "l2_trans.all_requests" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xF1, 0x01, C_ALL, "l2_lines_in.i" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x02, C_ALL, "l2_lines_in.s" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x04, C_ALL, "l2_lines_in.e" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x07, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \
+ \
+{ 0xF2, 0x05, C_ALL, "l2_lines_out.demand_clean" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+
+/* Intel Skylake Processor */
+/*
+ * This table is essentially taken from:
+ * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/skl_pcbe_tbl.c
+ * Also:
+ * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/fam6_pcbe.h
+ * { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \
+ * { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \
+ * { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \
+ * { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \
+ * { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \
+ * { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
+ * And:
+ * https://grok.cz.oracle.com/source/xref/on12-clone/usr/src/uts/intel/pcbe/core_pcbe.c
+ * { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
+ * { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
+ */
+#define EVENTS_FAM6_MOD78 \
+{ 0x03, 0x02, C_ALL, "ld_blocks.store_forward" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x03, 0x08, C_ALL, "ld_blocks.no_sr" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x07, 0x01, C_ALL, "ld_blocks_partial.address_alias" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x01, C_ALL, "dtlb_load_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x02, C_ALL, "dtlb_load_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x04, C_ALL, "dtlb_load_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x08, C_ALL, "dtlb_load_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x0E, C_ALL, "dtlb_load_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x10, C_ALL, "dtlb_load_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x08, 0x20, C_ALL, "dtlb_load_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x01, C_ALL, "int_misc.recovery_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0D, 0x01, C_ALL, "int_misc.recovery_cycles_any" , 0x0, ATTR_ANY, 0x0 }, \
+{ 0x0D, 0x80, C_ALL, "int_misc.clear_resteer_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x01, C_ALL, "uops_issued.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \
+{ 0x0E, 0x02, C_ALL, "uops_issued.vector_width_mismatch" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x0E, 0x20, C_ALL, "uops_issued.slow_lea" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x14, 0x01, C_ALL, "arith.divider_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x21, C_ALL, "l2_rqsts.demand_data_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x22, C_ALL, "l2_rqsts.rfo_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x24, C_ALL, "l2_rqsts.code_rd_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x27, C_ALL, "l2_rqsts.all_demand_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x38, C_ALL, "l2_rqsts.pf_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x3F, C_ALL, "l2_rqsts.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x41, C_ALL, "l2_rqsts.demand_data_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x42, C_ALL, "l2_rqsts.rfo_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0x44, C_ALL, "l2_rqsts.code_rd_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xD8, C_ALL, "l2_rqsts.pf_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE1, C_ALL, "l2_rqsts.all_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE2, C_ALL, "l2_rqsts.all_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE4, C_ALL, "l2_rqsts.all_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xE7, C_ALL, "l2_rqsts.all_demand_references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xF8, C_ALL, "l2_rqsts.all_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x24, 0xFF, C_ALL, "l2_rqsts.references" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.thread_p_any" , 0x0, ATTR_ANY, 0x0 }, \
+{ 0x3C, 0x00, C_ALL, "cpu_clk_unhalted.ring0_trans" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x3C, 0x01, C_ALL, "cpu_clk_thread_unhalted.ref_xclk_any" , 0x0, ATTR_ANY, 0x0 }, \
+{ 0x3C, 0x02, C_ALL, "cpu_clk_thread_unhalted.one_thread_active" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x48, 0x01, C_ALL, "l1d_pend_miss.pending_cycles_any" , 0x1, ATTR_ANY, 0x0 }, \
+{ 0x48, 0x02, C_ALL, "l1d_pend_miss.fb_full" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x01, C_ALL, "dtlb_store_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x02, C_ALL, "dtlb_store_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x04, C_ALL, "dtlb_store_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x08, C_ALL, "dtlb_store_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x0E, C_ALL, "dtlb_store_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x10, C_ALL, "dtlb_store_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x49, 0x20, C_ALL, "dtlb_store_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4C, 0x01, C_ALL, "load_hit_pre.sw_pf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x4F, 0x10, C_ALL, "ept.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x51, 0x01, C_ALL, "l1d.replacement" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x54, 0x01, C_ALL, "tx_mem.abort_conflict" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x02, C_ALL, "tx_mem.abort_capacity" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x04, C_ALL, "tx_mem.abort_hle_store_to_elided_lock" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x08, C_ALL, "tx_mem.abort_hle_elision_buffer_not_empty" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x10, C_ALL, "tx_mem.abort_hle_elision_buffer_mismatch" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x20, C_ALL, "tx_mem.abort_hle_elision_buffer_unsupported_alignment", 0x0, ATTR_TSX, 0x0 }, \
+{ 0x54, 0x40, C_ALL, "tx_mem.hle_elision_buffer_full" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5D, 0x01, C_ALL, "tx_exec.misc1" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5D, 0x02, C_ALL, "tx_exec.misc2" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5D, 0x04, C_ALL, "tx_exec.misc3" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5D, 0x08, C_ALL, "tx_exec.misc4" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5D, 0x10, C_ALL, "tx_exec.misc5" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x5E, 0x01, C_ALL, "rs_events.empty_end" , 0x1, (ATTR_INV | ATTR_EDGE), 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.cycles_with_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x01, C_ALL, "offcore_requests_outstanding.demand_data_rd_ge_6" , 0x6, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x02, C_ALL, "offcore_requests_outstanding.cycles_with_demand_code_rd", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x04, C_ALL, "offcore_requests_outstanding.cycles_with_demand_rfo",0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x08, C_ALL, "offcore_requests_outstanding.cycles_with_data_rd" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.l3_miss_demand_data_rd",0x0, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", 0x1, ATTR_NONE, 0x0 }, \
+{ 0x60, 0x10, C_ALL, "offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6",0x6, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x04, C_ALL, "idq.mite_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x08, C_ALL, "idq.dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x10, C_ALL, "idq.ms_dsb_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x18, C_ALL, "idq.all_dsb_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x20, C_ALL, "idq.ms_mite_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x24, C_ALL, "idq.all_mite_cycles_any_uops" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_cycles" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x79, 0x30, C_ALL, "idq.ms_switches" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0x80, 0x04, C_ALL, "icache_16b.ifdata_stall" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x83, 0x01, C_ALL, "icache_64b.iftag_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x83, 0x02, C_ALL, "icache_64b.iftag_miss" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x83, 0x04, C_ALL, "icache_64b.iftag_stall" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x01, C_ALL, "itlb_misses.miss_causes_a_walk" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x02, C_ALL, "itlb_misses.walk_completed_4k" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x04, C_ALL, "itlb_misses.walk_completed_2m_4m" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x08, C_ALL, "itlb_misses.walk_completed_1g" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x0E, C_ALL, "itlb_misses.walk_completed" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.walk_pending" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x10, C_ALL, "itlb_misses.walk_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x85, 0x20, C_ALL, "itlb_misses.stlb_hit" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x87, 0x01, C_ALL, "ild_stall.lcp" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_0_uops_deliv.core" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_1_uop_deliv.core" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_2_uop_deliv.core" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_le_3_uop_deliv.core" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0x9C, 0x01, C_ALL, "idq_uops_not_delivered.cycles_fe_was_ok" , 0x1, ATTR_INV, 0x0 }, \
+{ 0xA1, 0x01, C_ALL, "uops_dispatched_port.port_0" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x02, C_ALL, "uops_dispatched_port.port_1" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x04, C_ALL, "uops_dispatched_port.port_2" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x08, C_ALL, "uops_dispatched_port.port_3" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x10, C_ALL, "uops_dispatched_port.port_4" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x20, C_ALL, "uops_dispatched_port.port_5" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x40, C_ALL, "uops_dispatched_port.port_6" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA1, 0x80, C_ALL, "uops_dispatched_port.port_7" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x01, C_ALL, "resource_stalls.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA2, 0x08, C_ALL, "resource_stalls.sb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x01, C_ALL, "cycle_activity.cycles_l2_miss" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x02, C_ALL, "cycle_activity.cycles_l3_miss" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x04, C_ALL, "cycle_activity.stalls_total" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x05, C_ALL, "cycle_activity.stalls_l2_miss" , 0x5, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x06, C_ALL, "cycle_activity.stalls_l3_miss" , 0x6, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x08, C_ALL, "cycle_activity.cycles_l1d_miss" , 0x8, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x0C, C_ALL, "cycle_activity.stalls_l1d_miss" , 0xC, ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x10, C_ALL, "cycle_activity.cycles_mem_any" , 0x10,ATTR_NONE, 0x0 }, \
+{ 0xA3, 0x14, C_ALL, "cycle_activity.stalls_mem_any" , 0x14,ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x01, C_ALL, "exe_activity.exe_bound_0_ports" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x02, C_ALL, "exe_activity.1_ports_util" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x04, C_ALL, "exe_activity.2_ports_util" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x08, C_ALL, "exe_activity.3_ports_util" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x10, C_ALL, "exe_activity.4_ports_util" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA6, 0x40, C_ALL, "exe_activity.bound_on_stores" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.uops" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.cycles_active" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xA8, 0x01, C_ALL, "lsd.cycles_4_uops" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xAB, 0x02, C_ALL, "dsb2mite_switches.penalty_cycles" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xAE, 0x01, C_ALL, "itlb.itlb_flush" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x01, C_ALL, "offcore_requests.demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x02, C_ALL, "offcore_requests.demand_code_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x04, C_ALL, "offcore_requests.demand_rfo" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x08, C_ALL, "offcore_requests.all_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x10, C_ALL, "offcore_requests.l3_miss_demand_data_rd" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB0, 0x80, C_ALL, "offcore_requests.all_requests" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_1_uop_exec" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_2_uops_exec" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_3_uops_exec" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.cycles_ge_4_uops_exec" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x01, C_ALL, "uops_executed.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_none" , 0x1, ATTR_INV, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_1" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_2" , 0x2, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_3" , 0x3, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x02, C_ALL, "uops_executed.core_cycles_ge_4" , 0x4, ATTR_NONE, 0x0 }, \
+{ 0xB1, 0x10, C_ALL, "uops_executed.x87" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xB2, 0x01, C_ALL, "offcore_requests_buffer.sq_full" , 0x0, ATTR_NONE, 0x0 }, \
+\
+ /* \
+ * See Section "Off-core Response Performance Monitoring" \
+ * \
+ * Though these two off_core events support all counters, only 1 of \
+ * them can be used at any given time. This is due to the extra MSR \
+ * programming required. \
+ */ \
+/* { 0xB7, 0x01, C_ALL, "offcore_response_0" , 0x0, ATTR_NONE, OFFCORE_RSP_0 }, omit events requiring MSR programming */ \
+/* { 0xBB, 0x01, C_ALL, "offcore_response_1" , 0x0, ATTR_NONE, OFFCORE_RSP_1 }, omit events requiring MSR programming */ \
+{ 0xBD, 0x01, C_ALL, "tlb_flush.dtlb_thread" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xBD, 0x20, C_ALL, "tlb_flush.stlb_any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xC0, 0x1, C(1), "inst_retired.prec_dist" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \
+/* { 0xC0, 0x1, (C(0) | C(2) | C(3)), "inst_retired.total_cycles_ps" , 0x0A, (ATTR_PEBS_ONLY | ATTR_INV), 0x0 }, omit PEBS-only events */ \
+{ 0xC1, 0x3F, C_ALL, "other_assists.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.stall_cycles" , 0x1, ATTR_INV, 0x0 }, \
+{ 0xC2, 0x01, C_ALL, "uops_retired.total_cycles" , 0x0A, ATTR_INV, 0x0 }, \
+{ 0xC2, 0x02, C_ALL, "uops_retired.retire_slots" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x01, C_ALL, "machine_clears.count" , 0x1, ATTR_EDGE, 0x0 }, \
+{ 0xC3, 0x02, C_ALL, "machine_clears.memory_ordering" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC3, 0x04, C_ALL, "machine_clears.smc" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x01, C_ALL, "br_inst_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x02, C_ALL, "br_inst_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \
+/* { 0xC4, 0x04, C_ALL, "br_inst_retired.all_branches_pebs" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \
+{ 0xC4, 0x08, C_ALL, "br_inst_retired.near_return" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x10, C_ALL, "br_inst_retired.not_taken" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC4, 0x20, C_ALL, "br_inst_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC4, 0x40, C_ALL, "br_inst_retired.far_branch" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC5, 0x01, C_ALL, "br_misp_retired.conditional" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xC5, 0x02, C_ALL, "br_misp_retired.near_call" , 0x0, ATTR_PEBS, 0x0 }, \
+/* { 0xC5, 0x04, C_ALL, "br_misp_retired.all_branches_pebs" , 0x0, ATTR_PEBS_ONLY, 0x0 }, omit PEBS-only events */ \
+{ 0xC5, 0x20, C_ALL, "br_misp_retired.near_taken" , 0x0, ATTR_PEBS, 0x0 }, \
+/* { 0xC6, 0x01, C_ALL, "frontend_retired" , 0x0, ATTR_PEBS, MSR_PEBS_FRONTEND}, omit events requiring MSR programming */ \
+{ 0xC7, 0x01, C_ALL, "fp_arith_inst_retired.scalar_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x02, C_ALL, "fp_arith_inst_retired.scalar_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x04, C_ALL, "fp_arith_inst_retired.128b_packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x08, C_ALL, "fp_arith_inst_retired.128b_packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x10, C_ALL, "fp_arith_inst_retired.256b_packed_double" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC7, 0x20, C_ALL, "fp_arith_inst_retired.256b_packed_single" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xC8, 0x01, C_ALL, "hle_retired.start" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x02, C_ALL, "hle_retired.commit" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x04, C_ALL, "hle_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x08, C_ALL, "hle_retired.aborted_mem" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x10, C_ALL, "hle_retired.aborted_timer" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x20, C_ALL, "hle_retired.aborted_unfriendly" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x40, C_ALL, "hle_retired.aborted_memtype" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC8, 0x80, C_ALL, "hle_retired.aborted_events" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x01, C_ALL, "rtm_retired.start" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x02, C_ALL, "rtm_retired.commit" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x04, C_ALL, "rtm_retired.aborted" , 0x0, ATTR_PEBS | ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x08, C_ALL, "rtm_retired.aborted_mem" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x10, C_ALL, "rtm_retired.aborted_timer" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x20, C_ALL, "rtm_retired.aborted_unfriendly" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x40, C_ALL, "rtm_retired.aborted_memtype" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xC9, 0x80, C_ALL, "rtm_retired.aborted_events" , 0x0, ATTR_TSX, 0x0 }, \
+{ 0xCA, 0x1E, C_ALL, "fp_assist.any" , 0x1, ATTR_NONE, 0x0 }, \
+{ 0xCB, 0x01, C_ALL, "hw_interrupts.received" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xCC, 0x20, C_ALL, "rob_misc_events.lbr_inserts" , 0x0, ATTR_NONE, 0x0 }, \
+/* { 0xCD, 0x01, C_ALL, "mem_trans_retired.load_latency" , 0x0, ATTR_PEBS_ONLY_LD_LAT, PEBS_LD_LAT_THRESHOLD }, omit events requiring MSR programming */ \
+{ 0xD0, 0x11, C_ALL, "mem_inst_retired.stlb_miss_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x12, C_ALL, "mem_inst_retired.stlb_miss_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x21, C_ALL, "mem_inst_retired.lock_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x41, C_ALL, "mem_inst_retired.split_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x42, C_ALL, "mem_inst_retired.split_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x81, C_ALL, "mem_inst_retired.all_loads" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD0, 0x82, C_ALL, "mem_inst_retired.all_stores" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x01, C_ALL, "mem_load_retired.l1_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x02, C_ALL, "mem_load_retired.l2_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x04, C_ALL, "mem_load_retired.l3_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x08, C_ALL, "mem_load_retired.l1_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x10, C_ALL, "mem_load_retired.l2_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x20, C_ALL, "mem_load_retired.l3_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD1, 0x40, C_ALL, "mem_load_retired.fb_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x01, C_ALL, "mem_load_l3_hit_retired.xsnp_miss" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x02, C_ALL, "mem_load_l3_hit_retired.xsnp_hit" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x04, C_ALL, "mem_load_l3_hit_retired.xsnp_hitm" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD2, 0x08, C_ALL, "mem_load_l3_hit_retired.xsnp_none" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xD4, 0x04, C_ALL, "mem_load_misc_retired.uc" , 0x0, ATTR_PEBS, 0x0 }, \
+{ 0xE6, 0x01, C_ALL, "baclears.any" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF0, 0x40, C_ALL, "l2_trans.l2_wb" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF1, 0x1F, C_ALL, "l2_lines_in.all" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x01, C_ALL, "l2_lines_out.silent" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x02, C_ALL, "l2_lines_out.non_silent" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF2, 0x04, C_ALL, "l2_lines_out.useless_hwpf" , 0x0, ATTR_NONE, 0x0 }, \
+{ 0xF4, 0x10, C_ALL, "sq_misc.split_lock" , 0x0, ATTR_NONE, 0x0 }, \
+/* end of #define */
+
+#define NT_END {0, 0, 0, NULL, 0x0, ATTR_NONE, 0x0 } /* end-of-table */
+
+static const struct events_table_t *events_table = NULL;
+
+const struct events_table_t events_fam6_mod23[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD23
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod28[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD28
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod26[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD26
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod46[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD26
+ EVENTS_FAM6_MOD46_ONLY
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod37[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD37
+ EVENTS_FAM6_MOD37_ALSO
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod47[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD37
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod42[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD42
+ EVENTS_FAM6_MOD42_ONLY
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod45[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD42
+ EVENTS_FAM6_MOD45_ONLY
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod58[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD58
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod62[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD58
+ EVENTS_FAM6_MOD62_ONLY
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod60[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD60
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod61[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD61
+ NT_END
+};
+
+const struct events_table_t events_fam6_mod78[] = {
+ ARCH_EVENTS
+ EVENTS_FAM6_MOD78
+ NT_END
+};
+
+const struct events_table_t events_fam6_unknown[] = {
+ ARCH_EVENTS
+ NT_END
+};
+
+const struct events_table_t events_fam_arm[] = {
+// ARCH_EVENTS
+// *eventnum = pevent->eventselect;
+// *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT);
+// *eventnum |= (pevent->attrs << 16);
+// *eventnum |= (pevent->cmask << 24);
+// eventselect, unitmask, supported_counters, name, cmask, attrs, msr_offset
+
+// Hardware event
+#define HWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_HARDWARE, 0, 0 },
+ HWE("branch-instructions", PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
+ HWE("branch-misses", PERF_COUNT_HW_BRANCH_MISSES)
+ HWE("bus-cycles", PERF_COUNT_HW_BUS_CYCLES)
+ HWE("cache-misses", PERF_COUNT_HW_CACHE_MISSES)
+ HWE("cache-references", PERF_COUNT_HW_CACHE_REFERENCES)
+ HWE("cycles", PERF_COUNT_HW_CPU_CYCLES)
+ HWE("instructions", PERF_COUNT_HW_INSTRUCTIONS)
+ HWE("ref-cycles", PERF_COUNT_HW_REF_CPU_CYCLES)
+ HWE("stalled-cycles-backend", PERF_COUNT_HW_STALLED_CYCLES_BACKEND)
+ HWE("stalled-cycles-frontend", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND)
+
+// Software event
+#define SWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_SOFTWARE, 0, 0 },
+ SWE("alignment-faults", PERF_COUNT_SW_ALIGNMENT_FAULTS)
+ SWE("context-switches", PERF_COUNT_SW_CONTEXT_SWITCHES)
+ SWE("cpu-clock", PERF_COUNT_SW_CPU_CLOCK)
+ SWE("cpu-migrations", PERF_COUNT_SW_CPU_MIGRATIONS)
+ SWE("emulation-faults", PERF_COUNT_SW_EMULATION_FAULTS)
+ SWE("major-faults", PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+ SWE("minor-faults", PERF_COUNT_SW_PAGE_FAULTS_MIN)
+ SWE("page-faults", PERF_COUNT_SW_PAGE_FAULTS)
+ SWE("task-clock", PERF_COUNT_SW_TASK_CLOCK)
+
+// Hardware cache event
+#define HWCE(nm, id, op, res) { id | (op << 8) | (res << 16), 0, C_ALL, nm, PERF_TYPE_HW_CACHE, 0, 0 },
+ HWCE("L1-dcache-load-misses", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS)
+ HWCE("L1-dcache-loads", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ HWCE("L1-dcache-store-misses",PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ HWCE("L1-dcache-stores", PERF_COUNT_HW_CACHE_L1D, PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ HWCE("L1-icache-load-misses", PERF_COUNT_HW_CACHE_L1I, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS)
+ HWCE("L1-icache-loads", PERF_COUNT_HW_CACHE_L1I, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+// HWCE("branch-load-misses",)
+// HWCE("branch-loads",)
+ HWCE("dTLB-load-misses", PERF_COUNT_HW_CACHE_DTLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS)
+ HWCE("dTLB-loads", PERF_COUNT_HW_CACHE_DTLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ HWCE("iTLB-load-misses", PERF_COUNT_HW_CACHE_ITLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS)
+ HWCE("iTLB-loads", PERF_COUNT_HW_CACHE_ITLB, PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+
+ NT_END
+};
+
+static int
+core_pcbe_init (void)
+{
+ switch (cpuid_getvendor ())
+ {
+ case ARM_CPU_IMP_ARM:
+ case ARM_CPU_IMP_BRCM:
+ case ARM_CPU_IMP_CAVIUM:
+ case ARM_CPU_IMP_APM:
+ case ARM_CPU_IMP_QCOM:
+ snprintf (core_impl_name, sizeof (core_impl_name), "%s", AARCH64_VENDORSTR_ARM);
+ events_table = events_fam_arm;
+ num_gpc = 4; // MEZ: a real implementation is needed
+ num_ffc = 0;
+ total_pmc = num_gpc + num_ffc;
+ return 0;
+ case X86_VENDOR_Intel:
+ break;
+ default:
+ return -1;
+ }
+
+#if defined(__i386__) || defined(__x86_64)
+ /* No Architectural Performance Monitoring Leaf returned by CPUID */
+ if (get_cpuid_info ()->cpi_maxeax < 0xa)
+ return (-1);
+
+ /* Obtain the Architectural Performance Monitoring Leaf */
+ cpuid_regs_t cp;
+ my_cpuid (0xa, &cp);
+ uint32_t versionid = cp.eax & 0xFF;
+
+ /*
+ * Fixed-Function Counters (FFC)
+ *
+ * All Family 6 Model 15 and Model 23 processors have fixed-function
+ * counters. These counters were made Architectural with
+ * Family 6 Model 15 Stepping 9.
+ */
+ switch (versionid)
+ {
+ case 0:
+ return -1;
+ case 2:
+ num_ffc = cp.edx & 0x1F;
+ /*
+ * Some processors have an errata (AW34) where
+ * versionid is reported as 2 when actually 1.
+ * In this case, fixed-function counters are
+ * model-specific as in Version 1.
+ */
+ if (num_ffc != 0)
+ break;
+ /* FALLTHROUGH */
+ case 1:
+ num_ffc = 3;
+ versionid = 1;
+ break;
+ default:
+ num_ffc = cp.edx & 0x1F;
+ break;
+ }
+ if (num_ffc >= 64)
+ return (-1);
+ uint64_t known_ffc_num = sizeof (ffc_names) / sizeof (char *) - 1; /* -1 for EOT */
+ if (num_ffc > known_ffc_num)
+ /*
+ * The system seems to have more fixed-function counters than
+ * what this PCBE is able to handle correctly. Default to the
+ * maximum number of fixed-function counters that this driver
+ * is aware of.
+ */
+ num_ffc = known_ffc_num;
+
+ /*
+ * General Purpose Counters (GPC)
+ */
+ num_gpc = (cp.eax >> 8) & 0xFF;
+ if (num_gpc >= 64)
+ return (-1);
+ total_pmc = num_gpc + num_ffc;
+ if (total_pmc > 64) /* Too wide for the overflow bitmap */
+ return (-1);
+
+ uint_t cpuid_model = cpuid_getmodel ();
+
+ /* GPC events for Family 6 Models 15 & 23 only */
+ if ((cpuid_getfamily () == 6) &&
+ ((cpuid_model == 15) || (cpuid_model == 23)))
+ (void) snprintf (core_impl_name, IMPL_NAME_LEN, "Core Microarchitecture");
+ else
+ (void) snprintf (core_impl_name, IMPL_NAME_LEN,
+ "Intel Arch PerfMon v%d on Family %d Model %d",
+ versionid, cpuid_getfamily (), cpuid_model);
+ /*
+ * Process architectural and non-architectural events using GPC
+ */
+ if (num_gpc > 0)
+ {
+ switch (cpuid_model)
+ {
+ case 15: /* Core 2 */
+ case 23:
+ events_table = events_fam6_mod23;
+ break;
+ case 28: /* Atom */
+ events_table = events_fam6_mod28;
+ break;
+ case 37: /* Westmere */
+ case 44:
+ events_table = events_fam6_mod37;
+ break;
+ case 47:
+ events_table = events_fam6_mod47;
+ break;
+ case 26: /* Nehalem */
+ case 30:
+ case 31:
+ events_table = events_fam6_mod26;
+ break;
+ case 46:
+ events_table = events_fam6_mod46;
+ break;
+ case 42: /* Sandy Bridge */
+ events_table = events_fam6_mod42;
+ break;
+ case 45:
+ events_table = events_fam6_mod45;
+ break;
+ case 58: /* Ivy Bridge */
+ events_table = events_fam6_mod58;
+ break;
+ case 62:
+ events_table = events_fam6_mod62;
+ break;
+ case 60: /* Haswell */
+ case 63:
+ case 69:
+ case 70:
+ events_table = events_fam6_mod60;
+ break;
+ case 61: /* Broadwell */
+ case 71:
+ case 79:
+ case 86:
+ events_table = events_fam6_mod61;
+ break;
+ case 78: /* Skylake */
+ case 85:
+ case 94:
+ events_table = events_fam6_mod78;
+ break;
+ default: /* unknown */
+ events_table = events_fam6_unknown;
+ }
+ }
+ /*
+ * Fixed-function Counters (FFC) are already listed individually in
+ * ffc_names[]
+ */
+#endif
+ return 0;
+}
+
+static uint_t
+core_pcbe_ncounters ()
+{
+ return total_pmc;
+}
+
+static const char *
+core_pcbe_impl_name (void)
+{
+ return core_impl_name;
+}
+
+static const char *
+core_pcbe_cpuref (void)
+{
+#if defined(__aarch64__)
+ return "";
+#elif defined(__i386__) || defined(__x86_64)
+ switch (cpuid_getmodel ())
+ {
+ case 60: /* Haswell */
+ case 63:
+ case 69:
+ case 70:
+ return GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide, Part 2\"\nOrder Number: 253669-047US, June 2013");
+ case 61: /* Broadwell */
+ case 71:
+ case 79:
+ case 86:
+ case 78: /* Skylake */
+ case 85:
+ case 94:
+ return GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide\"");
+ default:
+ return
+ GTXT ("See Chapter 19 of the \"Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide, Part 2\"\nOrder Number: 253669-045US, January 2013");
+ }
+#endif
+}
+
+static int
+core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
+{
+ int count = 0;
+ const struct events_table_t *pevent;
+ for (pevent = events_table; pevent && pevent->name; pevent++)
+ for (uint_t jj = 0; jj < num_gpc; jj++)
+ if (C (jj) & pevent->supported_counters)
+ {
+ hwc_cb (jj, pevent->name);
+ count++;
+ }
+
+ for (int ii = 0; ii < sizeof (ffc_names) / sizeof (*ffc_names) && ffc_names[ii]; ii++)
+ {
+ hwc_cb (ii + num_gpc, ffc_names[ii]);
+ count++;
+ }
+ /* add generic events here */
+ return count;
+}
+
+static int
+core_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventnum,
+ eventsel_t *valid_umask, uint_t *pmc_sel)
+{
+ const struct events_table_t* pevent;
+ *valid_umask = 0x0; /* by default, don't allow user umask */
+ *pmc_sel = pmc; /* by default, use the requested pmc */
+
+ /* search non-ffc table */
+ for (pevent = events_table; pevent && pevent->name; pevent++)
+ {
+ if (strcmp (eventname, pevent->name) == 0)
+ {
+ *eventnum = pevent->eventselect;
+ *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT);
+ *eventnum |= (pevent->attrs << 16);
+ *eventnum |= (pevent->cmask << 24);
+
+ if (pevent->msr_offset)
+ {
+ /*
+ * Should also handle any pevent->msr_offset.
+ * Can check libcpc's usr/src/uts/intel/pcbe/snb_pcbe.h,
+ * function snb_gpc_configure().
+ *
+ * Actually, we should probably error out here
+ * until the appropriate support has been added.
+ * Also, we can comment out events that require
+ * msr_offset so that they aren't even listed.
+ */
+ }
+ if (!pevent->unitmask)
+ *valid_umask = 0xff; /* allow umask if nothing set */
+ return 0;
+ }
+ }
+
+ /* search ffc table */
+ for (int ii = 0; ii < sizeof (ffc_names) / sizeof (*ffc_names) && ffc_names[ii]; ii++)
+ {
+ if (strcmp (eventname, ffc_names[ii]) == 0)
+ {
+ *eventnum = 0;
+ *pmc_sel = ii | PERFCTR_FIXED_MAGIC;
+ return 0;
+ }
+ }
+ *eventnum = (eventsel_t) - 1;
+ return -1;
+}
+
+static hdrv_pcbe_api_t hdrv_pcbe_core_api = {
+ core_pcbe_init,
+ core_pcbe_ncounters,
+ core_pcbe_impl_name,
+ core_pcbe_cpuref,
+ core_pcbe_get_events,
+ core_pcbe_get_eventnum
+};
diff --git a/gprofng/common/cpu_frequency.h b/gprofng/common/cpu_frequency.h
new file mode 100644
index 0000000..b46b54d
--- /dev/null
+++ b/gprofng/common/cpu_frequency.h
@@ -0,0 +1,303 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _CPU_FREQUENCY_H
+#define _CPU_FREQUENCY_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <alloca.h>
+#include <unistd.h> /* processor_info_t */
+#include <fcntl.h>
+
+ typedef unsigned char uint8_t;
+
+#define MAXSTRLEN 1024
+ /*
+ * This file provide the api to detect Intel CPU frequency variation features
+ */
+
+#define COL_CPUFREQ_NONE 0x0000
+#define COL_CPUFREQ_SCALING 0x0001
+#define COL_CPUFREQ_TURBO 0x0002
+
+#if defined(__i386__) || defined(__x86_64)
+ // XXXX This is a rough table to estimate frequency increment due to intel turbo boost.
+ // CPU with different stepping and different core number have different turbo increment.
+ // It is used internally here, and is not implemented on SPARC
+
+ // YLM: one can use cputrack to estimate max turbo frequency
+ // example: for a cpu-bound app that runs for > 10 seconds, count cycles for 10 seconds:
+ // cputrack -T 10 -v -c cpu_clk_unhalted.thread_p a.out
+
+ static int
+ get_max_turbo_freq (int model)
+ {
+ switch (model)
+ {
+ // Nehalem
+ case 30:// Core i7-870: 2/2/4/5
+ return 2 * 133333;
+ case 26:// Xeon L5520: 1/1/1/2
+ return 2 * 133333;
+ case 46:// Xeon E7540: 2
+ return 2 * 133333;
+ // Westmere
+ case 37:// Core i5-520M: 2/4
+ return 2 * 133333;
+ case 44:// Xeon E5620: 1/1/2/2
+ return 2 * 133333;
+ case 47:// Xeon E7-2820: 1/1/1/2
+ return 1 * 133333;
+ // Sandy Bridge
+ case 42:// Core i5-2500: 1/2/3/4
+ return 3 * 100000;
+ // http://ark.intel.com/products/64584/Intel-Xeon-Processor-E5-2660-20M-Cache-2_20-GHz-8_00-GTs-Intel-QPI
+ case 45:// Xeon E5-2660 GenuineIntel 206D7 family 6 model 45 step 7 clock 2200 MHz
+ return 8 * 100000;
+ // Ivy Bridge
+ case 58:// Core i7-3770: 3/4/5/5
+ return 4 * 100000;
+ case 62:// Xeon E5-2697: 3/3/3/3/3/3/3/4/5/6/7/8
+ return 7 * 100000;
+ // Haswell
+ case 60:
+ return 789000; // empirically we see 3189 MHz - 2400 MHz
+ case 63:
+ return 1280000; // empirically we see 3580 MHz - 2300 MHz for single-threaded
+ // return 500000; // empirically we see 2800 MHz - 2300 MHz for large throughput
+ // Broadwell
+ // where are these values listed?
+ // maybe try https://en.wikipedia.org/wiki/Broadwell_%28microarchitecture%29#Server_processors
+ case 61:
+ return 400000;
+ case 71:
+ return 400000;
+ case 79:
+ return 950000; // empirically we see (3550-2600) MHz for single-threaded on x6-2a
+ case 85:
+ return 1600000; // X7: empirically see ~3.7GHz with single thread, baseline is 2.1Ghz Return 3,700,000-2,100,000
+ case 31: // Nehalem?
+ case 28: // Atom
+ case 69: // Haswell
+ case 70: // Haswell
+ case 78: // Skylake
+ case 94: // Skylake
+ default:
+ return 0;
+ }
+ }
+#endif
+
+ /*
+ * parameter: mode, pointer to a 8bit mode indicator
+ * return: max cpu frequency in MHz
+ */
+ //YXXX Updating this function? Check similar cut/paste code in:
+ // collctrl.cc::Coll_Ctrl()
+ // collector.c::log_header_write()
+ // cpu_frequency.h::get_cpu_frequency()
+
+ static int
+ get_cpu_frequency (uint8_t *mode)
+ {
+ int ret_freq = 0;
+ if (mode != NULL)
+ *mode = COL_CPUFREQ_NONE;
+ FILE *procf = fopen ("/proc/cpuinfo", "r");
+ if (procf != NULL)
+ {
+ char temp[1024];
+ int cpu = -1;
+#if defined(__i386__) || defined(__x86_64)
+ int model = -1;
+ int family = -1;
+#endif
+ while (fgets (temp, 1024, procf) != NULL)
+ {
+ if (strncmp (temp, "processor", strlen ("processor")) == 0)
+ {
+ char *val = strchr (temp, ':');
+ cpu = val ? atoi (val + 1) : -1;
+ }
+#if defined(__i386__) || defined(__x86_64)
+ else if (strncmp (temp, "model", strlen ("model")) == 0
+ && strstr (temp, "name") == 0)
+ {
+ char *val = strchr (temp, ':');
+ model = val ? atoi (val + 1) : -1;
+ }
+ else if (strncmp (temp, "cpu family", strlen ("cpu family")) == 0)
+ {
+ char *val = strchr (temp, ':');
+ family = val ? atoi (val + 1) : -1;
+ }
+#endif
+ else if (strncmp (temp, "cpu MHz", strlen ("cpu MHz")) == 0)
+ {
+ char *val = strchr (temp, ':');
+ int mhz = val ? atoi (val + 1) : 0; /* reading it as int is fine */
+ char scaling_freq_file[MAXSTRLEN + 1];
+ snprintf (scaling_freq_file, sizeof (scaling_freq_file),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", cpu);
+ int intel_pstate = 0;
+ int no_turbo = 0;
+ if (access (scaling_freq_file, R_OK) == 0)
+ {
+ FILE *cpufreqd = fopen (scaling_freq_file, "r");
+ if (cpufreqd != NULL)
+ {
+ if (fgets (temp, 1024, cpufreqd) != NULL
+ && strncmp (temp, "intel_pstate", sizeof ("intel_pstate") - 1) == 0)
+ intel_pstate = 1;
+ fclose (cpufreqd);
+ }
+ }
+ snprintf (scaling_freq_file, sizeof (scaling_freq_file),
+ "/sys/devices/system/cpu/intel_pstate/no_turbo");
+ if (access (scaling_freq_file, R_OK) == 0)
+ {
+ FILE *pstatent = fopen (scaling_freq_file, "r");
+ if (pstatent != NULL)
+ {
+ if (fgets (temp, 1024, pstatent) != NULL)
+ if (strncmp (temp, "1", sizeof ("1") - 1) == 0)
+ no_turbo = 1;
+ fclose (pstatent);
+ }
+ }
+
+ snprintf (scaling_freq_file, sizeof (scaling_freq_file),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpu);
+ int frequency_scaling = 0;
+ int turbo_mode = 0;
+ if (access (scaling_freq_file, R_OK) == 0)
+ {
+ FILE *cpufreqf = fopen (scaling_freq_file, "r");
+ if (cpufreqf != NULL)
+ {
+ if (fgets (temp, 1024, cpufreqf) != NULL)
+ {
+ int ondemand = 0;
+ if (strncmp (temp, "ondemand", sizeof ("ondemand") - 1) == 0)
+ ondemand = 1;
+ int performance = 0;
+ if (strncmp (temp, "performance", sizeof ("performance") - 1) == 0)
+ performance = 1;
+ int powersave = 0;
+ if (strncmp (temp, "powersave", sizeof ("powersave") - 1) == 0)
+ powersave = 1;
+ if (intel_pstate || ondemand || performance)
+ {
+ snprintf (scaling_freq_file, sizeof (scaling_freq_file),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu);
+ if (access (scaling_freq_file, R_OK) == 0)
+ {
+ FILE * cpufreqf_max;
+ if ((cpufreqf_max = fopen (scaling_freq_file, "r")) != NULL)
+ {
+ if (fgets (temp, 1024, cpufreqf_max) != NULL)
+ {
+ int tmpmhz = atoi (temp);
+ snprintf (scaling_freq_file, sizeof (scaling_freq_file),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies", cpu);
+ if (intel_pstate)
+ {
+ frequency_scaling = 1;
+ turbo_mode = !no_turbo;
+ if (powersave)
+ // the system might have been relatively cold
+ // so we might do better with scaling_max_freq
+ mhz = (int) (((double) tmpmhz / 1000.0) + 0.5);
+ }
+ else if (access (scaling_freq_file, R_OK) == 0)
+ {
+ FILE * cpufreqf_ava;
+ if ((cpufreqf_ava = fopen (scaling_freq_file, "r")) != NULL)
+ {
+ if (fgets (temp, 1024, cpufreqf_ava) != NULL)
+ {
+ if (strchr (temp, ' ') != strrchr (temp, ' ') && ondemand)
+ frequency_scaling = 1;
+ if (tmpmhz > 1000)
+ {
+#if defined(__i386__) || defined(__x86_64)
+ if (family == 6)
+ {
+ // test turbo mode
+ char non_turbo_max_freq[1024];
+ snprintf (non_turbo_max_freq, sizeof (non_turbo_max_freq),
+ "%d", tmpmhz - 1000);
+ if (strstr (temp, non_turbo_max_freq))
+ {
+ turbo_mode = 1;
+ tmpmhz = (tmpmhz - 1000) + get_max_turbo_freq (model);
+ }
+ }
+#endif
+ }
+ }
+ fclose (cpufreqf_ava);
+ }
+ mhz = (int) (((double) tmpmhz / 1000.0) + 0.5);
+ }
+ }
+ fclose (cpufreqf_max);
+ }
+ }
+ }
+ }
+ fclose (cpufreqf);
+ }
+ }
+ if (mhz > ret_freq)
+ ret_freq = mhz;
+ if (frequency_scaling && mode != NULL)
+ *mode |= COL_CPUFREQ_SCALING;
+ if (turbo_mode && mode != NULL)
+ *mode |= COL_CPUFREQ_TURBO;
+ }
+ else if (strncmp (temp, "Cpu", 3) == 0 && temp[3] != '\0' &&
+ strncmp (strchr (temp + 1, 'C') ? strchr (temp + 1, 'C') : (temp + 4), "ClkTck", 6) == 0)
+ { // sparc-Linux
+ char *val = strchr (temp, ':');
+ if (val)
+ {
+ unsigned long long freq;
+ sscanf (val + 2, "%llx", &freq);
+ int mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
+ if (mhz > ret_freq)
+ ret_freq = mhz;
+ }
+ }
+ }
+ fclose (procf);
+ }
+ return ret_freq;
+ }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_CPU_FREQUENCY_H*/
diff --git a/gprofng/common/cpuid.c b/gprofng/common/cpuid.c
new file mode 100644
index 0000000..211e09a
--- /dev/null
+++ b/gprofng/common/cpuid.c
@@ -0,0 +1,203 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#if defined(__i386__) || defined(__x86_64)
+#include <cpuid.h> /* GCC-provided */
+#elif defined(__aarch64__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+
+static inline uint_t __attribute_const__
+__get_cpuid (unsigned int op ATTRIBUTE_UNUSED, unsigned int *eax,
+ unsigned int *ebx ATTRIBUTE_UNUSED,
+ unsigned int *ecx ATTRIBUTE_UNUSED, unsigned int *edx ATTRIBUTE_UNUSED)
+{
+ // CPUID bit assignments:
+ // [31:24] IMPLEMENTER (0x50 - ARM_CPU_IMP_APM)
+ // [23:20] VARIANT indicates processor revision (0x2 = Revision 2)
+ // [19:16] Constant (Reads as 0xF)
+ // [15:04] PARTNO indicates part number (0xC23 = Cortex-M3)
+ // [03:00] REVISION indicates patch release (0x0 = Patch 0)
+ // unsigned long v = 0;
+ // __asm volatile ("MRS %[result], MPIDR_EL1" : [result] "=r" (v));
+ // Tprintf(DBG_LT0, "cpuid.c:%d read_cpuid_id() MPIDR_EL1=0x%016lx\n", __LINE__, v);
+ uint_t res = 0;
+ __asm volatile ("MRS %[result], MIDR_EL1" : [result] "=r" (*eax));
+ Tprintf (DBG_LT0, "cpuid.c:%d read_cpuid_id() MIDR_EL1=0x%016x\n", __LINE__, *eax);
+ return res;
+}
+#endif
+
+/*
+ * Various routines to handle identification
+ * and classification of x86 processors.
+ */
+
+#define IS_GLOBAL /* externally visible */
+#define X86_VENDOR_Intel 0
+#define X86_VENDORSTR_Intel "GenuineIntel"
+#define X86_VENDOR_IntelClone 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDORSTR_AMD "AuthenticAMD"
+
+#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
+#define CPI_FAMILY_XTD(reg) BITX(reg, 27, 20)
+#define CPI_MODEL_XTD(reg) BITX(reg, 19, 16)
+#define CPI_TYPE(reg) BITX(reg, 13, 12)
+#define CPI_FAMILY(reg) BITX(reg, 11, 8)
+#define CPI_STEP(reg) BITX(reg, 3, 0)
+#define CPI_MODEL(reg) BITX(reg, 7, 4)
+#define IS_EXTENDED_MODEL_INTEL(model) ((model) == 0x6 || (model) >= 0xf)
+
+
+typedef struct
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+} cpuid_regs_t;
+
+typedef struct
+{
+ unsigned int cpi_model;
+ unsigned int cpi_family;
+ unsigned int cpi_vendor; /* enum of cpi_vendorstr */
+ unsigned int cpi_maxeax; /* fn 0: %eax */
+ char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
+} cpuid_info_t;
+
+
+#if defined(__i386__) || defined(__x86_64)
+static uint_t
+cpuid_vendorstr_to_vendorcode (char *vendorstr)
+{
+ if (strcmp (vendorstr, X86_VENDORSTR_Intel) == 0)
+ return X86_VENDOR_Intel;
+ else if (strcmp (vendorstr, X86_VENDORSTR_AMD) == 0)
+ return X86_VENDOR_AMD;
+ else
+ return X86_VENDOR_IntelClone;
+}
+
+static int
+my_cpuid (unsigned int op, cpuid_regs_t *regs)
+{
+ regs->eax = regs->ebx = regs->ecx = regs->edx = 0;
+ int ret = __get_cpuid (op, &regs->eax, &regs->ebx, &regs->ecx, &regs->edx);
+ TprintfT (DBG_LT1, "my_cpuid: __get_cpuid(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) returns %d\n",
+ op, regs->eax, regs->ebx, regs->ecx, regs->edx, ret);
+ return ret;
+}
+#endif
+
+static cpuid_info_t *
+get_cpuid_info ()
+{
+ static int cpuid_inited = 0;
+ static cpuid_info_t cpuid_info;
+ cpuid_info_t *cpi = &cpuid_info;
+ if (cpuid_inited)
+ return cpi;
+ cpuid_inited = 1;
+
+#if defined(__aarch64__)
+ // CPUID bit assignments:
+ // [31:24] IMPLEMENTER (0x50 - ARM_CPU_IMP_APM)
+ // [23:20] VARIANT indicates processor revision (0x2 = Revision 2)
+ // [19:16] Constant (Reads as 0xF)
+ // [15:04] PARTNO indicates part number (0xC23 = Cortex-M3)
+ // [03:00] REVISION indicates patch release (0x0 = Patch 0)
+ uint_t reg = 0;
+ __asm volatile ("MRS %[result], MIDR_EL1" : [result] "=r" (reg));
+ cpi->cpi_vendor = reg >> 24;
+ cpi->cpi_model = (reg >> 4) & 0xfff;
+ switch (cpi->cpi_vendor)
+ {
+ case ARM_CPU_IMP_APM:
+ case ARM_CPU_IMP_ARM:
+ case ARM_CPU_IMP_CAVIUM:
+ case ARM_CPU_IMP_BRCM:
+ case ARM_CPU_IMP_QCOM:
+ strncpy (cpi->cpi_vendorstr, AARCH64_VENDORSTR_ARM, sizeof (cpi->cpi_vendorstr));
+ break;
+ default:
+ strncpy (cpi->cpi_vendorstr, "UNKNOWN ARM", sizeof (cpi->cpi_vendorstr));
+ break;
+ }
+ Tprintf (DBG_LT0, "cpuid.c:%d read_cpuid_id() MIDR_EL1==0x%016x cpi_vendor=%d cpi_model=%d\n",
+ __LINE__, (unsigned int) reg, cpi->cpi_vendor, cpi->cpi_model);
+
+#elif defined(__i386__) || defined(__x86_64)
+ cpuid_regs_t regs;
+ my_cpuid (0, &regs);
+ cpi->cpi_maxeax = regs.eax;
+ ((uint32_t *) cpi->cpi_vendorstr)[0] = regs.ebx;
+ ((uint32_t *) cpi->cpi_vendorstr)[1] = regs.edx;
+ ((uint32_t *) cpi->cpi_vendorstr)[2] = regs.ecx;
+ cpi->cpi_vendorstr[12] = 0;
+ cpi->cpi_vendor = cpuid_vendorstr_to_vendorcode (cpi->cpi_vendorstr);
+
+ my_cpuid (1, &regs);
+ cpi->cpi_model = CPI_MODEL (regs.eax);
+ cpi->cpi_family = CPI_FAMILY (regs.eax);
+ if (cpi->cpi_family == 0xf)
+ cpi->cpi_family += CPI_FAMILY_XTD (regs.eax);
+
+ /*
+ * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
+ * Intel, and presumably everyone else, uses model == 0xf, as
+ * one would expect (max value means possible overflow). Sigh.
+ */
+ switch (cpi->cpi_vendor)
+ {
+ case X86_VENDOR_Intel:
+ if (IS_EXTENDED_MODEL_INTEL (cpi->cpi_family))
+ cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4;
+ break;
+ case X86_VENDOR_AMD:
+ if (CPI_FAMILY (cpi->cpi_family) == 0xf)
+ cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4;
+ break;
+ default:
+ if (cpi->cpi_model == 0xf)
+ cpi->cpi_model += CPI_MODEL_XTD (regs.eax) << 4;
+ break;
+ }
+#endif
+ return cpi;
+}
+
+static inline uint_t
+cpuid_getvendor ()
+{
+ return get_cpuid_info ()->cpi_vendor;
+}
+
+static inline uint_t
+cpuid_getfamily ()
+{
+ return get_cpuid_info ()->cpi_family;
+}
+
+static inline uint_t
+cpuid_getmodel ()
+{
+ return get_cpuid_info ()->cpi_model;
+}
diff --git a/gprofng/common/gp-defs.h b/gprofng/common/gp-defs.h
new file mode 100644
index 0000000..440bfb1
--- /dev/null
+++ b/gprofng/common/gp-defs.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _GP_DEFS_H_
+#define _GP_DEFS_H_
+
+/* Define the ARCH and WSIZE predicates */
+/*
+ * The way we define and use predicates is similar to the
+ * standard #assert with one important exception:
+ * if an argument of a predicate is not known the result
+ * is 'false' and we want a compile time error to avoid
+ * silent results from typos like ARCH(INTEL), COMPILER(gnu),
+ * etc.
+ */
+#define ARCH(x) TOK_A_##x(ARCH)
+#define TOK_A_Aarch64(x) x##_Aarch64
+#define TOK_A_SPARC(x) x##_SPARC
+#define TOK_A_Intel(x) x##_Intel
+
+#define WSIZE(x) TOK_W_##x(WSIZE)
+#define TOK_W_32(x) x##_32
+#define TOK_W_64(x) x##_64
+
+#if defined(sparc) || defined(__sparcv9)
+#define ARCH_SPARC 1
+#elif defined(__i386__) || defined(__x86_64)
+#define ARCH_Intel 1
+#elif defined(__aarch64__)
+#define ARCH_Aarch64 1
+#else
+#error "Undefined platform"
+#endif
+
+#if defined(__sparcv9) || defined(__x86_64) || defined(__aarch64__)
+#define WSIZE_64 1
+#else
+#define WSIZE_32 1
+#endif
+
+#endif
diff --git a/gprofng/common/gp-experiment.h b/gprofng/common/gp-experiment.h
new file mode 100644
index 0000000..040c2d1
--- /dev/null
+++ b/gprofng/common/gp-experiment.h
@@ -0,0 +1,186 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _EXPERIMENT_H
+#define _EXPERIMENT_H
+
+/* version numbers define experiment format */
+#define SUNPERF_VERNUM 12
+#define SUNPERF_VERNUM_MINOR 4
+
+/* backward compatibility down to: */
+#define SUNPERF_VERNUM_LEAST 12
+
+#include "Emsgnum.h" /* for COL_ERROR_*, etc. symbols */
+
+#define SP_REMOTE_PROTOCOL_VERSION "12.4.1"
+
+#define SP_GROUP_HEADER "#analyzer experiment group"
+
+/* Experiment name macro definitions */
+
+/* for descendant experiments */
+#define DESCENDANT_EXPT_KEY ".er/_"
+#define IS_DESC_EXPT(exptname) (strstr(exptname,DESCENDANT_EXPT_KEY) != NULL)
+#define IS_FNDR_EXPT(exptname) (strstr(exptname,DESCENDANT_EXPT_KEY) == NULL)
+
+/* File name definitions */
+#define SP_ARCHIVES_DIR "archives"
+#define SP_ARCHIVE_LOG_FILE "archive.log"
+#define SP_LOG_FILE "log.xml"
+#define SP_NOTES_FILE "notes"
+#define SP_IFREQ_FILE "ifreq"
+#define SP_MAP_FILE "map.xml"
+#define SP_LABELS_FILE "labels.xml"
+#define SP_DYNTEXT_FILE "dyntext"
+#define SP_OVERVIEW_FILE "overview"
+#define SP_PROFILE_FILE "profile"
+#define SP_SYNCTRACE_FILE "synctrace"
+#define SP_IOTRACE_FILE "iotrace"
+#define SP_OMPTRACE_FILE "omptrace"
+#define SP_MPVIEW_FILE "mpview.dat3"
+#define SP_HWCNTR_FILE "hwcounters"
+#define SP_HEAPTRACE_FILE "heaptrace"
+#define SP_JCLASSES_FILE "jclasses"
+#define SP_DYNAMIC_CLASSES "jdynclasses"
+#define SP_RACETRACE_FILE "dataraces"
+#define SP_DEADLOCK_FILE "deadlocks"
+#define SP_FRINFO_FILE "frameinfo"
+#define SP_WARN_FILE "warnings.xml"
+
+#define SP_LIBCOLLECTOR_NAME "libgp-collector.so"
+#define SP_LIBAUDIT_NAME "libcollect-ng.so"
+
+/* XML tags */
+#define SP_TAG_COLLECTOR "collector"
+#define SP_TAG_CPU "cpu"
+#define SP_TAG_DATAPTR "dataptr"
+#define SP_TAG_EVENT "event"
+#define SP_TAG_EXPERIMENT "experiment"
+#define SP_TAG_FIELD "field"
+#define SP_TAG_PROCESS "process"
+#define SP_TAG_PROFILE "profile"
+#define SP_TAG_PROFDATA "profdata"
+#define SP_TAG_PROFPCKT "profpckt"
+#define SP_TAG_SETTING "setting"
+#define SP_TAG_STATE "state"
+#define SP_TAG_SYSTEM "system"
+#define SP_TAG_POWERM "powerm"
+#define SP_TAG_FREQUENCY "frequency"
+#define SP_TAG_DTRACEFATAL "dtracefatal"
+
+/* records for log and loadobjects files */
+/* note that these are in alphabetical order */
+#define SP_JCMD_ARCH "architecture"
+#define SP_JCMD_ARCHIVE "archive_run"
+#define SP_JCMD_ARGLIST "arglist"
+#define SP_JCMD_BLKSZ "blksz"
+#define SP_JCMD_CERROR "cerror"
+#define SP_JCMD_CLASS_LOAD "class_load"
+#define SP_JCMD_CLASS_UNLOAD "class_unload"
+#define SP_JCMD_COLLENV "collenv"
+#define SP_JCMD_COMMENT "comment"
+#define SP_JCMD_CPUID "cpuid"
+#define SP_JCMD_CWARN "cwarn"
+#define SP_JCMD_CWD "cwd"
+#define SP_JCMD_CVERSION "cversion"
+#define SP_JCMD_DATARACE "datarace"
+#define SP_JCMD_DEADLOCK "deadlock"
+#define SP_JCMD_DELAYSTART "delay_start"
+#define SP_JCMD_DESC_START "desc_start"
+#define SP_JCMD_DESC_STARTED "desc_started"
+#define SP_JCMD_DVERSION "dversion"
+#define SP_JCMD_EXEC_START "exec_start"
+#define SP_JCMD_EXEC_ERROR "exec_error"
+#define SP_JCMD_EXIT "exit"
+#define SP_JCMD_EXPT_DURATION "exp_duration"
+#define SP_JCMD_FAKETIME "faketime"
+#define SP_JCMD_FN_LOAD "fn_load"
+#define SP_JCMD_FN_UNLOAD "fn_unload"
+#define SP_JCMD_FUN_MAP "fun_map"
+#define SP_JCMD_FUN_UNMAP "fun_unmap"
+#define SP_JCMD_HEAPTRACE "heaptrace"
+#define SP_JCMD_HOSTNAME "hostname"
+#define SP_JCMD_HWC_DEFAULT "hwc_default"
+#define SP_JCMD_HW_COUNTER "hwcounter"
+#define SP_JCMD_HW_SIM_CTR "hwsimctr"
+#define SP_JCMD_IOTRACE "iotrace"
+#define SP_JCMD_JCM_LOAD "jcm_load"
+#define SP_JCMD_JCM_UNLOAD "jcm_unload"
+#define SP_JCMD_JCM_MAP "jcm_map"
+#define SP_JCMD_JCM_UNMAP "jcm_unmap"
+#define SP_JCMD_JTHREND "jthread_end"
+#define SP_JCMD_JTHRSTART "jthread_start"
+#define SP_JCMD_GCEND "gc_end"
+#define SP_JCMD_GCSTART "gc_start"
+#define SP_JCMD_JVERSION "jversion"
+//#define SP_JCMD_KPROFILE "kprofile" /* TBR */
+#define SP_JCMD_LIMIT "limit"
+#define SP_JCMD_LINETRACE "linetrace"
+#define SP_JCMD_LO_OPEN "lo_open"
+#define SP_JCMD_LO_CLOSE "lo_close"
+#define SP_JCMD_MOD_OPEN "mod_open"
+#define SP_JCMD_MPIEXP "MPIexperiment"
+#define SP_JCMD_MPI_NO_TRACE "MPI_no_trace"
+#define SP_JCMD_MPIOMPVER "mpi_openmpi_version"
+#define SP_JCMD_MPITRACEVER "mpi_trace_version"
+#define SP_JCMD_MPIPP "mpipp"
+#define SP_JCMD_MPIPPERR "mpipp_err"
+#define SP_JCMD_MPIPPWARN "mpipp_warn"
+#define SP_JCMD_MPISTATE "mpistate"
+#define SP_JCMD_MPITRACE "mpitrace" /* backwards compat only */
+#define SP_JCMD_MPVIEW "mpview"
+#define SP_JCMD_MSGTRACE "msgtrace"
+#define SP_JCMD_NOIDLE "noidle"
+#define SP_JCMD_OMPTRACE "omptrace"
+#define SP_JCMD_OS "os"
+#define SP_JCMD_PAGESIZE "pagesize"
+#define SP_JCMD_PAUSE "pause"
+#define SP_JCMD_PAUSE_SIG "pause_signal"
+#define SP_JCMD_PROFILE "profile"
+#define SP_JCMD_RESUME "resume"
+#define SP_JCMD_RUN "run"
+#define SP_JCMD_SAMPLE "sample"
+#define SP_JCMD_SAMPLE_PERIOD "sample_period"
+#define SP_JCMD_SAMPLE_SIG "sample_signal"
+#define SP_JCMD_SEGMENT_MAP "seg_map"
+#define SP_JCMD_SEGMENT_UNMAP "seg_unmap"
+#define SP_JCMD_SRCHPATH "search_path"
+#define SP_JCMD_STACKBASE "stackbase"
+#define SP_JCMD_SUNPERF "sunperf"
+#define SP_JCMD_SYNCTRACE "synctrace"
+#define SP_JCMD_TERMINATE "terminate"
+#define SP_JCMD_THREAD_PAUSE "thread_pause"
+#define SP_JCMD_THREAD_RESUME "thread_resume"
+#define SP_JCMD_USERNAME "username"
+#define SP_JCMD_VERSION "version"
+#define SP_JCMD_WSIZE "wsize"
+
+/* strings naming memory-segments */
+#define SP_MAP_ANON "Anon"
+#define SP_MAP_HEAP "Heap"
+#define SP_MAP_STACK "Stack"
+#define SP_MAP_SHMEM "SHMid"
+#define SP_MAP_UNRESOLVABLE "Unresolvable"
+
+#define SP_UNKNOWN_NAME "(unknown)"
+
+#define MAX_STACKDEPTH 2048
+#endif /* _EXPERIMENT_H */
diff --git a/gprofng/common/gp-time.h b/gprofng/common/gp-time.h
new file mode 100644
index 0000000..7755370
--- /dev/null
+++ b/gprofng/common/gp-time.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _GP_TIME_H_
+#define _GP_TIME_H_
+
+#include <sys/time.h>
+
+typedef long long hrtime_t;
+typedef struct timespec timestruc_t;
+
+#define ITIMER_REALPROF ITIMER_PROF
+#define NANOSEC 1000000000
+#define MICROSEC 1000000
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+ hrtime_t gethrtime (void);
+ hrtime_t gethrvtime (void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h
new file mode 100644
index 0000000..ff7b303
--- /dev/null
+++ b/gprofng/common/hwc_cpus.h
@@ -0,0 +1,198 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/* Hardware counter profiling: cpu types */
+
+#ifndef __HWC_CPUS_H
+#define __HWC_CPUS_H
+
+#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */
+
+ /* type for specifying CPU register number */
+ typedef int regno_t;
+#define REGNO_ANY ((regno_t)-1)
+#define REGNO_INVALID ((regno_t)-2)
+
+ /* --- Utilities for use with regno_t and reg_list[] --- */
+#define REG_LIST_IS_EMPTY(reg_list) (!(reg_list) || (reg_list)[0] == REGNO_ANY)
+#define REG_LIST_EOL(regno) ((regno)==REGNO_ANY)
+#define REG_LIST_SINGLE_VALID_ENTRY(reg_list) \
+ (((reg_list) && (reg_list)[1] == REGNO_ANY && \
+ (reg_list)[0] != REGNO_ANY ) ? (reg_list)[0] : REGNO_ANY)
+
+ /* enum for specifying unknown or uninitialized CPU */
+ enum
+ {
+ CPUVER_GENERIC = 0,
+ CPUVER_UNDEFINED = -1
+ };
+
+ // Note: changing an values below may make older HWC experiments unreadable.
+ // --- Sun/Oracle SPARC ---
+#define CPC_ULTRA1 1000
+#define CPC_ULTRA2 1001
+#define CPC_ULTRA3 1002
+#define CPC_ULTRA3_PLUS 1003
+#define CPC_ULTRA3_I 1004
+#define CPC_ULTRA4_PLUS 1005 /* Panther */
+#define CPC_ULTRA4 1017 /* Jaguar */
+#define CPC_ULTRA_T1 1100 /* Niagara1 */
+#define CPC_ULTRA_T2 1101 /* Niagara2 */
+#define CPC_ULTRA_T2P 1102
+#define CPC_ULTRA_T3 1103
+#define CPC_SPARC_T4 1104
+#define CPC_SPARC_T5 1110
+#define CPC_SPARC_T6 1120
+// #define CPC_SPARC_T7 1130 // use CPC_SPARC_M7
+#define CPC_SPARC_M4 1204 /* Obsolete */
+#define CPC_SPARC_M5 1210
+#define CPC_SPARC_M6 1220
+#define CPC_SPARC_M7 1230
+#define CPC_SPARC_M8 1240
+
+ // --- Intel ---
+ // Pentium
+#define CPC_PENTIUM 2000
+#define CPC_PENTIUM_MMX 2001
+#define CPC_PENTIUM_PRO 2002
+#define CPC_PENTIUM_PRO_MMX 2003
+#define CPC_PENTIUM_4 2017
+#define CPC_PENTIUM_4_HT 2027
+
+ // Core Microarchitecture (Merom/Menryn)
+#define CPC_INTEL_CORE2 2028
+#define CPC_INTEL_NEHALEM 2040
+#define CPC_INTEL_WESTMERE 2042
+#define CPC_INTEL_SANDYBRIDGE 2045
+#define CPC_INTEL_IVYBRIDGE 2047
+#define CPC_INTEL_ATOM 2050 /* Atom*/
+#define CPC_INTEL_HASWELL 2060
+#define CPC_INTEL_BROADWELL 2070
+#define CPC_INTEL_SKYLAKE 2080
+#define CPC_INTEL_UNKNOWN 2499
+#define CPC_AMD_K8C 2500 /* Opteron, Athlon... */
+#define CPC_AMD_FAM_10H 2501 /* Barcelona, Shanghai... */
+#define CPC_AMD_FAM_11H 2502 /* Griffin... */
+#define CPC_AMD_FAM_15H 2503
+#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier)
+#define CPC_FOX 3004 /* pseudo-chip */
+
+ // --- Fujitsu ---
+#define CPC_SPARC64_III 3000
+#define CPC_SPARC64_V 3002
+#define CPC_SPARC64_VI 4003 /* OPL-C */
+#define CPC_SPARC64_VII 4004 /* Jupiter */
+#define CPC_SPARC64_X 4006 /* Athena */
+#define CPC_SPARC64_XII 4010 /* Athena++ */
+
+// aarch64. Constants from arch/arm64/include/asm/cputype.h
+enum {
+ ARM_CPU_IMP_ARM = 0x41,
+ ARM_CPU_IMP_BRCM = 0x42,
+ ARM_CPU_IMP_CAVIUM = 0x43,
+ ARM_CPU_IMP_APM = 0x50,
+ ARM_CPU_IMP_QCOM = 0x51
+};
+
+#define AARCH64_VENDORSTR_ARM "ARM"
+
+ /* strings below must match those returned by cpc_getcpuver() */
+ typedef struct
+ {
+ int cpc2_cpuver;
+ const char * cpc2_cciname;
+ } libcpc2_cpu_lookup_t;
+#define LIBCPC2_CPU_LOOKUP_LIST \
+ {CPC_AMD_K8C , "AMD Opteron & Athlon64"}, \
+ {CPC_AMD_FAM_10H , "AMD Family 10h"}, \
+ {CPC_AMD_FAM_11H , "AMD Family 11h"}, \
+ {CPC_AMD_FAM_15H , "AMD Family 15h Model 01h"}, \
+ {CPC_AMD_FAM_15H , "AMD Family 15h Model 02h"},/*future*/ \
+ {CPC_AMD_FAM_15H , "AMD Family 15h Model 03h"},/*future*/ \
+ {CPC_PENTIUM_4_HT , "Pentium 4 with HyperThreading"}, \
+ {CPC_PENTIUM_4 , "Pentium 4"}, \
+ {CPC_PENTIUM_PRO_MMX , "Pentium Pro with MMX, Pentium II"}, \
+ {CPC_PENTIUM_PRO , "Pentium Pro, Pentium II"}, \
+ {CPC_PENTIUM_MMX , "Pentium with MMX"}, \
+ {CPC_PENTIUM , "Pentium"}, \
+ {CPC_INTEL_CORE2 , "Core Microarchitecture"}, \
+ /* Merom: F6M15: Clovertown, Kentsfield, Conroe, Merom, Woodcrest */ \
+ /* Merom: F6M22: Merom Conroe */ \
+ /* Penryn: F6M23: Yorkfield, Wolfdale, Penryn, Harpertown */ \
+ /* Penryn: F6M29: Dunnington */ \
+ {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 26"},/*Bloomfield, Nehalem EP*/ \
+ {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 30"},/*Clarksfield, Lynnfield, Jasper Forest*/ \
+ {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 31"},/*(TBD)*/ \
+ {CPC_INTEL_NEHALEM , "Intel Arch PerfMon v3 on Family 6 Model 46"},/*Nehalem EX*/ \
+ {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 37"},/*Arrandale, Clarskdale*/ \
+ {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 44"},/*Gulftown, Westmere EP*/ \
+ {CPC_INTEL_WESTMERE , "Intel Arch PerfMon v3 on Family 6 Model 47"},/*Westmere EX*/ \
+ {CPC_INTEL_SANDYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 42"},/*Sandy Bridge*/ \
+ {CPC_INTEL_SANDYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 45"},/*Sandy Bridge E, SandyBridge-EN, SandyBridge EP*/ \
+ {CPC_INTEL_IVYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 58"},/*Ivy Bridge*/ \
+ {CPC_INTEL_IVYBRIDGE , "Intel Arch PerfMon v3 on Family 6 Model 62"},/*(TBD)*/ \
+ {CPC_INTEL_ATOM , "Intel Arch PerfMon v3 on Family 6 Model 28"},/*Atom*/ \
+ {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 60"},/*Haswell*/ \
+ {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 63"},/*Haswell*/ \
+ {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 69"},/*Haswell*/ \
+ {CPC_INTEL_HASWELL , "Intel Arch PerfMon v3 on Family 6 Model 70"},/*Haswell*/ \
+ {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 61"},/*Broadwell*/ \
+ {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 71"},/*Broadwell*/ \
+ {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 79"},/*Broadwell*/ \
+ {CPC_INTEL_BROADWELL , "Intel Arch PerfMon v3 on Family 6 Model 86"},/*Broadwell*/ \
+ {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 78"},/*Skylake*/ \
+ {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 85"},/*Skylake*/ \
+ {CPC_INTEL_SKYLAKE , "Intel Arch PerfMon v4 on Family 6 Model 94"},/*Skylake*/ \
+ {CPC_INTEL_UNKNOWN , "Intel Arch PerfMon"},/*Not yet in table*/ \
+ {CPC_SPARC64_III , "SPARC64 III"/*?*/}, \
+ {CPC_SPARC64_V , "SPARC64 V"/*?*/}, \
+ {CPC_SPARC64_VI , "SPARC64 VI"}, \
+ {CPC_SPARC64_VII , "SPARC64 VI & VII"}, \
+ {CPC_SPARC64_X , "SPARC64 X"}, \
+ {CPC_SPARC64_XII , "SPARC64 XII"}, \
+ {CPC_ULTRA_T1 , "UltraSPARC T1"}, \
+ {CPC_ULTRA_T2 , "UltraSPARC T2"}, \
+ {CPC_ULTRA_T2P , "UltraSPARC T2+"}, \
+ {CPC_ULTRA_T3 , "SPARC T3"}, \
+ {CPC_SPARC_T4 , "SPARC T4"}, \
+ {CPC_SPARC_M4 , "SPARC M4"}, \
+ {CPC_SPARC_T5 , "SPARC T5"}, \
+ {CPC_SPARC_M5 , "SPARC M5"}, \
+ {CPC_SPARC_T6 , "SPARC T6"}, \
+ {CPC_SPARC_M6 , "SPARC M6"}, \
+ {CPC_SPARC_M7 , "SPARC T7"}, \
+ {CPC_SPARC_M7 , "SPARC 3e40"}, \
+ {CPC_SPARC_M7 , "SPARC M7"}, \
+ {CPC_SPARC_M8 , "SPARC 3e50"}, \
+ {CPC_ULTRA4_PLUS , "UltraSPARC IV+"}, \
+ {CPC_ULTRA4 , "UltraSPARC IV"}, \
+ {CPC_ULTRA3_I , "UltraSPARC IIIi"}, \
+ {CPC_ULTRA3_I , "UltraSPARC IIIi & IIIi+"}, \
+ {CPC_ULTRA3_PLUS , "UltraSPARC III+"}, \
+ {CPC_ULTRA3_PLUS , "UltraSPARC III+ & IV"}, \
+ {CPC_ULTRA3 , "UltraSPARC III"}, \
+ {CPC_ULTRA2 , "UltraSPARC I&II"}, \
+ {CPC_ULTRA1 , "UltraSPARC I&II"}, \
+ {ARM_CPU_IMP_APM , AARCH64_VENDORSTR_ARM}, \
+ {0, NULL}
+ /* init like this:
+ static libcpc2_cpu_lookup_t cpu_table[]={LIBCPC2_CPU_LOOKUP_LIST};
+ */
+#endif
diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c
new file mode 100644
index 0000000..caab983
--- /dev/null
+++ b/gprofng/common/hwcdrv.c
@@ -0,0 +1,1454 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+
+#include "hwcdrv.h"
+
+/*---------------------------------------------------------------------------*/
+/* macros */
+#define IS_GLOBAL /* Mark global symbols */
+
+#include "cpuid.c" /* ftns for identifying a chip */
+
+static hdrv_pcbe_api_t hdrv_pcbe_core_api;
+static hdrv_pcbe_api_t hdrv_pcbe_opteron_api;
+static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = {
+ &hdrv_pcbe_core_api,
+ &hdrv_pcbe_opteron_api,
+ NULL
+};
+#include "opteron_pcbe.c" /* CPU-specific code */
+#include "core_pcbe.c" /* CPU-specific code */
+
+extern hwcdrv_api_t hwcdrv_pcl_api;
+IS_GLOBAL hwcdrv_api_t *hwcdrv_drivers[] = {
+ &hwcdrv_pcl_api,
+ NULL
+};
+
+/*---------------------------------------------------------------------------*/
+
+/* utils for drivers */
+IS_GLOBAL int
+hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs)
+{
+ unsigned int pmc_assigned[MAX_PICS];
+ unsigned idx;
+ for (int ii = 0; ii < MAX_PICS; ii++)
+ pmc_assigned[ii] = 0;
+
+ /* assign the HWCs that we already know about */
+ for (idx = 0; idx < numctrs; idx++)
+ {
+ regno_t regno = entries[idx]->reg_num;
+ if (regno == REGNO_ANY)
+ {
+ /* check to see if list of possible registers only contains one entry */
+ regno = REG_LIST_SINGLE_VALID_ENTRY (entries[idx]->reg_list);
+ }
+ if (regno != REGNO_ANY)
+ {
+ if (regno < 0 || regno >= MAX_PICS || !regno_is_valid (entries[idx], regno))
+ {
+ logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx, regno);
+ entries[idx]->reg_num = regno; /* assigning back to entries */
+ pmc_assigned[regno] = 1;
+ }
+ }
+
+ /* assign HWCs that are currently REGNO_ANY */
+ for (idx = 0; idx < numctrs; idx++)
+ {
+ if (entries[idx]->reg_num == REGNO_ANY)
+ {
+ int assigned = 0;
+ regno_t *reg_list = entries[idx]->reg_list;
+ for (; reg_list && *reg_list != REGNO_ANY; reg_list++)
+ {
+ regno_t regno = *reg_list;
+ if (regno < 0 || regno >= MAX_PICS)
+ {
+ logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ if (pmc_assigned[regno] == 0)
+ {
+ TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): assigned: idx=%d, regno=%d\n", idx, regno);
+ entries[idx]->reg_num = regno; /* assigning back to entries */
+ pmc_assigned[regno] = 1;
+ assigned = 1;
+ break;
+ }
+ }
+ if (!assigned)
+ {
+ logerr (GTXT ("Counter '%s' could not be bound to a register\n"),
+ entries[idx]->name ? entries[idx]->name : "<NULL>");
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ }
+ }
+ return 0;
+}
+
+IS_GLOBAL int
+hwcdrv_lookup_cpuver (const char * cpcN_cciname)
+{
+ libcpc2_cpu_lookup_t *plookup;
+ static libcpc2_cpu_lookup_t cpu_table[] = {
+ LIBCPC2_CPU_LOOKUP_LIST
+ };
+ if (cpcN_cciname == NULL)
+ return CPUVER_UNDEFINED;
+
+ /* search table for name */
+ for (plookup = cpu_table; plookup->cpc2_cciname; plookup++)
+ {
+ int n = strlen (plookup->cpc2_cciname);
+ if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n))
+ return plookup->cpc2_cpuver;
+ }
+ /* unknown, but does have a descriptive string */
+ TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' "
+ "could not be determined\n",
+ cpcN_cciname);
+ return CPUVER_GENERIC;
+}
+
+/*---------------------------------------------------------------------------*/
+/* utils to generate x86 register definitions on Linux */
+
+/*
+ * This code is structured as though we're going to initialize the
+ * HWC by writing the Intel MSR register directly. That is, we
+ * assume the lowest 16 bits of the event number will have the event
+ * and that higher bits will set attributes.
+ *
+ * While SPARC is different, we can nonetheless use basically the
+ * same "x86"-named functions:
+ *
+ * - The event code will still be 16 bits. It will still
+ * be in the lowest 16 bits of the event number. Though
+ * perf_event_code() on SPARC will expect those bits to
+ * shifted, hwcdrv_pcl.c can easily perform that shift.
+ *
+ * - On SPARC we support only two attributes, "user" and "system",
+ * which hwcdrv_pcl.c already converts to the "exclude_user"
+ * and "exclude_kernel" fields expected by perf_event_open().
+ * "user" and "system" are stored in event bits 16 and 17.
+ * For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
+ */
+
+IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0;
+
+static const attr_info_t perfctr_sparc_attrs[] = {
+ {NTXT ("user"), 0, 0x01, 16}, //usr
+ {NTXT ("system"), 0, 0x01, 17}, //os
+ {NULL, 0, 0x00, 0},
+};
+static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */
+ {NTXT ("umask"), 0, 0xff, 8},
+ {NTXT ("user"), 0, 0x01, 16}, //usr
+ //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted)
+ {NTXT ("system"), 0, 0x01, 17}, //os
+ {NTXT ("edge"), 0, 0x01, 18},
+ {NTXT ("pc"), 0, 0x01, 19},
+ {NTXT ("inv"), 0, 0x01, 23},
+ {NTXT ("cmask"), 0, 0xff, 24},
+ {NULL, 0, 0x00, 0},
+};
+const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs;
+
+static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */
+ // (0xff << 0) | /* event*/
+ // (0xff << 8) | /* umask */
+ // (0x01 << 17) | /* os */
+ // (0x01 << 18) | /* edge */
+ // (0x01 << 19) | /* pc */
+ (0x01 << 20) | /* int */
+ // (0x01 << 21) | /* reserved */
+ (0x01 << 22) | /* enable */
+ // (0x01 << 23) | /* inv */
+ // (0xff << 24) | /* cmask */
+ 0;
+
+static int
+myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc,
+ eventsel_t *eventsel, eventsel_t *valid_umask,
+ uint_t *pmc_sel)
+{
+ if (hwcdrv_get_x86_eventnum &&
+ !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel))
+ return 0;
+
+ /* check for numerically-specified counters */
+ char * endptr;
+ uint64_t num = strtoull (eventname, &endptr, 0);
+ if (*eventname && !*endptr)
+ {
+ *eventsel = EXTENDED_EVNUM_2_EVSEL (num);
+ *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */
+ *pmc_sel = pmc;
+ return 0;
+ }
+
+ /* name does not specify a numeric value */
+ *eventsel = (eventsel_t) - 1;
+ *valid_umask = 0x0;
+ *pmc_sel = pmc;
+ return -1;
+}
+
+static int
+mask_shift_set (eventsel_t *presult, eventsel_t invalue,
+ eventsel_t mask, eventsel_t shift)
+{
+ if (invalue & ~mask)
+ return -1; /* invalue attempts to set bits outside of mask */
+ *presult &= ~(mask << shift); /* clear all the mask bits */
+ *presult |= (invalue << shift); /* set bits according to invalue */
+ return 0;
+}
+
+static int
+set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask,
+ hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly)
+{
+ eventsel_t evntsel = *result_mask;
+ for (int ii = 0; ii < (int) nattrs; ii++)
+ {
+ const char *attrname = attrs[ii].ca_name;
+ eventsel_t attrval = (eventsel_t) attrs[ii].ca_val;
+ const char *tmpname;
+ int attr_found = 0;
+ for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++)
+ {
+ if (strcmp (attrname, tmpname) == 0)
+ {
+ if (strcmp (attrname, "umask") == 0)
+ {
+ if (attrval & ~evnt_valid_umask)
+ {
+ logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"),
+ nameOnly, (long long) evnt_valid_umask);
+ return -1;
+ }
+ }
+ if (mask_shift_set (&evntsel,
+ perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval,
+ perfctr_attrs_table[jj].mask,
+ perfctr_attrs_table[jj].shift))
+ {
+ logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"),
+ nameOnly, attrname, (long long) attrval);
+ return -1;
+ }
+ TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n",
+ nameOnly, attrname, (long long) attrval);
+ attr_found = 1;
+ break;
+ }
+ }
+ if (!attr_found)
+ {
+ logerr (GTXT ("attribute `%s' is invalid\n"), attrname);
+ return -1;
+ }
+ }
+ *result_mask = evntsel;
+ return 0;
+}
+
+IS_GLOBAL int
+hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
+ eventsel_t *return_event, uint_t *return_pmc_sel)
+{
+ hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1];
+ unsigned nattrs = 0;
+ char *nameOnly = NULL;
+ eventsel_t evntsel = 0; // event number
+ eventsel_t evnt_valid_umask = 0;
+ uint_t pmc_sel = 0;
+ int rc = -1;
+ *return_event = 0;
+ *return_pmc_sel = 0;
+ void *attr_mem = hwcfuncs_parse_attrs (int_name, attrs, HWCFUNCS_MAX_ATTRS,
+ &nattrs, NULL);
+ if (!attr_mem)
+ {
+ logerr (GTXT ("out of memory, could not parse attributes\n"));
+ return -1;
+ }
+ hwcfuncs_parse_ctr (int_name, NULL, &nameOnly, NULL, NULL, NULL);
+ if (regno == REGNO_ANY)
+ {
+ logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly);
+ goto attr_wrapup;
+ }
+
+ /* look up evntsel */
+ if (myperfctr_get_x86_eventnum (nameOnly, regno,
+ &evntsel, &evnt_valid_umask, &pmc_sel))
+ {
+ logerr (GTXT ("counter `%s' is not valid\n"), nameOnly);
+ goto attr_wrapup;
+ }
+ TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n",
+ (long long) evntsel, pmc_sel, nameOnly, nattrs);
+
+ /* determine event attributes */
+ eventsel_t evnt_attrs = perfctr_evntsel_enable_bits;
+ if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly))
+ goto attr_wrapup;
+ if (evntsel & evnt_attrs)
+ TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n",
+ (long long) evntsel, (long long) evnt_attrs,
+ (long long) (evntsel & evnt_attrs));
+ *return_event = evntsel | evnt_attrs;
+ *return_pmc_sel = pmc_sel;
+ rc = 0;
+
+attr_wrapup:
+ free (attr_mem);
+ free (nameOnly);
+ return rc;
+}
+
+#ifdef __x86_64__
+#define syscall_instr "syscall"
+#define syscall_clobber "rcx", "r11", "memory"
+#endif
+#ifdef __i386__
+#define syscall_instr "int $0x80"
+#define syscall_clobber "memory"
+#endif
+
+static inline int
+perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ /* It seems that perf_event_open() sometimes fails spuriously,
+ * even while an immediate retry succeeds.
+ * So, let's try a few retries if the call fails just to be sure.
+ */
+ int rc;
+ for (int retry = 0; retry < 5; retry++)
+ {
+ rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags);
+ if (rc != -1)
+ return rc;
+ }
+ return rc;
+}
+
+/*---------------------------------------------------------------------------*/
+/* macros & fwd prototypes */
+
+#define HWCDRV_API static /* Mark functions used by hwcdrv API */
+
+HWCDRV_API int hwcdrv_start (void);
+HWCDRV_API int hwcdrv_free_counters ();
+
+static pid_t
+hwcdrv_gettid (void)
+{
+#ifndef LIBCOLLECTOR_SRC
+ return syscall (__NR_gettid);
+#elif defined(intel)
+ pid_t r;
+ __asm__ __volatile__(syscall_instr
+ : "=a" (r) : "0" (__NR_gettid)
+ : syscall_clobber);
+ return r;
+#else
+ return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm
+#endif
+}
+
+/*---------------------------------------------------------------------------*/
+/* types */
+
+#define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples
+// must be a power of 2
+
+/*---------------------------------------------------------------------------*/
+
+/* typedefs */
+
+typedef struct
+{ // event (hwc) definition
+ unsigned int reg_num; // PMC assignment, potentially for detecting conflicts
+ eventsel_t eventsel; // raw event bits (Intel/AMD)
+ uint64_t counter_preload; // number of HWC events before signal
+ struct perf_event_attr hw; // perf_event definition
+ hrtime_t min_time; // minimum time we're targeting between events
+ char *name;
+} perf_event_def_t;
+
+typedef struct
+{ // runtime state of perf_event buffer
+ void *buf; // pointer to mmapped buffer
+ size_t pagesz; // size of pages
+} buffer_state_t;
+
+typedef struct
+{ // runtime state of counter values
+ uint64_t prev_ena_ts; // previous perf_event "enabled" time
+ uint64_t prev_run_ts; // previous perf_event "running" time
+ uint64_t prev_value; // previous HWC value
+} counter_value_state_t;
+
+typedef struct
+{ // per-counter information
+ perf_event_def_t *ev_def; // global HWC definition for one counter
+ int fd; // perf_event fd
+ buffer_state_t buf_state; // perf_event buffer's state
+ counter_value_state_t value_state; // counter state
+ int needs_restart; // workaround for dbx failure to preserve si_fd
+ uint64_t last_overflow_period;
+ hrtime_t last_overflow_time;
+} counter_state_t;
+
+typedef struct
+{ // per-thread context
+ counter_state_t *ctr_list;
+ int signal_fd; // fd that caused the most recent signal
+ pthread_t tid; // for debugging signal delivery problems
+} hdrv_pcl_ctx_t;
+
+/*---------------------------------------------------------------------------*/
+
+/* static variables */
+static struct
+{
+ int library_ok;
+ int internal_open_called;
+ hwcfuncs_tsd_get_fn_t find_vpc_ctx;
+ unsigned hwcdef_cnt; /* number of *active* hardware counters */
+ hwcdrv_get_events_fn_t *get_events;
+} hdrv_pcl_state;
+
+static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED};
+static perf_event_def_t global_perf_event_def[MAX_PICS];
+
+#define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt)
+
+
+/* perf_event buffer formatting and handling */
+static void
+reset_buf (buffer_state_t *bufstate)
+{
+ TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n");
+ struct perf_event_mmap_page *metadata = bufstate->buf;
+ if (metadata)
+ metadata->data_tail = metadata->data_head;
+}
+
+static int
+skip_buf (buffer_state_t *bufstate, size_t sz)
+{
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n");
+ struct perf_event_mmap_page *metadata = bufstate->buf;
+ if (metadata == NULL)
+ return -1;
+ size_t pgsz = bufstate->pagesz;
+ size_t bufsz = NPAGES_PER_BUF*pgsz;
+ uint64_t d_tail = metadata->data_tail;
+ uint64_t d_head = metadata->data_head;
+
+ // validate request size
+ if (sz > d_head - d_tail || sz >= bufsz)
+ {
+ reset_buf (bufstate);
+ return -1;
+ }
+ metadata->data_tail = d_tail + sz; // advance tail
+ return 0;
+}
+
+static int
+read_buf (buffer_state_t *bufstate, void *buf, size_t sz)
+{
+ struct perf_event_mmap_page *metadata = bufstate->buf;
+ if (metadata == NULL)
+ return -1;
+ size_t pgsz = bufstate->pagesz;
+ size_t bufsz = NPAGES_PER_BUF*pgsz;
+ uint64_t d_tail = metadata->data_tail;
+ uint64_t d_head = metadata->data_head;
+
+ // validate request size
+ if (sz > d_head - d_tail || sz >= bufsz)
+ {
+ reset_buf (bufstate);
+ return -1;
+ }
+ char *buf_base = ((char *) metadata) + pgsz; // start of data buffer
+ uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer
+ size_t nbytes = sz;
+ if (start_pos + sz > bufsz)
+ {
+ // will wrap past end of buffer
+ nbytes = bufsz - start_pos;
+ memcpy (buf, buf_base + start_pos, nbytes);
+ start_pos = 0; // wrap to start
+ buf = (void *) (((char *) buf) + nbytes);
+ nbytes = sz - nbytes;
+ }
+ memcpy (buf, buf_base + start_pos, nbytes);
+ metadata->data_tail += sz;
+ return 0;
+}
+
+static int
+read_u64 (buffer_state_t *bufstate, uint64_t *value)
+{
+ return read_buf (bufstate, value, sizeof (uint64_t));
+}
+
+static int
+read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue,
+ uint64_t *rlost)
+{
+ // returns count of bytes read
+ buffer_state_t *bufstate = &ctr_state->buf_state;
+ counter_value_state_t *cntstate = &ctr_state->value_state;
+ int readsz = 0;
+
+ // PERF_SAMPLE_IP
+ uint64_t ipc = 0;
+ int rc = read_u64 (bufstate, &ipc);
+ if (rc)
+ return -1;
+ readsz += sizeof (uint64_t);
+
+ // PERF_SAMPLE_READ: value
+ uint64_t value = 0;
+ rc = read_u64 (bufstate, &value);
+ if (rc)
+ return -2;
+ readsz += sizeof (uint64_t);
+
+ /* Bug 20806896
+ * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and
+ * running times in the sample data that correspond to the metadata times
+ * metadata->time_enabled
+ * metadata->time_running
+ * from the PREVIOUS (not current) sample. Probably just ignore this bug
+ * since it's on old kernels and we only use the enabled and running times
+ * to construct loss_estimate.
+ */
+ // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED
+ uint64_t enabled_time = 0;
+ rc = read_u64 (bufstate, &enabled_time);
+ if (rc)
+ return -3;
+ readsz += sizeof (uint64_t);
+
+ // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING
+ uint64_t running_time = 0;
+ rc = read_u64 (bufstate, &running_time);
+ if (rc)
+ return -4;
+ readsz += sizeof (uint64_t);
+
+ uint64_t value_delta = value - cntstate->prev_value;
+ uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts;
+ uint64_t running_delta = running_time - cntstate->prev_run_ts;
+ cntstate->prev_value = value;
+ cntstate->prev_ena_ts = enabled_time;
+ cntstate->prev_run_ts = running_time;
+
+ // 24830461 need workaround for Linux anomalous HWC skid overrun
+ int set_error_flag = 0;
+ if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */)
+ set_error_flag = 1;
+
+ uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing
+ if (running_delta == enabled_delta)
+ {
+ // counter was running 100% of time, no multiplexing
+ }
+ else if (running_delta == 0)
+ loss_estimate = 1; // token amount to aid in debugging perfctr oddities
+ else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll))
+ {
+ // running should be smaller than enabled, can't estimate
+ /*
+ * 21418391 HWC can have a negative count
+ *
+ * We've also seen enabled not only be smaller than running
+ * but in fact go negative. Guard against this.
+ */
+ loss_estimate = 2; // token amount to aid in debugging perfctr oddities
+ }
+ else
+ {
+ // counter was running less than 100% of time
+ // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479
+ uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta;
+ value_delta = scaled_delta;
+#if 0
+ // We should perhaps warn the user that multiplexing is going on,
+ // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values.
+ // For now we simply don't report.
+ // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(),
+ // but at that level "lost" has a meaning that's considerably broader than just multiplexing.
+ collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
+ SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
+ ctr_list[idx].last_overflow_period, new_period);
+#endif
+ }
+ TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3,
+ "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu "
+ "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n",
+ ctr_state->ev_def->name, (long long) ipc,
+ (long long) enabled_delta, (long long) running_delta,
+ (long long) value_delta, (long long) value_delta,
+ (unsigned long long) loss_estimate,
+ loss_estimate ? ", WARNING - SCALED" : "",
+ set_error_flag ? ", ERRORFLAG" : "");
+ if (set_error_flag == 1)
+ value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */;
+ *rvalue = value_delta;
+ *rlost = loss_estimate;
+ if (readsz != msgsz)
+ {
+ TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n");
+ return -5;
+ }
+ return 0;
+}
+
+static void
+dump_perf_event_attr (struct perf_event_attr *at)
+{
+ TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n"
+ " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n",
+ (int) at->size, (int) at->type, (unsigned long long) at->sample_period,
+ (unsigned long long) at->config, (unsigned long long) at->config1,
+ (unsigned long long) at->config2, (unsigned long long) at->wakeup_events,
+ (unsigned long long) at->__reserved_1);
+#define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld)
+ DUMP_F (disabled);
+ DUMP_F (inherit);
+ DUMP_F (pinned);
+ DUMP_F (exclusive);
+ DUMP_F (exclude_user);
+ DUMP_F (exclude_kernel);
+ DUMP_F (exclude_hv);
+ DUMP_F (exclude_idle);
+ // DUMP_F(xmmap);
+ DUMP_F (comm);
+ DUMP_F (freq);
+ DUMP_F (inherit_stat);
+ DUMP_F (enable_on_exec);
+ DUMP_F (task);
+ DUMP_F (watermark);
+}
+
+static void
+init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period)
+{
+ memset (hw, 0, sizeof (struct perf_event_attr));
+ hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat
+
+#if defined(__i386__) || defined(__x86_64)
+ //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits
+ hw->config = event;
+ hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
+#elif defined(__aarch64__)
+ hw->type = (event >> 24) & 7;
+ hw->config = event & 0xff;
+#elif defined(sparc)
+ //SPARC needs to be shifted up 16 bits
+ hw->config = (event & 0xFFFF) << 16; // uint64_t event
+ uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c
+ hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4]
+ hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
+#endif
+
+ hw->sample_period = period;
+ hw->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ |
+ // PERF_SAMPLE_TID |
+ // PERF_SAMPLE_TIME | // possibly interesting
+ // PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_READ | // HWC value
+ // PERF_SAMPLE_CALLCHAIN | // interesting
+ // PERF_SAMPLE_ID |
+ // PERF_SAMPLE_CPU | // possibly interesting
+ // PERF_SAMPLE_PERIOD |
+ // PERF_SAMPLE_STREAM_ID |
+ // PERF_SAMPLE_RAW |
+ 0;
+ hw->read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled
+ PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled
+ // PERF_FORMAT_ID |
+ // PERF_FORMAT_GROUP |
+ 0;
+ hw->disabled = 1; /* off by default */
+
+ // Note: the following override config.priv bits!
+ hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */
+ hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */
+ hw->exclude_hv = 1; /* ditto hypervisor */
+ hw->wakeup_events = 1; /* wakeup every n events */
+ dump_perf_event_attr (hw);
+}
+
+static int
+start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string)
+{
+ // pe_attr should have been initialized in hwcdrv_create_counters()
+ struct perf_event_attr pe_attr;
+ memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr));
+
+ // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set
+ pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period;
+
+ int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0);
+ if (hwc_fd == -1)
+ {
+ TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n",
+ error_string, ii, errno);
+ return 1;
+ }
+
+ size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata
+ void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call?
+ PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0);
+ if (buf == MAP_FAILED)
+ {
+ TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n",
+ (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno));
+ return 1;
+ }
+ pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it
+ pctx->ctr_list[ii].fd = hwc_fd;
+ pctx->ctr_list[ii].buf_state.buf = buf;
+ pctx->ctr_list[ii].buf_state.pagesz = pgsz;
+ pctx->ctr_list[ii].value_state.prev_ena_ts = 0;
+ pctx->ctr_list[ii].value_state.prev_run_ts = 0;
+ pctx->ctr_list[ii].value_state.prev_value = 0;
+ pctx->ctr_list[ii].last_overflow_time = gethrtime ();
+
+ /* set async mode */
+ long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC;
+ int rc = fcntl (hwc_fd, F_SETFL, flags);
+ if (rc == -1)
+ {
+ TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii);
+ return 1;
+ }
+
+ /*
+ * set lwp ownership of the fd
+ * See BUGS section of "man perf_event_open":
+ * The F_SETOWN_EX option to fcntl(2) is needed to properly get
+ * overflow signals in threads. This was introduced in Linux 2.6.32.
+ * Legacy references:
+ * see http://lkml.org/lkml/2009/8/4/128
+ * google man fcntl F_SETOWN_EX -conflict
+ * "From Linux 2.6.32 onward, use F_SETOWN_EX to target
+ * SIGIO and SIGURG signals at a particular thread."
+ * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html
+ * See 2010 CSCADS presentation by Eranian
+ */
+ struct f_owner_ex fowner_ex;
+ fowner_ex.type = F_OWNER_TID;
+ fowner_ex.pid = pctx->tid;
+ rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex);
+ if (rc == -1)
+ {
+ TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii);
+ return 1;
+ }
+
+ /* Use sigio so handler can determine FD via siginfo->si_fd. */
+ rc = fcntl (hwc_fd, F_SETSIG, SIGIO);
+ if (rc == -1)
+ {
+ TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii);
+ return 1;
+ }
+ return 0;
+}
+
+static int
+stop_one_ctr (int ii, counter_state_t *ctr_list)
+{
+ int hwc_rc = 0;
+ if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1))
+ {
+ TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno);
+ hwc_rc = HWCFUNCS_ERROR_GENERIC;
+ }
+ void *buf = ctr_list[ii].buf_state.buf;
+ if (buf)
+ {
+ size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz;
+ ctr_list[ii].buf_state.buf = NULL;
+ int tmprc = munmap (buf, bufsz);
+ if (tmprc)
+ {
+ TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno);
+ hwc_rc = HWCFUNCS_ERROR_GENERIC;
+ }
+ }
+ if (-1 == close (ctr_list[ii].fd))
+ {
+ TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno);
+ hwc_rc = HWCFUNCS_ERROR_GENERIC;
+ }
+ return hwc_rc;
+}
+
+/* HWCDRV_API for thread-specific actions */
+HWCDRV_API int
+hwcdrv_lwp_init (void)
+{
+ return hwcdrv_start ();
+}
+
+HWCDRV_API void
+hwcdrv_lwp_fini (void)
+{
+ hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */
+}
+
+/* open */
+static int
+hdrv_pcl_internal_open ()
+{
+ if (hdrv_pcl_state.internal_open_called)
+ {
+ TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n");
+ return HWCFUNCS_ERROR_ALREADY_CALLED;
+ }
+
+ // determine if PCL is available
+ perf_event_def_t tmp_event_def;
+ memset (&tmp_event_def, 0, sizeof (tmp_event_def));
+ struct perf_event_attr *pe_attr = &tmp_event_def.hw;
+ init_perf_event (pe_attr, 0, 0);
+ pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event
+ pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts
+ int hwc_fd = perf_event_open (pe_attr,
+ 0, // pid/tid, 0 is self
+ -1, // cpu, -1 is per-thread mode
+ -1, // group_fd, -1 is root
+ 0); // flags
+ if (hwc_fd == -1)
+ {
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
+ " perf_event_open() failed, errno=%d\n", errno);
+ goto internal_open_error;
+ }
+
+ /* see if the PCL is new enough to know about F_SETOWN_EX */
+ struct f_owner_ex fowner_ex;
+ fowner_ex.type = F_OWNER_TID;
+ fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID
+ if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1)
+ {
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: "
+ "F_SETOWN failed, errno=%d\n", errno);
+ close (hwc_fd);
+ goto internal_open_error;
+ }
+ close (hwc_fd);
+
+ hdrv_pcl_state.internal_open_called = 1;
+ hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted
+ hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
+ TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n");
+ for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++)
+ {
+ hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii];
+ if (!ppcbe->hdrv_pcbe_init ())
+ {
+ hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name ();
+ hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname);
+ if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
+ goto internal_open_error;
+ hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters ();
+ hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref ();
+ hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events;
+ hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum;
+ break;
+ }
+ }
+ if (hdrv_pcl_about.cpcN_npics > MAX_PICS)
+ {
+ TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:"
+ " reducing number of HWCs from %u to %u on processor '%s'\n",
+ hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname);
+ hdrv_pcl_about.cpcN_npics = MAX_PICS;
+ }
+ TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:"
+ " perf_event cpuver=%d, name='%s'\n",
+ hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname);
+ return 0;
+
+internal_open_error:
+ hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED;
+ hdrv_pcl_about.cpcN_npics = 0;
+ hdrv_pcl_about.cpcN_docref = NULL;
+ hdrv_pcl_about.cpcN_cciname = NULL;
+ return HWCFUNCS_ERROR_NOT_SUPPORTED;
+}
+
+static void *
+single_thread_tsd_ftn ()
+{
+ static hdrv_pcl_ctx_t tsd_context;
+ return &tsd_context;
+}
+
+/* HWCDRV_API */
+HWCDRV_API int
+hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz)
+{
+ hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn;
+ if (tsd_sz)
+ *tsd_sz = sizeof (hdrv_pcl_ctx_t);
+
+ if (hdrv_pcl_state.internal_open_called)
+ return HWCFUNCS_ERROR_ALREADY_CALLED;
+ return hdrv_pcl_internal_open ();
+}
+
+HWCDRV_API void
+hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics,
+ const char **docref, uint64_t *support)
+{
+ if (cpuver)
+ *cpuver = hdrv_pcl_about.cpcN_cpuver;
+ if (cciname)
+ *cciname = hdrv_pcl_about.cpcN_cciname;
+ if (npics)
+ *npics = hdrv_pcl_about.cpcN_npics;
+ if (docref)
+ *docref = hdrv_pcl_about.cpcN_docref;
+ if (support)
+ *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID;
+}
+
+HWCDRV_API int
+hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
+{
+ if (tsd_ftn)
+ hdrv_pcl_state.find_vpc_ctx = tsd_ftn;
+ else
+ {
+ TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n");
+ return HWCFUNCS_ERROR_UNAVAIL;
+ }
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb)
+{
+ int count = 0;
+ if (hwc_cb && hdrv_pcl_state.get_events)
+ count = hdrv_pcl_state.get_events (hwc_cb);
+ if (attr_cb)
+ for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++)
+ attr_cb (perfctr_attrs_table[ii].attrname);
+ if (!count)
+ return -1;
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs)
+{
+ return hwcdrv_assign_all_regnos (entries, numctrs);
+}
+
+static int
+internal_hwc_start (int fd)
+{
+ int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1);
+ if (rc == -1)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:"
+ " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno);
+ return HWCFUNCS_ERROR_UNAVAIL;
+ }
+ TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd);
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events)
+{
+ /* set expired counters to overflow value and all others to 0 */
+ /* return 0: OK, counters should be restarted */
+ /* return non-zero: eventp not set, counters should not be restarted */
+ /* clear return values */
+ int ii;
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ {
+ eventp->ce_pic[ii] = 0;
+ lost_events->ce_pic[ii] = 0;
+ }
+ hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event?
+ eventp->ce_hrt = sig_ts;
+ lost_events->ce_hrt = sig_ts;
+
+ /* determine source signal */
+ int signal_fd = -1;
+ switch (si->si_code)
+ {
+ case POLL_HUP: /* expected value from pcl */
+ /* According to Stephane Eranian:
+ * "expect POLL_HUP instead of POLL_IN because we are
+ * in one-shot mode (IOC_REFRESH)"
+ */
+ signal_fd = si->si_fd;
+ break;
+ case SI_TKILL: /* event forwarded by tkill */
+ /* DBX can only forward SI_TKILL when it detects POLL_HUP
+ * unfortunately, this means that si->si_fd has been lost...
+ * We need to process the buffers, but we don't know the fd!
+ */
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " SI_TKILL detected\n", sig_ts);
+ break;
+ default:
+ // "sometimes we see a POLL_IN (1) with very high event rates,"
+ // according to eranian(?)
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
+ " unexpected si_code 0x%x\n", sig_ts, si->si_code);
+ return HWCFUNCS_ERROR_GENERIC;
+ }
+
+ hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
+ if (!pctx)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
+ " tsd context is NULL\n", sig_ts);
+ return HWCFUNCS_ERROR_UNEXPECTED;
+ }
+ counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
+ if (!ctr_list)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " ctr_list is NULL\n", sig_ts);
+ return HWCFUNCS_ERROR_UNEXPECTED;
+ }
+
+ /* clear needs_restart flag */
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ ctr_list[ii].needs_restart = 0;
+
+ /* attempt to identify the counter to read */
+ int signal_idx = -1;
+ pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t
+ if (signal_fd != -1)
+ {
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ {
+ if (ctr_list[ii].fd == signal_fd)
+ {
+ signal_idx = ii;
+ break;
+ }
+ }
+ }
+
+ if (signal_idx < 0)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
+ " pmc not determined!\n", sig_ts);
+ lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */
+ // note: bogus value may get overwritten in loop below
+ }
+
+ /* capture sample(s). In addition to signal_idx, check other counters. */
+ struct perf_event_header sheader;
+ int idx;
+ for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++)
+ {
+ int num_recs = 0;
+ while (1)
+ {
+ /* check for samples */
+ struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf;
+ if (metadata == NULL)
+ break; // empty
+ if (metadata->data_tail == metadata->data_head)
+ break; // empty
+
+ /* read header */
+ if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader)))
+ break;
+ num_recs++;
+
+ /* check for PERF_RECORD_SAMPLE */
+ size_t datasz = sheader.size - sizeof (struct perf_event_header);
+ if (sheader.type != PERF_RECORD_SAMPLE)
+ {
+ TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " unexpected recd type=%d\n",
+ sig_ts, sheader.type);
+ if (skip_buf (&ctr_list[idx].buf_state, datasz))
+ {
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
+ " skip recd type=%d failed\n", sig_ts, sheader.type);
+ lost_events->ce_pic[idx] = 4; /* record a bogus value */
+ break; // failed to skip buffer??
+ }
+ lost_events->ce_pic[idx] = 2; /* record a bogus value */
+ continue; // advance to next record
+ }
+
+ /* type is PERF_RECORD_SAMPLE */
+ uint64_t value, lostv;
+ if (read_sample (&ctr_list[idx], datasz, &value, &lostv))
+ {
+ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:"
+ " read_sample() failed\n", sig_ts);
+ lost_events->ce_pic[idx] = 3; // record a bogus value
+ break; // failed to read sample data??
+ }
+ TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:"
+ " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts,
+ idx, (unsigned long long) value, (unsigned long long) lostv);
+ if (eventp->ce_pic[idx])
+ {
+ TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " idx=%d previous sample recorded as lost_event\n", sig_ts, idx);
+ lost_events->ce_pic[idx] += eventp->ce_pic[idx];
+ }
+ eventp->ce_pic[idx] = value;
+ lost_events->ce_pic[idx] += lostv;
+ }
+
+ /* debug output for unexpected (but common) cases */
+ if (idx == signal_idx)
+ {
+ if (num_recs != 1)
+ TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx);
+ }
+ else if (num_recs)
+ TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:"
+ " %d unexpected record(s) for idx=%d (signal_idx=%d)\n",
+ sig_ts, num_recs, idx, signal_idx);
+
+ /* trigger counter restart whenever records were found */
+ if (num_recs)
+ {
+ /* check whether to adapt the overflow interval */
+ /* This is the Linux version.
+ * The Solaris version is in hwprofile.c collector_update_overflow_counters().
+ */
+ hrtime_t min_time = global_perf_event_def[idx].min_time;
+ if (min_time > 0 // overflow interval is adaptive
+ && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min
+ {
+ /* pick a new overflow interval */
+ /* roughly doubled, but add funny numbers */
+ /* hopefully the result is prime or not a multiple of some # of ops/loop */
+ uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37;
+#if 0
+ // On Solaris, we report the adjustment to the log file.
+ // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ.
+ // For now we simply don't report.
+ collector_interface->writeLog ("<event kind=\"%s\" id=\"%d\">%s %d -> %d</event>\n",
+ SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name,
+ ctr_list[idx].last_overflow_period, new_period);
+#endif
+ /* There are a variety of ways of resetting the period on Linux.
+ * The most elegant is
+ * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period)
+ * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD:
+ * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel.
+ * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect
+ * until after the next overflow.
+ * So we're kind of stuck shutting the fd down and restarting it with the new period.
+ */
+ if (stop_one_ctr (idx, ctr_list))
+ {
+ // EUGENE figure out what to do on error
+ }
+ ctr_list[idx].last_overflow_period = new_period;
+ if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):"))
+ {
+ // EUGENE figure out what to do on error
+ }
+ }
+ ctr_list[idx].last_overflow_time = sig_ts;
+#if 0
+ ctr_list[idx].needs_restart = 1;
+#else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart()
+ internal_hwc_start (ctr_list[idx].fd);
+#endif
+ }
+ }
+ return 0; // OK to restart counters
+}
+
+HWCDRV_API int
+hwcdrv_sighlr_restart (const hwc_event_t *pp)
+{
+#if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow()
+ hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx ();
+ if (!pctx)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n");
+ return -1;
+ }
+ counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list;
+ if (!ctr_list)
+ {
+ TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n");
+ return -1;
+ }
+ int errors = 0;
+ for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ {
+ if (ctr_list[ii].needs_restart)
+ errors |= internal_hwc_start (ctr_list[ii].fd);
+ ctr_list[ii].needs_restart = 0;
+ }
+ return errors;
+#else
+ return 0;
+#endif
+}
+
+/* create counters based on hwcdef[] */
+HWCDRV_API int
+hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
+{
+ if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics)
+ {
+ logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
+ {
+ logerr (GTXT ("Processor not supported\n"));
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+
+ /* add counters */
+ for (unsigned idx = 0; idx < hwcdef_cnt; idx++)
+ {
+ perf_event_def_t *glb_event_def = &global_perf_event_def[idx];
+ memset (glb_event_def, 0, sizeof (perf_event_def_t));
+ unsigned int pmc_sel;
+ eventsel_t evntsel;
+ if (hwcfuncs_get_x86_eventsel (hwcdef[idx].reg_num,
+ hwcdef[idx].int_name, &evntsel, &pmc_sel))
+ {
+ TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n");
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ glb_event_def->reg_num = pmc_sel;
+ glb_event_def->eventsel = evntsel;
+ glb_event_def->counter_preload = hwcdef[idx].val;
+ glb_event_def->min_time = hwcdef[idx].min_time;
+ glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor
+ init_perf_event (&glb_event_def->hw, glb_event_def->eventsel,
+ glb_event_def->counter_preload);
+ TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
+ "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
+ idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload,
+ (long long) glb_event_def->min_time, (int) glb_event_def->reg_num,
+ (long long) glb_event_def->eventsel,
+ (long long) HW_INTERVAL_PRESET (hwcdef[idx].val),
+ (long long) glb_event_def->hw.exclude_user,
+ (long long) glb_event_def->hw.exclude_kernel);
+ }
+
+ hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt;
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_free_counters () // note: only performs shutdown for this thread
+{
+ hdrv_pcl_ctx_t * pctx;
+ if (!COUNTERS_ENABLED ())
+ return 0;
+ pctx = hdrv_pcl_state.find_vpc_ctx ();
+ if (!pctx)
+ {
+ TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n");
+ return HWCFUNCS_ERROR_GENERIC;
+ }
+ counter_state_t *ctr_list = pctx->ctr_list;
+ if (!ctr_list)
+ {
+ // fork child: prolog suspends hwcs, then epilog frees them
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n");
+ return 0;
+ }
+ int hwc_rc = 0;
+ for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ if (stop_one_ctr (ii, ctr_list))
+ hwc_rc = HWCFUNCS_ERROR_GENERIC;
+ TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", pctx->tid);
+ pctx->ctr_list = NULL;
+ return hwc_rc;
+}
+
+HWCDRV_API int
+hwcdrv_start (void) /* must be called from each thread ? */
+{
+ hdrv_pcl_ctx_t *pctx = NULL;
+ if (!COUNTERS_ENABLED ())
+ {
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n");
+ return 0;
+ }
+ if (!hdrv_pcl_state.library_ok)
+ {
+ TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n");
+ return HWCFUNCS_ERROR_NOT_SUPPORTED;
+ }
+
+ /*
+ * set up per-thread context
+ */
+ pctx = hdrv_pcl_state.find_vpc_ctx ();
+ if (!pctx)
+ {
+ TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n");
+ return HWCFUNCS_ERROR_UNEXPECTED;
+ }
+ pctx->tid = hwcdrv_gettid ();
+ TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", pctx->tid);
+
+ /*
+ * create per-thread counter list
+ */
+ counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt,
+ sizeof (counter_state_t));
+ if (!ctr_list)
+ {
+ TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n");
+ return HWCFUNCS_ERROR_MEMORY;
+ }
+ int ii;
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely
+ pctx->ctr_list = ctr_list;
+
+ /*
+ * bind the counters
+ */
+ size_t pgsz = sysconf (_SC_PAGESIZE);
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ {
+ ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period;
+ if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup;
+ }
+
+ /*
+ * start the counters
+ */
+ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++)
+ {
+ int rc = internal_hwc_start (ctr_list[ii].fd);
+ if (rc < 0)
+ goto hwcdrv_start_cleanup;
+ }
+ return 0;
+
+hwcdrv_start_cleanup:
+ hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds
+ return HWCFUNCS_ERROR_UNAVAIL;
+}
+
+HWCDRV_API int
+hwcdrv_lwp_suspend (void) /* must be called from each thread */
+{
+ if (!COUNTERS_ENABLED ())
+ {
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n");
+ return 0;
+ }
+ TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n");
+ return hwcdrv_free_counters ();
+}
+
+HWCDRV_API int
+hwcdrv_lwp_resume (void) /* must be called from each thread */
+{
+ if (!COUNTERS_ENABLED ())
+ {
+ TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n");
+ return 0;
+ }
+ TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n");
+ return hwcdrv_start ();
+}
+
+HWCDRV_API int
+hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data)
+{
+ overflow_data->ce_hrt = 0;
+ for (int i = 0; i < MAX_PICS; i++)
+ {
+ overflow_data->ce_pic[i] = 0;
+ if (sampled_data)
+ HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]);
+ }
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+/* HWCDRV_API */
+
+hwcdrv_api_t hwcdrv_pcl_api = {
+ hwcdrv_init,
+ hwcdrv_get_info,
+ hwcdrv_enable_mt,
+ hwcdrv_get_descriptions,
+ hwcdrv_assign_regnos,
+ hwcdrv_create_counters,
+ hwcdrv_start,
+ hwcdrv_overflow,
+ hwcdrv_read_events,
+ hwcdrv_sighlr_restart,
+ hwcdrv_lwp_suspend,
+ hwcdrv_lwp_resume,
+ hwcdrv_free_counters,
+ hwcdrv_lwp_init,
+ hwcdrv_lwp_fini,
+ -1 // hwcdrv_init_status
+};
diff --git a/gprofng/common/hwcdrv.h b/gprofng/common/hwcdrv.h
new file mode 100644
index 0000000..14c55cf
--- /dev/null
+++ b/gprofng/common/hwcdrv.h
@@ -0,0 +1,330 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/* Hardware counter profiling driver's header */
+
+#ifndef __HWCDRV_H
+#define __HWCDRV_H
+
+#include "hwcfuncs.h"
+
+#ifdef linux
+#define HWCFUNCS_SIGNAL SIGIO
+#define HWCFUNCS_SIGNAL_STRING "SIGIO"
+#else
+#define HWCFUNCS_SIGNAL SIGEMT
+#define HWCFUNCS_SIGNAL_STRING "SIGEMT"
+#endif
+
+#ifndef LIBCOLLECTOR_SRC /* not running in libcollector */
+#include <string.h>
+
+#else /* running in libcollector */
+#include "collector_module.h"
+#include "libcol_util.h"
+
+#define get_hwcdrv __collector_get_hwcdrv
+#define hwcdrv_drivers __collector_hwcdrv_drivers
+#define hwcdrv_cpc1_api __collector_hwcdrv_cpc1_api
+#define hwcdrv_cpc2_api __collector_hwcdrv_cpc2_api
+#define hwcdrv_default __collector_hwcdrv_default
+#define hwcdrv_driver __collector_hwcdrv_driver
+#define hwcdrv_init __collector_hwcdrv_init
+#define hwcdrv_get_info __collector_hwcdrv_get_info
+#define hwcdrv_enable_mt __collector_hwcdrv_enable_mt
+#define hwcdrv_get_descriptions __collector_hwcdrv_get_descriptions
+#define hwcdrv_assign_regnos __collector_hwcdrv_assign_regnos
+#define hwcdrv_create_counters __collector_hwcdrv_create_counters
+#define hwcdrv_start __collector_hwcdrv_start
+#define hwcdrv_overflow __collector_hwcdrv_overflow
+#define hwcdrv_read_events __collector_hwcdrv_read_events
+#define hwcdrv_sighlr_restart __collector_hwcdrv_sighlr_restart
+#define hwcdrv_lwp_suspend __collector_hwcdrv_lwp_suspend
+#define hwcdrv_lwp_resume __collector_hwcdrv_lwp_resume
+#define hwcdrv_free_counters __collector_hwcdrv_free_counters
+#define hwcdrv_lwp_init __collector_hwcdrv_lwp_init
+#define hwcdrv_lwp_fini __collector_hwcdrv_lwp_fini
+#define hwcdrv_assign_all_regnos __collector_hwcdrv_assign_all_regnos
+#define hwcdrv_lookup_cpuver __collector_hwcdrv_lookup_cpuver
+#define hwcfuncs_int_capture_errmsg __collector_hwcfuncs_int_capture_errmsg
+
+#define GTXT(x) x
+
+/* Implemented by libcollector */
+#define calloc __collector_calloc
+#define close CALL_UTIL(close)
+#define fcntl CALL_UTIL(fcntl)
+#define fprintf CALL_UTIL(fprintf)
+//#define free __collector_free
+#define free(...)
+#define gethrtime __collector_gethrtime
+#define ioctl CALL_UTIL(ioctl)
+#define malloc __collector_malloc
+#define memcpy __collector_memcpy
+#define memset CALL_UTIL(memset)
+#define mmap CALL_UTIL(mmap)
+#define snprintf CALL_UTIL(snprintf)
+#define strchr CALL_UTIL(strchr)
+#define strcmp CALL_UTIL(strcmp)
+#define strncmp CALL_UTIL(strncmp)
+#define strcpy CALL_UTIL(strcpy)
+#define strdup __collector_strdup
+#define strncpy CALL_UTIL(strncpy)
+#define strerror CALL_UTIL(strerror)
+#define strlen CALL_UTIL(strlen)
+#define strstr CALL_UTIL(strstr)
+#define strtol CALL_UTIL(strtol)
+#define strtoll CALL_UTIL(strtoll)
+#define strtoul CALL_UTIL(strtoul)
+#define strtoull CALL_UTIL(strtoull)
+#define syscall CALL_UTIL(syscall)
+#define sysconf CALL_UTIL(sysconf)
+#define vsnprintf CALL_UTIL(vsnprintf)
+
+#endif /* --- LIBCOLLECTOR_SRC --- */
+
+/* TprintfT(<level>,...) definitions. Adjust per module as needed */
+#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
+#define DBG_LT1 1 // for configuration details, warnings
+#define DBG_LT2 2
+#define DBG_LT3 3
+#define DBG_LT4 4
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+ /* hwcdrv api */
+ typedef struct
+ {
+ int (*hwcdrv_init)(hwcfuncs_abort_fn_t abort_ftn, int * tsd_sz);
+ /* Initialize hwc counter library (do not call again after fork)
+ Must be called before other functions.
+ Input:
+ <abort_ftn>: NULL or callback function to be used for fatal errors
+ <tsd_sz>: If not NULL, returns size in bytes required for thread-specific storage
+ Return: 0 if successful
+ */
+
+ void (*hwcdrv_get_info)(int *cpuver, const char **cciname, uint_t *npics,
+ const char **docref, uint64_t *support);
+ /* get info about session
+ Input:
+ <cpuver>: if not NULL, returns value of CPC cpu version
+ <cciname>: if not NULL, returns name of CPU
+ <npics>: if not NULL, returns maximum # of HWCs
+ <docref>: if not NULL, returns documentation reference
+ <support>: if not NULL, returns bitmask (see hwcfuncs.h) of hwc support
+ Return: 0 if successful, nonzero otherwise
+ */
+
+ int (*hwcdrv_enable_mt)(hwcfuncs_tsd_get_fn_t tsd_ftn);
+ /* Enables multi-threaded mode (do not need to call again after fork)
+ Input:
+ <tsd_ftn>: If <tsd_sz>==0, this parameter is ignored.
+ Otherwise:
+ tsd_ftn() must be able to return a pointer to thread-specific
+ memory of <tsd_sz> bytes.
+ For a given thread, tsd_ftn() must
+ always return the same pointer.
+ Return: none
+ */
+
+ int (*hwcdrv_get_descriptions)(hwcf_hwc_cb_t *hwc_find_action,
+ hwcf_attr_cb_t *attr_find_action);
+ /* Initiate callbacks with all available HWC names and and HWC attributes.
+ Input:
+ <hwc_find_action>: if not NULL, will be called once for each HWC
+ <attr_find_action>: if not NULL, will be called once for each attribute
+ Return: 0 if successful
+ or a cpc return code upon error
+ */
+
+ int (*hwcdrv_assign_regnos)(Hwcentry* entries[], unsigned numctrs);
+ /* Assign entries[]->reg_num values as needed by platform
+ Input:
+ <entries>: array of counters
+ <numctrs>: number of items in <entries>
+ Return: 0 if successful
+ HWCFUNCS_ERROR_HWCINIT if resources unavailable
+ HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly
+ */
+
+ int (*hwcdrv_create_counters)(unsigned hwcdef_cnt, Hwcentry *hwcdef);
+ /* Create the counters, but don't start them.
+ call this once in main thread to create counters.
+ Input:
+ <defcnt>: number of counter definitions.
+ <hwcdef>: counter definitions.
+ Return: 0 if successful
+ or a cpc return code upon error
+ */
+
+ int (*hwcdrv_start)(void);
+ /* Start the counters.
+ call this once in main thread to start counters.
+ Return: 0 if successful
+ or a cpc return code upon error
+ */
+
+ int (*hwcdrv_overflow)(siginfo_t *si, hwc_event_t *sample,
+ hwc_event_t *lost_samples);
+ /* Linux only. Capture current counter values.
+ This is intended to be called from SIGEMT handler;
+ Input:
+ <si>: signal handler context information
+ <sample>: returns non-zero values for counters that overflowed
+ <lost_samples>: returns non-zero values for counters that "lost" counts
+ Return: 0 if successful
+ or a cpc return code upon error.
+ */
+
+ int (*hwcdrv_read_events)(hwc_event_t *overflow_data,
+ hwc_event_samples_t *sampled_data);
+ /* Read current counter values and samples. Read of samples is destructive.
+ Note: hwcdrv_read_events is not supported on Linux.
+ <overflow_data>: returns snapshot of counter values
+ <sampled_data>: returns sampled data
+ Return: 0 if successful
+ HWCFUNCS_ERROR_UNAVAIL if resource unavailable(e.g. called before initted)
+ (other values may be possible)
+ */
+
+ int (*hwcdrv_sighlr_restart)(const hwc_event_t* startVals);
+ /* Restarts the counters at the given value.
+ This is intended to be called from SIGEMT handler;
+ Input:
+ <startVals>: Solaris: new start values.
+ Linux: pointer may be NULL; startVals is ignored.
+ Return: 0 if successful
+ or a cpc return code upon error.
+ */
+
+ int (*hwcdrv_lwp_suspend)(void);
+ /* Attempt to stop counters on this lwp only.
+ hwcdrv_lwp_resume() should be used to restart counters.
+ Return: 0 if successful
+ or a cpc return code upon error.
+ */
+
+ int (*hwcdrv_lwp_resume)(void);
+ /* Attempt to restart counters on this lwp when counters were
+ stopped with hwcdrv_lwp_suspend().
+ Return: 0 if successful
+ or a cpc return code upon error.
+ */
+
+ int (*hwcdrv_free_counters)(void);
+ /* Stops counters on this lwp only and frees resources.
+ This will fail w/ unpredictable results if other lwps's are
+ still running. After this call returns,
+ hwcdrv_create_counters() may be called with new values.
+ Return: 0 if successful
+ or a cpc return code upon error.
+ */
+
+ int (*hwcdrv_lwp_init)(void);
+ /* per-thread counter init.
+ Solaris: nop.
+ Linux: just after thread creation call this from inside thread
+ to create context and start counters.
+ Return: 0 if successful
+ or a perfctr return code upon error
+ */
+
+ void (*hwcdrv_lwp_fini)(void);
+ /* per-thread counter cleanup.
+ Solaris: nop.
+ Linux: call in each thread upon thread destruction.
+ */
+
+ int hwcdrv_init_status;
+ } hwcdrv_api_t;
+
+ extern hwcdrv_api_t *get_hwcdrv ();
+ extern hwcdrv_api_t *__collector_get_hwcdrv ();
+ extern int __collector_hwcfuncs_bind_descriptor (const char *defstring);
+ extern Hwcentry **__collector_hwcfuncs_get_ctrs (unsigned *defcnt);
+ extern hwcdrv_api_t *hwcdrv_drivers[]; // array of available drivers
+
+ /* prototypes for internal use by hwcdrv drivers */
+ typedef struct
+ { // see hwcdrv_get_info() for field definitions
+ int cpcN_cpuver;
+ uint_t cpcN_npics;
+ const char *cpcN_docref;
+ const char *cpcN_cciname;
+ } hwcdrv_about_t;
+
+ extern int hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs);
+ /* assign user's counters to specific CPU registers */
+
+ extern int hwcdrv_lookup_cpuver (const char * cpcN_cciname);
+ /* returns hwc_cpus.h ID for a given string. */
+
+ extern void hwcfuncs_int_capture_errmsg (const char *fn, int subcode,
+ const char *fmt, va_list ap);
+#define logerr hwcfuncs_int_logerr
+
+ /*---------------------------------------------------------------------------*/
+ /* prototypes for internal use by linux hwcdrv drivers */
+#define PERFCTR_FIXED_MAGIC 0x40000000 /* tells perfctr to use intel fixed pmcs */
+#define PERFCTR_UMASK_SHIFT 8
+#define EXTENDED_EVNUM_2_EVSEL(evnum) \
+ ( (((eventsel_t)(evnum) & 0x0f00ULL) << 24) | ((eventsel_t)(evnum) & ~0x0f00ULL) )
+
+ typedef uint64_t eventsel_t;
+ extern int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
+ eventsel_t *return_event, uint_t *return_pmc_sel);
+
+ typedef int (hwcdrv_get_events_fn_t) (hwcf_hwc_cb_t *hwc_cb);
+ typedef int (hwcdrv_get_eventnum_fn_t) (const char *eventname, uint_t pmc,
+ eventsel_t *eventnum,
+ eventsel_t *valid_umask, uint_t *pmc_sel);
+ extern hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum;
+
+ typedef struct
+ {
+ const char * attrname; // user-visible name of attribute
+ int is_inverted; // nonzero means boolean attribute is inverted
+ eventsel_t mask; // which attribute bits can be set?
+ eventsel_t shift; // how far to shift bits for use in x86 register
+ } attr_info_t;
+ extern const attr_info_t *perfctr_attrs_table;
+
+ /* hdrv_pcbe api: cpu-specific drivers for Linux */
+ typedef struct
+ {
+ int (*hdrv_pcbe_init)(void);
+ uint_t (*hdrv_pcbe_ncounters)(void);
+ const char *(*hdrv_pcbe_impl_name)(void);
+ const char *(*hdrv_pcbe_cpuref)(void);
+ int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb);
+ int (*hdrv_pcbe_get_eventnum)(const char * eventname, uint_t pmc,
+ eventsel_t *eventnum, eventsel_t *valid_umask,
+ uint_t *pmc_sel);
+ } hdrv_pcbe_api_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gprofng/common/hwcentry.h b/gprofng/common/hwcentry.h
new file mode 100644
index 0000000..8611ab7
--- /dev/null
+++ b/gprofng/common/hwcentry.h
@@ -0,0 +1,417 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#ifndef _HWCENTRY_H
+#define _HWCENTRY_H
+
+#ifndef LIBCOLLECTOR_SRC /* not running in libcollector */
+#include <stdio.h> /* FILE */
+#endif /* --- LIBCOLLECTOR_SRC --- */
+#include <stdlib.h> /* size_t */
+#include "hwc_cpus.h"
+#include "gp-time.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+ /* ABS backtrack types */
+ typedef enum
+ {
+ /* !! Lowest 2 bits are used to indicate load and store, respectively !! */
+ /* Example: On SPARC, backtrack.c did this: if (ABS_memop & inst_type) ... */
+ ABST_NONE = 0x0,
+ ABST_LOAD = 0x1,
+ ABST_STORE = 0x2,
+ ABST_LDST = 0x3,
+ ABST_COUNT = 0x4,
+ ABST_US_DTLBM = 0xF,
+ ABST_NOPC = 0x100,
+ ABST_CLKDS = 0x103, // Obsolete
+ ABST_EXACT = 0x203,
+ ABST_LDST_SPARC64 = 0x303,
+ ABST_EXACT_PEBS_PLUS1 = 0x403
+ /* full description below... */
+ } ABST_type;
+
+#define ABST_PLUS_BY_DEFAULT(n) ((n)==ABST_EXACT || (n)==ABST_EXACT_PEBS_PLUS1)
+#define ABST_BACKTRACK_ENABLED(n) ((n)!=ABST_NONE && (n)!=ABST_NOPC)
+#define ABST_MEMSPACE_ENABLED(n) ((n)!=ABST_NONE && (n)!=ABST_NOPC && (n)!=ABST_COUNT)
+
+ /* ABS determines the type of backtracking available for a particular metric.
+ * Backtracking is enabled with the "+" in "-h +<countername>...".
+ *
+ * When Backtracking is not possible:
+ *
+ * ABST_NONE=0: Either the user did not specify "+", or backtracking
+ * is not applicable to the metric, for example:
+ * clk cycles,
+ * instruct counts (dispatch + branch + prefetch),
+ * i$,
+ * FP ops
+ * ABST_NOPC=0x100 Used for non-program-related external events, for example:
+ * system interface events,
+ * memory controller counters
+ * Of all ABST_type options, only ABST_NOPC prevents hwprofile.c
+ * from recording PC/stack information.
+ *
+ * When backtracking is allowed:
+ *
+ * ABST_LOAD=1: data read events, used with metrics like:
+ * D$, E$, P$ read misses and hits.
+ * [DC+EC+PC]_rd*, Re_*_miss*,
+ * EC_snoop_cb(?)
+ * ABST_STORE=2: data write events, used with metrics like:
+ * D$ writes and write related misses
+ * DC_wr/wr-miss, EC_wb, WC=writecache, Rstall_storeQ
+ * [EC+PC=pcache]_snoop_inv(?), WC_snoop_cb(?),
+ * ABST_LDST=3: data reads/writes, used with metrics like:
+ * E$ references, misses.
+ * ABST_COUNT=4: dedicated assembly instruction: '%hi(0xfc000)'
+ * See SW_count_n metric on sparc.
+ * ABST_US_DTLBM=0xF: for load-store on Sparc -- seems to be used only
+ * for "unskidded DTLB_miss" with DTLB_miss metric.
+ * Checks two adjacent instructions for Data access.
+ * ABST_CLKDS=0x103: data reads/writes, used with Clock-based Dataspace
+ * profiling. Ultrasparc T2 and earlier.
+ * ABST_EXACT=0x203: data reads/writes, precise trap with no skid
+ * ABST_LDST_SPARC64=0x303: Fujitsu SPARC64 load/store
+ * ABST_EXACT_PEBS_PLUS1=0x403: data reads/writes, precise sampling with 1 instr. skid
+ */
+
+ /* Hwcentry - structure for defining a counter.
+ * Some fields have different usage when returned from
+ * hwc_lookup(), hwc_post_lookup(), or hwc_scan_*().
+ * Each function will describe its return values in more detail.
+ */
+ typedef struct
+ {
+ char *name; /* user HWC specification */
+ char *int_name; /* internal HWC specification */
+ regno_t reg_num; /* register in CPU, aka picnum, or REGNO_ANY */
+ char *metric; /* descriptive name, for well-known counters only */
+ volatile int val; /* default or actual overflow value */
+ int timecvt; /* multiplier to convert metric to time, 0 if N/A */
+ ABST_type memop; /* type of backtracking allowed */
+ char *short_desc; /* optional one-liner description, or NULL */
+ int type; /* Type of perf_event_attr */
+ long long config; /* perf_event_type -specific configuration */
+ /* the fields above this line are expected, in order, by the tables in hwctable.c */
+ /* ================================================== */
+ /* the fields below this line are more flexible */
+ int sort_order; /* "tag" to associate experiment record with HWC def */
+ regno_t *reg_list; /* if not NULL, legal values for <reg_num> field above */
+ /* Note: reg_list will be terminated by REGNO_ANY */
+ /* Max size of array is MAX_PICS */
+ hrtime_t min_time; /* target minimum time between overflow events. 0 is off. See HWCTIME_* macros */
+ hrtime_t min_time_default; /* if min_time==HWCTIME_AUTO, use this value instead. 0 is off. */
+ int ref_val; /* if min_time==HWCTIME_AUTO, use this time. 0 is off. */
+ int lval, hval; /* temporary to allow DBX to build until dbx glue.cc fixed */
+ } Hwcentry;
+
+ // Hwcentry.min_time canned values
+#define HWCTIME_TBD ((hrtime_t)( -1LL)) /* self-adjusting enabled but nsecs not yet selected */
+#define HWCTIME_HI ( 1 * 1000 * 1000LL ) /* 1 msec represented in nsecs */
+#define HWCTIME_ON ( 10 * 1000 * 1000LL ) /* 10 msec represented in nsecs */
+#define HWCTIME_LO ( 100 * 1000 * 1000LL ) /* 100 msec represented in nsecs */
+
+#define HWC_VAL_HI(refVal) (((refVal)/10) + 1)
+#define HWC_VAL_ON(refVal) (refVal)
+#define HWC_VAL_LO(refVal) (((refVal)*10)/100*100 + 1) // zero's out lower digits, add 1
+#define HWC_VAL_CUSTOM(refVal, targetNanoSec) ((double)(refVal)*(targetNanoSec)/HWCTIME_ON)
+
+#define HWCENTRY_USES_SAMPLING(h) ((h)->memop==ABST_EXACT_PEBS_PLUS1)
+
+ extern int hwc_lookup (int forKernel, hrtime_t min_time_default,
+ const char *uname, Hwcentry *list[], unsigned listsz,
+ char **emsg, char **wmsg);
+ /* Parses counter cmdline string. Returns counter definitions.
+ * Input:
+ * <forKernel> lookup using which table: 0-collect or 1-er_kernel
+ * <min_time_default> minimum nseconds between events if Hwcentry.min_time == HWCTIME_TBD. 0 to disable.
+ * <uname> command line HWC definition of format:
+ * <ctr_def>...[{','|(whitespace)}<ctr_n_def>] where
+ * <ctr_def> == [+]<ctr>[/<reg#>][,<interval>]
+ * <list> array of pointers to store counter definitions
+ * <listsz> number of elements in <list>
+ * Returns:
+ * Success:
+ * Returns number of valid counters in <list> and <list>'s elements
+ * will be initialized as follows:
+ *
+ * <list[]->name>:
+ * Copy of the <uname> with the following modification:
+ * if backtracking is not supported, the + will be removed.
+ * <list[]->int_name>:
+ * For well-known and convenience ctrs, the internal HWC specification,
+ * e.g. BSQ_cache_reference~emask=0x0100.
+ * For raw ctrs, this will be a copy of <name>.
+ * <list[]->reg_num>:
+ * Register number if specified by user or table, REGNO_ANY otherwise.
+ * <list[]->metric>:
+ * For well-known counters, descriptive name, e.g. "D$ Read Misses".
+ * NULL otherwise.
+ * <list[]->val>:
+ * Overflow value selected by user, default value otherwise.
+ * <list[]->timecvt>:
+ * Value from tables.
+ * <list[]->memop>:
+ * If + is selected and backtracking is allowed, value from table.
+ * ABST_NONE or ABST_NOPC otherwise.
+ *
+ * It is the responsibility of the caller to free 'name' and 'int_name'.
+ * 'metric' is a static string and shouldn't be freed.
+ * 'emsg' will point to NULL
+ *
+ * Failure:
+ * Frees all allocated elements.
+ * emsg will point to a string with an error message to print
+ * returns -1
+ */
+
+ extern char *hwc_validate_ctrs (int forKernel, Hwcentry *list[], unsigned listsz);
+ /* Validates that the vector of specified HW counters can be loaded (more-or-less)
+ * Some invalid combinations, especially on Linux will not be detected
+ */
+
+ extern int hwc_get_cpc_cpuver ();
+ /* Return the cpc_cpuver for this system. Other possible values:
+ * CPUVER_GENERIC=0, CPU could not be determined, but HWCs are ok.
+ * CPUVER_UNDEFINED=-1, HWCs are not available.
+ */
+
+ extern char *hwc_get_docref (char *buf, size_t buflen);
+ /* Return a CPU HWC document reference, or NULL. */
+
+ // TBR
+ extern char *hwc_get_default_cntrs ();
+ /* Return a default HW counter string; may be NULL, or zero-length */
+ /* NULL means none is defined in the table; or zero-length means string defined could not be loaded */
+
+ extern char *hwc_get_default_cntrs2 (int forKernel, int style);
+ /* like hwc_get_default_cntrs() for style==1 */
+ /* but allows other styles of formatting as well */
+ /* deprecate and eventually remove hwc_get_default_cntrs() */
+
+ extern char *hwc_get_orig_default_cntrs ();
+ /* Get the default HW counter string as set in the table */
+ /* NULL means none is defined in the table */
+
+ extern void hwc_update_val (Hwcentry *ctr);
+ /* Check time-based intervals and update Hwcentry.val as needed */
+
+ extern char *hwc_get_cpuname (char *buf, size_t buflen);
+ /* Return the cpc cpu name for this system, or NULL. */
+
+ extern unsigned hwc_get_max_regs ();
+ /* Return number of counters registers for this system. */
+
+ extern unsigned hwc_get_max_concurrent (int forKernel);
+ /* Return the max number of simultaneous counters for this system. */
+
+ extern char **hwc_get_attrs (int forKernel);
+ /* Return:
+ * Array of attributes (strings) supported by this system.
+ * Last element in array is null.
+ * Array and its elements should NOT be freed by the caller.
+ */
+
+ extern unsigned hwc_scan_attrs (void (*action)(const char *attr,
+ const char *desc));
+ /* Scan the HW counter attributes, and call function for each attribute.
+ * Input:
+ * <action>:
+ * If NULL, no action is performed, but count is still returned.
+ * Otherwise called for each type of attributes, or if none exist,
+ * called once with NULL parameter.
+ * Return: count of times <action> would have been called w/ non-NULL data.
+ */
+
+ extern Hwcentry *hwc_post_lookup (Hwcentry * pret_ctr, char *uname,
+ char * int_name, int cpc_cpuver);
+ /* When post-processing a run, look up a Hwcentry for given type of system.
+ * Input:
+ * <pret_ctr>: storage for counter definition
+ * <uname>: well-known name, convenience name, or complete HWC defintion.
+ * <int_name>: Hwcentry->int_name or NULL for don't care
+ * <cpc_cpuver>: version of cpu used for experiment.
+ * Return:
+ * <pret_ctr>'s elements set as follows:
+ *
+ * <pret_ctr->name>:
+ * Copy of <uname> with the following modifications:
+ * 1) + and /<regnum> will be stripped off
+ * 2) attributes will be sorted and values will shown in hex.
+ * <pret_ctr->int_name>:
+ * For well-known/convenience counters, the internal HWC specification
+ * from the table, e.g. BSQ_cache_reference~emask=0x0100.
+ * Otherwise, a copy of <uname>.
+ * <pret_ctr->reg_num>:
+ * Register number if specified by user or table,
+ * REGNO_ANY othewise.
+ * <pret_ctr->metric>:
+ * For well-known counters, descriptive name, e.g. "D$ Read Misses".
+ * NULL otherwise.
+ * <pret_ctr->timecvt>:
+ * For well-known/convenience/hidden counters, value from table.
+ * 0 otherwise.
+ * <pret_ctr->memop>:
+ * For well-known/convenience/hidden counters, value from table.
+ * ABST_NONE otherwise.
+ * <pret_ctr->sort_order>:
+ * Set to 0.
+ *
+ * It is the responsibility of the caller to free 'name' and 'int_name'.
+ * 'metric' is a static string and shouldn't be freed.
+ */
+
+ extern Hwcentry **hwc_get_std_ctrs (int forKernel);
+ /* Return:
+ * Array of well-known counters supported by this system.
+ * Last element in array will be NULL.
+ * Array and its elements should NOT be freed by the caller.
+ */
+
+ extern unsigned hwc_scan_std_ctrs (void (*action)(const Hwcentry *));
+ /* Call <action> for each well-known counter.
+ * Input:
+ * <action>:
+ * If NULL, no action is performed, but count is still returned.
+ * Otherwise called for each type of attributes, or if none exist,
+ * called once with NULL parameter.
+ * Return:
+ * Count of times <action> would have been called w/ non-NULL data.
+ * If <action> is not NULL, Hwcentry fields will be set as follows:
+ * <ctr->name>:
+ * HWC alias name, e.g. dcrm.
+ * <ctr->int_name>:
+ * The internal HWC specification, e.g. BSQ_cache_reference~emask=0x0100.
+ * <ctr->reg_num>:
+ * Register number if specified by the table, REGNO_ANY otherwise.
+ * <ctr->metric>:
+ * Descriptive name, e.g. "D$ Read Misses".
+ * <ctr->lval>:
+ * Low-resolution overflow value.
+ * <ctr->val>:
+ * Default overflow value.
+ * <ctr->hval>:
+ * High-resolution overflow value.
+ * <ctr->timecvt>:
+ * multiplier to convert metric to time, 0 otherwise.
+ * <ctr->memop>:
+ * ABST_* type for this counter.
+ * <ctr->reg_list>:
+ * Array of legal <reg_num> values. Terminated by REGNO_ANY.
+ *
+ * Note: All fields point to static data, none should be freed.
+ */
+
+ extern Hwcentry **hwc_get_raw_ctrs (int forKernel);
+ /* Return:
+ * Table of raw (not well-known) counters supported by this system.
+ * Last element in array will be NULL.
+ * Table and its elements should NOT be freed by the caller.
+ */
+
+ extern unsigned hwc_scan_raw_ctrs (void (*action)(const Hwcentry *));
+ /* Call <action> for each raw counter.
+ * Input:
+ * <action>:
+ * If NULL, no action is performed, but count is still returned.
+ * Otherwise called for each type of attributes, or if none exist,
+ * called once with NULL parameter.
+ * Return:
+ * Count of times <action> would have been called w/ non-NULL data.
+ * If <action> is not NULL, Hwcentry fields will be set as follows:
+ * <ctr->name>:
+ * HWC raw name without attributes, e.g. BSQ_cache_reference.
+ * <ctr->int_name>:
+ * NULL.
+ * <ctr->metric>:
+ * NULL.
+ * The remainder of the fields are the same as for
+ * hwc_scan_std_ctrs().
+ *
+ * Note: All fields point to static data, none should be freed.
+ */
+
+ extern void
+ hwc_usage (int forKernel, const char *cmd, const char *dataspace_msg);
+ /* Print an i18n'd description of "-h" usage, used by collect and er_kernel.
+ */
+
+ extern void hwc_usage_f (int forKernel, FILE *f, const char *cmd,
+ const char *dataspace_msg, int show_syntax,
+ int show_short_desc);
+ /* Print an i18n'd description of "-h" usage to a FILE. Used by GUI. */
+
+ extern char *hwc_rate_string (const Hwcentry *pctr, int force_numeric_format);
+ /* Returns {"on"|"hi"|"lo"|""|<value>}. Return value must be freed by caller. */
+
+ extern char *hwc_i18n_metric (const Hwcentry *ctr);
+ /* Get a basic lable for a counter, properly i18n'd.
+ * Note: NOT MT SAFE.
+ * Examples:
+ * CPU Cycles
+ * DC_rd Events
+ * Pseudocode:
+ * if(ctr->metric != NULL) {
+ * sprintf(metricbuf, PTXT(ctr->metric) );
+ * } else if (ctr->name != NULL) {
+ * sprintf(metricbuf, GTXT("%s Events"), ctr->name );
+ * } else if (ctr->int_name != NULL) {
+ * sprintf(metricbuf, GTXT("%s Events"), ctr->int_name );
+ * }
+ * Return: pointer to a buffer containing the above description.
+ */
+
+ extern char *hwc_hwcentry_string (char *buf, size_t buflen, const Hwcentry *ctr);
+ /* Get a i18n'd description of a HW counter's options.
+ * Examples of well-known counters:
+ * cycles[/{0|1}],9999991 ('CPU Cycles', alias for Cycle_cnt; CPU-cycles)
+ * dcr[/0],1000003 ('D$ Read Refs', alias for DC_rd; load events)
+ * Examples of raw counters:
+ * Cycle_cnt[/{0|1}],1000003 (CPU-cycles)
+ * DC_rd[/0],1000003 (load events)
+ * Return: <buf>, filled in.
+ */
+
+ extern char *hwc_hwcentry_specd_string (char *buf, size_t buflen, const Hwcentry *ctr);
+ /* Get a i18n'd description of a HW counter's specific configuration.
+ * Examples of well-known counters:
+ * cycles,9999991 ('CPU Cycles')
+ * +dcr/0,1000003 ('D$ Read Refs')
+ * Examples of raw counters:
+ * Cycle_cnt,1000003
+ * +DC_rd/0,1000003
+ * Return: <buf>, filled in.
+ */
+
+ extern const char *hwc_memop_string (ABST_type memop);
+ /* Get a i18n'd description of a variable of type ABST_type.
+ * Return: pointer to static string.
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c
new file mode 100644
index 0000000..2f9764d
--- /dev/null
+++ b/gprofng/common/hwcfuncs.c
@@ -0,0 +1,704 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/* Hardware counter profiling */
+#include "hwcdrv.h"
+#include "hwcfuncs.h"
+
+/*---------------------------------------------------------------------------*/
+/* macros */
+
+#define IS_GLOBAL /* Mark global symbols */
+#define HWCDRV_API static /* Mark functions used by hwcdrv API */
+
+/*---------------------------------------------------------------------------*/
+/* static variables */
+static uint_t cpcN_npics;
+static char hwcfuncs_errmsg_buf[1024];
+static int hwcfuncs_errmsg_enabled = 1;
+static int hwcfuncs_errmsg_valid;
+
+/* --- user counter selections and options */
+static unsigned hwcdef_cnt; /* number of *active* hardware counters */
+static Hwcentry hwcdef[MAX_PICS]; /* HWC definitions */
+static Hwcentry *hwctable[MAX_PICS]; /* HWC definitions */
+
+/* --- drivers --- */
+
+// default driver
+
+HWCDRV_API int
+hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int* tsd_sz)
+{
+ return -1;
+}
+
+HWCDRV_API void
+hwcdrv_get_info (
+ int * cpuver, const char ** cciname,
+ uint_t * npics, const char ** docref, uint64_t* support) { }
+
+HWCDRV_API int
+hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_find_action,
+ hwcf_attr_cb_t *attr_find_action)
+{
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_assign_regnos (Hwcentry *entries[], unsigned numctrs)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_read_events (hwc_event_t *events, hwc_event_samples_t*samples)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_start (void)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_overflow (siginfo_t *si, hwc_event_t *s, hwc_event_t *t)
+{
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_sighlr_restart (const hwc_event_t *sample)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_lwp_suspend (void)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_lwp_resume (void)
+{
+ return -1;
+}
+
+HWCDRV_API int
+hwcdrv_free_counters (void)
+{
+ return 0;
+}
+
+HWCDRV_API int
+hwcdrv_lwp_init (void)
+{
+ return 0;
+}
+
+HWCDRV_API void
+hwcdrv_lwp_fini (void) { }
+
+static hwcdrv_api_t hwcdrv_default = {
+ hwcdrv_init,
+ hwcdrv_get_info,
+ hwcdrv_enable_mt,
+ hwcdrv_get_descriptions,
+ hwcdrv_assign_regnos,
+ hwcdrv_create_counters,
+ hwcdrv_start,
+ hwcdrv_overflow,
+ hwcdrv_read_events,
+ hwcdrv_sighlr_restart,
+ hwcdrv_lwp_suspend,
+ hwcdrv_lwp_resume,
+ hwcdrv_free_counters,
+ hwcdrv_lwp_init,
+ hwcdrv_lwp_fini,
+ -1 // hwcdrv_init_status
+};
+
+static hwcdrv_api_t *hwcdrv_driver = &hwcdrv_default;
+
+
+/*---------------------------------------------------------------------------*/
+/* misc */
+
+/* print a counter definition (for debugging) */
+static void
+ctrdefprint (int dbg_lvl, const char * hdr, Hwcentry*phwcdef)
+{
+ TprintfT (dbg_lvl, "%s: name='%s', int_name='%s',"
+ " reg_num=%d, timecvt=%d, memop=%d, "
+ "interval=%d, tag=%u, reg_list=%p\n",
+ hdr, phwcdef->name, phwcdef->int_name, phwcdef->reg_num,
+ phwcdef->timecvt, phwcdef->memop, phwcdef->val,
+ phwcdef->sort_order, phwcdef->reg_list);
+}
+
+/*---------------------------------------------------------------------------*/
+/* errmsg buffering */
+
+/* errmsg buffering is needed only because the most descriptive error
+ messages from CPC are delivered using a callback mechanism.
+ hwcfuncs_errmsg_get() should only be used during initialization, and
+ ideally, only to provide feedback to an end user when his counters can't
+ be bound to HW.
+ */
+IS_GLOBAL char *
+hwcfuncs_errmsg_get (char *buf, size_t bufsize, int enable)
+{
+ hwcfuncs_errmsg_enabled = 0;
+ if (buf && bufsize)
+ {
+ if (hwcfuncs_errmsg_valid)
+ {
+ strncpy (buf, hwcfuncs_errmsg_buf, bufsize);
+ buf[bufsize - 1] = 0;
+ }
+ else
+ *buf = 0;
+ }
+ hwcfuncs_errmsg_buf[0] = 0;
+ hwcfuncs_errmsg_valid = 0;
+ hwcfuncs_errmsg_enabled = enable;
+ return buf;
+}
+
+/* used by cpc to log an error */
+IS_GLOBAL void
+hwcfuncs_int_capture_errmsg (const char *fn, int subcode,
+ const char *fmt, va_list ap)
+{
+ if (hwcfuncs_errmsg_enabled &&
+ !hwcfuncs_errmsg_valid)
+ {
+ vsnprintf (hwcfuncs_errmsg_buf, sizeof (hwcfuncs_errmsg_buf), fmt, ap);
+ TprintfT (DBG_LT0, "hwcfuncs: cpcN_capture_errmsg(): %s\n",
+ hwcfuncs_errmsg_buf);
+ hwcfuncs_errmsg_valid = 1;
+ }
+ return;
+}
+
+/* Log an internal error to the CPC error buffer.
+ * Note: only call this during init functions.
+ * Note: when most cpc calls fail, they will call cpcN_capture_errmsg()
+ * directly, so only call logerr() when a non-cpc function fails.
+ */
+IS_GLOBAL void
+hwcfuncs_int_logerr (const char *format, ...)
+{
+ va_list va;
+ va_start (va, format);
+ hwcfuncs_int_capture_errmsg ("logerr", 0, format, va);
+ va_end (va);
+}
+
+/* utils to parse counter strings */
+static void
+clear_hwcdefs ()
+{
+ for (unsigned idx = 0; idx < MAX_PICS; idx++)
+ {
+ static Hwcentry empty;
+ hwcdef[idx] = empty; // leaks strings and reg_list array
+ hwcdef[idx].reg_num = REGNO_ANY;
+ hwcdef[idx].val = -1;
+ hwcdef[idx].sort_order = -1;
+ }
+}
+
+/* initialize hwcdef[] based on user's counter definitions */
+static int
+process_data_descriptor (const char *defstring)
+{
+ /*
+ * <defstring> format should be of format
+ * :%s:%s:0x%x:%d:%lld:%d:%d:0x%x[,%s...repeat for each ctr]
+ * where the counter fields are:
+ * :<userName>:<internalCtr>:<register>:<timeoutVal>[:m<min_time>]:<tag>:<timecvt>:<memop>
+ * See Coll_Ctrl::build_data_desc().
+ */
+ int err = 0;
+ char *ds = NULL;
+ char *dsp = NULL;
+ unsigned idx;
+
+ clear_hwcdefs ();
+ if (!defstring || !strlen (defstring))
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ ds = strdup (defstring);
+ if (!ds)
+ {
+ err = HWCFUNCS_ERROR_HWCINIT;
+ goto ext_hw_install_end;
+ }
+ dsp = ds;
+
+ for (idx = 0; idx < MAX_PICS && *dsp; idx++)
+ {
+ char *name = NULL;
+ char *int_name = NULL;
+ regno_t reg = REGNO_ANY;
+ ABST_type memop = ABST_NONE;
+ int interval = 0;
+ int timecvt = 0;
+ unsigned sort_order = (unsigned) - 1;
+
+ /* name */
+ name = dsp;
+ dsp = strchr (dsp, ':');
+ if (dsp == NULL)
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ *dsp++ = (char) 0;
+
+ /* int_name */
+ int_name = dsp;
+ dsp = strchr (dsp, ':');
+ if (dsp == NULL)
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ *dsp++ = (char) 0;
+
+ /* reg_num */
+ reg = (int) strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ if (reg < 0 && reg != -1)
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ if (reg >= 0)
+ hwcdef[idx].reg_num = reg;
+
+ /* val */
+ interval = (int) strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ if (interval < 0)
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ hwcdef[idx].val = interval;
+
+ /* min_time */
+ /*
+ * This is a new field.
+ * An old launcher (dbx, etc.) would not include it.
+ * Detect the presence of the field by the char 'm'.
+ */
+ if (*dsp == 'm')
+ {
+ long long tmp_ll = 0;
+ dsp++;
+ tmp_ll = strtoll (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ if (tmp_ll < 0)
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ hwcdef[idx].min_time = tmp_ll;
+ }
+ else
+ hwcdef[idx].min_time = 0;
+
+ /* sort_order */
+ sort_order = (int) strtoul (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ hwcdef[idx].sort_order = sort_order;
+
+ /* timecvt */
+ timecvt = (int) strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ hwcdef[idx].timecvt = timecvt;
+
+ /* memop */
+ memop = (ABST_type) strtol (dsp, &dsp, 0);
+ if (*dsp != 0 && *dsp++ != ',')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ hwcdef[idx].memop = memop;
+ if (*name)
+ hwcdef[idx].name = strdup (name);
+ else
+ hwcdef[idx].name = strdup (int_name);
+ if (*int_name)
+ hwcdef[idx].int_name = strdup (int_name);
+ else
+ hwcdef[idx].int_name = strdup (name);
+ ctrdefprint (DBG_LT1, "hwcfuncs: process_data_descriptor", &hwcdef[idx]);
+ }
+
+ if (*dsp)
+ {
+ TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
+ "ctr string had some trailing garbage:"
+ " '%s'\n", dsp);
+ err = HWCFUNCS_ERROR_HWCARGS;
+ goto ext_hw_install_end;
+ }
+ free (ds);
+ hwcdef_cnt = idx;
+ return 0;
+
+ext_hw_install_end:
+ if (dsp && *dsp)
+ {
+ TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
+ " syntax error just before:"
+ " '%s;\n", dsp);
+ logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp);
+ }
+ else
+ logerr (GTXT ("Data descriptor syntax error\n"));
+ free (ds);
+ return err;
+}
+
+/* initialize hwcdef[] based on user's counter definitions */
+static int
+process_hwcentrylist (const Hwcentry* entries[], unsigned numctrs)
+{
+ int err = 0;
+ clear_hwcdefs ();
+ if (numctrs > cpcN_npics)
+ {
+ logerr (GTXT ("More than %d counters were specified\n"), cpcN_npics); /*!*/
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ for (unsigned idx = 0; idx < numctrs; idx++)
+ {
+ Hwcentry *phwcdef = &hwcdef[idx];
+ *phwcdef = *entries[idx];
+ if (phwcdef->name)
+ phwcdef->name = strdup (phwcdef->name);
+ else
+ phwcdef->name = "NULL";
+ if (phwcdef->int_name)
+ phwcdef->int_name = strdup (phwcdef->int_name);
+ else
+ phwcdef->int_name = "NULL";
+ if (phwcdef->val < 0)
+ {
+ logerr (GTXT ("Negative interval specified for HW counter `%s'\n"), /*!*/
+ phwcdef->name);
+ err = HWCFUNCS_ERROR_HWCARGS;
+ break;
+ }
+ ctrdefprint (DBG_LT1, "hwcfuncs: process_hwcentrylist", phwcdef);
+ }
+ if (!err)
+ hwcdef_cnt = numctrs;
+ return err;
+}
+
+/* see hwcfuncs.h */
+IS_GLOBAL void *
+hwcfuncs_parse_attrs (const char *countername, hwcfuncs_attr_t attrs[],
+ unsigned max_attrs, uint_t *pnum_attrs, char**errstring)
+{
+ char *head = NULL;
+ char *tail = NULL;
+ uint_t nattrs = 0;
+ char *counter_copy;
+ int success = 0;
+ char errbuf[512];
+ errbuf[0] = 0;
+ counter_copy = strdup (countername);
+
+ /* advance pointer to first attribute */
+ tail = strchr (counter_copy, HWCFUNCS_PARSE_ATTR);
+ if (tail)
+ *tail = 0;
+
+ /* remove regno and value, if supplied */
+ {
+ char *tmp = strchr (counter_copy, HWCFUNCS_PARSE_REGNUM);
+ if (tmp)
+ *tmp = 0;
+ tmp = strchr (counter_copy, HWCFUNCS_PARSE_VALUE);
+ if (tmp)
+ *tmp = 0;
+ }
+
+ while (tail)
+ {
+ char *pch;
+ if (nattrs >= max_attrs)
+ {
+ snprintf (errbuf, sizeof (errbuf),
+ GTXT ("Too many attributes defined in `%s'"),
+ countername);
+ goto mycpc2_parse_attrs_end;
+ }
+ /* get attribute name */
+ head = tail + 1;
+ tail = strchr (head, HWCFUNCS_PARSE_EQUAL);
+ if (!tail)
+ {
+ snprintf (errbuf, sizeof (errbuf),
+ GTXT ("Missing value for attribute `%s' in `%s'"),
+ head, countername);
+ goto mycpc2_parse_attrs_end;
+ }
+ *tail = 0; /* null terminate current component */
+ attrs[nattrs].ca_name = head;
+
+ /* get attribute value */
+ head = tail + 1;
+ tail = strchr (head, HWCFUNCS_PARSE_ATTR);
+ if (tail)
+ *tail = 0; /* null terminate current component */
+ attrs[nattrs].ca_val = strtoull (head, &pch, 0);
+ if (pch == head)
+ {
+ snprintf (errbuf, sizeof (errbuf),
+ GTXT ("Illegal value for attribute `%s' in `%s'"),
+ attrs[nattrs].ca_name, countername);
+ goto mycpc2_parse_attrs_end;
+ }
+ TprintfT (DBG_LT0, "hwcfuncs: pic_: '%s', attribute[%u]"
+ " '%s' = 0x%llx\n",
+ counter_copy, nattrs, attrs[nattrs].ca_name,
+ (long long unsigned int) attrs[nattrs].ca_val);
+
+ nattrs++;
+ }
+ success = 1;
+
+mycpc2_parse_attrs_end:
+ *pnum_attrs = nattrs;
+ if (success)
+ {
+ if (errstring)
+ *errstring = NULL;
+ }
+ else
+ {
+ if (errstring)
+ *errstring = strdup (errbuf);
+ free (counter_copy);
+ counter_copy = NULL;
+ }
+ return counter_copy;
+}
+
+IS_GLOBAL void
+hwcfuncs_parse_ctr (const char *counter_def, int *pplus, char **pnameOnly,
+ char **pattrs, char **pregstr, regno_t *pregno)
+{
+ char *nameptr, *copy, *slash, *attr_delim;
+ int plus;
+ regno_t regno;
+ nameptr = copy = strdup (counter_def);
+
+ /* plus */
+ plus = 0;
+ if (nameptr[0] == HWCFUNCS_PARSE_BACKTRACK)
+ {
+ plus = 1;
+ nameptr++;
+ }
+ else if (nameptr[0] == HWCFUNCS_PARSE_BACKTRACK_OFF)
+ {
+ plus = -1;
+ nameptr++;
+ }
+ if (pplus)
+ *pplus = plus;
+
+ /* regno */
+ regno = REGNO_ANY;
+ if (pregstr)
+ *pregstr = NULL;
+ slash = strchr (nameptr, HWCFUNCS_PARSE_REGNUM);
+ if (slash != NULL)
+ {
+ /* the remaining string should be a number > 0 */
+ if (pregstr)
+ *pregstr = strdup (slash);
+ char *endchar = NULL;
+ regno = (regno_t) strtol (slash + 1, &endchar, 0);
+ if (*endchar != 0)
+ regno = -2;
+ if (*(slash + 1) == '-')
+ regno = -2;
+ /* terminate previous element up to slash */
+ *slash = 0;
+ }
+ if (pregno)
+ *pregno = regno;
+
+ /* attrs */
+ if (pattrs)
+ *pattrs = NULL;
+ attr_delim = strchr (nameptr, HWCFUNCS_PARSE_ATTR);
+ if (attr_delim != NULL)
+ {
+ if (pattrs)
+ *pattrs = strdup (attr_delim);
+ /* terminate previous element up to attr_delim */
+ *attr_delim++ = 0;
+ }
+ if (pnameOnly)
+ *pnameOnly = strdup (nameptr);
+ free (copy);
+}
+
+/* create counters */
+IS_GLOBAL int
+hwcfuncs_bind_descriptor (const char *defstring)
+{
+ int err = process_data_descriptor (defstring);
+ if (err)
+ {
+ TprintfT (DBG_LT0, "hwcfuncs: ERROR: hwcfuncs_bind_descriptor failed\n");
+ return err;
+ }
+ err = hwcdrv_driver->hwcdrv_create_counters (hwcdef_cnt, hwcdef);
+ return err;
+}
+
+/* see hwcfuncs.h */
+IS_GLOBAL int
+hwcfuncs_bind_hwcentry (const Hwcentry* entries[], unsigned numctrs)
+{
+ int err = -1;
+ err = process_hwcentrylist (entries, numctrs);
+ if (err)
+ {
+ TprintfT (DBG_LT0, "hwcfuncs: ERROR: hwcfuncs_bind_hwcentry\n");
+ return err;
+ }
+ err = hwcdrv_driver->hwcdrv_create_counters (hwcdef_cnt, hwcdef);
+ return err;
+}
+
+/* see hwcfuncs.h */
+IS_GLOBAL Hwcentry **
+hwcfuncs_get_ctrs (unsigned *defcnt)
+{
+ if (defcnt)
+ *defcnt = hwcdef_cnt;
+ return hwctable;
+}
+
+/* return 1 if <regno> is in Hwcentry's list */
+IS_GLOBAL int
+regno_is_valid (const Hwcentry * pctr, regno_t regno)
+{
+ regno_t *reg_list = pctr->reg_list;
+ if (REG_LIST_IS_EMPTY (reg_list))
+ return 0;
+ if (regno == REGNO_ANY) /* wildcard */
+ return 1;
+ for (int ii = 0; ii < MAX_PICS; ii++)
+ {
+ regno_t tmp = reg_list[ii];
+ if (REG_LIST_EOL (tmp)) /* end of list */
+ break;
+ if (tmp == regno) /* is in list */
+ return 1;
+ }
+ return 0;
+}
+
+/* supplied by hwcdrv_api drivers */
+IS_GLOBAL int
+hwcfuncs_assign_regnos (Hwcentry* entries[],
+ unsigned numctrs)
+{
+ if (numctrs > cpcN_npics)
+ {
+ logerr (GTXT ("More than %d counters were specified\n"), cpcN_npics); /*!*/
+ return HWCFUNCS_ERROR_HWCARGS;
+ }
+ return hwcdrv_driver->hwcdrv_assign_regnos (entries, numctrs);
+}
+
+extern hwcdrv_api_t hwcdrv_pcl_api;
+static int hwcdrv_driver_inited = 0;
+
+hwcdrv_api_t *
+get_hwcdrv ()
+{
+ if (hwcdrv_driver_inited)
+ return hwcdrv_driver;
+ hwcdrv_driver_inited = 1;
+ cpcN_npics = 0;
+ for (int i = 0; i < MAX_PICS; i++)
+ hwctable[i] = &hwcdef[i];
+ hwcdrv_driver = &hwcdrv_pcl_api;
+ hwcdrv_driver->hwcdrv_init_status = hwcdrv_driver->hwcdrv_init (NULL, NULL);
+ if (hwcdrv_driver->hwcdrv_init_status == 0)
+ {
+ hwcdrv_driver->hwcdrv_get_info (NULL, NULL, &cpcN_npics, NULL, NULL);
+ return hwcdrv_driver;
+ }
+ hwcdrv_driver = &hwcdrv_default;
+ return hwcdrv_driver;
+}
diff --git a/gprofng/common/hwcfuncs.h b/gprofng/common/hwcfuncs.h
new file mode 100644
index 0000000..ef0360b
--- /dev/null
+++ b/gprofng/common/hwcfuncs.h
@@ -0,0 +1,269 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/* Hardware counter profiling */
+
+#ifndef __HWCFUNCS_H
+#define __HWCFUNCS_H
+
+#ifdef LIBCOLLECTOR_SRC /* running in libcollector */
+#define hwcfuncs_int_logerr __collector_hwcfuncs_int_logerr
+#define hwcfuncs_parse_ctr __collector_hwcfuncs_parse_ctr
+#define hwcfuncs_parse_attrs __collector_hwcfuncs_parse_attrs
+#define hwcfuncs_bind_descriptor __collector_hwcfuncs_bind_descriptor
+#define hwcfuncs_bind_hwcentry __collector_hwcfuncs_bind_hwcentry
+#define hwcfuncs_assign_regnos __collector_hwcfuncs_assign_regnos
+#define regno_is_valid __collector_regno_is_valid
+#define hwcfuncs_get_ctrs __collector_hwcfuncs_get_ctrs
+#define hwcfuncs_errmsg_get __collector_hwcfuncs_errmsg_get
+#endif /* --- LIBCOLLECTOR_SRC --- */
+
+#include <signal.h> /* siginfo_t */
+#include <limits.h> /* UINT64_t */
+#include <sys/types.h>
+#include <stdint.h>
+
+#include "hwcentry.h" /* for Hwcentry type */
+#include "gp-time.h"
+
+typedef unsigned int uint_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*---------------------------------------------------------------------------*/
+/* compile options */
+
+#define HWC_DEBUG 0 /* 0/1 to enable extra HWC debug */
+
+/*---------------------------------------------------------------------------*/
+/* typedefs */
+/* generic hw event */
+ typedef struct _hwc_event_t
+ { /* generalized counter event */
+ hrtime_t ce_hrt; /* gethrtime() */
+ uint64_t ce_pic[MAX_PICS]; /* counter samples or start values */
+ } hwc_event_t;
+
+ /* supplementary data that accompanies some hw events */
+ typedef struct
+ { /* supplementary data fields */
+ uint64_t smpl_pc; /* pc related to event */
+ uint64_t smpl_data_source; /* chip-specific data source encoding */
+ uint64_t smpl_latency; /* latency related to event */
+ uint64_t smpl_mem_addr; /* memory address related to event */
+ } hwc_sample_t;
+#define HWCFUNCS_INVALID_U64 0xFEEDBEEFDEADBEEFllu /* identifies fields as unused */
+
+typedef struct { /* supplementary data fields */
+ hwc_sample_t sample[MAX_PICS]; /* counter samples or start values */
+} hwc_event_samples_t;
+
+#define HWCFUNCS_SAMPLE_RESET(sample) \
+ do { \
+ (sample)->smpl_pc =HWCFUNCS_INVALID_U64; \
+ (sample)->smpl_data_source =HWCFUNCS_INVALID_U64; \
+ (sample)->smpl_latency =HWCFUNCS_INVALID_U64; \
+ (sample)->smpl_mem_addr =HWCFUNCS_INVALID_U64; \
+ } while(0)
+
+#define HWCFUNCS_SAMPLE_IS_RESET(sample) \
+ ( \
+ (sample)->smpl_pc ==HWCFUNCS_INVALID_U64 && \
+ (sample)->smpl_data_source==HWCFUNCS_INVALID_U64 && \
+ (sample)->smpl_latency ==HWCFUNCS_INVALID_U64 && \
+ (sample)->smpl_mem_addr ==HWCFUNCS_INVALID_U64 \
+ )
+
+/*---------------------------------------------------------------------------*/
+/* macros */
+
+#define HW_INTERVAL_MAX UINT64_MAX
+#define HW_INTERVAL_PRESET(x) (HW_INTERVAL_MAX - ((uint64_t)(x) - 1))
+#define HW_INTERVAL_TYPE(x) ((uint64_t) (x)
+
+/* parsing */
+#define HWCFUNCS_MAX_ATTRS 20
+#define HWCFUNCS_PARSE_ATTR '~'
+#define HWCFUNCS_PARSE_EQUAL '='
+#define HWCFUNCS_PARSE_BACKTRACK '+'
+#define HWCFUNCS_PARSE_BACKTRACK_OFF '-'
+#define HWCFUNCS_PARSE_REGNUM '/'
+#define HWCFUNCS_PARSE_VALUE ','
+
+/* error codes */
+#define HWCFUNCS_ERROR_GENERIC (-1)
+#define HWCFUNCS_ERROR_NOT_SUPPORTED (-2)
+#define HWCFUNCS_ERROR_ALREADY_CALLED (-3)
+#define HWCFUNCS_ERROR_HWCINIT (-4)
+#define HWCFUNCS_ERROR_HWCARGS (-5)
+#define HWCFUNCS_ERROR_MEMORY (-6)
+#define HWCFUNCS_ERROR_UNAVAIL (-7)
+#define HWCFUNCS_ERROR_ERRNO_ZERO (-8)
+#define HWCFUNCS_ERROR_UNEXPECTED (-99)
+
+/*---------------------------------------------------------------------------*/
+/* prototypes */
+
+typedef void (*hwcfuncs_abort_fn_t) (int errnum, const char *msg);
+
+extern void hwcfuncs_int_logerr(const char *format,...);
+/* Log an error to the internal error buffer. See hwcfuncs_errmsg_get().
+ Note: Not MT-safe; don't even enable logging in an MT environment.
+ Recommend using this call only during init.
+ Note: when a libcpc call fails, it may automatically call
+ cpcN_capture_errmsg() to log the error message in the same internal buffer.
+ Recommend using this call only for non-cpc failures.
+ */
+
+#define HWCFUNCS_SUPPORT_OVERFLOW_PROFILING 0x01llu
+#define HWCFUNCS_SUPPORT_PEBS_SAMPLING 0x02llu
+#define HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID 0x04llu // OS identifies which counter overflowed
+ /* get info about session
+ Input:
+ <cpuver>: if not NULL, returns value of CPC cpu version
+ <cciname>: if not NULL, returns name of CPU
+ <npics>: if not NULL, returns maximum # of HWCs
+ <docref>: if not NULL, returns documentation reference
+ <support>: if not NULL, returns bitmask (see above) of hwc support
+ Return: none
+ */
+
+ typedef void* (*hwcfuncs_tsd_get_fn_t) (void);
+ typedef void (hwcf_hwc_cb_t) (uint_t cpcregno, const char *name);
+ typedef void (hwcf_attr_cb_t) (const char *attr);
+
+ extern void
+ hwcfuncs_parse_ctr (const char *counter_def, int *pplus, char **pnameOnly,
+ char **pattrs, char **pregstr, regno_t *pregno);
+/* Parse a counter definition string (value must already be stripped off).
+ Input:
+ <counter_def>: input whose format is
+ [+|-]<countername>[~attrs...][/<regno>]
+ pointers to return values: Any can be NULL.
+ Return:
+ <plus>: 1 if [+] is found, -1 if [-] is found, 0 otherwise
+ <pnameonly>: strdup(<countername>)
+ <pattrs>: strdup([~attrs...]) if specified, NULL otherwise.
+ <pregstr>: strdup(/<regno>) if specified, NULL otherwise.
+ <pregno>: <regno> if readable, REGNO_ANY if not specd, or -2 otherwise.
+ */
+
+ typedef struct
+ {
+ char *ca_name;
+ uint64_t ca_val;
+ } hwcfuncs_attr_t; /* matches cpc_attr_t */
+
+ void * hwcfuncs_parse_attrs (const char *countername,
+ hwcfuncs_attr_t attrs[], unsigned max_attrs,
+ uint_t *pnum_attrs, char **errstring);
+ /* Extract the attribute fields from <countername>.
+ Input:
+ <countername>: string whose format is
+ [+]<ctrname>[~attributes...][/<regno>][,...]
+ <attrs>: array of attributes to be returned
+ <max_attrs>: number of elements in <attrs>
+ <pnum_attrs>: if not NULL, will return how many attrs were found.
+ <errstring>: pointer to a buffer for storing error info, or NULL.
+ Return: upon success, a pointer to an allocated copy of <countername>, or
+ NULL if there's a failure. (A copy is made in order to provide storage
+ for the ca_name fields in the <attrs> array.)
+
+ The pointer should be freed when <attrs> is no longer in use.
+ <attrs> will be filled in data from countername.
+ <pnum_attrs> will have the number of elements in <attrs>. May be
+ non-zero even if return value indicates an error.
+ <errstring> NULL if no error, otherwise, a malloc'd GTXT string.
+ */
+
+ extern int hwcfuncs_bind_descriptor (const char *defstring);
+ /* Bind counters to resources.
+ Input:
+ <defstring>: string whose format is
+ :%s:%s:0x%x:%d:%d,0x%x[:%s...repeat for each ctr]
+ where the fields are:
+ :<userName>:<internalCtr>:<register>:<timeoutVal>:<tag>:<memop>
+ Return: 0 if successful
+ HWCFUNCS_ERROR_HWCINIT if resources unavailable
+ HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly
+ */
+
+ extern int hwcfuncs_bind_hwcentry (const Hwcentry *entries[],
+ unsigned numctrs);
+ /* Bind counters to resources.
+ Input:
+ <entries>: array of counters
+ <numctrs>: number of items in <entries>
+ Return: 0 if successful
+ HWCFUNCS_ERROR_HWCINIT if resources unavailable
+ HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly
+ */
+
+ extern int hwcfuncs_assign_regnos (Hwcentry *entries[], unsigned numctrs);
+ /* Assign entries[]->reg_num values as needed by platform
+ Note: modifies <entries> by supplying a regno to each counter
+ Input:
+ <entries>: array of counters
+ <numctrs>: number of items in <entries>
+ Output:
+ <entries>: array of counters is modified
+ Return: 0 if successful
+ HWCFUNCS_ERROR_HWCINIT if resources unavailable
+ HWCFUNCS_ERROR_HWCARGS if counters were not specified correctly
+ */
+
+ extern int regno_is_valid (const Hwcentry *pctr, regno_t regno);
+ /* return 1 if <regno> is in Hwcentry's list
+ Input:
+ <pctr>: counter definition, reg_list[] should be initialized
+ <regno>: register to check
+ Return: 1 if <regno> is in Hwcentry's list, 0 otherwise
+ */
+
+ extern Hwcentry **hwcfuncs_get_ctrs (unsigned *defcnt);
+ /* Get descriptions of the currently bound counters.
+ Input:
+ <defcnt>: if not NULL, returns number of counter definitions.
+ Return:
+ table of counter definition pointers
+ */
+
+ extern char *hwcfuncs_errmsg_get (char * buf, size_t bufsize,
+ int enable_capture);
+ /* Gets a recent HWC error message.
+ To clear previous error messages and insure error message is enabled,
+ call hwcfuncs_errmsg_get(NULL,0,1).
+ Once enabled, one error is stored in an internal buffer. A call to this
+ function will clear the buffer and allow a new message to be captured.
+ Note: Not MT-safe - don't enable this feature in an MT environment.
+ Input:
+ <buf>: pointer to buffer or NULL.
+ <bufsize>: size of <buf>
+ <enable_capture>: 0 - disable buffering, 1 - enable buffering.
+ Return: error string or an empty string.
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ! __HWCFUNCS_H */
diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c
new file mode 100644
index 0000000..bc441e1
--- /dev/null
+++ b/gprofng/common/hwctable.c
@@ -0,0 +1,5410 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+
+#include "hwcdrv.h"
+#include "hwcfuncs.h"
+
+/* TprintfT(<level>,...) definitions. Adjust per module as needed */
+#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
+#define DBG_LT1 1 // for configuration details, warnings
+#define DBG_LT2 2
+#define DBG_LT3 3
+
+/*---------------------------------------------------------------------------*/
+/* compile options */
+
+#define DISALLOW_USI_USII_6357446
+/* Solaris 9/libcpc1 allows cpc_bind() to work on US-IIe processors, even
+ though this processor cannot generate profiling interrupts. */
+
+#define DISALLOW_PENTIUM_PRO_MMX_7007575
+/* Solaris/libcpc2 defaults to "Pentium Pro with MMX, Pentium II"
+ when it doesn't recognize an Intel processor. As a result,
+ when collect attempts to start Pentium Pro counters on a
+ new machine (e.g. Westmere as of 1/2011), the OS may hang. */
+
+/* Register 0 counter doesn't work on Niagara T1 version (?) */
+#define WORKAROUND_6231196_NIAGARA1_NO_CTR_0
+
+/*---------------------------------------------------------------------------*/
+/* consts, macros */
+
+/* 10^N rates */
+#define PRELOADS_9 1001000001
+#define PRELOADS_85 320100001
+#define PRELOADS_8 100100001
+#define PRELOADS_75 32010001
+#define PRELOADS_7 10010001
+#define PRELOADS_65 3201001
+#define PRELOADS_6 1001001
+#define PRELOADS_55 320101
+#define PRELOADS_5 100101
+#define PRELOADS_45 32001
+#define PRELOADS_4 10001
+#define PRELOADS_35 3201
+#define PRELOADS_3 1001
+#define PRELOADS_25 301
+
+#define ABST_TBD ABST_NONE /* to be determined */
+
+/*---------------------------------------------------------------------------*/
+/* prototypes */
+static void hwc_cb (uint_t cpc_regno, const char *name);
+static void attrs_cb (const char *attr);
+static int attr_is_valid (int forKernel, const char *attr);
+
+/*---------------------------------------------------------------------------*/
+/* HWC definition tables */
+
+/*
+ comments on hwcentry tables
+ ---------------------------
+ name: this field should not contain '~'.
+ int_name: actual name of register, may contain ~ attribute specifications.
+ regnum: assigned register.
+ metric: if non-NULL, is a 'standard' counter that will show up in help.
+ timecvt: >0: can convert to time, 'timecvt' CPU cycles per event
+ =0: counts events
+ <0: can convert to time, count reference-clock cycles at '-timecvt' MHz
+ memop: see description for ABST_type enum
+ */
+
+// PRELOAD(): generates an interval based on the cycles/event and CPU GHZ.
+// Note: the macro tweaks the interval so that it ends in decimal 001.
+#define CYC_PER_SAMPLE (1000ULL*1000*1000/100) // cycles per signal at 1ghz, 100 samples/second
+#define PRELOAD(min_cycles_per_event,ghz) (((ghz)*CYC_PER_SAMPLE/(min_cycles_per_event))/100*100+1)
+
+// PRELOAD_DEF: initial value for uncalibrated events.
+// This value should be based on a rate that will work for the slowest changing
+// HWCs, HWCs where there are many CPU cycles between events.
+//
+// The interval needs to target the slowest HWCs so that
+// automatic adjustment of HWC overflow intervals can adapt.
+#define PRELOAD_DEF PRELOAD(1000,3) // default interval targets 1000 cycles/event at 3ghz
+// For er_kernel, which HWC intervals cannot be adjusted automatically for ON/HI/LO,
+// The interval should target some safe interval for fast events
+#define PRELOAD_DEF_ERKERNEL PRELOAD(4,4) // default interval targets 4 cycles/event at 4ghz
+
+static const Hwcentry empty_ctr = {NULL, NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, 0};
+
+
+// --- use cycles counter to expose "system_time" on Linux ---
+#define SYSTIME_REGNOS REGNO_ANY // Linux: make sys_time/usr_time available for data collection
+// Note: For x86, Linux and Solaris use different ref-clock names
+#define USE_INTEL_REF_CYCLES(MHZ) \
+ {"usr_time","unhalted-reference-cycles", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
+ {"usr_time","cpu_clk_unhalted.ref_p", SYSTIME_REGNOS, STXT("User CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
+ {"sys_time","unhalted-reference-cycles~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD(900,MHZ), -(MHZ), ABST_NONE}, \
+ {"sys_time","cpu_clk_unhalted.ref_p~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, \
+ {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
+ {"cycles0", "cpu_clk_unhalted.ref_p", 0, NULL, PRELOAD( 900,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
+ {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
+ {"cycles1", "cpu_clk_unhalted.ref_p", 1, NULL, PRELOAD( 910,MHZ), -(MHZ), ABST_NONE}, /*hidden*/ \
+ /* end of list */
+
+#define SPARC_CYCLES \
+ {"usr_time","Cycles_user", SYSTIME_REGNOS, STXT("User CPU"), PRELOADS_75,1, ABST_NONE}, \
+ {"sys_time","Cycles_user~system=1~user=0", SYSTIME_REGNOS, STXT("System CPU"), PRELOADS_75,1, ABST_NONE}, \
+ /* end of list */
+
+
+/* --- PERF_EVENTS "software" definitions --- */
+#define PERF_EVENTS_SW_EVENT_ALIASES \
+// none supported for now
+#if 0
+ {"usr", "PERF_COUNT_SW_TASK_CLOCK", REGNO_ANY, STXT("User CPU"), PRELOADS_7, -(1000), ABST_NONE}, \
+ {"sys", "PERF_COUNT_SW_TASK_CLOCK~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_7, -(1000), ABST_NONE}, \
+ /* end of list */
+#endif
+
+#define PERF_EVENTS_SW_EVENT_DEFS \
+// none supported for now
+#if 0
+ {"PERF_COUNT_SW_TASK_CLOCK", NULL, REGNO_ANY, NULL, PRELOADS_7, -(1000),ABST_NONE}, \
+ /* end of list */
+#endif
+
+/*
+ * The PAPI descriptive strings used to be wrapped with STXT(),
+ * a macro defined in perfan/include/i18n.h. For the time being,
+ * we want to demote the PAPI counters by omitting the
+ * descriptions. So we use a new macro PAPITXT() for this purpose.
+ */
+#define PAPITXT(x) NULL
+
+/* Solaris "Generic" Counters */
+static Hwcentry papi_generic_list[] = {
+ {"PAPI_l1_dcm", NULL, REGNO_ANY, PAPITXT ("L1 D-cache misses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l1_icm", NULL, REGNO_ANY, PAPITXT ("L1 I-cache misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l2_dcm", NULL, REGNO_ANY, PAPITXT ("L2 D-cache misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l2_icm", NULL, REGNO_ANY, PAPITXT ("L2 I-cache misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l3_dcm", NULL, REGNO_ANY, PAPITXT ("L3 D-cache misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_l3_icm", NULL, REGNO_ANY, PAPITXT ("L3 I-cache misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_l1_tcm", NULL, REGNO_ANY, PAPITXT ("L1 misses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l2_tcm", NULL, REGNO_ANY, PAPITXT ("L2 misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l3_tcm", NULL, REGNO_ANY, PAPITXT ("L3 misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_ca_snp", NULL, REGNO_ANY, PAPITXT ("Requests for a snoop"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_ca_shr", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to shared cache line"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_ca_cln", NULL, REGNO_ANY, PAPITXT ("Requests for exclusive access to clean cache line"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_ca_inv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line invalidation"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_ca_itv", NULL, REGNO_ANY, PAPITXT ("Requests for cache line intervention"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l3_ldm", NULL, REGNO_ANY, PAPITXT ("L3 load misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_l3_stm", NULL, REGNO_ANY, PAPITXT ("L3 store misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_bru_idl", NULL, REGNO_ANY, PAPITXT ("Cycles branch units are idle"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_fxu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles integer units are idle"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_fpu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles FP units are idle"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_lsu_idl", NULL, REGNO_ANY, PAPITXT ("Cycles load/store units are idle"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_tlb_dm", NULL, REGNO_ANY, PAPITXT ("DTLB misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_tlb_im", NULL, REGNO_ANY, PAPITXT ("ITLB misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_tlb_tl", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_tlb_tm", NULL, REGNO_ANY, PAPITXT ("Total TLB misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_ldm", NULL, REGNO_ANY, PAPITXT ("L1 load misses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l1_stm", NULL, REGNO_ANY, PAPITXT ("L1 store misses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l2_ldm", NULL, REGNO_ANY, PAPITXT ("L2 load misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l2_stm", NULL, REGNO_ANY, PAPITXT ("L2 store misses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_btac_m", NULL, REGNO_ANY, PAPITXT ("Branch target address cache misses"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_prf_dm", NULL, REGNO_ANY, PAPITXT ("Data prefetch cache misses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_dch", NULL, REGNO_ANY, PAPITXT ("L3 D-cache hits"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_tlb_sd", NULL, REGNO_ANY, PAPITXT ("TLB shootdowns"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_csr_fal", NULL, REGNO_ANY, PAPITXT ("Failed store conditional instructions"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_csr_suc", NULL, REGNO_ANY, PAPITXT ("Successful store conditional instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_csr_tot", NULL, REGNO_ANY, PAPITXT ("Total store conditional instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_mem_scy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory accesses"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_mem_rcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory reads"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_mem_wcy", NULL, REGNO_ANY, PAPITXT ("Cycles Stalled Waiting for memory writes"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_stl_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instruction issue"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_ful_icy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instruction issue"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_stl_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with no instructions completed"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_ful_ccy", NULL, REGNO_ANY, PAPITXT ("Cycles with maximum instructions completed"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_hw_int", NULL, REGNO_ANY, PAPITXT ("Hardware interrupts"), PRELOADS_5, 0, ABST_NONE},
+ {"PAPI_br_ucn", NULL, REGNO_ANY, PAPITXT ("Unconditional branch instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_br_cn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_br_tkn", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions taken"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_br_ntk", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions not taken"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_br_msp", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions mispredicted"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_br_prc", NULL, REGNO_ANY, PAPITXT ("Cond. branch instructions correctly predicted"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fma_ins", NULL, REGNO_ANY, PAPITXT ("FMA instructions completed"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_tot_iis", NULL, REGNO_ANY, PAPITXT ("Instructions issued"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_tot_ins", NULL, REGNO_ANY, PAPITXT ("Instructions completed"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_int_ins", NULL, REGNO_ANY, PAPITXT ("Integer instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fp_ins", NULL, REGNO_ANY, PAPITXT ("Floating-point instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_ld_ins", NULL, REGNO_ANY, PAPITXT ("Load instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_sr_ins", NULL, REGNO_ANY, PAPITXT ("Store instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_br_ins", NULL, REGNO_ANY, PAPITXT ("Branch instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_vec_ins", NULL, REGNO_ANY, PAPITXT ("Vector/SIMD instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_res_stl", NULL, REGNO_ANY, PAPITXT ("Cycles stalled on any resource"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_fp_stal", NULL, REGNO_ANY, PAPITXT ("Cycles the FP unit(s) are stalled"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_tot_cyc", NULL, REGNO_ANY, PAPITXT ("Total cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"PAPI_lst_ins", NULL, REGNO_ANY, PAPITXT ("Load/store instructions completed"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_syc_ins", NULL, REGNO_ANY, PAPITXT ("Sync instructions completed"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l1_dch", NULL, REGNO_ANY, PAPITXT ("L1 D-cache hits"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_dch", NULL, REGNO_ANY, PAPITXT ("L2 D-cache hits"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l1_dca", NULL, REGNO_ANY, PAPITXT ("L1 D-cache accesses"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_dca", NULL, REGNO_ANY, PAPITXT ("L2 D-cache accesses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_dca", NULL, REGNO_ANY, PAPITXT ("L3 D-cache accesses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_dcr", NULL, REGNO_ANY, PAPITXT ("L1 D-cache reads"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_dcr", NULL, REGNO_ANY, PAPITXT ("L2 D-cache reads"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_dcr", NULL, REGNO_ANY, PAPITXT ("L3 D-cache reads"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_dcw", NULL, REGNO_ANY, PAPITXT ("L1 D-cache writes"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_dcw", NULL, REGNO_ANY, PAPITXT ("L2 D-cache writes"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_dcw", NULL, REGNO_ANY, PAPITXT ("L3 D-cache writes"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_ich", NULL, REGNO_ANY, PAPITXT ("L1 I-cache hits"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_ich", NULL, REGNO_ANY, PAPITXT ("L2 I-cache hits"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_ich", NULL, REGNO_ANY, PAPITXT ("L3 I-cache hits"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_ica", NULL, REGNO_ANY, PAPITXT ("L1 I-cache accesses"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_ica", NULL, REGNO_ANY, PAPITXT ("L2 I-cache accesses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_ica", NULL, REGNO_ANY, PAPITXT ("L3 I-cache accesses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_icr", NULL, REGNO_ANY, PAPITXT ("L1 I-cache reads"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_icr", NULL, REGNO_ANY, PAPITXT ("L2 I-cache reads"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_icr", NULL, REGNO_ANY, PAPITXT ("L3 I-cache reads"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_icw", NULL, REGNO_ANY, PAPITXT ("L1 I-cache writes"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_icw", NULL, REGNO_ANY, PAPITXT ("L2 I-cache writes"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_icw", NULL, REGNO_ANY, PAPITXT ("L3 I-cache writes"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_tch", NULL, REGNO_ANY, PAPITXT ("L1 total hits"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_tch", NULL, REGNO_ANY, PAPITXT ("L2 total hits"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_tch", NULL, REGNO_ANY, PAPITXT ("L3 total hits"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_tca", NULL, REGNO_ANY, PAPITXT ("L1 total accesses"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_tca", NULL, REGNO_ANY, PAPITXT ("L2 total accesses"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_tca", NULL, REGNO_ANY, PAPITXT ("L3 total accesses"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_tcr", NULL, REGNO_ANY, PAPITXT ("L1 total reads"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_tcr", NULL, REGNO_ANY, PAPITXT ("L2 total reads"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_tcr", NULL, REGNO_ANY, PAPITXT ("L3 total reads"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_l1_tcw", NULL, REGNO_ANY, PAPITXT ("L1 total writes"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_l2_tcw", NULL, REGNO_ANY, PAPITXT ("L2 total writes"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_l3_tcw", NULL, REGNO_ANY, PAPITXT ("L3 total writes"), PRELOADS_6, 0, ABST_NONE},
+ {"PAPI_fml_ins", NULL, REGNO_ANY, PAPITXT ("FP multiply instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fad_ins", NULL, REGNO_ANY, PAPITXT ("FP add instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fdv_ins", NULL, REGNO_ANY, PAPITXT ("FP divide instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fsq_ins", NULL, REGNO_ANY, PAPITXT ("FP square root instructions"), PRELOADS_65, 0, ABST_NONE},
+ {"PAPI_fnv_ins", NULL, REGNO_ANY, PAPITXT ("FP inverse instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"PAPI_fp_ops", NULL, REGNO_ANY, PAPITXT ("FP operations"), PRELOADS_7, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usIlist[] = {
+ {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usIIIlist[] = /* III, IIIi, IIIp. Note that some counters are processor-specific */{
+ {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"icm", "IC_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dcrm", "DC_rd_miss", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_LOAD},
+ {"dcwm", "DC_wr_miss", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_STORE},
+ {"dcr", "DC_rd", REGNO_ANY, STXT ("D$ Read Refs"), PRELOADS_6, 0, ABST_LOAD},
+ {"dcw", "DC_wr", REGNO_ANY, STXT ("D$ Write Refs"), PRELOADS_6, 0, ABST_STORE},
+ {"ecref", "EC_ref", REGNO_ANY, STXT ("E$ Refs"), PRELOADS_6, 0, ABST_LDST},
+ {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_US_DTLBM},
+ {"ecm", "EC_misses", REGNO_ANY, STXT ("E$ Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecrm", "EC_rd_miss", REGNO_ANY, STXT ("E$ Read Misses"), PRELOADS_5, 0, ABST_LOAD},
+ {"ecml", "EC_miss_local", REGNO_ANY, STXT ("E$ Local Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecmr", "EC_miss_remote", REGNO_ANY, STXT ("E$ Remote Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecim", "EC_ic_miss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"icstall", "Dispatch0_IC_miss", REGNO_ANY, STXT ("I$ Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"dcstall", "Re_DC_miss", REGNO_ANY, STXT ("D$ and E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"ecstall", "Re_EC_miss", REGNO_ANY, STXT ("E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"sqstall", "Rstall_storeQ", REGNO_ANY, STXT ("StoreQ Stall Cycles"), PRELOADS_6, 1, ABST_STORE},
+ {"rawstall", "Re_RAW_miss", REGNO_ANY, STXT ("RAW Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"dcmissov", "Re_DC_missovhd", REGNO_ANY, STXT ("DC Miss Ovhd"), PRELOADS_6, 1, ABST_LOAD},
+ {"fpustall", "Re_FPU_bypass", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"fpusestall", "Rstall_FP_use", REGNO_ANY, STXT ("FPU Use Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"iustall", "Rstall_IU_use", REGNO_ANY, STXT ("IU Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"fpadd", "FA_pipe_completion", REGNO_ANY, STXT ("FP Adds"), PRELOADS_6, 0, ABST_NONE},
+ {"fpmul", "FM_pipe_completion", REGNO_ANY, STXT ("FP Muls"), PRELOADS_6, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"Cycle_cnt", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {"EC_miss_mtag_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"DC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"DC_wr_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"DC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"DC_wr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"EC_ref", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"EC_snoop_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"EC_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"EC_wb_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_US_DTLBM},
+ {"EC_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"EC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_port0_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"EC_miss_local", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"EC_miss_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"EC_snoop_cb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"WC_snoop_cb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"WC_scrubbed", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"WC_wb_wo_read", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"PC_MS_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_soft_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_hard_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_port1_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_snoop_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/},
+ {"SW_count_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_COUNT},
+ {"SW_count_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_COUNT},
+ {"Dispatch0_IC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch0_mispred", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch0_br_target", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch0_2nd_br", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch_rs_mispred", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rstall_storeQ", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_STORE},
+ {"Rstall_FP_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rstall_IU_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"EC_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"Re_RAW_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_DC_missovhd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_endian_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_FPU_bypass", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Re_DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_EC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_PC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"SI_snoop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"SI_ciq_flow", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"SI_owned", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_msl_busy_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_mdb_overflow_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_page_close_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_reads_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_stalls_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycle_cnt", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "Cycle_cnt", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "Instr_cnt", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usIVplist[] = {
+ {"cycles", "Cycle_cnt", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"icm", "IC_fill", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dcrm", "DC_rd_miss", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_LOAD},
+ {"dcwm", "DC_wr_miss", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_STORE},
+ {"dcr", "DC_rd", REGNO_ANY, STXT ("D$ Read Refs"), PRELOADS_6, 0, ABST_LOAD},
+ {"dcw", "DC_wr", REGNO_ANY, STXT ("D$ Write Refs"), PRELOADS_6, 0, ABST_STORE},
+ {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_US_DTLBM},
+ {"l2ref", "L2_ref", REGNO_ANY, STXT ("L2$ Refs"), PRELOADS_5, 0, ABST_LDST},
+ {"l2m", "L2_miss", REGNO_ANY, STXT ("L2$ Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"l2rm", "L2_rd_miss", REGNO_ANY, STXT ("L2$ Read Misses"), PRELOADS_5, 0, ABST_LOAD},
+ {"l2im", "L2_IC_miss", REGNO_ANY, STXT ("L2$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"ecm", "L3_miss", REGNO_ANY, STXT ("E$ Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecrm", "L3_rd_miss", REGNO_ANY, STXT ("E$ Read Misses"), PRELOADS_5, 0, ABST_LOAD},
+ {"ecml", "SSM_L3_miss_local", REGNO_ANY, STXT ("E$ Local Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecmr", "SSM_L3_miss_remote", REGNO_ANY, STXT ("E$ Remote Misses"), PRELOADS_5, 0, ABST_LDST},
+ {"ecim", "L3_IC_miss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"icstall", "Dispatch0_IC_miss", REGNO_ANY, STXT ("I$ Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"dcstall", "Re_DC_miss", REGNO_ANY, STXT ("D$ and E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"ecstall", "Re_L3_miss", REGNO_ANY, STXT ("E$ Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"sqstall", "Rstall_storeQ", REGNO_ANY, STXT ("StoreQ Stall Cycles"), PRELOADS_6, 1, ABST_STORE},
+ {"rawstall", "Re_RAW_miss", REGNO_ANY, STXT ("RAW Stall Cycles"), PRELOADS_6, 1, ABST_LOAD},
+ {"dcmissov", "Re_DC_missovhd", REGNO_ANY, STXT ("DC Miss Ovhd"), PRELOADS_6, 1, ABST_LOAD},
+ {"fpustall", "Re_FPU_bypass", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"fpusestall", "Rstall_FP_use", REGNO_ANY, STXT ("FPU Use Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"iustall", "Rstall_IU_use", REGNO_ANY, STXT ("IU Stall Cycles"), PRELOADS_6, 1, ABST_NONE},
+ {"fpadd", "FA_pipe_completion", REGNO_ANY, STXT ("FP Adds"), PRELOADS_6, 0, ABST_NONE},
+ {"fpmul", "FM_pipe_completion", REGNO_ANY, STXT ("FP Muls"), PRELOADS_6, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"Cycle_cnt", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {"DC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"DC_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"DC_wr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"DC_wr_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_US_DTLBM},
+ {"Dispatch0_2nd_br", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch0_IC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Dispatch0_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2L3_snoop_cb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"L2L3_snoop_inv_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"L2_hit_I_state_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/},
+ {"L2_hit_other_half", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L2_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"L2_ref", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L2_snoop_cb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"L2_snoop_inv_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC /*?*/},
+ {"L2_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L2_wb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L2_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L2_write_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L3_hit_I_state_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L3_hit_other_half", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"L3_rd_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"L3_wb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L3_wb_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L3_write_hit_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"L3_write_miss_RTO", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE},
+ {"MC_reads_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_reads_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_stalls_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_stalls_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"MC_writes_0_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_1_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_2_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"MC_writes_3_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ /*? {"PC_MS_misses", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD}, */
+ {"PC_hard_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_inv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/},
+ {"PC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"PC_soft_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LOAD},
+ {"Re_DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_DC_missovhd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_FPU_bypass", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Re_L2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Re_PFQ_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Re_RAW_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_LOAD},
+ {"Rstall_FP_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rstall_IU_use", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rstall_storeQ", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_STORE},
+ {"SI_RTO_src_data", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"SI_RTS_src_data", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"SI_ciq_flow_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NOPC},
+ {"SI_owned_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"SI_snoop_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NOPC},
+ {"ecml", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"ecmr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST},
+ {"SSM_L3_miss_local", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/},
+ {"SSM_L3_miss_mtag_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/},
+ {"SSM_L3_miss_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_LDST /*?*/},
+ {"SSM_L3_wb_remote", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_STORE /*?*/},
+ {"SSM_new_transaction_sh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_TBD /*?*/},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycle_cnt", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "Cycle_cnt", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "Instr_cnt", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry niagara1[] =
+ /* CPC_ULTRA_T1 , "UltraSPARC T1" */{
+ {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+#ifndef WORKAROUND_6231196_NIAGARA1_NO_CTR_0 /* since register 0 counter don't work XXX */
+ {"icm", "IC_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"ecim", "L2_imiss", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_4, 0, ABST_NONE},
+ {"dcm", "DC_miss", REGNO_ANY, STXT ("D$ Misses"), PRELOADS_5, 0, ABST_EXACT},
+ {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_EXACT},
+ {"ecdm", "L2_dmiss_ld", REGNO_ANY, STXT ("E$ Data Misses"), PRELOADS_4, 0, ABST_EXACT},
+ {"flops", "FP_instr_cnt", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_6, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"SB_full", NULL, REGNO_ANY, NULL, PRELOADS_6, 1, ABST_NONE},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT},
+ {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT},
+ {"L2_dmiss_ld", NULL, REGNO_ANY, NULL, PRELOADS_6, 0, ABST_EXACT},
+#endif
+
+ /* additional (hidden) aliases, for convenience */
+ {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry niagara2[] = {
+ /* CPC_ULTRA_T2 , "UltraSPARC T2" */
+ /* CPC_ULTRA_T2 , "UltraSPARC T2+" */
+ {"insts", "Instr_cnt", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT},
+ {"dcm", "DC_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT},
+ {"dtlbm", "DTLB_miss", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l2drm", "L2_dmiss_ld", REGNO_ANY, STXT ("L2 D-cache Read Misses (See Bug 15664448)"), PRELOADS_5, 0, ABST_EXACT},
+ {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"itlbm", "ITLB_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"l2im", "L2_imiss", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_4, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOADS_7, 0, ABST_EXACT},
+ {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"L2_dmiss_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DTLB_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"DES_3DES_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"AES_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Kasumi_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"MD5_SHA-1_SHA-256_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"MA_busy_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases, for convenience */
+ {"insts1", "Instr_cnt", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry sparc_t4[] = {
+ // Identical to sparc_t5_m6 except for: l3m_spec
+ // when updating this table, also update sparc_t5_m6[]
+ // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments)
+ {"l2l3dh", "DC_miss_L2_L3_hit_nospec", REGNO_INVALID, STXT ("L2 or L3 D-cache Hits"), PRELOADS_6, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"l3m", "DC_miss_remote_L3_hit_nospec~emask=0x6", REGNO_INVALID, STXT ("L3 D-cache Misses"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"lmh", "DC_miss_local_hit_nospec", REGNO_INVALID, STXT ("Local Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"rmh", "DC_miss_remote_L3_hit_nospec", REGNO_INVALID, STXT ("Remote Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"pqs", "PQ_tag_wait", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE}, // old alias name
+ {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_q)
+ {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_buf)
+ {"sel_stalls", "Sel_0_ready", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE},
+ {"icm", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_55, 0, ABST_NONE}, // 20@ l2/l3 hit (guess)
+ {"icm_stalls", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Miss Est Stalls"), PRELOADS_55, 25, ABST_NONE}, // 25@ l2-20/l3-50
+
+ // current aliases
+ SPARC_CYCLES
+ {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"c_stalls", "Commit_0", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"dcm", "DC_miss_nospec", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_EXACT},
+ {"l3m_spec", "DC_miss_local_hit~emask=0x6", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")}, // T4 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile
+ // {"l3m_spec", "DC_miss_local_hit~emask=0x30", REGNO_ANY, STXT("L3 D-cache Speculative Misses"),PRELOADS_5,0, ABST_NONE, STXT("Loads that speculatively missed local L3")}, // T5/M6 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile
+ {"lmh_spec", "DC_miss_local_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ {"rmh_spec", "DC_miss_remote_L3_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ //
+ {"dtlbm", "DTLB_miss_asynch", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle
+ {"dtlb_hwtw_stalls", "DTLB_HWTW_all", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50
+ {"dtlb_trap_stalls", "DTLB_fill_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap
+ {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")},
+ {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad
+ {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, // 24@miss, %5 of branches is bad
+ {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, // 2 cycles minimum
+ {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, // 24@miss, %5 of branches is bad
+ {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, // 1 cycle/event
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */
+
+ {"Sel_pipe_drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")},
+ {"Sel_0_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")},
+ {"Sel_0_ready", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")},
+ {"Sel_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")},
+ {"Sel_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")},
+
+ {"Pick_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")},
+ {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")},
+ {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")},
+ {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")},
+ {"SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")},
+ {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")},
+ {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")},
+
+ {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")},
+ {"Sw_count_intr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")},
+ {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")},
+ {"SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")},
+ {"Block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")},
+
+ {"BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")},
+
+ {"ITLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"ITLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"ITLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"ITLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"ITLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"ITLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")},
+ {"ITLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")},
+
+ {"Fetch_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Instr_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"PQ_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"DTLB_miss_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")},
+ {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")},
+ {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")},
+
+ {"DC_miss_L2_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT},
+ {"DC_miss_local_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT},
+ {"DC_miss_remote_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT},
+ {"DC_miss_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")},
+
+ {"DTLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"DTLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"DTLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"DTLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"DTLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"DTLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")},
+ {"DTLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")},
+ {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")},
+ {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")},
+
+ {"St_q_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"St_hit_L2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")},
+ {"St_hit_L3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")},
+
+ {"DC_miss_L2_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D or L3")},
+ {"DC_miss_local_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")},
+ {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache or remote memory")},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed L1D")},
+
+ {"L2_pipe_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")},
+ {"Br_trg_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")},
+ {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")},
+
+ {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")},
+ //
+ {"Commit_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")},
+ {"Commit_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")},
+ {"Commit_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")},
+ {"Commit_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")},
+ {"Commit_1_or_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry sparc_t5_m6[] = {
+ // Identical to sparc_t4 except for: l3m_spec
+ // when updating this table, also update sparc_t4[]
+ // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments)
+ {"l2l3dh", "DC_miss_L2_L3_hit_nospec", REGNO_INVALID, STXT ("L2 or L3 D-cache Hits"), PRELOADS_6, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"l3m", "DC_miss_remote_L3_hit_nospec~emask=0x6", REGNO_INVALID, STXT ("L3 D-cache Misses"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"lmh", "DC_miss_local_hit_nospec", REGNO_INVALID, STXT ("Local Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"rmh", "DC_miss_remote_L3_hit_nospec", REGNO_INVALID, STXT ("Remote Mem. Hits"), PRELOADS_5, 0, ABST_EXACT}, // undercounts due to thread-hog issue
+ {"pqs", "PQ_tag_wait", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE}, // old alias name
+ {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_q)
+ {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE}, // 11@full hit, 60@partial hit (in future, combine w/st_buf)
+ {"sel_stalls", "Sel_0_ready", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE},
+ {"icm", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_55, 0, ABST_NONE}, // 20@ l2/l3 hit (guess)
+ {"icm_stalls", "IC_miss", REGNO_INVALID, STXT ("L1 I-Cache Miss Est Stalls"), PRELOADS_55, 25, ABST_NONE}, // 25@ l2-20/l3-50
+
+ // current aliases
+ SPARC_CYCLES
+ {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"c_stalls", "Commit_0", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+
+ {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"dcm", "DC_miss_nospec", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_EXACT},
+ // {"l3m_spec", "DC_miss_local_hit~emask=0x6", REGNO_ANY, STXT("L3 D-cache Speculative Misses"),PRELOADS_5,0, ABST_NONE, STXT("Loads that speculatively missed local L3")}, // T4 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile
+ {"l3m_spec", "DC_miss_local_hit~emask=0x30", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")}, // T5/M6 encoding (430 lm, 690 rm) ~5 misses overlap on t5/pico_ile
+ {"lmh_spec", "DC_miss_local_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ {"rmh_spec", "DC_miss_remote_L3_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ //
+ {"dtlbm", "DTLB_miss_asynch", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle
+ {"dtlb_hwtw_stalls", "DTLB_HWTW_all", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50
+ {"dtlb_trap_stalls", "DTLB_fill_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap
+ {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")},
+ {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad
+ {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE}, // 24@miss, %5 of branches is bad
+ {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE}, // 2 cycles minimum
+ {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE}, // 24@miss, %5 of branches is bad
+ {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE}, // 1 cycle/event
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */
+
+ {"Sel_pipe_drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")},
+ {"Sel_0_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")},
+ {"Sel_0_ready", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")},
+ {"Sel_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")},
+ {"Sel_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")},
+
+ {"Pick_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")},
+ {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")},
+ {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")},
+ {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")},
+ {"SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")},
+ {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")},
+ {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")},
+
+ {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")},
+ {"Sw_count_intr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")},
+ {"Atomics", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")},
+ {"SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")},
+ {"Block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")},
+
+ {"BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")},
+
+ {"ITLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"ITLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"ITLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"ITLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"ITLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"ITLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")},
+ {"ITLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")},
+
+ {"Fetch_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Instr_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"PQ_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_SB_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"DTLB_miss_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")},
+ {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")},
+ {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")},
+
+ {"DC_miss_L2_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT},
+ {"DC_miss_local_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT},
+ {"DC_miss_remote_L3_hit_nospec", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT},
+ {"DC_miss_nospec", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")},
+
+ {"DTLB_fill_8KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"DTLB_fill_64KB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"DTLB_fill_4MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"DTLB_fill_256MB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"DTLB_fill_2GB", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"DTLB_fill_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")},
+ {"DTLB_miss_asynch", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")},
+ {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")},
+ {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")},
+
+ {"St_q_tag_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"St_hit_L2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")},
+ {"St_hit_L3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")},
+
+ {"DC_miss_L2_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D or L3")},
+ {"DC_miss_local_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")},
+ {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache or remote memory")},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed L1D")},
+
+ {"L2_pipe_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")},
+ {"Br_trg_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")},
+ {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")},
+
+ {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")},
+ //
+ {"Commit_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")},
+ {"Commit_0_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")},
+ {"Commit_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")},
+ {"Commit_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")},
+ {"Commit_1_or_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry sparc_m7[] = {
+ // obsolete aliases marked with REGNO_INVALID (allows reading of older experiments)
+ {"icm", "IC_miss_commit", REGNO_INVALID, STXT ("L1 I-Cache Misses"), PRELOADS_6, 0, ABST_EXACT},
+ {"raw_stb", "RAW_hit_st_buf", REGNO_INVALID, STXT ("RAW Hazard in Store Buffer"), PRELOADS_55, 0, ABST_NONE},
+ {"raw_stq", "RAW_hit_st_q", REGNO_INVALID, STXT ("RAW Hazard in Store Queue"), PRELOADS_55, 0, ABST_NONE},
+ {"pqs", "PQ_tag_wait_cyc", REGNO_INVALID, STXT ("Pick Queue Stalls"), PRELOADS_7, 1, ABST_NONE},
+ {"sel_stalls", "Sel_0_ready_cyc", REGNO_INVALID, STXT ("Stalls Another Thread Selected"), PRELOADS_7, 1, ABST_NONE},
+
+ // current aliases
+ SPARC_CYCLES
+ {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"c_stalls", "Commit_0_cyc", REGNO_ANY, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+
+ {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT},
+ {"dcm", "DC_miss_commit", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT},
+
+ {"l3m_spec", "DC_miss_L3_miss", REGNO_ANY, STXT ("L3 D-cache Speculative Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"lmh_spec", "DC_miss_local_mem_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ {"rmh_spec", "DC_miss_remote_mem_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ //
+ {"dtlbm", "DTLB_HWTW_search", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_55, 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle
+ {"dtlb_hwtw_stalls", "DTLB_HWTW_ref", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOADS_55, 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50
+ {"dtlb_trap_stalls", "DTLB_HWTW_miss_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOADS_35, 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap
+ {"rawhaz", "RAW_hit_st_q~emask=0xf", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOADS_55, 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")},
+ {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOADS_6, 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad
+ {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE},
+ {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE},
+ {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"spill_fill", "Flush_arch_exception", REGNO_ANY, STXT ("Reg Window Spill/Fill Est Stalls"), PRELOAD (100, 4), 80, ABST_NONE, STXT ("Estimated time stalled on flushing pipeline due to register window spill/fill")},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */
+ {"Sel_pipe_drain_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")},
+ {"Sel_0_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved")},
+ {"Sel_0_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")},
+ {"Sel_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that only 1 instruction or uop was selected")},
+ {"Sel_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles that 2 instructions or uops were selected")},
+
+ {"Pick_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_3_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Pick_any_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")},
+ {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")},
+ {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")},
+ {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")},
+ {"Instr_SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")},
+ {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 4), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")},
+ {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 0, ABST_NONE, STXT ("Total instructions completed")},
+
+ {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")},
+ {"Instr_SW_count", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("SW Count instructions completed")},
+ {"Instr_atomic", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")},
+ {"Instr_SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")},
+ {"Instr_block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_EXACT, STXT ("Block load/store instructions completed")},
+
+ {"Br_BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")},
+
+ {"ITLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"ITLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"ITLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"ITLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"ITLB_HWTW_hit_2G_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"ITLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")},
+ {"ITLB_HWTW_search", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")},
+
+ {"Fetch_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"Instr_buffer_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"PQ_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"ROB_LB_SB_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"DTLB_miss_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")},
+ {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")},
+ {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_ref", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")},
+
+ {"DC_miss_L2_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT},
+ {"DC_miss_nbr_scc_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_EXACT},
+ {"DC_miss_nbr_scc_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_EXACT},
+ {"DC_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 4), 0, ABST_EXACT, STXT ("Loads that missed local L1D")},
+
+ {"DTLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"DTLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"DTLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"DTLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"DTLB_HWTW_hit_2G_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 2G or 16G page")},
+ {"DTLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")},
+ {"DTLB_HWTW_search", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")},
+ {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")},
+ {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")},
+
+ {"St_q_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"St_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")},
+ {"St_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")},
+
+ {"DC_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Loads that speculatively hit local L1D")},
+ {"DC_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D")},
+ {"DC_miss_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3")},
+ {"DC_miss_nbr_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L2D via local L3")},
+ {"DC_miss_nbr_scc_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L3 on same socket")},
+ {"DC_miss_nbr_scc_miss", NULL, REGNO_ANY, NULL, PRELOAD (400, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed all caches on same socket")},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (10, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L1D")},
+ {"DC_miss_L2_miss", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L2D")},
+ {"DC_miss_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (200, 4), 0, ABST_NONE, STXT ("Loads that speculatively missed local L3")},
+
+ {"DC_miss_remote_scc_hit", NULL, REGNO_ANY, NULL, PRELOAD (800, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache on different socket")},
+ {"DC_miss_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")},
+ {"DC_miss_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 4), 0, ABST_NONE, STXT ("Loads that speculatively hit remote memory")},
+ {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")},
+ {"Br_tgt_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")},
+ {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")},
+
+ {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 4), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")},
+
+ {"Flush_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to a load that misses L3 when more than 1 hardware thread is active on the core")},
+ {"Flush_br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to a branch misprediction")},
+ {"Flush_arch_exception", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to SPARC architecture exceptions and trap entry/return")},
+ {"Flush_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 4), 0, ABST_NONE, STXT ("Pipeline flushes due to hardware thread state change to/from halted/paused state")},
+ //
+ {"Commit_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")},
+ {"Commit_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")},
+ {"Commit_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 uop commits from this hardware thread")},
+ {"Commit_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 2 uops commit from this hardware thread")},
+ {"Commit_1_or_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles 1 or 2 uops commit from this hardware thread")},
+
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry sparc_m8[] = {
+ // current aliases
+ SPARC_CYCLES
+ {"cycles", "Cycles_user", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "Instr_all", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"c_stalls", "Commit_0_cyc", 3, STXT ("Stall Cycles"), PRELOADS_7, 1, ABST_NONE}, // 22825776: limit to reg 3
+ {"Sel_0_wait_cyc", "Sel_0_cyc~emask=0x3f", REGNO_ANY, STXT ("Select Stall Cycles"), PRELOADS_7, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for various conditions to be resolved that prevent it being selected")},
+
+ {"loads", "Instr_ld", REGNO_ANY, STXT ("Load Instructions"), PRELOADS_7, 0, ABST_EXACT},
+ {"stores", "Instr_st", REGNO_ANY, STXT ("Store Instructions"), PRELOADS_6, 0, ABST_EXACT},
+ {"dcm", "DC_miss_commit", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_EXACT},
+
+ {"lmh_spec", "DC_miss_local_mem_hit", REGNO_ANY, STXT ("Local Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+ {"rmh_spec", "DC_miss_remote_mem_hit", REGNO_ANY, STXT ("Remote Mem Speculative Hits"), PRELOADS_5, 0, ABST_NONE},
+
+ {"dtlbm", "DTLB_HWTW", REGNO_ANY, STXT ("DTLB Misses"), PRELOAD (40, 5), 0, ABST_NONE}, // 10@l1 hit, 24@l2 hit, 60@l3 hit, 500@l3 miss, 5000@trap 0.001 events/cycle
+ {"dtlb_hwtw_stalls", "DTLB_HWTW", REGNO_ANY, STXT ("DTLB HWTW Est Stalls"), PRELOAD (40, 5), 25, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss requiring a HW tablewalk")}, // l2-20, l3-50
+ {"dtlb_trap_stalls", "DTLB_HWTW_miss_trap", REGNO_ANY, STXT ("DTLB Trap Est Stalls"), PRELOAD (800, 5), 5000, ABST_NONE, STXT ("Estimated time stalled on a DTLB miss with HW tablewalk unsuccessful")}, // 5000@trap
+ {"rawhaz", "RAW_hit", REGNO_ANY, STXT ("Read-after-write Hazards"), PRELOAD (40, 5), 0, ABST_NONE},
+ {"br_msp_stalls", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict Stalls"), PRELOAD (40, 5), 24, ABST_NONE, STXT ("Estimated time stalled on Branch mispredictions")}, // 24@miss, %5 of branches is bad
+ {"br_msp", "Br_mispred", REGNO_ANY, STXT ("Branch Mispredict"), PRELOAD (40, 5), 0, ABST_NONE},
+ {"br_tkn", "Br_taken", REGNO_ANY, STXT ("Branch Taken"), PRELOADS_7, 0, ABST_NONE},
+ {"br_ins", "Branches", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"fgu", "Instr_FGU_crypto", REGNO_ANY, STXT ("FP/VIS/Crypto Instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"spill_fill", "Flush_spill_fill", REGNO_ANY, STXT ("Reg Window Spill/Fill Est Stalls"), PRELOAD (100, 5), 80, ABST_NONE, STXT ("Estimated time stalled on flushing pipeline due to register window spill/fill")},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Counters that can be time converted, support memspace, or have a short_desc need to be in this table */
+ //0x01
+ {"Fetch_stall_IFU_reset_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_IC_miss_MB_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_IC_miss_MB_avail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_IC_miss_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_ITLB_miss_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_SEL_buf_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x02
+ {"Fetch_1_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_2_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_3_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_4_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_5_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_6_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_7_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_8_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x07
+ {"ITLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"ITLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"ITLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"ITLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"ITLB_HWTW_hit_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 16G page")},
+ {"ITLB_HWTW_hit_1T", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk successfully loaded translation for 1T page")},
+ // { "ITLB_HWTW_miss_RA2PAC", 0x0740, 0xf07ff },
+ // { "ITLB_HWTW_miss_not_RA2PAC", 0x0780, 0xf07ff },
+ {"ITLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk unsuccessful")},
+ {"ITLB_HWTW", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk search done")},
+ //0x08
+ {"Br_BTC_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branches delayed a few extra cycles because branch target not found in Branch Target Cache")},
+ //0x09
+ {"Sel_0_no_instr_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select because no instructions are available")},
+ {"Sel_0_pipe_drain_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting with correct instructions when pipeline has to drain after branch misprediction")},
+ {"Sel_0_postsync_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select waiting for prior instructions to commit")},
+ {"Sel_0_presync_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select with instruction that cannot decode until prior instructions have committed")},
+ {"Sel_0_thread_hog_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select to prevent strand monopolizing resources")},
+ {"Sel_0_tag_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread stalls at Select because no required tags are available")},
+ {"Sel_0_ready_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread was ready to have its instructions selected but another hardware thread was selected instead")},
+ {"Sel_0_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles a hardware thread is not selected")},
+ // No direct equivalent Sel_1/2_cyc. Nearest is Decode_uop, which increments by 0-4 each cycle according to how many uops were decoded.
+ //0x13
+ {"ITLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L2D")},
+ {"ITLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"ITLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("ITLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L2D")},
+ {"DTLB_HWTW_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (80, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk hit local L3 or neighbor L2D")},
+ {"DTLB_HWTW_L3_miss", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk missed all local caches")},
+ {"DTLB_HWTW_ref", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss requiring HW tablewalk")},
+ //0x0E
+ {"Instr_FGU_crypto", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("FP and VIS instructions completed by the Floating Point and Graphics Unit")},
+ {"Instr_ld", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Load instructions completed")},
+ {"Instr_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Store instructions completed")},
+ {"Instr_block_ld_st", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_EXACT, STXT ("Block load/store instructions completed")},
+ {"Instr_SPR_ring_ops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("Specialized instructions that require internal use of SPR ring completed")},
+ {"Instr_atomic", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_EXACT, STXT ("Atomic instructions, including CASA/XA, completed")},
+ {"Instr_SW_prefetch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT, STXT ("PREFETCH and PREFETCHA instructions completed")},
+ {"Instr_other", NULL, REGNO_ANY, NULL, PRELOAD (2, 5), 0, ABST_NONE, STXT ("Basic arithmetic and logical instructions completed")},
+ {"Instr_all", NULL, REGNO_ANY, NULL, PRELOAD (1, 5), 0, ABST_NONE, STXT ("Total instructions completed")},
+ //0x0F
+ {"Branches", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Control transfer instructions completed, excluding trap-related transfers")},
+ //0x10
+ {"Br_taken", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Branch instructions taken and completed")},
+ //0x11
+ {"Rename_tag_wait_PQ_1_EXU_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rename_tag_wait_PQ_0_LSU_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Rename_wait_crypto_diag_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Sel_0_wait_ROB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Sel_0_wait_WRF_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Sel_0_wait_LB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Sel_0_wait_SB_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x12
+ {"Fetch_stall_BDA_tag_unavail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_BTA_tag_unavail_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_misc_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"Fetch_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"MMU_TTE_buffer_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"MMU_PRQ_pool_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x15
+ {"L2I_request_block_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2I_thread_hog_stall_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2I_MB_full_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2I_snoop_eviction", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2I_stall_no_request_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2I_stall_no_response_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x16
+ {"Flush_thread_hog", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes to prevent thread from monopolizing resources")},
+ {"Flush_br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to a branch misprediction")},
+ {"Flush_arch_exception", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to SPARC architecture exceptions and trap entry/return")},
+ {"Flush_evil_twin", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to detecting floating point evil twin condition")},
+ {"Flush_LSU_trap", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes to refetch Next-PC")},
+ {"Flush_mode_change", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to strand mode change")},
+ {"Flush_misalign", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to detecting misaligned load/store requiring transition to misaligned mitigation mode")},
+ {"Flush_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to hardware thread state change to/from halted/paused state")},
+ {"Flush_all", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to any reason")},
+ //0x17
+ {"Flush_spill_n_normal", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill_n_normal exception")},
+ {"Flush_spill_n_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill_n_other exception")},
+ {"Flush_fill_n_normal", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to fill_n_normal exception")},
+ {"Flush_fill_n_other", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to fill_n_other exception")},
+ {"Flush_spill_fill", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to spill/fill exceptions")},
+ {"Flush_lost_load", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Pipeline flushes due to speculatively executed load violating memory order")},
+ //0x21
+ {"Br_dir_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose direction was mispredicted")},
+ {"Br_tgt_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose target was mispredicted")},
+ {"Br_mispred", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Branch instructions completed whose direction or target was mispredicted")},
+ //0x23
+ {"LSU_st_q_tag_wait_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LSU_st_q_tag_wait_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2D_stall_no_request_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2D_stall_no_response_credit_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //0x27
+ {"DC_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L2D")},
+ {"DC_miss_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3")},
+ {"DC_miss_L3_dirty_copyback", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local L3 but require copyback from L2D within same CPC")},
+ {"DC_miss_nbr_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit neighbor L3 on same socket")},
+ {"DC_miss_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (400, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit remote cache on different socket")},
+ {"DC_miss_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit local memory")},
+ {"DC_miss_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("Loads that speculatively hit remote memory")},
+ {"DC_miss", NULL, REGNO_ANY, NULL, PRELOAD (10, 5), 0, ABST_NONE, STXT ("Loads that speculatively missed local L1D")},
+ //0x28
+ {"DC_sec_miss_L2_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_L2_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_L3_dirty_copyback_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_nbr_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_remote_L3_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_local_mem_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_remote_mem_hit_commit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_EXACT},
+ {"DC_miss_commit", NULL, REGNO_ANY, NULL, PRELOAD (25, 5), 0, ABST_EXACT, STXT ("Loads that missed local L1D")},
+ //0x29
+ // {"Store_DC_sec_miss_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT("")},
+ {"Store_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (20, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L2D")},
+ {"Store_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local L3")},
+ {"Store_nbr_L2_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in neighbor L2 on same socket")},
+ {"Store_nbr_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (100, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in neighbor L3 on same socket")},
+ {"Store_remote_L3_hit", NULL, REGNO_ANY, NULL, PRELOAD (400, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in remote cache on different socket")},
+ {"Store_local_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (500, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in local memory")},
+ {"Store_remote_mem_hit", NULL, REGNO_ANY, NULL, PRELOAD (1000, 5), 0, ABST_NONE, STXT ("Stores whose cacheline being updated was in remote memory")},
+ {"Store_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE, STXT ("Stores whose cacheline being updated was observed to be somewhere in the memory hierarchy")},
+ //0x2d
+ {"RAW_hit_st_buf", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) still in store buffer not yet committed")},
+ {"RAW_hit_st_q", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write) committed but in store queue not yet written to L2D")},
+ {"RAW_hit", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("Loads delayed by a previous store (read-after-write hazards)")},
+ //0x2f
+ {"Cycles_user_non_MLA", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"Cycles_user_MLA", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"Cycles_user", NULL, REGNO_ANY, NULL, PRELOAD (1, 5), 1, ABST_NONE, STXT ("Cycles hardware thread is active in specified mode(s)")},
+ //0x37
+ {"DTLB_HWTW_hit_8K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 8K page")},
+ {"DTLB_HWTW_hit_64K", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 64K page")},
+ {"DTLB_HWTW_hit_4M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 4M page")},
+ {"DTLB_HWTW_hit_256M", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 256M page")},
+ {"DTLB_HWTW_hit_16G", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 16G page")},
+ {"DTLB_HWTW_hit_1T", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk successfully loaded translation for 1T page")},
+ {"DTLB_HWTW_miss_trap", NULL, REGNO_ANY, NULL, PRELOAD (800, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk unsuccessful")},
+ {"DTLB_HWTW", NULL, REGNO_ANY, NULL, PRELOAD (40, 5), 0, ABST_NONE, STXT ("DTLB miss and HW tablewalk search done")},
+ //0x3f
+ {"Commit_0_cyc", /*22825776*/ NULL, 3, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from this hardware thread")},
+ {"Commit_0_all_cyc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE, STXT ("Cycles no uop commits from any hardware thread on this core")},
+ // Similar situation to Sel_1_cyc etc. No direct equivalent, nearest is Commit_uop, which increments by 0-4 each cycle according to how many uops were committed.
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "Cycles_user", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "Cycles_user", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "Instr_all", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "Instr_all", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usfuji_V_list[] = {
+ {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"flops", "floating_instructions", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_6, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {"load_store_instructions", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cycle_counts", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "cycle_counts", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "instruction_counts", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "instruction_counts", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usfuji_VI_VII_list[] = {
+ {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"dcm", "op_r_iu_req_mi_go", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dcstall", "op_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"dtlbm", "write_op_uTLB", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ // l2m: mem_cache_load test shows undercount of 3x, however, we don't care too much about this chip, keeping the alias for now
+ {"l2m", "sx_miss_count_dm", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, /*YXXX undercounts?*/
+ {"l2wm", "dvp_count_dm", REGNO_ANY, STXT ("L2 Cache Writeback Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"l2ref", "sx_read_count_dm", REGNO_ANY, STXT ("L2 Cache Refs"), PRELOADS_6, 0, ABST_NONE},
+ {"l2stall", "sx_miss_wait_dm", REGNO_ANY, STXT ("L2 Cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"icm", "if_r_iu_req_mi_go", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"icstall", "if_wait_all", REGNO_ANY, STXT ("L1 I-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"itlbm", "write_if_uTLB", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"flops", "floating_instructions", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_7, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"load_store_instructions", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"active_cycle_count", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"write_op_uTLB", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"sx_miss_wait_pf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"sx_miss_wait_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"sx_miss_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_r_iu_req_mi_go", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"sx_miss_count_dm_if", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"swpf_lbs_hit", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"sx_read_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"trap_DMMU_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"op_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"sx_miss_count_dm_opex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"if_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"dvp_count_dm", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+ {"sx_miss_count_dm_opsh", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 0, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+
+static Hwcentry usfuji_X_list[] = {
+ {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"dcm", "L1D_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
+ {"dcstall", "L1D_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"w_op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"d_move_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L1D_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"cse_priority_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L1I_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"single_mode_cycle_counts", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"suspend_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"sleep_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry usfuji_XII_list[] = {
+ {"cycles", "cycle_counts", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "instruction_counts", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"dcm", "L1D_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
+ {"dcstall", "L1D_wait_all", REGNO_ANY, STXT ("L1 D-cache Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cycle_counts", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"L1D_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L1I_wait_all", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"L2_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_dm_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_dm_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_dm_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_dm_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_pf_bank0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_pf_bank1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_pf_bank2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"LL_miss_wait_pf_bank3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"cse_priority_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"d_move_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"l2_sy_miss_wait_dm_part1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"l2_sy_miss_wait_dm_part2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"msgr_reqp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"msgr_rtnp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"msgs_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_l1d_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_l2_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_ll_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_ll_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_branch_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_eu_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_fl_comp_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_l1d_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_l2_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_ll_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_ll_miss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_nc_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_pfp_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_pfp_busy_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_pfp_busy_swpf", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_sxmiss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"w_op_stv_wait_sxmiss_ex", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {"single_mode_cycle_counts", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"suspend_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"sleep_cycle", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cycle_counts", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "cycle_counts", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "instruction_counts", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "instruction_counts", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+/* Kernel profiling pseudo-chip, OBSOLETE (To support 12.3 and earlier, TBR) */
+static Hwcentry kproflist[] = {
+ {"kcycles", "kcycles", 0, STXT ("KCPU Cycles"), PRELOADS_5, 1, ABST_NONE},
+ {"kucycles", "kucycles", 0, STXT ("KUCPU Cycles"), PRELOADS_5, 1, ABST_NONE},
+ {"kthr", "kthr", 0, STXT ("KTHR Cycles"), PRELOADS_5, 1, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry pentiumIIlist[] = {
+ /* note -- missing entries for dtlbm, ecm */
+ {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"icm", "ifu_ifetch_miss", REGNO_ANY, STXT ("I$ Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dcrm", "dcu_m_lines_in", REGNO_ANY, STXT ("D$ Read Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dcwm", "dcu_m_lines_out", REGNO_ANY, STXT ("D$ Write Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"flops", "flops", REGNO_ANY, STXT ("Floating-point Ops"), PRELOADS_7, 0, ABST_NONE},
+ {"itlbm", "itlb_miss", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"ecim", "l2_ifetch", REGNO_ANY, STXT ("E$ Instr. Misses"), PRELOADS_5, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry pentiumIIIlist[] = {
+ /* note -- many missing entries; no reference machine to try */
+ {"cycles", "cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "inst_retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "inst_retired", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "inst_retired", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry pentium4[] = {
+ {"cycles", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "instr_retired~emask=0x3", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+ {"l1m", "BSQ_cache_reference~emask=0x0507", REGNO_ANY, STXT ("L1 Cache Misses"), PRELOADS_7, 0, ABST_NONE},
+ {"l2h", "BSQ_cache_reference~emask=0x0007", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"l2m", "BSQ_cache_reference~emask=0x0500", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"TC_deliver_mode", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"machine_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 5, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "TC_deliver_mode~threshold=0xf~complement=1~compare=1", 6, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "instr_retired~emask=0x3", 15, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "instr_retired~emask=0x3", 16, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry intelCore2list[] = {
+ // For post-processing, both Linux and Solaris definitions need to be "live".
+ // However, for data collection, OS-specific definitions may need to be hidden.
+ // Use REGNO_INVALID for definitions that should be hidden for data collection.
+#define LINUX_ONLY REGNO_ANY
+#define SOLARIS_ONLY REGNO_INVALID /* hidden for Linux data collection */
+
+ {"cycles", "cpu_clk_unhalted.core", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ SOLARIS_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
+ {"cycles", "cpu_clk_unhalted", LINUX_ONLY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+
+ {"insts", "instr_retired.any", SOLARIS_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ /* Linux Note: 7046312 Many HWC tests fail on system Core2 system with perf_events if above alias used */
+ {"insts", "inst_retired", LINUX_ONLY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ // The following counters were identified in "Cycle Accounting Analysis on Intel Core2 Processors" by David Levinthal
+ {"uops_stalled", "rs_uops_dispatched~cmask=1~inv=1", REGNO_ANY, STXT ("uOps Stalled"), PRELOADS_7, 1, ABST_NONE},
+ {"l2m", "mem_load_retired~umask=0x08", REGNO_ANY, STXT ("L2 Line Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dtlbm", "mem_load_retired~umask=0x10", REGNO_ANY, STXT ("L1 DTLB Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"l1m", "mem_load_retired~umask=0x02", REGNO_ANY, STXT ("L1 Line Misses"), PRELOADS_6, 0, ABST_NONE},
+ // {"stalls_resources","resource_stalls~umask=0x1f", REGNO_ANY, STXT("Resource Stalls"), PRELOADS_6, 1, ABST_NONE},
+ {"rs_full", "resource_stalls~umask=0x02", REGNO_ANY, STXT ("Reservation Station Full"), PRELOADS_6, 1, ABST_NONE},
+ {"br_miss_flush", "resource_stalls~umask=0x10", REGNO_ANY, STXT ("Mispredicted Branch Flushes"), PRELOADS_6, 1, ABST_NONE},
+ {"ld_st_full", "resource_stalls~umask=0x04", REGNO_ANY, STXT ("Load/Store Buffers Full"), PRELOADS_6, 1, ABST_NONE},
+ {"rob_full", "resource_stalls~umask=0x01", REGNO_ANY, STXT ("Reorder Buffer Full"), PRELOADS_6, 1, ABST_NONE},
+ {"slow_decode", "ild_stall", REGNO_ANY, STXT ("Slow Instruction Decode"), PRELOADS_6, 1, ABST_NONE},
+ {"br_miss", "br_cnd_missp_exec", REGNO_ANY, STXT ("Mispredicted Branches"), PRELOADS_5, 0, ABST_NONE},
+ {"ret_miss", "br_call_missp_exec", REGNO_ANY, STXT ("Mispredicted Return Calls"), PRELOADS_5, 0, ABST_NONE},
+ {"div_busy", "idle_during_div", REGNO_ANY, STXT ("Divider Unit Busy"), PRELOADS_5, 1, ABST_NONE},
+ {"fp_assists", "fp_assist", REGNO_ANY, STXT ("FP Microcode Assists"), PRELOADS_5, 0, ABST_NONE},
+ {"bus_busy", "bus_drdy_clocks~umask=0x60", REGNO_ANY, STXT ("Busy Data Bus"), PRELOADS_5, 1, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*03*/"store_block", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*03*/"store_block.drain_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*03*/"store_block.order", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*03*/"store_block.snoop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*09*/"memory_disambiguation.reset", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*18*/"idle_during_div", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*19*/"delayed_bypass.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*23*/"l2_dbus_busy_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*42*/"l1d_cache_lock.duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*83*/"inst_queue", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*83*/"inst_queue.full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*86*/"cycles_l1i_mem_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1*/"rs_uops_dispatched_port.5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*6c*/"cycles_int", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*6c*/"cycles_int.masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*6c*/"cycles_int.pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.partial_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.fpsw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.other_serialization_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls.es", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls.ds", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls.fs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls.gs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.rob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.ld_st", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.br_miss_clear", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*dc*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "inst_retired", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+
+static Hwcentry intelNehalemList[] = {
+ /* 6832635: on Linux, we're not seeing consistent overflows on FFCs */
+ /* 15634344==6940930: HWC overflow profiling can cause system hang on Solaris/core-i7 systems */
+ /* 17578620: counter overflow for fixed-function counters hangs systems */
+ /* same issues for intelSandyBridgeList and intelHaswellList */
+ PERF_EVENTS_SW_EVENT_ALIASES
+ USE_INTEL_REF_CYCLES (133)
+ {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ // cpu_clk_unhalted.ref: at the ref requency of the cpu. Should not be affected by Speedstep or Turbo.
+ // cpu_clk_unhalted.thread_p: with HT & 2 threads, 2x cycles. Affected by Speedstep and Turbo.
+
+ // PEBs (Sampling)
+ {"l2m_latency", "mem_inst_retired.latency_above_threshold", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1},
+
+ // See file hwctable.README.corei7
+ {"dch", "mem_load_retired.l1d_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"dcm", "0xCB~umask=0x1e", REGNO_ANY, STXT ("L1 D-Cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_retired*/
+ {"lfbdh", "mem_load_retired.hit_lfb", REGNO_ANY, STXT ("LFB D-cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2m", "0xCB~umask=0x1c", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_retired*/
+ {"l3h", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE},
+ {"l3h_stall", "mem_load_retired.llc_unshared_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop x 35: Est. Stalls"), PRELOADS_6, 35, ABST_NONE},
+ {"l3hsnoop", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop"), PRELOADS_6, 0, ABST_NONE},
+ {"l3hsnoop_stall", "mem_load_retired.other_core_l2_hit_hitm", REGNO_ANY, STXT ("L3 Cache Hit w/Snoop x 74: Est. Stalls"), PRELOADS_6, 74, ABST_NONE},
+ {"l3m", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"l3m_stall", "mem_load_retired.llc_miss", REGNO_ANY, STXT ("L3 Cache Misses x 180: Estimated Stalls"), PRELOADS_5, 180, ABST_NONE},
+ {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
+ {"addr_alias_stall", "partial_address_alias", REGNO_ANY, STXT ("Partial Address Aliases x 3: Est. Stalls"), PRELOADS_6, 3, ABST_NONE},
+ {"uope_stall", "uops_executed.port234~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Execute Stalls per Core"), PRELOADS_7, 1, ABST_NONE},
+ {"uopr_stall", "uops_retired.any~cmask=1~inv=1", REGNO_ANY, STXT ("UOP Retired Stalls"), PRELOADS_7, 1, ABST_NONE},
+ {"itlbm", "itlb_miss_retired", REGNO_ANY, STXT ("ITLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l1i_stall", "l1i.cycles_stalled", REGNO_ANY, STXT ("L1 I-cache Stalls"), PRELOADS_6, 1, ABST_NONE},
+ {"br_rets", "br_inst_retired.all_branches", REGNO_ANY, STXT ("Branch Instruction Retires"), PRELOADS_7, 0, ABST_NONE},
+ {"br_misp", "br_misp_exec.any", REGNO_ANY, STXT ("Branch Mispredicts"), PRELOADS_6, 0, ABST_NONE},
+ {"mach_clear", "machine_clears.cycles", REGNO_ANY, STXT ("Machine Clear Asserted"), PRELOADS_6, 1, ABST_NONE},
+ {"fp_mmx", "fp_mmx_trans.any", REGNO_ANY, STXT ("FP-MMX Transistions"), PRELOADS_6, 0, ABST_NONE},
+ {"div_busy", "arith.cycles_div_busy", REGNO_ANY, STXT ("Divider Busy Cycles"), PRELOADS_6, 1, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {/*30a*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*30a*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*04*/"sb_drain.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*08.04*/"dtlb_load_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //{/*0e*/"uops_issued.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*09*/"memory_disambiguation.reset", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*09*/"memory_disambiguation.watch_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0b*/"mem_inst_retired.latency_above_threshold", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 33, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
+ {/*14*/"arith.cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*17*/"inst_queue_write_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*1d*/"hw_int.cycles_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*1d*/"hw_int.cycles_pending_and_masked", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*48*/"l1d_pend_miss.load_buffers_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.04*/"dtlb_misses.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*4e*/"sfence_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*4f.10*/"ept.walk_cycles", /*westmere*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60*/"offcore_requests_outstanding.demand.read_data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60*/"offcore_requests_outstanding.demand.read_code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60*/"offcore_requests_outstanding.demand.rfo", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60*/"offcore_requests_outstanding.any.read", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63*/"cache_lock_cycles.l1d", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63*/"cache_lock_cycles.l1d_l2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*80*/"l1i.cycles_stalled", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85*/"itlb_misses.walk_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85*/"itlb_misses.pmh_busy_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall.mru", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall.regen", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87*/"ild_stall.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.rs_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.store", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.rob_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.fpcw", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.mxcsr", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2*/"resource_stalls.other", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b0*/"offcore_requests_sq_full", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b3*/"snoopq_requests_outstanding.data", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b3*/"snoopq_requests_outstanding.invalidate", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b3*/"snoopq_requests_outstanding.code", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //{/*c2*/"uops_retired.stalled_cycles",/*future, multibit*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c3*/"machine_clears.cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.flags", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.registers", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.rob_read_port", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.scoreboard", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d2*/"rat_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*d4*/"seg_rename_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*f6*/"sq_full_stall_cycles", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+#if 0
+ USE_INTEL_REF_CYCLES (133),
+#endif
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+
+static Hwcentry intelSandyBridgeList[] = {
+ /* see comments for "cycles" and "insts" for intelNehalemList */
+ PERF_EVENTS_SW_EVENT_ALIASES
+ USE_INTEL_REF_CYCLES (100)
+ {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ // PEBS (sampling)
+ {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+
+ // See file hwctable.README.sandybridge
+ {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /*mem_load_uops_retired*/
+ {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, /*mem_load_uops_retired*/
+ // Intel errata: BT241 and BT243 says the mem_load_uops_retired.llc* counters may not be reliable on some CPU variants
+ {"l3h", "mem_load_uops_retired.llc_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE}, // may undercount
+ {"l3m", "longest_lat_cache.miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+
+ /* dtlbm has not been confirmed via Intel white paper */
+ {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
+ {"dtlbm", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dtlbm_stall", "dtlb_load_misses.demand_ld_walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*08.04*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*08.84*/"dtlb_load_misses.demand_ld_walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.40*/"int_misc.rat_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*59.20*/"partial_rat_stalls.flags_merge_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*59.20*/"partial_rat_stalls.flags_merge_uop_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*59.40*/"partial_rat_stalls.slow_lea_window", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ //{/*59.80*/"partial_rat_stalls.mul_single_uop", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5b.0c*/"resource_stalls2.all_fl_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5b.0f*/"resource_stalls2.all_prf_control", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5b.40*/"resource_stalls2.bob_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5b.4f*/"resource_stalls2.ooo_rsrc", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.xx*/"cpl_cycles.ring0_transition", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.demand_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.08*/"offcore_requests_outstanding.all_data_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.3c*/"idq.mite_all_cycles", /* Linux, but not in docs? */ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.02*/"resource_stalls.lb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.0a*/"resource_stalls.lb_sb", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.0e*/"resource_stalls.mem_rs", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.20*/"resource_stalls.fcsw", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.40*/"resource_stalls.mxcsr", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.80*/"resource_stalls.other", /*sb*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.F0*/"resource_stalls.ooo_rsrc", /*sb-ep*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*a3.01*/"cycle_activity.cycles_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*??.??*/"cycle_activity.stalls_l2_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.02*/"cycle_activity.cycles_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*??.??*/"cycle_activity.stalls_ldm_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.04*/"cycle_activity.cycles_no_execute", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.04*/"cycle_activity.cycles_no_dispatch", /*sandybridge*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.08*/"cycle_activity.cycles_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*??.??*/"cycle_activity.stalls_l1d_pending", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_dispatched.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.stall_cycles", /*F6M62*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", /*F6M62,not doc'd*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*bf.05*/"l1d_blocks.bank_conflict_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x10*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.active_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+#if 0 // need to see documentation on the following before marking them as cycles
+ uops_executed.cycles_ge_1_uop_exec[ / {0 | 1 | 2 | 3}], 1000003 (events)
+ uops_executed.cycles_ge_2_uops_exec[ /
+ {0 | 1 | 2 | 3}
+ ], 1000003 (events)
+ uops_executed.cycles_ge_3_uops_exec[ /
+ {0 | 1 | 2 | 3}
+ ], 1000003 (events)
+ uops_executed.cycles_ge_4_uops_exec[ /
+ {0 | 1 | 2 | 3}
+ ], 1000003 (events)
+#endif
+ {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
+
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+#if 0
+ USE_INTEL_REF_CYCLES (100),
+#endif
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+
+static Hwcentry intelHaswellList[] = {
+ /* see comments for "cycles" and "insts" for intelNehalemList */
+ PERF_EVENTS_SW_EVENT_ALIASES
+ USE_INTEL_REF_CYCLES (100)
+ {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ // PEBS (sampling)
+ {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+
+ {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired
+ {"dcm", "0xd1~umask=0x08", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, //mem_load_uops_retired
+ {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired
+ {"l2m", "0xd1~umask=0x10", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE}, //mem_load_uops_retired
+ {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hit w/o Snoop"), PRELOADS_6, 0, ABST_NONE},
+ {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired
+ {"l3m", "0xd1~umask=0x20", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE}, //mem_load_uops_retired
+
+ /* dtlbm has not been confirmed via Intel white paper */
+ {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dtlbm_stall", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses x 30: Estimated Stalls"), PRELOADS_6, 30, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {/* 30a */"cpu_clk_unhalted.thread", /*15634344==6940930*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ //{/* 30a */"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.04*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.xx*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.00*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.20*/"idq.ms_mite_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.20*/"idq.ms_mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*80.04*/"icache.ifetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85.04*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE}, // Intel SDM says these are stalls, not cycles
+ {/*87.04*/"ild_stall.iq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ // {/*9c.01*/"idq_uops_not_delivered.cycles_ge_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*a3.01*/"cycle_activity.cycles_l2_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ // {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.02*/"cycle_activity.cycles_ldm_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ // {/*a3.05*/"cycle_activity.stalls_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.08*/"cycle_activity.cycles_l1d_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ // {/*a3.??*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ // {/*a3.??*/"cycle_activity.stalls_ldm_pending",/*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*b1.??*/"uops_executed.stall_cycles", /*? not in PRM*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.??*/"uops_executed.cycles_ge_1_uop_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.??*/"uops_executed.cycles_ge_2_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.??*/"uops_executed.cycles_ge_3_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.??*/"uops_executed.cycles_ge_4_uops_exec", /*?*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*c2.01*/"uops_retired.stall_cycles", /*cmask==1 + INV*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.total_cycles", /*cmask==0x1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.core_stall_cycles", /*PEBS Any==1*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ {/*cd.01*/"mem_trans_retired.load_latency", /*PEBS*/ NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1}, //non-standard overflow
+
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+#if 0
+ USE_INTEL_REF_CYCLES (100),
+#endif
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+
+static Hwcentry intelBroadwellList[] = {
+ /* see comments for "cycles" and "insts" for intelNehalemList */
+ PERF_EVENTS_SW_EVENT_ALIASES
+ USE_INTEL_REF_CYCLES (100)
+ {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ // PEBS (sampling)
+ {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+ {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+
+ // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
+ {"dch", "mem_load_uops_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"dcm", "mem_load_uops_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
+ {"l2h", "mem_load_uops_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2m", "mem_load_uops_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l3h", "mem_load_uops_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE},
+ {"l3m", "mem_load_uops_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"dtlbm", "dtlb_load_misses.walk_completed", REGNO_ANY, STXT ("DTLB Misses"), PRELOADS_6, 0, ABST_NONE},
+
+ // counters that can be time converted (add FFCs if we decide to support them)
+ // counters that are load-store (did not include any... do we want to?)
+ {/*08.10*/"dtlb_load_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.03*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*14.01*/"arith.fpu_div_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c.02*/"cpu_clk_thread_unhalted.one_thread_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.10*/"dtlb_store_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*4f.10*/"ept.walk_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.01*/"cpl_cycles.ring0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.01*/"cpl_cycles.ring0_trans", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5c.02*/"cpl_cycles.ring123", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.02*/"offcore_requests_outstanding.demand_code_rd_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.04*/"offcore_requests_outstanding.demand_rfo_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.01*/"lock_cycles.split_lock_uc_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.02*/"idq.empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85.10*/"itlb_misses.walk_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.xx*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.01*/"uops_executed_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_executed_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_executed_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_executed_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_executed_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_executed_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.40*/"uops_executed_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.80*/"uops_executed_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.01*/"uops_executed_port.port_0_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_executed_port.port_1_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_executed_port.port_2_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_executed_port.port_3_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_executed_port.port_4_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_executed_port.port_5_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.40*/"uops_executed_port.port_6_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.80*/"uops_executed_port.port_7_core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.04*/"resource_stalls.rs", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.10*/"resource_stalls.rob", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.01*/"cycle_activity.cycles_l2_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.02*/"cycle_activity.cycles_ldm_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.04*/"cycle_activity.cycles_no_execute", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.08*/"cycle_activity.cycles_l1d_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.core_stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c3.01*/"machine_clears.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+#if 0
+ USE_INTEL_REF_CYCLES (100),
+#endif
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry intelSkylakeList[] = {
+ /* see comments for "cycles" and "insts" for intelNehalemList */
+ PERF_EVENTS_SW_EVENT_ALIASES
+ USE_INTEL_REF_CYCLES (25)
+ {"cycles", "cpu_clk_unhalted.thread_p", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "inst_retired.any_p", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ // PEBS (sampling)
+ {"l2m_latency", "mem_trans_retired.load_latency", REGNO_ANY, STXT ("L2 Cache Miss Est. Latency"), PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+ {/*cd.01*/"mem_trans_retired.load_latency", NULL, REGNO_ANY, NULL, PRELOADS_4, 65, ABST_EXACT_PEBS_PLUS1},
+
+ // aliases (the first set are PEBS, but on Intel the only precise counter we support is l2m_latency)
+ {"dch", "mem_load_retired.l1_hit", REGNO_ANY, STXT ("L1 D-cache Hits"), PRELOADS_7, 0, ABST_NONE},
+ {"dcm", "mem_load_retired.l1_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
+ {"l2h", "mem_load_retired.l2_hit", REGNO_ANY, STXT ("L2 Cache Hits"), PRELOADS_65, 0, ABST_NONE},
+ {"l2m", "mem_load_retired.l2_miss", REGNO_ANY, STXT ("L2 Cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l2m_stall", "cycle_activity.stalls_l2_miss", REGNO_ANY, STXT ("L2 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation
+ {"l3h", "mem_load_retired.l3_hit", REGNO_ANY, STXT ("L3 Cache Hits"), PRELOADS_6, 0, ABST_NONE},
+ {"l3m", "mem_load_retired.l3_miss", REGNO_ANY, STXT ("L3 Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"l3m_stall", "cycle_activity.stalls_l3_miss", REGNO_ANY, STXT ("L3 Cache Miss Stall"), PRELOADS_7, 1, ABST_NONE}, // needs validation
+ {"dtlbm_stall", "dtlb_load_misses.walk_active", REGNO_ANY, STXT ("DTLB Miss Est Stall"), PRELOADS_7, 1, ABST_NONE, STXT ("Estimated time stalled on DTLB misses requiring a tablewalk. Does not include time related to STLB hits.")}, // needs validation
+ // PEBS mem_inst_retired.stlb_miss_loads for finding location of DTLB issues
+ // what about: dtlb_load_misses.walk_completed, dtlb_load_misses.walk_pending, dtlb_load_misses.stlb_hit
+
+ {"fp_scalar", "fp_arith_inst_retired.scalar_double~umask=0x3", REGNO_ANY, STXT ("FP Scalar uOps"), PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point scalar micro-ops that retired")},
+ {"fp_vector", "fp_arith_inst_retired.128b_packed_double~umask=0x3c", REGNO_ANY, STXT ("FP Vector uOps"), /*needs test*/ PRELOADS_7, 0, ABST_NONE, STXT ("Floating-point vector micro-ops that retired")},
+
+ // counters that can be time converted (add FFCs if we decide to support them)
+ // counters that are load-store (did not include any... do we want to?)
+ {/*08.10*/"dtlb_load_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*08.10*/"dtlb_load_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.01*/"int_misc.recovery_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.01*/"int_misc.recovery_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d.80*/"int_misc.clear_resteer_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0e.01*/"uops_issued.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*14.01*/"arith.divider_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.ring0_trans", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p_any", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.thread_p", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*3c.00*/"cpu_clk_unhalted.core", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*48.01*/"l1d_pend_miss.pending_cycles_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.10*/"dtlb_store_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*49.10*/"dtlb_store_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*4f.10*/"ept.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*5e.01*/"rs_events.empty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.cycles_with_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.01*/"offcore_requests_outstanding.demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.02*/"offcore_requests_outstanding.cycles_with_demand_code_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.04*/"offcore_requests_outstanding.cycles_with_demand_rfo", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.08*/"offcore_requests_outstanding.cycles_with_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.10*/"offcore_requests_outstanding.cycles_with_l3_miss_demand_data_rd", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*60.10*/"offcore_requests_outstanding.l3_miss_demand_data_rd_ge_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63.02*/"lock_cycles.cache_lock_duration", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.04*/"idq.mite_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.08*/"idq.dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.10*/"idq.ms_dsb_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.18*/"idq.all_dsb_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.24*/"idq.all_mite_cycles_any_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*79.30*/"idq.ms_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*80.04*/"icache_16b.ifdata_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*83.04*/"icache_64b.iftag_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85.10*/"itlb_misses.walk_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*85.10*/"itlb_misses.walk_pending", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*87.01*/"ild_stall.lcp", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_0_uops_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_le_1_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_le_2_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_le_3_uop_deliv.core", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*9c.01*/"idq_uops_not_delivered.cycles_fe_was_ok", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.01*/"uops_dispatched_port.port_0", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.02*/"uops_dispatched_port.port_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.04*/"uops_dispatched_port.port_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.08*/"uops_dispatched_port.port_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.10*/"uops_dispatched_port.port_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.20*/"uops_dispatched_port.port_5", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.40*/"uops_dispatched_port.port_6", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a1.80*/"uops_dispatched_port.port_7", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.01*/"resource_stalls.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a2.08*/"resource_stalls.sb", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.01*/"cycle_activity.cycles_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.02*/"cycle_activity.cycles_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.04*/"cycle_activity.stalls_total", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.05*/"cycle_activity.stalls_l2_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.06*/"cycle_activity.stalls_l3_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.08*/"cycle_activity.cycles_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.0c*/"cycle_activity.stalls_l1d_miss", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.10*/"cycle_activity.cycles_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a3.14*/"cycle_activity.stalls_mem_any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.01*/"exe_activity.exe_bound_0_ports", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.02*/"exe_activity.1_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.04*/"exe_activity.2_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.08*/"exe_activity.3_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.10*/"exe_activity.4_ports_util", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a6.40*/"exe_activity.bound_on_stores", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a8.01*/"lsd.cycles_4_uops", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*a8.01*/"lsd.cycles_active", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*ab.02*/"dsb2mite_switches.penalty_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_1_uop_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_2_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_3_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.cycles_ge_4_uops_exec", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.01*/"uops_executed.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.02*/"uops_executed.core_cycles_ge_1", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.02*/"uops_executed.core_cycles_ge_2", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.02*/"uops_executed.core_cycles_ge_3", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.02*/"uops_executed.core_cycles_ge_4", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*b1.02*/"uops_executed.core_cycles_none", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c0.1*/"inst_retired.total_cycles_ps", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.stall_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c2.01*/"uops_retired.total_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*ca.1e*/"fp_assist.any", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* "Architectural" events: */
+ {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+#if 0
+ USE_INTEL_REF_CYCLES (25),
+#endif
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry intelLinuxUnknown[] = {
+ PERF_EVENTS_SW_EVENT_ALIASES
+ // USE_INTEL_REF_CYCLES(100) // freq is unknown
+ {"cycles", "unhalted-core-cycles", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"cycles", "PERF_COUNT_HW_CPU_CYCLES", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "instruction-retired", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"insts", "PERF_COUNT_HW_INSTRUCTIONS", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+
+ {"dcm", "PERF_COUNT_HW_CACHE_MISSES.L1D", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE},
+ {"llm", "llc-misses", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+ {"llm", "PERF_COUNT_HW_CACHE_MISSES.LL", REGNO_ANY, STXT ("Last-Level Cache Misses"), PRELOADS_5, 0, ABST_NONE},
+
+ {"br_msp", "branch-misses-retired", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE},
+ {"br_msp", "PERF_COUNT_HW_BRANCH_MISSES", REGNO_ANY, STXT ("Branch Mispredict"), PRELOADS_6, 0, ABST_NONE},
+ {"br_ins", "branch-instruction-retired", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
+ {"br_ins", "PERF_COUNT_HW_BRANCH_INSTRUCTIONS", REGNO_ANY, STXT ("Branch Instructions"), PRELOADS_7, 0, ABST_NONE},
+
+ // counters that can be time converted (add FFCs if we decide to support them)
+ // counters that are load-store (did not include any... do we want to?)
+ /* "Architectural" events: */
+ {/* FFC */"cpu_clk_unhalted.thread", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/* FFC */"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ PERF_EVENTS_SW_EVENT_DEFS
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "unhalted-reference-cycles", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles #
+ {"cycles0", "PERF_COUNT_HW_BUS_CYCLES", 0, NULL, PRELOADS_6, -(25), ABST_NONE}, //YXXX -can't do with ref cycles #
+ {"cycles1", "unhalted-reference-cycles", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles #
+ {"cycles1", "PERF_COUNT_HW_BUS_CYCLES", 1, NULL, PRELOADS_65, -(25), ABST_NONE}, //YXXX - can't do with ref cycles #
+ {"insts0", "instruction-retired", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts0", "PERF_COUNT_HW_INSTRUCTIONS", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "instruction-retired", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "PERF_COUNT_HW_INSTRUCTIONS", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry intelAtomList[] = {
+ {"cycles", "cpu_clk_unhalted.core", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"cycles", "cpu_clk_unhalted.thread", /*6759307*/ REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"insts", "instr_retired.any", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_7, 0, ABST_NONE},
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ /* XXXX add core2-related entries if appropriate */
+ {/*30A*/"cpu_clk_unhalted.core", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {/*30A*/"cpu_clk_unhalted.thread", /*6759307*/ NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {/*0c*/"page_walks.cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*14*/"cycles_div_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*21*/"l2_ads", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*22*/"l2_dbus_busy", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*32*/"l2_no_req", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.core_p", NULL, REGNO_ANY, NULL, PRELOADS_7, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.bus", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*3c*/"cpu_clk_unhalted.no_other", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*62*/"bus_drdy_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*63*/"bus_lock_clocks", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*64*/"bus_data_rcv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7a*/"bus_hit_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7b*/"bus_hitm_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7d*/"busq_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7e*/"snoop_stall_drv", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*7f*/"bus_io_wait", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c6*/"cycles_int_masked.cycles_int_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*c6*/"cycles_int_masked.cycles_int_pending_and_masked", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* "Architectural" events: */
+ {/*3c*/"unhalted-core-cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases for convenience */
+ {"cycles0", "cpu_clk_unhalted.core_p", 0, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"cycles1", "cpu_clk_unhalted.core_p", 1, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"insts0", "inst_retired.any_p", 0, NULL, PRELOADS_75, 0, ABST_NONE},
+ {"insts1", "inst_retired.any_p", 1, NULL, PRELOADS_75, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry amd_opteron_10h_11h[] = {
+ {"cycles", "BU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "FR_retired_x86_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
+ {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l2itlbh", "IC_itlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 ITLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */
+ {"l2itlbm", "IC_itlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 ITLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
+ {"l2ir", "BU_internal_L2_req~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Refs"), PRELOADS_6, 0, ABST_NONE},
+ {"l2im", "BU_fill_req_missed_L2~umask=0x1", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_4, 0, ABST_NONE},
+ {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
+ {"dcm", "DC_miss", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */
+ {"l2dtlbh", "DC_dtlb_L1_miss_L2_hit", REGNO_ANY, STXT ("L2 DTLB Hits"), PRELOADS_6, 0, ABST_NONE}, /* new */
+ {"l2dtlbm", "DC_dtlb_L1_miss_L2_miss", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
+ {"l2dr", "BU_internal_L2_req~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Refs"), PRELOADS_65, 0, ABST_NONE}, /* hwc_cache_load: 1.6x overcount on shanghai01 */
+ {"l2dm", "BU_fill_req_missed_L2~umask=0x2", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */
+ {"fpadd", "FP_dispatched_fpu_ops~umask=0x1", REGNO_ANY, STXT ("FP Adds"), PRELOADS_7, 0, ABST_NONE},
+ {"fpmul", "FP_dispatched_fpu_ops~umask=0x2", REGNO_ANY, STXT ("FP Muls"), PRELOADS_7, 0, ABST_NONE},
+ {"fpustall", "FR_dispatch_stall_fpu_full", REGNO_ANY, STXT ("FPU Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ {"memstall", "FR_dispatch_stall_ls_full", REGNO_ANY, STXT ("Memory Unit Stall Cycles"), PRELOADS_7, 1, ABST_NONE},
+ // For PAPI mappings, see hwctable.README.family10h
+ // For PAPI mappings, see hwctable.README.opteron
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {"BU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {"FP_cycles_no_fpu_ops_retired", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"FP_serialize_ops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"FR_dispatch_stall_branch_abort_to_retire", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_fpu_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_ls_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_reorder_buffer_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_resv_stations_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_segment_load", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"FR_nothing_to_dispatch", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"LS_buffer_2_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+ {"NB_mem_ctrlr_dram_cmd_slots_missed", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {"NB_mem_ctrlr_turnaround", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_TBD},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "BU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "BU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "FR_retired_x86_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "FR_retired_x86_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry amd_15h[] = {
+ {"cycles", "CU_cpu_clk_unhalted", REGNO_ANY, STXT ("CPU Cycles"), PRELOADS_75, 1, ABST_NONE},
+ {"insts", "EX_retired_instr_w_excp_intr", REGNO_ANY, STXT ("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"icr", "IC_fetch", REGNO_ANY, STXT ("L1 I-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
+ {"icm", "IC_miss", REGNO_ANY, STXT ("L1 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"l2im", "IC_refill_from_system", REGNO_ANY, STXT ("L2 I-cache Misses"), PRELOADS_6, 0, ABST_NONE},
+ {"dcr", "DC_access", REGNO_ANY, STXT ("L1 D-cache Refs"), PRELOADS_7, 0, ABST_NONE}, /* new */
+ {"dcm", "DC_miss~umask=0x3", REGNO_ANY, STXT ("L1 D-cache Misses"), PRELOADS_65, 0, ABST_NONE}, /* new */
+ {"l2dm", "DC_refill_from_system", REGNO_ANY, STXT ("L2 D-cache Misses"), PRELOADS_6, 0, ABST_NONE}, /* new */
+ {"dtlbm", "DC_unified_tlb_miss~umask=0x7", REGNO_ANY, STXT ("L2 DTLB Misses"), PRELOADS_5, 0, ABST_NONE}, /* new */
+ // For PAPI mappings, see hwctable.README.family15h
+
+ /* explicit definitions of (hidden) entries for proper counters */
+ /* Only counters that can be time converted, or are load-store need to be in this table */
+ {/*001.xx*/"FP_scheduler_empty", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*006.xx*/"FP_bottom_execute_uops_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*023.xx*/"LS_ldq_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*024.xx*/"LS_locked_operation", /*umask!=0*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*069.xx*/"CU_mab_wait_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*076.xx*/"CU_cpu_clk_unhalted", NULL, REGNO_ANY, NULL, PRELOADS_75, 1, ABST_NONE},
+ {/*087.xx*/"IC_instr_fetch_stall", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0cd.xx*/"EX_intr_masked_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0ce.xx*/"EX_intr_masked_while_pending_cycles", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d0.xx*/"DE_nothing_to_dispatch", /*future*/ NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d1.xx*/"DE_dispatch_stalls", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d3.xx*/"DE_dispatch_stall_serialization", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d5.xx*/"DE_dispatch_stall_instr_retire_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d6.xx*/"DE_dispatch_stall_int_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d7.xx*/"DE_dispatch_stall_fp_scheduler_q_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d8.xx*/"DE_dispatch_stall_ldq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*0d9.xx*/"DE_dispatch_stall_waiting_all_quiet", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+ {/*1d8.xx*/"EX_dispatch_stall_stq_full", NULL, REGNO_ANY, NULL, PRELOAD_DEF, 1, ABST_NONE},
+
+ /* additional (hidden) aliases, for convenience */
+ {"cycles0", "CU_cpu_clk_unhalted", 0, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"cycles1", "CU_cpu_clk_unhalted", 1, NULL, PRELOADS_8, 1, ABST_NONE},
+ {"insts0", "EX_retired_instr_w_excp_intr", 0, NULL, PRELOADS_8, 0, ABST_NONE},
+ {"insts1", "EX_retired_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE},
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+#define USE_ARM_REF_CYCLES \
+ {"usr_time","cycles", REGNO_ANY, STXT("User CPU"), PRELOADS_85, 1, ABST_NONE}, \
+ {"sys_time","cycles~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_85, 1, ABST_NONE}, \
+
+static Hwcentry armlist[] = {
+ USE_ARM_REF_CYCLES
+// Hardware event:
+ {"branch-instructions", NULL, REGNO_ANY, STXT("Branch-instructions"), PRELOADS_35, 0, ABST_NONE},
+ {"branch-misses", NULL, REGNO_ANY, STXT("Branch-misses"), PRELOADS_35, 0, ABST_NONE},
+ {"bus-cycles", NULL, REGNO_ANY, STXT("Bus Cycles"), PRELOADS_35, 1, ABST_NONE},
+ {"cache-misses", NULL, REGNO_ANY, STXT("Cache-misses"), PRELOADS_35, 0, ABST_NONE},
+ {"cache-references", NULL, REGNO_ANY, STXT("Cache-references"), PRELOADS_35, 0, ABST_NONE},
+ {"cycles", NULL, REGNO_ANY, STXT("CPU Cycles"), PRELOADS_85, 1, ABST_NONE},
+ {"insts", "instructions", REGNO_ANY, STXT("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
+ {"ref-cycles", NULL, REGNO_ANY, STXT("Total Cycles"), PRELOADS_85, 1, ABST_NONE},
+ {"stalled-cycles-backend", NULL, REGNO_ANY, STXT("Stalled Cycles during issue."), PRELOADS_85, 1, ABST_NONE},
+ {"stalled-cycles-frontend", NULL, REGNO_ANY, STXT("Stalled Cycles during retirement."), PRELOADS_85, 1, ABST_NONE},
+
+// Software event:
+ {"alignment-faults", NULL, REGNO_ANY, STXT("Alignment Faults"), PRELOADS_85, 0, ABST_NONE},
+ {"context-switches", NULL, REGNO_ANY, STXT("Context Switches"), PRELOADS_85, 0, ABST_NONE},
+ {"cpu-clock", NULL, REGNO_ANY, STXT("CPU Clock"), PRELOADS_85, 1, ABST_NONE},
+ {"cpu-migrations", NULL, REGNO_ANY, STXT("CPU Migrations"), PRELOADS_85, 0, ABST_NONE},
+ {"emulation-faults", NULL, REGNO_ANY, STXT("Emulation Faults"), PRELOADS_85, 0, ABST_NONE},
+ {"major-faults", NULL, REGNO_ANY, STXT("Major Page Faults"), PRELOADS_85, 0, ABST_NONE},
+ {"minor-faults", NULL, REGNO_ANY, STXT("Minor Page Faults"), PRELOADS_85, 0, ABST_NONE},
+ {"page-faults", NULL, REGNO_ANY, STXT("Page Faults"), PRELOADS_85, 0, ABST_NONE},
+ {"task-clock", NULL, REGNO_ANY, STXT("Clock Count Specific"), PRELOADS_85, 1, ABST_NONE},
+
+// Hardware cache event
+ {"L1-dcache-load-misses", NULL, REGNO_ANY, STXT("L1 D-cache Load Misses"), PRELOADS_35, 0, ABST_NONE},
+ {"L1-dcache-loads", NULL, REGNO_ANY, STXT("L1 D-cache Loads"), PRELOADS_35, 0, ABST_NONE},
+ {"L1-dcache-store-misses", NULL, REGNO_ANY, STXT("L1 D-cache Store Misses"), PRELOADS_35, 0, ABST_NONE},
+ {"L1-dcache-stores", NULL, REGNO_ANY, STXT("L1 D-cache Store Stores"), PRELOADS_35, 0, ABST_NONE},
+ {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Load Misses"), PRELOADS_35, 0, ABST_NONE},
+ {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Loads"), PRELOADS_35, 0, ABST_NONE},
+ {"dTLB-load-misses", NULL, REGNO_ANY, STXT("D-TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
+ {"dTLB-loads", NULL, REGNO_ANY, STXT("D-TLB Loads"), PRELOADS_35, 0, ABST_NONE},
+ {"iTLB-load-misses", NULL, REGNO_ANY, STXT("The Instruction TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
+ {"iTLB-loads", NULL, REGNO_ANY, STXT("The Instruction TLB Loads"), PRELOADS_35, 0, ABST_NONE},
+
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+static Hwcentry unknownlist[] =
+ /* used for unrecognized CPU type */{
+ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
+};
+
+/* structure defining the counters for a CPU type */
+typedef struct
+{
+ int cputag;
+ Hwcentry *stdlist_table;
+#define MAX_DEFAULT_HWC_DEFS 4 // allows multiple defs to handle OS variations; extend as needed
+ char *default_exp_p[MAX_DEFAULT_HWC_DEFS + 1]; // end of list MUST be marked with NULL
+} cpu_list_t;
+
+/* IMPORTANT NOTE:
+ *
+ * Any default HWC string must consist of counter names separated by -TWO- commas,
+ * with a no trailing comma/value after the last counter name
+ *
+ * Only aliased counters should be specified; non-aliased counters will
+ * not get the right overflow values set.
+ * If the string is not formatted that way, -h hi and -h lo will fail
+ */
+static cpu_list_t cputabs[] = {
+ {CPC_ULTRA1, usIlist, {NULL}}, /* bind will fail */
+ {CPC_ULTRA2, usIlist, {NULL}}, /* bind will fail */
+ {CPC_ULTRA3, usIIIlist, {"insts,,ecstall", 0}},
+ {CPC_ULTRA3_PLUS, usIIIlist, {"insts,,ecstall", 0}},
+ {CPC_ULTRA3_I, usIIIlist, {"insts,,ecstall", 0}},
+ {CPC_ULTRA4_PLUS, usIVplist, {"insts,,ecstall", 0}},
+ {CPC_ULTRA_T1, niagara1, {"insts", 0}},
+ {CPC_ULTRA_T2, niagara2, {"insts,,+l2drm", 0}},
+ {CPC_ULTRA_T2P, niagara2, {"insts,,+l2drm", 0}},
+ {CPC_ULTRA_T3, niagara2, {"insts,,+l2drm", 0}},
+ {CPC_SPARC_T4, sparc_t4, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}},
+ {CPC_SPARC_M4, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // renamed to m5
+ {CPC_SPARC_T5, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}},
+ {CPC_SPARC_M5, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}},
+ {CPC_SPARC_T6, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // no such processor
+ {CPC_SPARC_M6, sparc_t5_m6, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}},
+ {CPC_SPARC_M7, sparc_m7, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}}, // includes T7
+ {CPC_SPARC_M8, sparc_m8, {"insts,,cycles,,c_stalls,,dcm", "c_stalls", 0}},
+ {CPC_PENTIUM_PRO_MMX, pentiumIIlist, {"insts", 0}},
+ {CPC_PENTIUM_PRO, pentiumIIIlist, {"insts", 0}},
+ {CPC_PENTIUM_4, pentium4, {"insts", 0}},
+ {CPC_PENTIUM_4_HT, pentium4, {"insts", 0}},
+ {CPC_INTEL_CORE2, intelCore2list, {"insts,,cycles", 0}},
+ {CPC_INTEL_NEHALEM, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
+ {CPC_INTEL_WESTMERE, intelNehalemList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l3m_stall,,dtlbm_stall", 0}},
+ {CPC_INTEL_SANDYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l3m,,dtlbm", 0}},
+ {CPC_INTEL_IVYBRIDGE, intelSandyBridgeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l3m,,dtlbm", 0}},
+ {CPC_INTEL_HASWELL, intelHaswellList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l3m,,dtlbm", 0}},
+ {CPC_INTEL_BROADWELL, intelBroadwellList, {"insts,,cycles,,+l2m_latency,,dtlbm",
+ "insts,,cycles,,l3m,,dtlbm", 0}},
+ {CPC_INTEL_SKYLAKE, intelSkylakeList, {"insts,,cycles,,+l2m_latency,,dtlbm_stall",
+ "insts,,cycles,,l2m_stall,,dtlbm_stall", 0}},
+ {CPC_INTEL_UNKNOWN, intelLinuxUnknown, {"cycles,,insts,,llm",
+ "user_time,,system_time,,cycles,,insts,,llm", 0}},
+ {CPC_INTEL_ATOM, intelAtomList, {"insts", 0}},
+ {CPC_AMD_K8C, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
+ {CPC_AMD_FAM_10H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
+ {CPC_AMD_FAM_11H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}},
+ {CPC_AMD_FAM_15H, amd_15h, {"insts,,cycles", 0}},
+ {CPC_SPARC64_V, usfuji_V_list, {"insts,,cycles", 0}},
+ {CPC_SPARC64_VI, usfuji_VI_VII_list, {"insts,,cycles,,dcstall", 0}},
+ {CPC_SPARC64_VII, usfuji_VI_VII_list, {"insts,,cycles,,dcstall", 0}},
+ {CPC_SPARC64_X, usfuji_X_list, {"insts,,cycles,,dcstall", 0}},
+ {CPC_SPARC64_XII, usfuji_XII_list, {"insts,,cycles,,dcstall", 0}},
+ {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
+ {ARM_CPU_IMP_APM, armlist, {"insts,,cycles", 0}},
+ {0, unknownlist, {NULL}} /* processor is unknown, but experiment is allowed */
+};
+
+/*---------------------------------------------------------------------------*/
+/* state variables */
+static int initialized;
+static int signals_disabled;
+
+// Simple array list
+typedef struct
+{
+ void** array; // array of ptrs, last item set to null
+ int sz; // num live elements in array
+ int max; // array allocation size
+} ptr_list;
+
+static void
+ptr_list_init (ptr_list *lst)
+{
+ lst->sz = 0;
+ lst->max = 0;
+ lst->array = 0;
+}
+
+static void
+ptr_list_add (ptr_list *lst, char* ptr)
+{ // ptr must be freeable
+ if (lst->sz >= lst->max - 1)
+ {
+ void * * new;
+ int newmax = lst->max ? lst->max * 2 : 16;
+ new = (void**) realloc (lst->array, newmax * sizeof (void*));
+ if (!new) return; // failed, discard add
+ lst->max = newmax;
+ lst->array = new;
+ }
+ lst->array[lst->sz++] = ptr;
+ lst->array[lst->sz] = NULL; // mark new end-of-list
+}
+
+static void
+ptr_list_free (ptr_list *lst)
+{ // includes shallow free of all elements
+ if (lst->array)
+ {
+ for (int ii = 0; lst->array[ii]; ii++)
+ free (lst->array[ii]);
+ free (lst->array);
+ }
+ lst->sz = 0;
+ lst->max = 0;
+ lst->array = 0;
+}
+
+// Capabilities of this machine (initialized by setup_cpc())
+static int cpcx_cpuver = CPUVER_UNDEFINED;
+static uint_t cpcx_npics;
+static const char *cpcx_cciname;
+static const char *cpcx_docref;
+static uint64_t cpcx_support_bitmask;
+
+// cpcx_*[0]: collect lists
+// cpcx_*[1]: er_kernel lists
+// Each cpcx_*[] list is an array of ptrs with null ptr marking end of list
+static char **cpcx_attrs[2];
+
+static Hwcentry **cpcx_std[2];
+static Hwcentry **cpcx_raw[2];
+static Hwcentry **cpcx_hidden[2];
+
+static uint_t cpcx_max_concurrent[2];
+static char *cpcx_default_hwcs[2];
+static char *cpcx_orig_default_hwcs[2];
+static int cpcx_has_precise[2];
+
+#define VALID_FOR_KERNEL(forKernel) ((forKernel)>=0 && (forKernel)<=1)
+#define IS_KERNEL(forKernel) ((forKernel)==1)
+
+// used to build lists:
+static ptr_list unfiltered_attrs;
+static ptr_list unfiltered_raw;
+
+/*---------------------------------------------------------------------------*/
+/* misc internal utilities */
+
+/* compare 2 strings to either \0 or <termchar> */
+#define IS_EOL(currchar, termchar) ((currchar)==(termchar) || (currchar)==0)
+
+static int
+is_same (const char * regname, const char * int_name, char termchar)
+{
+ do
+ {
+ char a = *regname;
+ char b = *int_name;
+ if (IS_EOL (a, termchar))
+ {
+ if (IS_EOL (b, termchar))
+ return 1; /* strings are the same up to terminating char */
+ else
+ break; /* strings differ */
+ }
+ if (a != b)
+ break; /* strings differ */
+ regname++;
+ int_name++;
+ }
+ while (1);
+ return 0;
+}
+
+static int
+is_numeric (const char *name, uint64_t *pval)
+{
+ char *endptr;
+ uint64_t val = strtoull (name, &endptr, 0);
+ if (!*name || *endptr)
+ return 0; /* name does not specify a numeric value */
+ if (pval)
+ *pval = val;
+ return 1;
+}
+
+static int
+is_visible_alias (Hwcentry* pctr)
+{
+ if (!pctr)
+ return 0;
+ if (pctr->name && pctr->int_name && pctr->metric)
+ return 1;
+ return 0;
+}
+
+static int
+is_hidden_alias (Hwcentry* pctr)
+{
+ if (!pctr)
+ return 0;
+ if (pctr->name && pctr->int_name && pctr->metric == NULL)
+ return 1;
+ return 0;
+}
+
+static int
+is_numeric_alias (Hwcentry* pctr)
+{
+ int is_numeric_alias = 0;
+ regno_t regno;
+ char *nameOnly = NULL;
+ hwcfuncs_parse_ctr (pctr->int_name, NULL, &nameOnly, NULL, NULL, &regno);
+ if (is_numeric (nameOnly, NULL))
+ is_numeric_alias = 1;
+ free (nameOnly);
+ return is_numeric_alias;
+}
+
+/* print list of register to a buffer */
+/*
+ * style e x a m p l e s
+ * 0 NONE 2 {0|1|2|3}
+ * 1 NONE 2 : 0, 1, 2, or 3
+ * 2 0 1 2 3 6
+ */
+static char *
+get_regnolist (char *buf, size_t sz, const regno_t *reg_list, int style)
+{
+ if (!buf || !sz)
+ return "INTERNAL ERROR";
+ buf[0] = 0;
+ if (style == 2)
+ {
+ int ii;
+ // width should be consistent with that in format_columns()
+ // the format will accommodate cpcx_npics regs
+ if (cpcx_npics < 1)
+ return "INTERNAL ERROR";
+ // clear out the buffer
+ for (ii = 0; ii < sz; ii++)
+ buf[ii] = '_';
+ if (cpcx_npics <= 9)
+ {
+ // one char per reg, plus terminating null char
+ if (cpcx_npics + 1 > sz)
+ return "INTERNAL ERROR";
+ buf[cpcx_npics] = '\0';
+
+ // fill buf with regnos
+ for (ii = 0; ii < MAX_PICS; ii++)
+ {
+ regno_t regno = reg_list[ii];
+ if (REG_LIST_EOL (regno))
+ break;
+ if (regno < 0 || regno >= cpcx_npics)
+ return "INTERNAL ERROR";
+ buf[regno] = '0' + regno;
+ }
+ }
+ else
+ {
+ /* space between regs, which may be 1 or 2 digits each
+ * 1 char for reg 0
+ * 2 chars for regs 1-9 each
+ * 3 chars for regs 10- each
+ * 1 char for terminating null char
+ */
+ int nchars = 17 + 3 * (cpcx_npics - 9);
+ if (nchars > sz)
+ return "INTERNAL ERROR";
+ buf[nchars - 1] = '\0';
+
+ // fill buf with regnos
+ for (ii = 0; ii < MAX_PICS; ii++)
+ {
+ regno_t regno = reg_list[ii];
+ if (REG_LIST_EOL (regno))
+ break;
+ if (regno <= 9)
+ buf[2 * regno ] = '0' + regno;
+ else
+ {
+ buf[3 * (regno - 9) + 17] = '0' + (regno / 10);
+ buf[3 * (regno - 9) + 18] = '0' + (regno % 10);
+ }
+ }
+ }
+ return buf;
+ }
+ if (REG_LIST_IS_EMPTY (reg_list))
+ {
+ snprintf (buf, sz, GTXT ("NONE"));
+ return buf;
+ }
+ else if (REG_LIST_EOL (reg_list[1]))
+ {
+ /* 1 item in list */
+ snprintf (buf, sz, "%d", reg_list[0]);
+ return buf;
+ }
+ else
+ {
+ /* 2 more items in list */
+ int ii, num_regs;
+ for (ii = 0; ii < MAX_PICS; ii++)
+ {
+ regno_t regno = reg_list[ii];
+ if (REG_LIST_EOL (regno))
+ break;
+ }
+ num_regs = ii;
+ buf[0] = 0;
+ for (ii = 0; ii < num_regs; ii++)
+ {
+ regno_t regno = reg_list[ii];
+ if (style == 0)
+ snprintf (buf + strlen (buf), sz - strlen (buf),
+ "%c%d", ii ? '|' : '{', regno);
+ else
+ {
+ if (num_regs == 2)
+ snprintf (buf + strlen (buf), sz - strlen (buf),
+ "%d%s", regno, !ii ? " or " : "");
+ else
+ {
+ /* 3 or more items in list */
+ if (ii < num_regs - 2)
+ snprintf (buf + strlen (buf), sz - strlen (buf),
+ "%d, ", regno);
+ else if (ii == num_regs - 2)
+ snprintf (buf + strlen (buf), sz - strlen (buf),
+ "%d, or ", regno);
+ else
+ snprintf (buf + strlen (buf), sz - strlen (buf),
+ "%d", regno);
+ }
+ }
+ }
+ if (style == 0)
+ snprintf (buf + strlen (buf), sz - strlen (buf), "}");
+ }
+ return buf;
+}
+
+#if !HWC_DEBUG
+#define hwcentry_print(lvl,x1,x2)
+#else
+
+/* print a Hwcentry */
+static void
+hwcentry_print (int lvl, const char * header, const Hwcentry *pentry)
+{
+ char buf[1024];
+ Tprintf (lvl, "%s '%s', '%s', %d, '%s', %d, %d, %d, %d, %d, %d, /",
+ header,
+ pentry->name ? pentry->name : "NULL",
+ pentry->int_name ? pentry->int_name : "NULL",
+ pentry->reg_num,
+ pentry->metric ? pentry->metric : "NULL",
+ pentry->lval, /* low-resolution/long run */
+ pentry->val, /* normal */
+ pentry->hval, /* high-resolution/short run */
+ pentry->timecvt,
+ pentry->memop, /* type of instruction that can trigger */
+ pentry->sort_order);
+ get_regnolist (buf, sizeof (buf), pentry->reg_list, 0);
+ Tprintf (lvl, "%s\n", buf);
+}
+#endif
+
+/* add <regno> to a Hwcentry's list */
+static void
+regno_add (Hwcentry * pctr, regno_t regno)
+{
+ int jj;
+ regno_t *reg_list;
+ if (!pctr)
+ {
+ Tprintf (0, "hwctable: regno_add(): ERROR: pctr==NULL\n");
+ return;
+ }
+ reg_list = pctr->reg_list;
+ if (!reg_list)
+ {
+ /* create list */
+ reg_list = (regno_t*) malloc (sizeof (regno_t*) * MAX_PICS);
+ if (!reg_list)
+ {
+ hwcentry_print (DBG_LT0, "hwctable: regno_add: ERROR:"
+ " Out of memory: ", pctr);
+ return;
+ }
+ /* initialize list */
+ for (jj = 0; jj < MAX_PICS; jj++)
+ reg_list[jj] = REGNO_ANY;
+ pctr->reg_list = reg_list;
+ }
+ if (regno == REGNO_ANY)
+ {
+ /* add all counters up to cpcx_npics */
+ for (jj = 0; jj < MAX_PICS && jj < cpcx_npics; jj++)
+ reg_list[jj] = jj;
+ }
+ else
+ {
+ /* add <regno> to list of registers */
+ for (jj = 0; jj < MAX_PICS; jj++)
+ {
+ if (reg_list[jj] == regno)
+ {
+ hwcentry_print (DBG_LT0, "hwctable: regno_add: WARNING: "
+ "Duplicate regno: ", pctr);
+ break;
+ }
+ if (reg_list[jj] == REGNO_ANY)
+ {
+ reg_list[jj] = regno;
+ break;
+ }
+ }
+ }
+ if (jj == MAX_PICS)
+ hwcentry_print (DBG_LT0, "hwctable: regno_add: WARNING:"
+ " regno list is full:", pctr);
+}
+
+/*---------------------------------------------------------------------------*/
+/* utilities for rawlist (list of raw counters with reglist[] filled in) */
+
+/* search the 'raw' list of counters for <name> */
+static Hwcentry *
+ptrarray_find_by_name (Hwcentry** array, const char * name)
+{
+ if (name == NULL)
+ return NULL;
+ Tprintf (DBG_LT3, "hwctable: array_find_by_name(%s):\n", name);
+ for (int ii = 0; array && array[ii]; ii++)
+ if (strcmp (array[ii]->name, name) == 0)
+ return array[ii];
+ return NULL; /* not found */
+}
+
+/* add Hwcentry to the 'raw' list of counters */
+static Hwcentry *
+alloc_shallow_copy (const Hwcentry *pctr)
+{
+ Hwcentry *node = (Hwcentry *) malloc (sizeof (Hwcentry));
+ if (!node)
+ return NULL; // fail
+ *node = *pctr; /* shallow copy! */
+ if (pctr->name)
+ node->name = strdup (pctr->name);
+ return node;
+}
+
+/* add Hwcentry to the 'raw' list of counters */
+static Hwcentry *
+list_append_shallow_copy (ptr_list *list, const Hwcentry *pctr)
+{
+ Hwcentry *node = alloc_shallow_copy (pctr);
+ if (!node)
+ return NULL; // fail
+ ptr_list_add (list, (void*) node);
+ return node;
+}
+
+static Hwcentry *
+list_add (ptr_list *list, uint_t regno, const char *name)
+{
+ Hwcentry *praw;
+ praw = ptrarray_find_by_name ((Hwcentry**) list->array, name);
+ if (!praw)
+ {
+ Hwcentry tmpctr = empty_ctr;
+ tmpctr.name = (char *) name;
+ praw = list_append_shallow_copy (list, &tmpctr);
+ }
+ if (praw)
+ regno_add (praw, regno);
+ return praw;
+}
+
+/*---------------------------------------------------------------------------*/
+/* utilities for stdlist (table of aliased, hidden, & convenience, ctrs) */
+
+/* find top level definition for <cpuid> */
+static cpu_list_t*
+cputabs_find_entry (int cpuid)
+{
+ int i;
+ /* now search for the appropriate table */
+ for (i = 0;; i++)
+ {
+ if (cputabs[i].cputag == 0)
+ break;
+ if (cpuid == cputabs[i].cputag)
+ return &cputabs[i];
+ }
+ Tprintf (0, "hwctable: cputabs_find_entry: WARNING: "
+ "cpu_id = %d not defined. No 'standard' counters are available\n",
+ cpuid);
+ return &cputabs[i];
+}
+
+/* find Hwcentry table for <cpuid> */
+static Hwcentry*
+stdlist_get_table (int cpuid)
+{
+ cpu_list_t* tmp = cputabs_find_entry (cpuid);
+ if (tmp)
+ return tmp->stdlist_table;
+ return NULL;
+}
+
+/* search the 'standard' list of counters for <name>,<regno> */
+/* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
+
+/* note: int_name==NULL is a wildcard */
+static const Hwcentry *
+ptrarray_find (const Hwcentry **array, const char *name, const char *int_name,
+ int check_regno, regno_t regno)
+{
+ const Hwcentry *pctr;
+ if (!array)
+ return NULL;
+ for (int ii = 0; array[ii]; ii++)
+ {
+ pctr = array[ii];
+ if (strcmp (pctr->name, name))
+ continue;
+ if (int_name && int_name[0] != 0 && pctr->int_name)
+ {
+ if (NULL == strstr (int_name, pctr->int_name))
+ continue;
+ }
+ if (!check_regno)
+ return pctr;
+ else
+ {
+ /* duplicates aliases are allowed in table because of 6759307 */
+ if (REG_LIST_IS_EMPTY (pctr->reg_list))
+ {
+ /* skip aliases that don't have a valid list of registers */
+ hwcentry_print (1, "hwctable: stdlist_find_by_name:"
+ " WARNING: alias found, but event not supported by HW:",
+ pctr);
+ continue;
+ }
+ if (!regno_is_valid (pctr, regno))
+ {
+ hwcentry_print (1, "hwctable: stdlist_find_by_name():"
+ " WARNING: alias found, but regno doesn't match:",
+ pctr);
+ continue;
+ }
+ return pctr;
+ }
+ }
+ return NULL;
+}
+
+/* search the 'standard' list of counters for <name>,<regno> */
+
+/* note: <regno>=REGNO_ANY is a wildcard that matches any value. */
+static const Hwcentry *
+static_table_find (const Hwcentry *table, const char *name, const char *int_name,
+ int check_regno, regno_t regno)
+{
+ int sz;
+ for (sz = 0; table && table[sz].name; sz++)
+ ;
+ if (!sz)
+ return NULL;
+ const Hwcentry ** list = calloc (sz + 1, sizeof (void*));
+ if (!list)
+ return NULL;
+ for (int ii = 0; ii < sz; ii++)
+ list[ii] = &table[ii];
+ list[sz] = NULL;
+ const Hwcentry *pctr = ptrarray_find (list, name, int_name, check_regno, regno);
+ free (list);
+ return pctr;
+}
+
+#if !HWC_DEBUG
+#define stdlist_print(dbg_lvl,table)
+#else
+
+/* print all Hwcentries in standard table. Check for weird stuff */
+static void
+stdlist_print (int dbg_lvl, const Hwcentry* table)
+{
+ const Hwcentry *pctr;
+ if (!table)
+ {
+ Tprintf (0, "hwctable: stdlist_print: ERROR: "
+ "table is invalid.\n");
+ return;
+ }
+ for (pctr = table; pctr->name; pctr++)
+ {
+ int ii;
+ hwcentry_print (dbg_lvl, "hwctable: stdlist: ", pctr);
+ if (REG_LIST_IS_EMPTY (pctr->reg_list))
+ {
+ if (pctr->int_name || !pctr->metric)
+ hwcentry_print (DBG_LT1, "hwctable: stdlist_print: WARNING: "
+ "no hardware event found for table entry", pctr);
+ continue;
+ }
+ /* check if incorrect reg_num used in table */
+ if (!regno_is_valid (pctr, pctr->reg_num))
+ {
+ hwcentry_print (DBG_LT0, "hwctable: stdlist_print: ERROR: "
+ "reg_num is not in table. ", pctr);
+ continue;
+ }
+ for (ii = 0; ii < MAX_PICS; ii++)
+ {
+ regno_t regno = pctr->reg_list[ii];
+ if (REG_LIST_EOL (regno))
+ break;
+ }
+ if (ii > 1 && pctr->reg_num != REGNO_ANY)
+ {
+ /* several regnos were valid, but only one can be specified */
+ if (pctr->metric || !pctr->int_name)
+ {
+ /* pctr is standard or a raw definition */
+ /* (pctr is not an alias like cycles0) */
+ hwcentry_print (DBG_LT0, "hwctable: stdlist_print: ERROR: "
+ "regno in table should have been REGNO_ANY. ",
+ pctr);
+ }
+ }
+ }
+}
+#endif
+
+/*---------------------------------------------------------------------------*/
+/* utilities for init */
+
+/* try to bind counters to hw. Return 0 on success, nonzero otherwise */
+static int
+test_hwcs (const Hwcentry* entries[], unsigned numctrs)
+{
+ int rc = -1;
+ hwc_event_t sample;
+ int created = 0;
+ hwcdrv_api_t *hwcdrv = get_hwcdrv ();
+ Tprintf (DBG_LT2, "hwctable: test_hwcs()...\n");
+ rc = hwcfuncs_bind_hwcentry (entries, numctrs);
+ if (rc)
+ {
+ Tprintf (0, "hwctable: WARNING: test "
+ "counters could not be created\n");
+ goto end_test_hwcs;
+ }
+ created = 1;
+ if (!signals_disabled)
+ {
+ (void) signal (HWCFUNCS_SIGNAL, SIG_IGN);
+ signals_disabled = 1;
+ }
+ rc = hwcdrv->hwcdrv_start ();
+ if (rc)
+ {
+ Tprintf (0, "hwctable: WARNING: test "
+ "counters could not be started\n");
+ goto end_test_hwcs;
+ }
+ rc = hwcdrv->hwcdrv_read_events (&sample, NULL);
+ if (rc)
+ Tprintf (0, "hwctable: WARNING: test sample failed\n");
+ rc = 0;
+#if HWC_DEBUG
+ {
+ unsigned ii;
+ Tprintf (DBG_LT1, "hwctable: test_hwcs(");
+ for (ii = 0; ii < numctrs; ii++)
+ Tprintf (DBG_LT1, "%s%s", ii ? "," : "", entries[ii]->name);
+ Tprintf (DBG_LT1, ") PASS\n");
+ }
+#endif
+
+end_test_hwcs:
+ if (created && hwcdrv->hwcdrv_free_counters ())
+ Tprintf (0, "hwctable: WARNING: test counters could not be freed\n");
+ return rc;
+}
+
+#if !HWC_DEBUG
+#define check_tables()
+#else
+
+/* check for typos in tables */
+static void
+check_tables ()
+{
+ int i;
+ /* now search the known table of counters */
+ for (i = 0;; i++)
+ {
+ Hwcentry * pentry;
+ int cputag = cputabs[i].cputag;
+ if (cputag == 0)
+ break;
+ if (cputag == CPC_KPROF)
+ continue;
+ pentry = cputabs[i].stdlist_table;
+ for (; pentry; pentry++)
+ {
+ if (!pentry->name)
+ break;
+ if (!pentry->int_name)
+ {/* internal, only to supply ABST and timecvt */
+ if (pentry->metric)
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " internal && metric @%d, %s\n", cputag, pentry->name);
+ if (pentry->reg_num != REGNO_ANY)
+ Tprintf (DBG_LT1, "hwctable: check_tables: WARNING:"
+ " internal && reg_num!=REGNO_ANY @%d, %s\n",
+ cputag, pentry->name);
+ if (pentry->val != PRELOAD_DEF
+ && pentry->memop != ABST_EXACT_PEBS_PLUS1)
+ Tprintf (DBG_LT2, "hwctable: check_tables: INFO:"
+ " internal && custom val=%d @%d, %s\n",
+ pentry->val, cputag, pentry->name);
+#if 0
+ if (!pentry->timecvt && pentry->memop == ABST_NONE)
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " internal && not special! @%d, %s\n",
+ cputag, pentry->name);
+#endif
+ }
+ if (pentry->metric)
+ { /* aliased */
+ if (!pentry->int_name)
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " aliased && !int_name @%d, %s\n", cputag, pentry->name);
+#if 0
+ else if (!strcmp (pentry->name, pentry->int_name))
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " name==int_name @%d, %s\n",
+ cputag, pentry->name);
+#endif
+ if (pentry->reg_num != REGNO_ANY && pentry->reg_num != REGNO_INVALID)
+ Tprintf (DBG_LT1, "hwctable: check_tables: INFO:"
+ " aliased && custom reg_num==%d @%d, %s\n",
+ pentry->reg_num, cputag, pentry->name);
+ if (pentry->reg_num == REGNO_INVALID)
+ Tprintf (DBG_LT2, "hwctable: check_tables: INFO:"
+ " aliased && reg_num==REGNO_INVALID @%d, %s\n",
+ cputag, pentry->name);
+ }
+ if (pentry->int_name && !pentry->metric)
+ { /* convenience */
+ if (!strcmp (pentry->name, pentry->int_name))
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " convenience && name==int_name @%d, %s\n",
+ cputag, pentry->name);
+ if (pentry->reg_num == REGNO_ANY)
+ Tprintf (DBG_LT0, "hwctable: check_tables: ERROR:"
+ " convenience && reg_num==REGNO_ANY @%d, %s\n",
+ cputag, pentry->name);
+ }
+ }
+ }
+}
+#endif
+
+static int try_a_counter ();
+static void hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out,
+ Hwcentry ***praw_out, Hwcentry ***phidden_out,
+ Hwcentry**static_tables,
+ Hwcentry **raw_unfiltered_in);
+
+/* internal call to initialize libs, ctr tables */
+static void
+setup_cpc_general (int skip_hwc_test)
+{
+ const cpu_list_t* cputabs_entry;
+ int rc = -1;
+ Tprintf (DBG_LT2, "hwctable: setup_cpc()... \n");
+ if (initialized)
+ {
+ Tprintf (0, "hwctable: WARNING: setup_cpc() has already been called\n");
+ return;
+ }
+ initialized = 1;
+ cpcx_cpuver = CPUVER_UNDEFINED;
+ cpcx_cciname = NULL;
+ cpcx_npics = 0;
+ cpcx_docref = NULL;
+ cpcx_support_bitmask = 0;
+ for (int kk = 0; kk < 2; kk++)
+ { // collect-0 and kernel-1
+ cpcx_attrs[kk] = NULL;
+ cpcx_std[kk] = NULL;
+ cpcx_raw[kk] = NULL;
+ cpcx_hidden[kk] = NULL;
+ cpcx_max_concurrent[kk] = 0;
+ cpcx_default_hwcs[kk] = NULL;
+ cpcx_orig_default_hwcs[kk] = NULL;
+ cpcx_has_precise[kk] = 0;
+ }
+ check_tables ();
+ hwcdrv_api_t *hwcdrv = get_hwcdrv ();
+ if (hwcdrv->hwcdrv_init_status)
+ {
+ Tprintf (0, "WARNING: setup_cpc_general() failed. init_status=%d \n",
+ hwcdrv->hwcdrv_init_status);
+ goto setup_cpc_wrapup;
+ }
+ hwcdrv->hwcdrv_get_info (&cpcx_cpuver, &cpcx_cciname, &cpcx_npics,
+ &cpcx_docref, &cpcx_support_bitmask);
+
+#ifdef DISALLOW_USI_USII_6357446
+ if (cpcx_cpuver == CPC_ULTRA1 || cpcx_cpuver == CPC_ULTRA2)
+ {
+ Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
+ " US-I/US-II cannot provide profile interrupts\n", cpcx_cpuver);
+ /* profiling interrupts don't work on US-I, US-II */
+ hwcfuncs_int_logerr (GTXT ("UltraSPARC I and II cannot provide overflow interrupts\n"));
+ goto setup_cpc_wrapup;
+ }
+#endif
+
+#ifdef DISALLOW_PENTIUM_PRO_MMX_7007575
+ if (cpcx_cpuver == CPC_PENTIUM_PRO_MMX)
+ {
+ Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
+ " `Pentium Pro with MMX, Pentium II' is not supported\n", cpcx_cpuver);
+ hwcfuncs_int_logerr (GTXT ("libcpc cannot identify processor type\n"));
+ goto setup_cpc_wrapup;
+ }
+#endif
+
+ /* now search the known table of counters */
+ cputabs_entry = cputabs_find_entry (cpcx_cpuver);
+ if (cputabs_entry == NULL)
+ {
+ Tprintf (0, "hwctable: WARNING: setup_cpc(): cpu=%d"
+ " could not be found in the tables\n", cpcx_cpuver);
+ /* strange, should have at least selected "unknownlist" */
+ hwcfuncs_int_logerr (GTXT ("Analyzer CPU table could not be found\n"));
+ goto setup_cpc_wrapup;
+ }
+
+ Hwcentry * valid_cpu_tables[2]; // [0]:static table of counters, [1]:static table of generic counters
+ valid_cpu_tables[0] = cputabs_entry->stdlist_table;
+ if (valid_cpu_tables[0] == NULL)
+ {
+ Tprintf (0, "hwctable: WARNING: setup_cpc(): "
+ " valid_cpu_tables was NULL??\n");
+ /* strange, someone put a NULL in the lookup table? */
+ hwcfuncs_int_logerr (GTXT ("Analyzer CPU table is invalid\n"));
+ goto setup_cpc_wrapup;
+ }
+ valid_cpu_tables[1] = papi_generic_list;
+ Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n");
+ // populate cpcx_raw and cpcx_attr
+ hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb);
+ for (int kk = 0; kk < 2; kk++)
+ { // collect and er_kernel
+ hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk],
+ valid_cpu_tables, (Hwcentry**) unfiltered_raw.array);
+ cpcx_has_precise[kk] = 0;
+ for (int rr = 0; cpcx_raw[kk] && cpcx_raw[kk][rr]; rr++)
+ {
+ int memop = cpcx_raw[kk][rr]->memop;
+ if (ABST_MEMSPACE_ENABLED (memop))
+ {
+ cpcx_has_precise[kk] = 1;
+ break;
+ }
+ }
+ cpcx_attrs[kk] = (char**) unfiltered_attrs.array;
+ cpcx_max_concurrent[kk] = cpcx_npics;
+ }
+#if 1 // 22897042 - DTrace cpc provider does not support profiling on multiple ctrs on some systems
+ if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID) != HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID)
+ {
+ // kernel profiling only supports one counter if overflowing counter can't be identified
+ cpcx_max_concurrent[1] = cpcx_npics ? 1 : 0;
+ }
+#endif
+
+ /* --- quick test of the cpc interface --- */
+ if (skip_hwc_test)
+ rc = 0;
+ else
+ rc = try_a_counter (0);
+
+ /* initialize the default counter string definition */
+ for (int kk = 0; kk < 2; kk++)
+ {
+ char * default_exp = 0;
+ int jj;
+ for (jj = 0; (default_exp = cputabs_entry->default_exp_p[jj]); jj++)
+ {
+ int rc = hwc_lookup (kk, 0, default_exp, NULL, 0, NULL, NULL);
+ if (rc > 0)
+ break;
+ }
+ if (!default_exp)
+ {
+ char * fallback[3] = {NTXT ("insts,,cycles,,l3m"), NTXT ("insts,,cycles"), NTXT ("insts")};
+ for (int ff = 0; ff < 3; ff++)
+ {
+ int rc = hwc_lookup (kk, 0, fallback[ff], NULL, 0, NULL, NULL);
+ if (rc > 0)
+ {
+ default_exp = strdup (fallback[ff]);
+ break;
+ }
+ }
+ }
+ cpcx_default_hwcs[kk] = default_exp;
+ cpcx_orig_default_hwcs[kk] = default_exp;
+ }
+
+setup_cpc_wrapup:
+ if (rc)
+ {
+ cpcx_npics = 0;
+ /*
+ ptr_list_free(&tmp_raw); // free stuff... YXXX
+ ptr_list_free(&unfiltered_attrs);
+ */
+ }
+ return;
+}
+
+static void
+setup_cpcx ()
+{
+ if (initialized)
+ return;
+ setup_cpc_general (0); // set up and include a hwc test run
+}
+
+static void
+setup_cpc_skip_hwctest ()
+{
+ if (initialized)
+ return;
+ setup_cpc_general (1); // set up but skip hwc test run
+}
+
+static int
+try_a_counter (int forKernel)
+{
+ if (!VALID_FOR_KERNEL (forKernel))
+ return -1;
+ int rc = -1;
+ const Hwcentry * testevent;
+ if (cpcx_std[forKernel] == NULL)
+ {
+ Tprintf (0, "hwctable: WARNING: cpcx_std not initialized");
+ return 0; /* consider this an automatic PASS */
+ }
+ /* look for a valid table entry, only try valid_cpu_tables[0] */
+ {
+ testevent = cpcx_std[forKernel][0];
+ if (!testevent || !testevent->name)
+ {
+ Tprintf (0, "hwctable: WARNING: no test metric"
+ " available to verify counters\n");
+ return 0; /* consider this an automatic PASS */
+ }
+ if (REG_LIST_IS_EMPTY (testevent->reg_list))
+ return 0; // weird
+ }
+ Hwcentry tmp_testevent;
+ tmp_testevent = *testevent; /* shallow copy */
+ if (tmp_testevent.int_name == NULL)
+ {
+ /* counter is defined in 'hidden' section of table, supply int_name */
+ tmp_testevent.int_name = strdup (tmp_testevent.name);
+ }
+ Hwcentry * test_array[1] = {&tmp_testevent};
+ rc = hwcfuncs_assign_regnos (test_array, 1); /* may modify test_array */
+ if (rc)
+ return rc;
+ rc = test_hwcs ((const Hwcentry**) test_array, 1);
+ if (rc == HWCFUNCS_ERROR_UNAVAIL)
+ {
+ // consider this a pass (allow HWC table to be printed)
+ Tprintf (0, "hwctable: WARNING: "
+ "cpc_bind_event() shows counters busy; allow to continue\n");
+ return 0;
+ }
+ else if (rc)
+ {
+ // failed to start for some other reason
+ Tprintf (0, "hwctable: WARNING: "
+ "test of counter '%s' failed\n",
+ testevent->name);
+ return rc;
+ }
+ return 0;
+}
+
+void
+hwc_update_val (Hwcentry *hwc)
+{
+ if (hwc->ref_val == 0)
+ hwc->ref_val = hwc->val; // save original reference
+ int64_t newVal;
+ hrtime_t min_time_nsec = hwc->min_time;
+ if (min_time_nsec == HWCTIME_TBD)
+ min_time_nsec = hwc->min_time_default;
+ switch (min_time_nsec)
+ {
+ case 0: // disable time-based intervals
+ // do not modify val
+ return;
+ case HWCTIME_ON:
+ case HWCTIME_TBD:
+ newVal = HWC_VAL_ON (hwc->ref_val);
+ break;
+ case HWCTIME_LO:
+ newVal = HWC_VAL_LO (hwc->ref_val);
+ break;
+ case HWCTIME_HI:
+ newVal = HWC_VAL_HI (hwc->ref_val);
+ break;
+ default:
+ newVal = HWC_VAL_CUSTOM (hwc->ref_val, min_time_nsec);
+ break;
+ }
+#define MAX_INT_VAL (2*1000*1000*1000 + 1000100)// yuck, limited to signed int
+ if (newVal >= MAX_INT_VAL)
+ newVal = MAX_INT_VAL;
+ hwc->val = newVal;
+}
+
+/* convert value string to value and store result in hwc->val */
+/* This function moved here from collctrl.cc */
+/*
+ * Keep the HWCTIME_* definitions in sync with those in
+ * collctrl.cc Coll_Ctrl::add_hwcstring().
+ */
+static int
+set_hwcval (Hwcentry *hwc, hrtime_t global_min_time_nsec, const char *valptr)
+{
+ hwc->min_time_default = global_min_time_nsec;
+ if (hwc->val == 1)
+ {
+ // An interval of 1 is used for certain types of count data.
+ // (er_bit, er_generic, er_rock ...)
+ // Hi and Lo do not apply.
+ /* use the default */
+ }
+ else if (valptr == NULL || valptr[0] == 0 || strcmp (valptr, "auto") == 0)
+ hwc->min_time = HWCTIME_TBD;
+ else if (strcmp (valptr, "on") == 0)
+ hwc->min_time = HWCTIME_ON;
+ else if (strcmp (valptr, "lo") == 0 || strcmp (valptr, "low") == 0)
+ hwc->min_time = HWCTIME_LO;
+ else if (strcmp (valptr, "hi") == 0 || strcmp (valptr, "high") == 0
+ || strcmp (valptr, "h") == 0)
+ hwc->min_time = HWCTIME_HI;
+ else
+ {
+ /* the remaining string should be a number > 0 */
+ char *endchar = NULL;
+ long long tmp = strtoll (valptr, &endchar, 0);
+ int value = (int) tmp;
+ if (*endchar != 0 || tmp <= 0 || value != tmp)
+ {
+ // also covers errno == ERANGE
+ Tprintf (0, "hwctable: set_hwcval(): ERROR: "
+ "Invalid counter value %s for counter `%s'\n",
+ valptr, hwc->name);
+ return -1;
+ }
+ if (tmp > UINT32_MAX / 2)
+ {
+ /* Roch B. says that we MUST do this check for er_kernel
+ because some platforms deliver overflow interrupts without
+ identifying which counter overflowed. The only way to
+ determine which counter overflowed is to have enough
+ margin on 32 bit counters to make sure they don't
+ wrap.
+ */
+ Tprintf (0, "hwctable: set_hwcval(): ERROR: "
+ "Counter value %s exceeds %lu\n",
+ valptr, (unsigned long) UINT32_MAX / 2);
+ return -1;
+ }
+ /* set the value */
+ if (value != 0)
+ {
+ if (hwc->ref_val == 0)
+ hwc->ref_val = hwc->val; // save original reference
+ hwc->val = value;
+ hwc->min_time = 0; // turn off auto-adjust
+ }
+ }
+ hwc_update_val (hwc);
+ return 0;
+}
+
+static char *
+canonical_name (const char *counter)
+{
+ char *nameOnly = NULL;
+ char *attrs = NULL;
+ char tmpbuf[1024];
+ tmpbuf[0] = 0;
+ hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, NULL);
+ snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
+ "%s", nameOnly);
+ if (attrs)
+ {
+ hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS];
+ void * attr_mem;
+ unsigned nattrs;
+ int ii, jj;
+
+ /* extract attributes from counter */
+ attr_mem = hwcfuncs_parse_attrs (counter, cpc2_attrs, HWCFUNCS_MAX_ATTRS,
+ &nattrs, NULL);
+ if (!attr_mem)
+ {
+ snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
+ "~UNKNOWN");
+ goto canonical_attrs_wrapup;
+ }
+
+ /* sort the attributes */
+ for (ii = 0; ii < (int) nattrs - 1; ii++)
+ {
+ for (jj = ii + 1; jj < nattrs; jj++)
+ {
+ int cmp = strcmp (cpc2_attrs[ii].ca_name,
+ cpc2_attrs[jj].ca_name);
+ if (cmp > 0)
+ {
+ hwcfuncs_attr_t tmp = cpc2_attrs[jj];
+ cpc2_attrs[jj] = cpc2_attrs[ii];
+ cpc2_attrs[ii] = tmp;
+ }
+ }
+ }
+
+ /* print attributes in canonical format */
+ for (ii = 0; ii < nattrs; ii++)
+ snprintf (tmpbuf + strlen (tmpbuf), sizeof (tmpbuf) - strlen (tmpbuf),
+ "~%s=0x%llx", cpc2_attrs[ii].ca_name, (long long) cpc2_attrs[ii].ca_val);
+ free (attr_mem);
+ }
+canonical_attrs_wrapup:
+ free (nameOnly);
+ free (attrs);
+ return strdup (tmpbuf);
+}
+
+/* process counter and value strings - put results in <*pret_ctr> */
+
+/* Print errors to UEbuf for any failure that results in nonzero return */
+static int
+process_ctr_def (int forKernel, hrtime_t global_min_time_nsec,
+ const char *counter, const char *value, Hwcentry *pret_ctr,
+ char* UWbuf, size_t UWsz, char* UEbuf, size_t UEsz)
+{
+ int rc = -1;
+ char *nameOnly = NULL;
+ char *attrs = NULL;
+ char *regstr = NULL;
+ int plus;
+ regno_t regno;
+ const Hwcentry *pfound = NULL;
+ const char *uname = NULL;
+ int disable_backtrack;
+ UEbuf[0] = 0;
+ UWbuf[0] = 0;
+ Tprintf (DBG_LT3, "hwctable: process_ctr_def(): counter=%s value=%s \n",
+ counter, value ? value : "NULL");
+ hwcfuncs_parse_ctr (counter, &plus, &nameOnly, &attrs, &regstr, &regno);
+
+ /* search for the counter in the std and raw lists */
+ {
+ pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, regno);
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist:",
+ pfound);
+ }
+ if (!pfound)
+ {
+ pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, regno);
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden):", pfound);
+ }
+ if (!pfound)
+ {
+ pfound = ptrarray_find_by_name (cpcx_raw[forKernel], nameOnly); /* (regno match checked later) */
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in rawlist:", pfound);
+ }
+ if (!pfound)
+ {
+ pfound = ptrarray_find ((const Hwcentry**) cpcx_std[forKernel], nameOnly, NULL, 1, REGNO_ANY);
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist but regno didn't match:", pfound);
+ }
+ if (!pfound)
+ {
+ pfound = ptrarray_find ((const Hwcentry**) cpcx_hidden[forKernel], nameOnly, NULL, 1, REGNO_ANY);
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: found in stdlist(hidden) but regno didn't match:", pfound);
+ }
+ if (!pfound)
+ {
+ uint64_t val = 0;
+ if (is_numeric (nameOnly, &val))
+ {
+ Hwcentry *tmp = alloc_shallow_copy (&empty_ctr); // Leaks?
+ if (tmp)
+ {
+ tmp->name = strdup (nameOnly);
+ regno_add (tmp, REGNO_ANY);
+ pfound = tmp;
+ }
+ }
+ if (pfound)
+ hwcentry_print (DBG_LT1, "hwctable: process_ctr_def: counter specified by numeric value:", pfound);
+ }
+ if (!pfound)
+ {
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Invalid HW counter name: %s\n"), nameOnly);
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
+ (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
+ goto process_ctr_def_wrapup;
+ }
+
+ /* counter found */
+ *pret_ctr = *pfound; /* shallow copy */
+ pret_ctr->int_name = NULL; /* so free doesn't try to free these pfound's ptrs */
+ pret_ctr->name = NULL; /* so free doesn't try to free these pfound's ptrs */
+
+ /* update uname,memop */
+ uname = counter;
+ disable_backtrack = 0;
+ if (plus != 0 || ABST_PLUS_BY_DEFAULT (pret_ctr->memop))
+ {
+ // attempt to process memoryspace profiling
+ int message_printed = 0;
+ if (cpcx_cpuver == CPUVER_GENERIC)
+ {
+ // accept plus, since we don't know what this CPU is
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("`+' may not be correctly supported on `%s' because processor is not recognized."),
+ cpcx_cciname);
+ pret_ctr->memop = ABST_LDST; // supply a backtracking data type - required for collector
+ }
+ else if (cpcx_cpuver == CPC_ULTRA1 || cpcx_cpuver == CPC_ULTRA2
+ || cpcx_cpuver == CPC_ULTRA3 || cpcx_cpuver == CPC_ULTRA3_PLUS
+ || cpcx_cpuver == CPC_ULTRA3_I || cpcx_cpuver == CPC_ULTRA4_PLUS
+ || cpcx_cpuver == CPC_ULTRA4 || cpcx_cpuver == CPC_ULTRA_T1
+ || cpcx_cpuver == CPC_ULTRA_T2 || cpcx_cpuver == CPC_ULTRA_T2P
+ || cpcx_cpuver == CPC_ULTRA_T3)
+ {
+ if (!ABST_BACKTRACK_ENABLED (pret_ctr->memop))
+ disable_backtrack = 1;
+ }
+ else if (cpcx_cpuver == CPC_SPARC_T4 || cpcx_cpuver == CPC_SPARC_T5
+ || cpcx_cpuver == CPC_SPARC_T6 || cpcx_cpuver == CPC_SPARC_M4
+ || cpcx_cpuver == CPC_SPARC_M5 || cpcx_cpuver == CPC_SPARC_M6
+ || cpcx_cpuver == CPC_SPARC_M7 || cpcx_cpuver == CPC_SPARC_M8)
+ {
+ if (pret_ctr->memop != ABST_EXACT)
+ disable_backtrack = 1;
+ }
+ else if (cpcx_cpuver == CPC_INTEL_NEHALEM || cpcx_cpuver == CPC_INTEL_WESTMERE
+ || cpcx_cpuver == CPC_INTEL_SANDYBRIDGE
+ || cpcx_cpuver == CPC_INTEL_IVYBRIDGE
+ || cpcx_cpuver == CPC_INTEL_HASWELL
+ || cpcx_cpuver == CPC_INTEL_BROADWELL
+ || cpcx_cpuver == CPC_INTEL_SKYLAKE)
+ {
+ if (pret_ctr->memop != ABST_EXACT_PEBS_PLUS1)
+ disable_backtrack = 1;
+ else if (plus < 0)
+ {
+ // disabling memoryspace not supported for
+ // remove specified -
+ uname++;
+ plus = 0;
+ snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
+ GTXT ("Warning: `-' is not supported on `%s' -- memory reference backtracking will remain enabled for this counter\n"),
+ nameOnly);
+ }
+ }
+ else
+ {
+ message_printed = 1;
+ snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
+ GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for `%s'\n"),
+ cpcx_cciname, nameOnly);
+ disable_backtrack = 1;
+ }
+ if (disable_backtrack)
+ {
+ if (plus != 0)
+ uname++; // remove specified + or -
+ if (!message_printed && plus > 0)
+ snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
+ GTXT ("Warning: `+' is not supported on `%s' -- memory reference backtracking will not be enabled for this counter\n"),
+ nameOnly);
+ }
+ }
+ else
+ disable_backtrack = 1;
+ if (disable_backtrack || plus < 0)
+ if (pret_ctr->memop != ABST_NOPC)
+ pret_ctr->memop = ABST_NONE;
+ if (pret_ctr->memop == ABST_NOPC)
+ snprintf (UWbuf + strlen (UWbuf), UWsz - strlen (UWbuf),
+ GTXT ("Warning: HW counter `%s' is not program-related -- callstacks will be not be recorded for this counter\n"),
+ uname);
+
+ /* update reg_num */
+ if (!regno_is_valid (pfound, regno))
+ {
+ char buf[1024];
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("For counter `%s', %s is not a valid register; valid registers: %s\n"),
+ nameOnly, regstr ? regstr + 1 : "?",
+ get_regnolist (buf, sizeof (buf), pfound->reg_list, 1));
+ goto process_ctr_def_wrapup;
+ }
+ if (pret_ctr->reg_num == REGNO_ANY)
+ { /* table's regno is a wildcard */
+ if (REG_LIST_EOL (pfound->reg_list[1]))
+ {
+ /* valid list only contains one regno, so use it */
+ pret_ctr->reg_num = pfound->reg_list[0];
+ }
+ else
+ pret_ctr->reg_num = regno; /* use user's selection */
+ }
+
+ /* update name and int_name */
+ {
+ // validate attributes
+ if (attrs)
+ {
+ hwcfuncs_attr_t cpc2_attrs[HWCFUNCS_MAX_ATTRS];
+ void * attr_mem;
+ unsigned nattrs;
+ char *errbuf;
+ /* extract attributes from uname */
+ attr_mem = hwcfuncs_parse_attrs (uname, cpc2_attrs, HWCFUNCS_MAX_ATTRS,
+ &nattrs, &errbuf);
+ if (!attr_mem)
+ {
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ "%s\n", errbuf);
+ free (errbuf);
+ goto process_ctr_def_wrapup;
+ }
+ /* make sure all attributes are valid */
+ for (unsigned ii = 0; ii < nattrs; ii++)
+ {
+ if (!attr_is_valid (forKernel, cpc2_attrs[ii].ca_name))
+ {
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Invalid attribute specified for counter `%s': %s\n"),
+ nameOnly, cpc2_attrs[ii].ca_name);
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
+ (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
+ free (attr_mem);
+ goto process_ctr_def_wrapup;
+ }
+ for (unsigned jj = ii + 1; jj < nattrs; jj++)
+ {
+ if (strcmp (cpc2_attrs[ii].ca_name,
+ cpc2_attrs[jj].ca_name) == 0)
+ {
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Duplicate attribute specified for counter `%s': %s\n"),
+ nameOnly, cpc2_attrs[ii].ca_name);
+ free (attr_mem);
+ goto process_ctr_def_wrapup;
+ }
+ }
+ }
+ free (attr_mem);
+ }
+ pret_ctr->name = strdup (uname);
+
+ // assign int_name
+ if (pfound->int_name)
+ {
+ // Counter is one of the following:
+ // - aliased (e.g. cycles~system=1),
+ // - convenience (e.g. cycles0~system=1),
+ if (!attrs) // convert alias to internal name
+ pret_ctr->int_name = strdup (pfound->int_name);
+ else
+ {
+ // convert alias to internal name and
+ // append user-supplied attributes
+ size_t sz = strlen (pfound->int_name) + strlen (attrs) + 1;
+ char *tbuf = calloc (sz, 1);
+ if (tbuf)
+ snprintf (tbuf, sz, "%s%s", pfound->int_name, attrs);
+ pret_ctr->int_name = tbuf;
+ }
+ }
+ else
+ pret_ctr->int_name = strdup (uname); // user-supplied name
+ }
+
+ /* update val */
+ if (set_hwcval (pret_ctr, global_min_time_nsec, value))
+ {
+ snprintf (UEbuf + strlen (UEbuf), UEsz - strlen (UEbuf),
+ GTXT ("Invalid interval for HW counter `%s': %s\n"),
+ nameOnly, value);
+ goto process_ctr_def_wrapup;
+ }
+ hwcentry_print (DBG_LT2, "hwctable: process_ctr_def:", pret_ctr);
+ rc = 0;
+
+process_ctr_def_wrapup:
+ free (regstr);
+ free (attrs);
+ free (nameOnly);
+ return rc;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* external interfaces, see hwcentry.h for descriptions. */
+
+extern int
+hwc_lookup (int forKernel, hrtime_t global_min_time_nsec, const char *instring,
+ Hwcentry *caller_entries[], unsigned maxctrs, char **emsg, char **wmsg)
+{
+ unsigned ii;
+ char *instr_copy = NULL, *ss = NULL;
+ unsigned numctrs = 0;
+ int rc = 0;
+ char *tokenptr[MAX_PICS * 2];
+ unsigned numtokens = 0;
+ char UEbuf[1024 * 5]; /* error message buffer; strdup of it is passed back to user */
+ char UWbuf[1024 * 5]; /* warning message buffer; strdup of it is passed back to user */
+ if (emsg)
+ *emsg = NULL;
+ if (wmsg)
+ *wmsg = NULL;
+ UEbuf[0] = 0;
+ UWbuf[0] = 0;
+
+ // supply temporary result buffers as needed
+ Hwcentry tmp_entry_table[MAX_PICS];
+ Hwcentry * tmp_entries[MAX_PICS];
+ Hwcentry **entries;
+ if (caller_entries)
+ entries = caller_entries;
+ else
+ {
+ // user doesn't care about results; provide temporary storage for results
+ for (ii = 0; ii < MAX_PICS; ii++)
+ tmp_entries[ii] = &tmp_entry_table[ii];
+ entries = tmp_entries;
+ maxctrs = MAX_PICS;
+ }
+ Tprintf (DBG_LT1, "hwctable: hwc_lookup(%s)\n",
+ instring ? instring : "NULL");
+
+ /* clear <entries> first - prevent seg faults in hwc_lookup_wrapup */
+ for (ii = 0; ii < maxctrs; ii++)
+ *entries[ii] = empty_ctr;
+ if (!instring)
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("No HW counters were specified."));
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+
+ /* make sure tables are initialized */
+ setup_cpc_skip_hwctest ();
+ if (cpcx_npics == 0)
+ {
+ if (cpcx_cpuver < 0)
+ {
+ char buf[1024];
+ *buf = 0;
+ char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */
+ if (*pch)
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("HW counter profiling is not supported on this system: %s%s"),
+ pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
+ else
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("HW counter profiling is not supported on this system\n"));
+ }
+ else
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("HW counter profiling is not supported on '%s'\n"),
+ cpcx_cciname);
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+ ss = instr_copy = strdup (instring);
+ while (*ss != 0 && (*ss == ' ' || *ss == '\t'))
+ ss++;
+ tokenptr[numtokens++] = ss;
+ do
+ {
+ /* find end of previous token, replace w/ NULL, skip whitespace, set <tokenptr>, repeat */
+ for (; *ss; ss++)
+ {
+ if (*ss == ',' || *ss == ' ' || *ss == '\t')
+ {
+ /* end of previous token found */
+ *ss = 0; /* terminate the previous token */
+ ss++;
+ while (*ss != 0 && (*ss == ' ' || *ss == '\t'))
+ ss++;
+ if (*ss)
+ tokenptr[numtokens++] = ss;
+ break; // from for loop
+ }
+ }
+ }
+ while (*ss && numtokens < (MAX_PICS * 2));
+
+ if (*ss)
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("The number of HW counters specified exceeds internal resources\n"));
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
+ (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+ Tprintf (DBG_LT3, "hwctable: hwc_lookup(): numtokens=%d\n", numtokens);
+
+ /* look up individual counters */
+ {
+ int fail = 0;
+ for (ii = 0; ii < numtokens && numctrs < maxctrs; ii += 2)
+ {
+ const char *counter;
+ const char *value;
+ Hwcentry *pret_ctr = entries[numctrs];
+
+ /* assign the tokens to ctrnames, timeoutValues. */
+ counter = tokenptr[ii];
+ if (ii + 1 < numtokens)
+ value = tokenptr[ii + 1];
+ else
+ value = 0;
+ if (process_ctr_def (forKernel, global_min_time_nsec, counter, value, pret_ctr,
+ UWbuf + strlen (UWbuf),
+ sizeof (UWbuf) - strlen (UWbuf),
+ UEbuf + strlen (UEbuf),
+ sizeof (UEbuf) - strlen (UEbuf)))
+ {
+ /* could choose to set fail=1 and continue here,
+ but errmsgs would be aggregated (messy) */
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+ numctrs++;
+ }
+ if (fail)
+ {
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+ }
+
+ if (!numctrs)
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("No HW counters were specified.\n"));
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+ if (numctrs > cpcx_max_concurrent[forKernel])
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("The HW counter configuration could not be loaded: More than %d counters were specified\n"), cpcx_max_concurrent[forKernel]);
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
+ (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
+ rc = -1;
+ goto hwc_lookup_wrapup;
+ }
+
+hwc_lookup_wrapup:
+ free (instr_copy);
+ if (wmsg && strlen (UWbuf))
+ *wmsg = strdup (UWbuf);
+ if (emsg && strlen (UEbuf))
+ *emsg = strdup (UEbuf);
+ if (rc == 0)
+ rc = numctrs;
+ return rc;
+}
+
+extern char *
+hwc_validate_ctrs (int forKernel, Hwcentry *entries[], unsigned numctrs)
+{
+ char UEbuf[1024 * 5];
+ UEbuf[0] = 0;
+
+ /* search for obvious duplicates*/
+ unsigned ii;
+ for (ii = 0; ii < numctrs; ii++)
+ {
+ regno_t reg_a = entries[ii]->reg_num;
+ if (reg_a != REGNO_ANY)
+ {
+ unsigned jj;
+ for (jj = ii + 1; jj < numctrs; jj++)
+ {
+ int reg_b = entries[jj]->reg_num;
+ if (reg_a == reg_b)
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("Only one HW counter is allowed per register. The following counters use register %d: \n"),
+ reg_a);
+ for (jj = 0; jj < numctrs; jj++)
+ {
+ char buf[256];
+ int reg_b = entries[jj]->reg_num;
+ if (reg_a == reg_b)
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT (" %d. %s\n"), jj + 1,
+ hwc_hwcentry_specd_string (buf, sizeof (buf),
+ entries[jj]));
+ }
+ return strdup (UEbuf);
+ }
+ }
+ }
+ }
+
+ /* test counters */
+ hwcfuncs_errmsg_get (NULL, 0, 1); /* enable errmsg capture */
+ int hwc_rc = hwcfuncs_assign_regnos (entries, numctrs);
+ if (!hwc_rc)
+ hwc_rc = test_hwcs ((const Hwcentry**) entries, numctrs);
+ if (hwc_rc)
+ {
+ if (cpcx_cpuver == CPC_PENTIUM_4_HT || cpcx_cpuver == CPC_PENTIUM_4)
+ {
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("HW counter profiling is disabled unless only one logical CPU per HyperThreaded processor is online (see psradm)\n"));
+ return strdup (UEbuf);
+ }
+ char buf[1024];
+ *buf = 0;
+ char * pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0); /* get first err msg, disable capture */
+ if (*pch)
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("The HW counter configuration could not be loaded: %s%s"),
+ pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
+ else
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("The HW counter configuration could not be loaded\n"));
+ snprintf (UEbuf + strlen (UEbuf), sizeof (UEbuf) - strlen (UEbuf),
+ GTXT ("Run \"%s -h\" with no other arguments for more information on HW counters on this system.\n"),
+ (IS_KERNEL (forKernel) ? "er_kernel" : "collect"));
+ return strdup (UEbuf);
+ }
+ return NULL;
+}
+
+extern Hwcentry *
+hwc_post_lookup (Hwcentry * pret_ctr, char *counter, char * int_name, int cpuver)
+{
+ const Hwcentry *pfound;
+ regno_t regno;
+ char *nameOnly = NULL;
+ char *attrs = NULL;
+
+ /* fields in pret_ctr (name and int_name) should already be free */
+ hwcfuncs_parse_ctr (counter, NULL, &nameOnly, &attrs, NULL, &regno);
+
+ /* look for it in the canonical list */
+ pfound = static_table_find (stdlist_get_table (cpuver),
+ nameOnly, int_name, 0, REGNO_ANY);
+ if (!pfound) /* try the generic list */
+ pfound = static_table_find (papi_generic_list,
+ nameOnly, int_name, 0, REGNO_ANY);
+ if (pfound)
+ {
+ /* in standard list */
+ *pret_ctr = *pfound; /* shallow copy */
+ if (pret_ctr->int_name)
+ {
+ // aliased counter
+ pret_ctr->int_name = strdup (pret_ctr->int_name);
+ if (pret_ctr->short_desc == NULL)
+ {
+ // look for short_desc of corresponding raw counter
+ const Hwcentry *praw = static_table_find (stdlist_get_table (cpuver),
+ pret_ctr->int_name, NULL, 0, REGNO_ANY);
+ if (praw && praw->short_desc)
+ pret_ctr->short_desc = strdup (praw->short_desc);
+ }
+ }
+ else
+ pret_ctr->int_name = strdup (counter);
+ if (pret_ctr->reg_num == REGNO_ANY)
+ pret_ctr->reg_num = regno; /* table's regno is a wildcard */
+ }
+ else
+ {
+ /* not a standard counter */
+ *pret_ctr = empty_ctr;
+ pret_ctr->int_name = strdup (counter);
+ pret_ctr->reg_num = regno;
+ }
+
+ /* update the name */
+ if (attrs)
+ {
+ pret_ctr->name = canonical_name (counter);
+ if (pret_ctr->metric)
+ {
+ // metric text is supplied from a table. (User supplied HWC alias)
+ // Append user-supplied attributes to metric name:
+ size_t len = strlen (pret_ctr->metric) + strlen (attrs) + 4;
+ char *pch = calloc (len, 1);
+ if (pch)
+ snprintf (pch, len, "%s (%s)", pret_ctr->metric, attrs);
+ pret_ctr->metric = pch; // leaks
+ }
+ }
+ else
+ pret_ctr->name = strdup (nameOnly);
+
+ if (pfound)
+ hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: found: ", pret_ctr);
+ else
+ hwcentry_print (DBG_LT2, "hwctable: hwc_post_lookup: default: ", pret_ctr);
+ free (attrs);
+ free (nameOnly);
+ return pret_ctr;
+}
+
+static const char *
+hwc_on_lo_hi (const Hwcentry *pctr)
+{
+ char* rate;
+ {
+ switch (pctr->min_time)
+ {
+ case (HWCTIME_LO):
+ rate = NTXT ("lo");
+ break;
+ case (HWCTIME_ON):
+ rate = NTXT ("on");
+ break;
+ case (HWCTIME_HI):
+ rate = NTXT ("hi");
+ break;
+ case (0):
+ rate = NULL; // null => use interval count
+ break;
+ default:
+ case (HWCTIME_TBD):
+ rate = NTXT ("on");
+ break;
+ }
+ }
+ return rate; //strdup( rate );
+}
+
+extern char *
+hwc_rate_string (const Hwcentry *pctr, int force_numeric)
+{
+ const char * rateString = hwc_on_lo_hi (pctr);
+ char buf[128];
+ if (!rateString || force_numeric)
+ {
+ snprintf (buf, sizeof (buf), NTXT ("%d"), pctr->val);
+ rateString = buf;
+ }
+ return strdup (rateString);
+}
+
+static char metricbuf[2048];
+
+extern char *
+hwc_i18n_metric (const Hwcentry *pctr)
+{
+ if (pctr->metric != NULL)
+ snprintf (metricbuf, sizeof (metricbuf), NTXT ("%s"), PTXT (pctr->metric));
+ else if (pctr->name != NULL)
+ snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->name);
+ else if (pctr->int_name != NULL)
+ snprintf (metricbuf, sizeof (metricbuf), GTXT ("%s Events"), pctr->int_name);
+ else
+ snprintf (metricbuf, sizeof (metricbuf), GTXT ("Undefined Events"));
+ return metricbuf;
+}
+
+/* return cpu version, should only be called when about to generate an experiment,
+ not when reading back an experiment */
+#if 0 /* called by ... */
+. / perfan / collect / src / collect.cc : start : 245 : cpuver = hwc_get_cpc_cpuver ();
+. / ccr_components / Collector_Interface / collctrl.cc : constructor : 202 : cpcx_cpuver = hwc_get_cpc_cpuver ();
+. / perfan / dbe / src / Dbe.cc : 3041 : JApplication::cpuver = hwc_get_cpc_cpuver ();
+. / perfan / dbe / src / Dbe.cc : 3164 : JApplication::cpuver = hwc_get_cpc_cpuver ();
+
+note:
+cpc_getcpuver () : only papi, ostest, this and hwprofile.c call it
+#endif
+int
+hwc_get_cpc_cpuver ()
+{
+ setup_cpcx ();
+ return cpcx_cpuver;
+}
+
+extern char*
+hwc_get_cpuname (char *buf, size_t buflen)
+{
+ setup_cpcx ();
+ if (!buf || !buflen)
+ return buf;
+ buf[0] = 0;
+ if (cpcx_cciname)
+ {
+ strncpy (buf, cpcx_cciname, buflen - 1);
+ buf[buflen - 1] = 0;
+ }
+ return buf;
+}
+
+extern char*
+hwc_get_docref (char *buf, size_t buflen)
+{
+ setup_cpcx ();
+ if (!buf || !buflen)
+ return buf;
+ buf[0] = 0;
+ if (cpcx_docref)
+ {
+ strncpy (buf, cpcx_docref, buflen - 1);
+ buf[buflen - 1] = 0;
+ }
+ return buf;
+}
+
+//TBR:
+
+extern char*
+hwc_get_default_cntrs ()
+{
+ setup_cpcx ();
+ if (cpcx_default_hwcs[0] != NULL)
+ return strdup (cpcx_default_hwcs[0]); // TBR deprecate this
+ return NULL;
+}
+
+extern char*
+hwc_get_default_cntrs2 (int forKernel, int style)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return NULL;
+ char *cpcx_default = cpcx_default_hwcs[forKernel];
+ if (cpcx_default == NULL || cpcx_npics == 0)
+ return NULL;
+ if (style == 1)
+ return strdup (cpcx_default);
+
+ // style == 2
+ // we will replace "," delimiters with " -h " (an extra 3 chars per HWC)
+ char *s = (char *) malloc (strlen (cpcx_default) + 3 * cpcx_npics);
+ if (s == NULL) return s;
+ char *p = s;
+ char *q = cpcx_default;
+ int i;
+ for (i = 0; i < cpcx_npics; i++)
+ {
+ int qlen = strlen (q);
+ if (qlen == 0)
+ {
+ p[0] = '\0';
+ break;
+ }
+ // add " -h " if not the first HWC
+ if (i != 0)
+ {
+ p[0] = ' ';
+ p[1] = '-';
+ p[2] = 'h';
+ p[3] = ' ';
+ p += 4;
+ }
+
+ // find second comma
+ char *r = strchr (q, ',');
+ if (r)
+ r = strchr (r + 1, ',');
+
+ // we didn't find one, so the rest of the string is the last HWC
+ if (r == NULL)
+ {
+ // EUGENE could check i==cpcx_npicx-1, but what if it isn't???
+ strcpy (p, q);
+ if (p[qlen - 1] == ',')
+ qlen--;
+ p[qlen] = '\0';
+ break;
+ }
+
+ // copy the HWC, trim trailing comma, add null char
+ qlen = r - q - 1;
+ strcpy (p, q);
+ if (p[qlen - 1] == ',')
+ qlen--;
+ p += qlen;
+ p[0] = '\0';
+ q = r + 1;
+ }
+ return s;
+}
+
+extern char*
+hwc_get_orig_default_cntrs (int forKernel)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return NULL;
+ if (cpcx_orig_default_hwcs[forKernel] != NULL)
+ return strdup (cpcx_orig_default_hwcs[forKernel]);
+ return NULL;
+}
+
+extern const char *
+hwc_memop_string (ABST_type memop)
+{
+ const char * s;
+ switch (memop)
+ {
+ case ABST_NONE:
+ s = "";
+ break;
+ case ABST_LOAD:
+ s = GTXT ("load ");
+ break;
+ case ABST_STORE:
+ s = GTXT ("store ");
+ break;
+ case ABST_LDST:
+ case ABST_US_DTLBM:
+ case ABST_LDST_SPARC64:
+ s = GTXT ("load-store ");
+ break;
+ case ABST_EXACT_PEBS_PLUS1:
+ case ABST_EXACT:
+ s = GTXT ("memoryspace ");
+ break;
+ case ABST_COUNT:
+ s = GTXT ("count ");
+ break;
+ case ABST_NOPC:
+ s = GTXT ("not-program-related ");
+ break;
+ default:
+ s = ""; // was "ABST_UNK", but that's meaningless to users
+ break;
+ }
+ return s;
+}
+
+static const char *
+timecvt_string (int timecvt)
+{
+ if (timecvt > 0)
+ return GTXT ("CPU-cycles");
+ if (timecvt < 0)
+ return GTXT ("ref-cycles");
+ return GTXT ("events");
+}
+
+int show_regs = 0; // The register setting is available on Solaris only
+
+/*
+ * print the specified strings in aligned columns
+ */
+static void
+format_columns (char *buf, int bufsiz, char *s1, char *s2, const char *s3,
+ const char *s4, char *s5, const char *s6)
+{
+ // NULL strings are blanks
+ char *blank = NTXT ("");
+ if (s2 == NULL)
+ s2 = blank;
+ if (s3 == NULL)
+ s3 = blank;
+ if (s6 == NULL)
+ s6 = blank;
+
+ // get the lengths and target widths
+ // (s6 can be as wide as it likes)
+ int l1 = strlen (s1), n1 = 10, l2 = strlen (s2), n2 = 13;
+ int l3 = strlen (s3), n3 = 20, l4 = strlen (s4), n4 = 10, n5;
+ char divide = ' ';
+
+ // adjust widths, stealing from one column to help a neighbor
+ // There's a ragged boundary between s2 and s3.
+ // So push this boundary to the right.
+ n2 += n3 - l3;
+ n3 -= n3 - l3;
+
+ // If s3 is empty, push the boundary over to s4.
+ if (l3 == 0)
+ {
+ n2 += n4 - l4;
+ n4 -= n4 - l4;
+ }
+
+ // If there's enough room to fit s1 and s2, do so.
+ if (n1 + n2 >= l1 + l2)
+ {
+ if (n1 < l1)
+ {
+ n2 -= l1 - n1;
+ n1 += l1 - n1;
+ }
+ if (n2 < l2)
+ {
+ n1 -= l2 - n2;
+ n2 += l2 - n2;
+ }
+ }
+ else
+ {
+ // not enough room, so we need to divide the line
+ n3 += 4 // 4-blank margin
+ + n1 // 1st column
+ + 1 // space between 1st and 2nd columns
+ + n2 // 2nd column
+ + 1; // space between 2nd and 3th columns
+ divide = '\n';
+
+ // make 1st column large enough
+ if (n1 < l1)
+ n1 = l1;
+
+ // width of 2nd column no longer matters since we divided the line
+ n2 = 0;
+ }
+
+ if (show_regs)
+ {
+ // fifth column should be wide enough for regnolist
+ // see function get_regnolist()
+ if (cpcx_npics < 10)
+ n5 = cpcx_npics; // one char per regno
+ else
+ n5 = 16 + 3 * (cpcx_npics - 9); // spaces between regnos and some regnos are 2-char wide
+ // ... and be wide enough for header "regs"
+ if (n5 < 4)
+ n5 = 4;
+
+ // print to buffer
+ // (don't need a space before s4 since historical precedent to have a trailing space in s3)
+ snprintf (buf, bufsiz, "%-*s %-*s%c%*s%*s %-*s %s",
+ n1, s1, n2, s2, divide, n3, s3, n4, s4, n5, s5, s6);
+ }
+ else
+ snprintf (buf, bufsiz, "%-*s %-*s%c%*s%*s %s",
+ n1, s1, n2, s2, divide, n3, s3, n4, s4, s6);
+ for (int i = strlen (buf); i > 0; i--)
+ if (buf[i] == ' ' || buf[i] == '\t')
+ buf[i] = 0;
+ else
+ break;
+}
+
+/* routine to return HW counter string formatted and i18n'd */
+static char *
+hwc_hwcentry_string_internal (char *buf, size_t buflen, const Hwcentry *ctr,
+ int show_short_desc)
+{
+ char stderrbuf[1024];
+ char regnolist[256];
+ if (!buf || !buflen)
+ return buf;
+ buf[0] = 0;
+ if (ctr == NULL)
+ {
+ snprintf (stderrbuf, sizeof (stderrbuf), GTXT ("HW counter not available"));
+ goto hwc_hwcentry_string_done;
+ }
+ char *desc = NULL;
+ if (show_short_desc)
+ desc = ctr->short_desc;
+ if (desc == NULL)
+ desc = ctr->metric ? hwc_i18n_metric (ctr) : NULL;
+ format_columns (stderrbuf, sizeof (stderrbuf), ctr->name, ctr->int_name,
+ hwc_memop_string (ctr->memop), timecvt_string (ctr->timecvt),
+ get_regnolist (regnolist, sizeof (regnolist), ctr->reg_list, 2),
+ desc);
+
+hwc_hwcentry_string_done:
+ strncpy (buf, stderrbuf, buflen - 1);
+ buf[buflen - 1] = 0;
+ return buf;
+}
+
+/* routine to return HW counter string formatted and i18n'd */
+extern char *
+hwc_hwcentry_string (char *buf, size_t buflen, const Hwcentry *ctr)
+{
+ return hwc_hwcentry_string_internal (buf, buflen, ctr, 0);
+}
+
+/* routine to return HW counter string formatted and i18n'd */
+extern char *
+hwc_hwcentry_specd_string (char *buf, size_t buflen, const Hwcentry *ctr)
+{
+ char stderrbuf[1024];
+ const char *memop, *timecvt;
+ char descstr[1024];
+ if (!buf || !buflen)
+ return buf;
+ buf[0] = 0;
+ if (ctr == NULL)
+ {
+ snprintf (stderrbuf, sizeof (stderrbuf), GTXT ("HW counter not available"));
+ goto hwc_hwcentry_specd_string_done;
+ }
+ timecvt = timecvt_string (ctr->timecvt);
+ if (ctr->memop)
+ memop = hwc_memop_string (ctr->memop);
+ else
+ memop = "";
+ if (ctr->metric != NULL) /* a standard counter for a specific register */
+ snprintf (descstr, sizeof (descstr), GTXT (" (`%s'; %s%s)"),
+ hwc_i18n_metric (ctr), memop, timecvt);
+ else /* raw counter */
+ snprintf (descstr, sizeof (descstr), GTXT (" (%s%s)"), memop, timecvt);
+
+ char *rateString = hwc_rate_string (ctr, 1);
+ snprintf (stderrbuf, sizeof (stderrbuf), NTXT ("%s,%s%s"), ctr->name,
+ rateString ? rateString : "", descstr);
+ free (rateString);
+
+hwc_hwcentry_specd_string_done:
+ strncpy (buf, stderrbuf, buflen - 1);
+ buf[buflen - 1] = 0;
+ return buf;
+}
+
+unsigned
+hwc_get_max_regs ()
+{
+ setup_cpcx ();
+ return cpcx_npics;
+}
+
+unsigned
+hwc_get_max_concurrent (int forKernel)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return 0;
+ return cpcx_max_concurrent[forKernel];
+}
+
+char**
+hwc_get_attrs (int forKernel)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return NULL;
+ return cpcx_attrs[forKernel];
+}
+
+Hwcentry **
+hwc_get_std_ctrs (int forKernel)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return NULL;
+ return cpcx_std[forKernel];
+}
+
+Hwcentry **
+hwc_get_raw_ctrs (int forKernel)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel))
+ return NULL;
+ return cpcx_raw[forKernel];
+}
+
+/* Call an action function for each attribute supported */
+unsigned
+hwc_scan_attrs (void (*action)(const char *attr, const char *desc))
+{
+ setup_cpcx ();
+ int cnt = 0;
+ for (int ii = 0; cpcx_attrs[0] && cpcx_attrs[0][ii]; ii++, cnt++)
+ {
+ if (action)
+ action (cpcx_attrs[0][ii], NULL);
+ }
+ if (!cnt && action)
+ action (NULL, NULL);
+ return cnt;
+}
+
+unsigned
+hwc_scan_std_ctrs (void (*action)(const Hwcentry *))
+{
+ setup_cpcx ();
+ Tprintf (DBG_LT1, "hwctable: hwc_scan_standard_ctrs()...\n");
+ int cnt = 0;
+ for (int ii = 0; cpcx_std[0] && cpcx_std[0][ii]; ii++, cnt++)
+ if (action)
+ action (cpcx_std[0][ii]);
+ if (!cnt && action)
+ action (NULL);
+ return cnt;
+}
+
+/* Call an action function for each counter supported */
+/* action is called with NULL when all counters have been seen */
+unsigned
+hwc_scan_raw_ctrs (void (*action)(const Hwcentry *))
+{
+ setup_cpcx ();
+ Tprintf (DBG_LT1, "hwctable: hwc_scan_raw_ctrs()...\n");
+ int cnt = 0;
+ for (int ii = 0; cpcx_raw[0] && cpcx_raw[0][ii]; ii++, cnt++)
+ if (action)
+ action (cpcx_raw[0][ii]);
+ if (!cnt && action)
+ action (NULL);
+ return cnt;
+}
+
+static void
+hwc_usage_raw_overview_sparc (FILE *f_usage, int cpuver)
+{
+ /* All these cpuver's use cputabs[]==sparc_t5_m6 anyhow. */
+ if ((cpuver == CPC_SPARC_M5) || (cpuver == CPC_SPARC_M6)
+ || (cpuver == CPC_SPARC_T5) || (cpuver == CPC_SPARC_T6))
+ cpuver = CPC_SPARC_M4; // M4 was renamed to M5
+
+ /* While there are small differences between
+ * cputabs[]== sparc_t4
+ * cputabs[]== sparc_t5_m6
+ * they are in HWCs we don't discuss in the overview anyhow.
+ * So just lump them in with T4.
+ */
+ if (cpuver == CPC_SPARC_M4)
+ cpuver = CPC_SPARC_T4;
+
+ /* Check for the cases we support. */
+ if (cpuver != CPC_SPARC_T4 && cpuver != CPC_SPARC_M7 && cpuver != CPC_SPARC_M8)
+ return;
+ fprintf (f_usage, GTXT (" While the above aliases represent the most useful hardware counters\n"
+ " for this processor, a full list of raw (unaliased) counter names appears\n"
+ " below. First is an overview of some of these names.\n\n"));
+ fprintf (f_usage, GTXT (" == Cycles.\n"
+ " Count active cycles with\n"
+ " Cycles_user\n"
+ " Set attributes to choose user, system, and/or hyperprivileged cycles.\n\n"));
+ fprintf (f_usage, GTXT (" == Instructions.\n"
+ " Count instructions when they are committed with:\n"));
+ fprintf (f_usage, NTXT (" Instr_all\n"));
+ if (cpuver != CPC_SPARC_M8)
+ fprintf (f_usage, GTXT (" It is the total of these counters:\n"));
+ else
+ fprintf (f_usage, GTXT (" Some subsets of instructions can be counted separately:\n"));
+ fprintf (f_usage, NTXT (" Branches %s\n"), GTXT ("branches"));
+ fprintf (f_usage, NTXT (" Instr_FGU_crypto %s\n"), GTXT ("Floating Point and Graphics Unit"));
+ fprintf (f_usage, NTXT (" Instr_ld %s\n"), GTXT ("loads"));
+ fprintf (f_usage, NTXT (" Instr_st %s\n"), GTXT ("stores"));
+ fprintf (f_usage, NTXT (" %-19s %s\n"),
+ cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SPR_ring_ops")
+ : NTXT ("SPR_ring_ops"),
+ GTXT ("internal use of SPR ring"));
+ fprintf (f_usage, NTXT (" Instr_other %s\n"), GTXT ("basic arithmetic and logical instructions"));
+ if (cpuver != CPC_SPARC_M8)
+ fprintf (f_usage, GTXT (" Some subsets of these instructions can be counted separately:\n"));
+ fprintf (f_usage, NTXT (" Br_taken %s\n"), GTXT ("Branches that are taken"));
+ fprintf (f_usage, NTXT (" %-19s %s\n"),
+ cpuver == CPC_SPARC_M7 ? NTXT ("Instr_block_ld_st")
+ : NTXT ("Block_ld_st"),
+ GTXT ("block load/store"));
+ fprintf (f_usage, NTXT (" %-19s %s\n"),
+ cpuver == CPC_SPARC_M7 ? NTXT ("Instr_atomic")
+ : NTXT ("Atomics"),
+ GTXT ("atomic instructions"));
+ fprintf (f_usage, NTXT (" %-19s %s\n"),
+ cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_prefetch")
+ : NTXT ("SW_prefetch"),
+ GTXT ("prefetches"));
+ fprintf (f_usage, NTXT (" %-19s %s\n"),
+ cpuver == CPC_SPARC_M7 ? NTXT ("Instr_SW_count")
+ : NTXT ("Sw_count_intr"),
+ GTXT ("SW Count instructions (counts special no-op assembler instructions)"));
+ fprintf (f_usage, NTXT ("\n"));
+
+#ifdef TMPLEN
+ compilation error : we're trying to use a macro that's already defined
+#endif
+#define TMPLEN 32
+ char s0[TMPLEN], s1[TMPLEN], s2[TMPLEN], s3[TMPLEN];
+ if (cpuver == CPC_SPARC_M7)
+ {
+ snprintf (s0, TMPLEN, "Commit_0_cyc");
+ snprintf (s1, TMPLEN, "Commit_1_cyc");
+ snprintf (s2, TMPLEN, "Commit_2_cyc");
+ snprintf (s3, TMPLEN, "Commit_1_or_2_cyc");
+ }
+ else
+ {
+ snprintf (s0, TMPLEN, "Commit_0");
+ snprintf (s1, TMPLEN, "Commit_1");
+ snprintf (s2, TMPLEN, "Commit_2");
+ snprintf (s3, TMPLEN, "Commit_1_or_2");
+ }
+#undef TMPLEN
+ fprintf (f_usage, GTXT (" == Commit.\n"
+ " Instructions may be launched speculatively, executed out of order, etc.\n"));
+ if (cpuver != CPC_SPARC_M8)
+ {
+ fprintf (f_usage, GTXT (" We can count the number of cycles during which 0, 1, or 2 instructions are\n"
+ " actually completed and their results committed:\n"));
+ fprintf (f_usage, GTXT (" %s\n"
+ " %s\n"
+ " %s\n"
+ " %s\n"
+ " %s is a useful way of identifying parts of your application with\n"
+ " high-latency instructions.\n\n"),
+ s0, s1, s2, s3, s0);
+ }
+ else
+ {
+ fprintf (f_usage, GTXT (" We can count the number of cycles during which no instructions were\n"
+ " able to commit results using:\n"));
+ fprintf (f_usage, GTXT (" %s\n"
+ " %s is a useful way of identifying parts of your application with\n"
+ " high-latency instructions.\n\n"),
+ s0, s0);
+ }
+
+ fprintf (f_usage, GTXT (" == Cache/memory hierarchy.\n"));
+ if (cpuver == CPC_SPARC_M7)
+ {
+ fprintf (f_usage, GTXT (" In the cache hierarchy:\n"
+ " * Each socket has memory and multiple SPARC core clusters (scc).\n"
+ " * Each scc has an L3 cache and multiple L2 and L1 caches.\n"));
+ fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_hit"), GTXT ("hit own L1 data cache"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_nbr_L2_hit"), GTXT ("hit neighbor L2 (same scc)"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_nbr_scc_hit"), GTXT ("hit neighbor scc (same socket)"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_nbr_scc_miss"), GTXT ("miss all caches (same socket)"));
+ fprintf (f_usage, GTXT (" These loads can also be grouped:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss"), GTXT ("all - DC_hit"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L2_miss"), GTXT ("all - DC_hit - DC_miss_L2_hit"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L3_miss"), GTXT ("DC_miss_nbr_scc_hit + DC_miss_nbr_scc_miss"));
+ fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_remote_scc_hit"), GTXT ("hit cache on different socket"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
+ fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n"
+ " of helping performance but whose results might not be committed.\n"));
+#if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
+ /* 21869427 should not look like memoryspace profiling is supported on Linux */
+ /* 21869424 desire memoryspace profiling on Linux */
+ fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n"));
+ fprintf (f_usage, NTXT (" DC_miss_commit\n"));
+#else
+ fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
+ " use the 'memoryspace' counter:\n"));
+ fprintf (f_usage, NTXT (" DC_miss_commit\n"));
+#endif
+ fprintf (f_usage, NTXT ("\n"));
+ }
+ else if (cpuver == CPC_SPARC_M8)
+ {
+ fprintf (f_usage, GTXT (" In the cache hierarchy:\n"
+ " * Each processor has 4 memory controllers and 2 quad core clusters (QCC).\n"
+ " * Each QCC contains 4 cache processor clusters (CPC).\n"
+ " * Each CPC contains 4 cores.\n"
+ " * Each core supports 8 hardware threads.\n"
+ " * The L3 consists of 2 partitions with 1 QCC per partition.\n"
+ ));
+ fprintf (f_usage, GTXT (" Loads can be counted by where they hit on socket:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L2_hit"), GTXT ("hit own L2"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L3_hit"), GTXT ("hit own L3"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_L3_dirty_copyback"), GTXT ("hit own L3 but require copyback from L2D"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_nbr_L3_hit"), GTXT ("hit neighbor L3 (same socket)"));
+ fprintf (f_usage, GTXT (" Loads that miss all caches on this socket can be counted:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_remote_L3_hit"), GTXT ("hit cache on different socket"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_local_mem_hit"), GTXT ("hit local memory (same socket)"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("DC_miss_remote_mem_hit"), GTXT ("hit remote memory (off socket)"));
+ fprintf (f_usage, GTXT (" These events are for speculative loads, launched in anticipation\n"
+ " of helping performance but whose results might not be committed.\n"));
+#if 0 // was: #if defined(linux). See 22236226 - sparc-Linux: Support basic Memoryspace and Dataspace profiling (capture VADDR)
+ /* 21869427 should not look like memoryspace profiling is supported on Linux */
+ /* 21869424 desire memoryspace profiling on Linux */
+ fprintf (f_usage, GTXT (" To count only data-cache misses that commit, use:\n"));
+ fprintf (f_usage, NTXT (" DC_miss_commit\n"));
+#else
+ fprintf (f_usage, GTXT (" To count only data-cache misses that commit, or for memoryspace profiling,\n"
+ " use the 'memoryspace' counter:\n"));
+ fprintf (f_usage, NTXT (" DC_miss_commit\n"));
+#endif
+ fprintf (f_usage, NTXT ("\n"));
+ }
+ else
+ {
+ fprintf (f_usage, GTXT (" Total data-cache misses can be counted with:\n"));
+ fprintf (f_usage, NTXT (" DC_miss DC_miss_nospec\n"));
+ fprintf (f_usage, GTXT (" They are the totals of misses that hit in L2/L3 cache, local memory, or\n"
+ " remote memory:\n"));
+ fprintf (f_usage, NTXT (" DC_miss_L2_L3_hit DC_miss_L2_L3_hit_nospec\n"));
+ fprintf (f_usage, NTXT (" DC_miss_local_hit DC_miss_local_hit_nospec\n"));
+ fprintf (f_usage, NTXT (" DC_miss_remote_L3_hit DC_miss_remote_L3_hit_nospec\n"));
+ fprintf (f_usage, GTXT (" The events in the left column include speculative operations. Use the\n"
+ " right-hand _nospec events to count only data accesses that commit\n"
+ " or for memoryspace profiling.\n\n"));
+ }
+
+ fprintf (f_usage, GTXT (" == TLB misses.\n"
+ " The Translation Lookaside Buffer (TLB) is a cache of virtual-to-physical\n"
+ " page translations."));
+ fprintf (f_usage, GTXT (" If a virtual address (VA) is not represented in the\n"
+ " TLB, an expensive hardware table walk (HWTW) must be conducted."));
+ fprintf (f_usage, GTXT (" If the\n"
+ " page is still not found, a trap results. There is a data TLB (DTLB) and\n"
+ " an instruction TLB (ITLB).\n\n"));
+ fprintf (f_usage, GTXT (" TLB misses can be counted by:\n"));
+ fprintf (f_usage, NTXT (" %s\n"),
+ cpuver == CPC_SPARC_M7 ?
+ NTXT ("DTLB_HWTW_search ITLB_HWTW_search") :
+ cpuver == CPC_SPARC_M8 ?
+ NTXT ("DTLB_HWTW ITLB_HWTW") :
+ NTXT ("DTLB_miss_asynch ITLB_miss_asynch"));
+ fprintf (f_usage, GTXT (" or broken down by page size:\n"));
+ fprintf (f_usage, NTXT (" %s"),
+ cpuver == CPC_SPARC_M7 ?
+ NTXT ("DTLB_HWTW_hit_8K ITLB_HWTW_hit_8K\n"
+ " DTLB_HWTW_hit_64K ITLB_HWTW_hit_64K\n"
+ " DTLB_HWTW_hit_4M ITLB_HWTW_hit_4M\n") :
+ NTXT ("DTLB_fill_8KB ITLB_fill_8KB\n"
+ " DTLB_fill_64KB ITLB_fill_64KB\n"
+ " DTLB_fill_4MB ITLB_fill_4MB\n"));
+ fprintf (f_usage, NTXT (" %s\n\n"),
+ cpuver == CPC_SPARC_M7 ?
+ NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
+ " DTLB_HWTW_hit_2G_16G ITLB_HWTW_hit_2G_16G\n"
+ " DTLB_HWTW_miss_trap ITLB_HWTW_miss_trap") :
+ cpuver == CPC_SPARC_M8 ?
+ NTXT ("DTLB_HWTW_hit_256M ITLB_HWTW_hit_256M\n"
+ " DTLB_HWTW_hit_16G ITLB_HWTW_hit_16G\n"
+ " DTLB_HWTW_hit_1T ITLB_HWTW_hit_1T") :
+ NTXT ("DTLB_fill_256MB ITLB_fill_256MB\n"
+ " DTLB_fill_2GB ITLB_fill_2GB\n"
+ " DTLB_fill_trap ITLB_fill_trap"));
+ if (cpuver == CPC_SPARC_M8)
+ {
+ fprintf (f_usage, GTXT (" TLB traps, which can require hundreds of cycles, can be counted with:\n"));
+ fprintf (f_usage, NTXT (" %s\n\n"),
+ NTXT ("DTLB_fill_trap ITLB_fill_trap"));
+ }
+
+ fprintf (f_usage, GTXT (" == Branch misprediction.\n"
+ " Count branch mispredictions with:\n"
+ " Br_mispred\n"
+ " It is the total of:\n"
+ " Br_dir_mispred direction was mispredicted\n"
+ " %s target was mispredicted\n"
+ "\n"), cpuver == CPC_SPARC_M7 ? NTXT ("Br_tgt_mispred") : NTXT ("Br_trg_mispred"));
+
+ fprintf (f_usage, GTXT (" == RAW hazards.\n"
+ " A read-after-write (RAW) delay occurs when we attempt to read a datum\n"
+ " before an earlier write has had time to complete:\n"));
+ if (cpuver == CPC_SPARC_M8)
+ {
+ fprintf (f_usage, NTXT (" RAW_hit\n"));
+ fprintf (f_usage, GTXT (" RAW_hit events can be broken down into:\n"));
+ }
+ else
+ {
+ fprintf (f_usage, NTXT (" RAW_hit_st_q~emask=0xf\n"));
+ fprintf (f_usage, GTXT (" The mask 0xf counts the total of all types such as:\n"));
+ }
+ fprintf (f_usage, NTXT (" RAW_hit_st_buf write is still in store buffer\n"
+ " RAW_hit_st_q write is still in store queue\n"
+ "\n"));
+ if (cpuver == CPC_SPARC_M7)
+ {
+ fprintf (f_usage, GTXT (" == Flush.\n"
+ " One can count the number of times the pipeline must be flushed:\n"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("Flush_L3_miss"), GTXT ("load missed L3 and >1 strand is active on the core"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("Flush_br_mispred"), GTXT ("branch misprediction"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("Flush_arch_exception"), GTXT ("SPARC exceptions and trap entry/return"));
+ fprintf (f_usage, NTXT (" %-22s %s\n"),
+ NTXT ("Flush_other"), GTXT ("state change to/from halted/paused"));
+ fprintf (f_usage, NTXT ("\n"));
+ }
+}
+
+static void
+hwc_usage_internal (int forKernel, FILE *f_usage, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc)
+{
+ if (!VALID_FOR_KERNEL (forKernel))
+ return;
+ char cpuname[128];
+ hwc_get_cpuname (cpuname, 128);
+ Hwcentry** raw_ctrs = hwc_get_raw_ctrs (forKernel);
+ int has_raw_ctrs = (raw_ctrs && raw_ctrs[0]);
+ Hwcentry** std_ctrs = hwc_get_std_ctrs (forKernel);
+ int has_std_ctrs = (std_ctrs && std_ctrs[0]);
+ unsigned hwc_maxregs = hwc_get_max_concurrent (forKernel);
+ int cpuver = hwc_get_cpc_cpuver ();
+ if (hwc_maxregs != 0)
+ {
+ if (show_syntax)
+ {
+ fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d):\n\n"), cpuname, cpuver);
+ fprintf (f_usage, GTXT (" -h {auto|lo|on|hi}\n"));
+ fprintf (f_usage, GTXT ("\tturn on default set of HW counters at the specified rate\n"));
+ if (hwc_maxregs == 1)
+ {
+ fprintf (f_usage, GTXT (" -h <ctr_def>\n"));
+ fprintf (f_usage, GTXT ("\tspecify HW counter profiling for one HW counter only\n"));
+ }
+ else
+ {
+ fprintf (f_usage, GTXT (" -h <ctr_def> [-h <ctr_def>]...\n"));
+ fprintf (f_usage, GTXT (" -h <ctr_def>[,<ctr_def>]...\n"));
+ fprintf (f_usage, GTXT ("\tspecify HW counter profiling for up to %u HW counters\n"), hwc_maxregs);
+ }
+ fprintf (f_usage, NTXT ("\n"));
+ }
+ else
+ {
+ fprintf (f_usage, GTXT ("\nSpecifying HW counters on `%s' (cpuver=%d)\n\n"), cpuname, cpuver);
+ if (hwc_maxregs == 1)
+ fprintf (f_usage, GTXT (" Hardware counter profiling is supported for only one counter.\n"));
+ else
+ fprintf (f_usage, GTXT (" Hardware counter profiling is supported for up to %u HW counters.\n"), hwc_maxregs);
+ }
+ }
+ else
+ {
+ if (!IS_KERNEL (forKernel))
+ { // EUGENE I don't see why we don't also use this for er_kernel
+ char buf[1024];
+ *buf = 0;
+ char *pch = hwcfuncs_errmsg_get (buf, sizeof (buf), 0);
+ if (*pch)
+ fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system: %s%s"),
+ pch, pch[strlen (pch) - 1] == '\n' ? "" : "\n");
+ else
+ fprintf (f_usage, GTXT ("HW counter profiling is not supported on this system\n"));
+ }
+ return;
+ }
+
+ /* At this point, we know we have counters */
+ char**hwc_attrs = hwc_get_attrs (forKernel);
+ int has_attrs = (hwc_attrs && hwc_attrs[0]);
+ if (show_syntax)
+ {
+ const char *reg_s = show_regs ? "[/<reg#>]" : "";
+ const char *attr_s = has_attrs ? "[[~<attr>=<val>]...]" : "";
+ fprintf (f_usage, GTXT (" <ctr_def> == <ctr>%s%s,[<rate>]\n"), attr_s, reg_s);
+ if (dataspace_msg)
+ fprintf (f_usage, NTXT ("%s"), dataspace_msg);
+ fprintf (f_usage, GTXT (" <ctr>\n"));
+ fprintf (f_usage, GTXT (" counter name, "));
+ }
+ else
+ fprintf (f_usage, GTXT (" Counter name "));
+ fprintf (f_usage, GTXT ("must be selected from the available counters\n"
+ " listed below. On most systems, if a counter is not listed\n"
+ " below, it may still be specified by its numeric value.\n"));
+ if (cpcx_has_precise[forKernel])
+ {
+ if (!forKernel)
+ fprintf (f_usage, GTXT (" Counters labeled as 'memoryspace' in the list below will\n"
+ " collect memoryspace data by default.\n"));
+ }
+ fprintf (f_usage, GTXT ("\n"));
+ if (has_attrs)
+ {
+ if (show_syntax)
+ {
+ fprintf (f_usage, GTXT (" ~<attr>=<val>\n"));
+ fprintf (f_usage, GTXT (" optional attribute where <val> can be in decimal or hex\n"
+ " format, and <attr> can be one of: \n"));
+ }
+ else
+ fprintf (f_usage, GTXT (" Optional attribute where <val> can be in decimal or hex\n"
+ " format, and <attr> can be one of: \n"));
+ for (char **pattr = hwc_attrs; *pattr; pattr++)
+ fprintf (f_usage, NTXT (" `%s'\n"), *pattr);
+ if (show_syntax)
+ fprintf (f_usage, GTXT (" Multiple attributes may be specified, and each must be preceded by a ~.\n\n"));
+ else
+ fprintf (f_usage, GTXT (" Multiple attributes may be specified.\n\n"));
+ if (IS_KERNEL (forKernel))
+ fprintf (f_usage, GTXT (" Other attributes may be supported by the chip, but are not supported by DTrace and will be ignored by er_kernel.\n\n"));
+ }
+
+ if (show_syntax)
+ {
+ if (show_regs)
+ fprintf (f_usage, GTXT (" /<reg#>\n"
+ " forces use of a specific hardware register. (Solaris only)\n"
+ " If not specified, %s will attempt to place the counter into the first\n"
+ " available register and as a result may be unable to place\n"
+ " subsequent counters due to register conflicts.\n"
+ " The / in front of the register number is required if a register is specified.\n\n"),
+ cmd);
+
+ fprintf (f_usage, GTXT (" <rate> == {auto|lo|on|hi}\n"));
+ fprintf (f_usage, GTXT (" `auto' (default) match the rate used by clock profiling.\n"));
+ fprintf (f_usage, GTXT (" If clock profiling is disabled, use `on'.\n"));
+ fprintf (f_usage, GTXT (" `lo' per-thread maximum rate of ~10 samples/second\n"));
+ fprintf (f_usage, GTXT (" `on' per-thread maximum rate of ~100 samples/second\n"));
+ fprintf (f_usage, GTXT (" `hi' per-thread maximum rate of ~1000 samples/second\n\n"));
+ fprintf (f_usage, GTXT (" <rate> == <interval>\n"));
+ fprintf (f_usage, GTXT (" event interval; see collect (1) for details\n\n"));
+
+ fprintf (f_usage, GTXT (" A comma ',' followed immediately by white space may be omitted.\n\n"));
+ }
+
+ /* default counters */
+ fprintf (f_usage, GTXT ("Default set of HW counters:\n\n"));
+ char * defctrs = hwc_get_default_cntrs2 (forKernel, 1);
+ if (defctrs == NULL)
+ fprintf (f_usage, GTXT (" No default HW counter set defined for this system.\n"));
+ else if (strlen (defctrs) == 0)
+ {
+ char *s = hwc_get_orig_default_cntrs (forKernel);
+ fprintf (f_usage, GTXT (" The default HW counter set (%s) defined for %s cannot be loaded on this system.\n"),
+ s, cpuname);
+ free (s);
+ free (defctrs);
+ }
+ else
+ {
+ char *defctrs2 = hwc_get_default_cntrs2 (forKernel, 2);
+ fprintf (f_usage, GTXT (" -h %s\n"), defctrs);
+ free (defctrs2);
+ free (defctrs);
+ }
+
+ /* long listings */
+ char tmp[1024];
+ if (has_std_ctrs)
+ {
+ fprintf (f_usage, GTXT ("\nAliases for most useful HW counters:\n\n"));
+ format_columns (tmp, 1024, "alias", "raw name", "type ", "units", "regs", "description");
+ fprintf (f_usage, NTXT (" %s\n\n"), tmp);
+ for (Hwcentry **pctr = std_ctrs; *pctr; pctr++)
+ {
+ Hwcentry *ctr = *pctr;
+ hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, 0);
+ fprintf (f_usage, NTXT (" %s\n"), tmp);
+ }
+ }
+ if (has_raw_ctrs)
+ {
+ fprintf (f_usage, GTXT ("\nRaw HW counters:\n\n"));
+ hwc_usage_raw_overview_sparc (f_usage, cpuver);
+ format_columns (tmp, 1024, "name", NULL, "type ", "units", "regs", "description");
+ fprintf (f_usage, NTXT (" %s\n\n"), tmp);
+ for (Hwcentry **pctr = raw_ctrs; *pctr; pctr++)
+ {
+ Hwcentry *ctr = *pctr;
+ hwc_hwcentry_string_internal (tmp, sizeof (tmp), ctr, show_short_desc);
+ fprintf (f_usage, NTXT (" %s\n"), tmp);
+ }
+ }
+
+ /* documentation notice */
+ hwc_get_docref (tmp, 1024);
+ if (strlen (tmp))
+ fprintf (f_usage, NTXT ("\n%s\n"), tmp);
+}
+
+/* Print a description of "-h" usage, largely common to collect and er_kernel. */
+void
+hwc_usage (int forKernel, const char *cmd, const char *dataspace_msg)
+{
+ hwc_usage_internal (forKernel, stdout, cmd, dataspace_msg, 1, 0);
+}
+
+void
+hwc_usage_f (int forKernel, FILE *f, const char *cmd, const char *dataspace_msg, int show_syntax, int show_short_desc)
+{
+ hwc_usage_internal (forKernel, f, cmd, dataspace_msg, show_syntax, show_short_desc);
+}
+
+/*---------------------------------------------------------------------------*/
+/* init functions */
+
+static char* supported_pebs_counters[] = {
+ "mem_inst_retired.latency_above_threshold",
+ "mem_trans_retired.load_latency",
+ "mem_trans_retired.precise_store",
+ NULL
+};
+
+/* callback, (see setup_cpc()) called for each valid regno/name combo */
+
+/* builds rawlist,, creates and updates reg_list[] arrays in stdlist table */
+static void
+hwc_cb (uint_t cpc_regno, const char *name)
+{
+ regno_t regno = cpc_regno; /* convert type */
+ list_add (&unfiltered_raw, regno, name);
+}
+
+/* input:
+ * forKernel: 1 - generate lists for er_kernel, 0 - generate lists for collect
+ *
+ * raw_orig: HWCs as generated by hwc_cb()
+ * output:
+ * pstd_out[], praw_out[]: malloc'd array of pointers to malloc'd hwcentry, or NULL
+ */
+static void
+hwc_process_raw_ctrs (int forKernel, Hwcentry ***pstd_out,
+ Hwcentry ***praw_out, Hwcentry ***phidden_out,
+ Hwcentry**static_tables, Hwcentry **raw_unfiltered_in)
+{
+ // set up output buffers
+ ptr_list s_outbufs[3];
+ ptr_list *std_out = &s_outbufs[0];
+ ptr_list_init (std_out);
+ ptr_list *raw_out = &s_outbufs[1];
+ ptr_list_init (raw_out);
+ ptr_list *hidden_out = &s_outbufs[2];
+ ptr_list_init (hidden_out);
+
+#define NUM_TABLES 3
+ ptr_list table_copy[NUM_TABLES]; // copy of data from static tables. [0]std, [1]generic, and [2]hidden
+ for (int tt = 0; tt < NUM_TABLES; tt++)
+ ptr_list_init (&table_copy[tt]);
+
+ // copy records from std [0] and generic [1] static input tables into table_copy[0],[1],or[2]
+ for (int tt = 0; tt < 2; tt++)
+ for (Hwcentry *pctr = static_tables[tt]; pctr && pctr->name; pctr++)
+ if (is_hidden_alias (pctr))
+ list_append_shallow_copy (&table_copy[2], pctr); // hidden list
+ else
+ list_append_shallow_copy (&table_copy[tt], pctr);
+
+ // copy raw_unfiltered_in to raw_out
+ for (int ii = 0; raw_unfiltered_in && raw_unfiltered_in[ii]; ii++)
+ {
+ Hwcentry *pctr = raw_unfiltered_in[ii];
+ // filter out raw counters that don't work correctly
+
+#ifdef WORKAROUND_6231196_NIAGARA1_NO_CTR_0
+ if (cpcx_cpuver == CPC_ULTRA_T1)
+ if (!regno_is_valid (pctr, 1))
+ continue; /* Niagara can not profile on register zero; skip this */
+#endif
+ // remove specific PEBs counters when back end doesn't support sampling
+ const char *name = pctr->name;
+ if ((cpcx_support_bitmask & HWCFUNCS_SUPPORT_PEBS_SAMPLING) == 0 || forKernel)
+ {
+ int skip = 0;
+ for (int ii = 0; supported_pebs_counters[ii]; ii++)
+ if (strcmp (supported_pebs_counters[ii], name) == 0)
+ {
+ skip = 1;
+ break;
+ }
+ if (skip)
+ continue;
+ }
+
+ Hwcentry *pnew = list_append_shallow_copy (raw_out, pctr);
+#ifdef WORKAROUND_6231196_NIAGARA1_NO_CTR_0
+ if (cpcx_cpuver == CPC_ULTRA_T1)
+ {
+ free (pnew->reg_list);
+ pnew->reg_list = NULL;
+ regno_add (pnew, 1); // only allow register 1
+ }
+#endif
+ } // raw_unfiltered_in
+
+ // Scan raw counters to populate Hwcentry fields from matching static_tables entries
+ // Also populate reg_list for aliases found in table_copy[]
+ for (int uu = 0; uu < raw_out->sz; uu++)
+ {
+ Hwcentry *praw = (Hwcentry*) raw_out->array[uu];
+ Hwcentry *pstd = NULL; // set if non-alias entry from std table matches
+ char *name = praw->name;
+ /* in the standard counter and generic lists,
+ update reg_list for all matching items */
+ for (int tt = 0; tt < NUM_TABLES; tt++)
+ { // std, generic, and hidden
+ if (table_copy[tt].sz == 0)
+ continue;
+ Hwcentry **array = (Hwcentry**) table_copy[tt].array;
+ for (int jj = 0; array[jj]; jj++)
+ { // all table counters
+ Hwcentry *pctr = array[jj];
+ char *pname;
+ if (pctr->int_name)
+ pname = pctr->int_name;
+ else
+ pname = pctr->name;
+ if (!is_same (name, pname, '~'))
+ continue;
+
+ /* truncated pname matches <name>... */
+ // check to see if table entry applies only to specific register
+ int specific_reg_num_only = 0;
+ if (pctr->reg_num != REGNO_ANY)
+ {
+ // table entry applies only to specific register
+ if (!regno_is_valid (praw, pctr->reg_num))
+ continue;
+ specific_reg_num_only = 1;
+ }
+
+ // Match!
+ // Update cpu_table_copy's supported registers
+ if (specific_reg_num_only)
+ regno_add (pctr, pctr->reg_num);
+ else
+ pctr->reg_list = praw->reg_list;
+
+ if (!is_visible_alias (pctr) && !is_hidden_alias (pctr))
+ {
+ // Note: we could expand criteria to also allow aliases to set default rates for raw HWCs
+ /* This is an 'internal' raw counter */
+ if (!pstd)
+ pstd = pctr; /* use info as a template when adding to raw list */
+ else
+ hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: "
+ "counter %s appears in table more than once: ",
+ pstd);
+ }
+ }/* for table rows */
+ }/* for std and generic tables */
+
+ if (pstd)
+ {
+ /* the main table had an entry that matched <name> exactly */
+ /* Apply the main table entry as a template */
+ *praw = *pstd;
+ }
+ }/* for (raw_out) */
+
+ // update std_out and hidden_out
+ for (int tt = 0; tt < NUM_TABLES; tt++)
+ {
+ if (tt == 1 /*skip std_raw*/ || table_copy[tt].sz == 0)
+ continue;
+ Hwcentry *pctr;
+ for (int ii = 0; (pctr = table_copy[tt].array[ii]); ii++)
+ {
+ // prune unsupported rows from std table
+ if (!is_visible_alias (pctr) && !is_hidden_alias (pctr))
+ continue; // only aliases
+ if (REG_LIST_IS_EMPTY (pctr->reg_list))
+ {
+ if (is_numeric_alias (pctr))
+ {
+#if 1 //22844570 DTrace cpc provider does not accept numeric counter names
+ if (forKernel)
+ continue;
+#endif
+ regno_add (pctr, REGNO_ANY); // hwcs specified by number allowed on any register
+ }
+ else
+ continue;
+ }
+
+ ptr_list *dest = (tt == 0) ? std_out : hidden_out;
+ Hwcentry *isInList;
+ if (pctr->short_desc == NULL)
+ {
+ isInList = ptrarray_find_by_name ((Hwcentry**) raw_out->array, pctr->int_name);
+ if (isInList)
+ pctr->short_desc = isInList->short_desc; // copy the raw counter's detailed description
+ }
+ isInList = ptrarray_find_by_name ((Hwcentry**) dest->array, pctr->name);
+ if (isInList)
+ hwcentry_print (DBG_LT0, "hwctable: hwc_cb: Warning: "
+ "counter %s appears in alias list more than once: ",
+ pctr);
+ else
+ list_append_shallow_copy (dest, pctr);
+ }
+ }
+ for (int tt = 0; tt < NUM_TABLES; tt++)
+ ptr_list_free (&table_copy[tt]);
+
+ if (forKernel)
+ {
+ // for er_kernel, use baseline value of PRELOAD_DEF_ERKERNEL instead of PRELOAD_DEF
+ for (int tt = 0; tt < 3; tt++)
+ { // std_out-0, raw_out-1, hidden_out-2
+ Hwcentry** hwcs = (Hwcentry**) (s_outbufs[tt].array);
+ for (int ii = 0; hwcs && hwcs[ii]; ii++)
+ {
+ Hwcentry *hwc = hwcs[ii];
+ if (hwc->val == PRELOAD_DEF)
+ hwc->val = PRELOAD_DEF_ERKERNEL;
+ }
+ }
+ }
+ *pstd_out = (Hwcentry**) std_out->array;
+ *praw_out = (Hwcentry**) raw_out->array;
+ *phidden_out = (Hwcentry**) hidden_out->array;
+}
+
+/* callback, (see setup_cpc()) called for each valid attribute */
+/* builds attrlist */
+static void
+attrs_cb (const char *attr)
+{
+ Tprintf (DBG_LT3, "hwctable: attrs_cb(): %s\n", attr);
+ if (strcmp (attr, "picnum") == 0)
+ return; /* don't make this attribute available to users */
+ ptr_list_add (&unfiltered_attrs, (void*) strdup (attr));
+}
+
+/* returns true if attribute is valid for this platform */
+static int
+attr_is_valid (int forKernel, const char *attr)
+{
+ setup_cpcx ();
+ if (!VALID_FOR_KERNEL (forKernel) || !cpcx_attrs[forKernel])
+ return 0;
+ for (int ii = 0; cpcx_attrs[forKernel][ii]; ii++)
+ if (strcmp (attr, cpcx_attrs[forKernel][ii]) == 0)
+ return 1;
+ return 0;
+}
diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c
new file mode 100644
index 0000000..d479945
--- /dev/null
+++ b/gprofng/common/opteron_pcbe.c
@@ -0,0 +1,448 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ Contributed by Oracle.
+
+ This file is part of GNU Binutils.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street - Fifth Floor, Boston,
+ MA 02110-1301, USA. */
+
+/*
+ * This file contains preset event names from the Performance Application
+ * Programming Interface v3.5 which included the following notice:
+ *
+ * Copyright (c) 2005,6
+ * Innovative Computing Labs
+ * Computer Science Department,
+ * University of Tennessee,
+ * Knoxville, TN.
+ * All Rights Reserved.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of Tennessee nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * This open source software license conforms to the BSD License template.
+ */
+
+/*
+ * Performance Counter Back-End for AMD Opteron and AMD Athlon 64 processors.
+ */
+
+#include <sys/types.h>
+#include "hwcdrv.h"
+
+#define CPU /* used by cpuid_get*() functions */
+
+typedef struct _amd_event
+{
+ char *name;
+ uint16_t emask; /* Event mask setting */
+ uint8_t umask_valid; /* Mask of unreserved UNIT_MASK bits */
+} amd_event_t;
+
+typedef struct _amd_generic_event
+{
+ char *name;
+ char *event;
+ uint8_t umask;
+} amd_generic_event_t;
+
+#define EV_END { NULL, 0, 0 }
+#define GEN_EV_END { NULL, NULL, 0 }
+
+#define AMD_cmn_events \
+ { "FP_dispatched_fpu_ops", 0x00, 0x3F }, \
+ { "FP_cycles_no_fpu_ops_retired", 0x01, 0x0 }, \
+ { "FP_dispatched_fpu_ops_ff", 0x02, 0x0 }, \
+ { "LS_seg_reg_load", 0x20, 0x7F }, \
+ { "LS_uarch_resync_self_modify", 0x21, 0x0 }, \
+ { "LS_uarch_resync_snoop", 0x22, 0x0 }, \
+ { "LS_buffer_2_full", 0x23, 0x0 }, \
+ { "LS_retired_cflush", 0x26, 0x0 }, \
+ { "LS_retired_cpuid", 0x27, 0x0 }, \
+ { "DC_access", 0x40, 0x0 }, \
+ { "DC_miss", 0x41, 0x0 }, \
+ { "DC_refill_from_L2", 0x42, 0x1F }, \
+ { "DC_refill_from_system", 0x43, 0x1F }, \
+ { "DC_misaligned_data_ref", 0x47, 0x0 }, \
+ { "DC_uarch_late_cancel_access", 0x48, 0x0 }, \
+ { "DC_uarch_early_cancel_access", 0x49, 0x0 }, \
+ { "DC_dispatched_prefetch_instr", 0x4B, 0x7 }, \
+ { "DC_dcache_accesses_by_locks", 0x4C, 0x2 }, \
+ { "BU_memory_requests", 0x65, 0x83}, \
+ { "BU_data_prefetch", 0x67, 0x3 }, \
+ { "BU_cpu_clk_unhalted", 0x76, 0x0 }, \
+ { "IC_fetch", 0x80, 0x0 }, \
+ { "IC_miss", 0x81, 0x0 }, \
+ { "IC_refill_from_L2", 0x82, 0x0 }, \
+ { "IC_refill_from_system", 0x83, 0x0 }, \
+ { "IC_itlb_L1_miss_L2_hit", 0x84, 0x0 }, \
+ { "IC_uarch_resync_snoop", 0x86, 0x0 }, \
+ { "IC_instr_fetch_stall", 0x87, 0x0 }, \
+ { "IC_return_stack_hit", 0x88, 0x0 }, \
+ { "IC_return_stack_overflow", 0x89, 0x0 }, \
+ { "FR_retired_x86_instr_w_excp_intr", 0xC0, 0x0 }, \
+ { "FR_retired_uops", 0xC1, 0x0 }, \
+ { "FR_retired_branches_w_excp_intr", 0xC2, 0x0 }, \
+ { "FR_retired_branches_mispred", 0xC3, 0x0 }, \
+ { "FR_retired_taken_branches", 0xC4, 0x0 }, \
+ { "FR_retired_taken_branches_mispred", 0xC5, 0x0 }, \
+ { "FR_retired_far_ctl_transfer", 0xC6, 0x0 }, \
+ { "FR_retired_resyncs", 0xC7, 0x0 }, \
+ { "FR_retired_near_rets", 0xC8, 0x0 }, \
+ { "FR_retired_near_rets_mispred", 0xC9, 0x0 }, \
+ { "FR_retired_taken_branches_mispred_addr_miscomp", 0xCA, 0x0 }, \
+ { "FR_retired_fastpath_double_op_instr", 0xCC, 0x7 }, \
+ { "FR_intr_masked_cycles", 0xCD, 0x0 }, \
+ { "FR_intr_masked_while_pending_cycles", 0xCE, 0x0 }, \
+ { "FR_taken_hardware_intrs", 0xCF, 0x0 }, \
+ { "FR_nothing_to_dispatch", 0xD0, 0x0 }, \
+ { "FR_dispatch_stalls", 0xD1, 0x0 }, \
+ { "FR_dispatch_stall_branch_abort_to_retire", 0xD2, 0x0 }, \
+ { "FR_dispatch_stall_serialization", 0xD3, 0x0 }, \
+ { "FR_dispatch_stall_segment_load", 0xD4, 0x0 }, \
+ { "FR_dispatch_stall_reorder_buffer_full", 0xD5, 0x0 }, \
+ { "FR_dispatch_stall_resv_stations_full", 0xD6, 0x0 }, \
+ { "FR_dispatch_stall_fpu_full", 0xD7, 0x0 }, \
+ { "FR_dispatch_stall_ls_full", 0xD8, 0x0 }, \
+ { "FR_dispatch_stall_waiting_all_quiet", 0xD9, 0x0 }, \
+ { "FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", 0xDA, 0x0 },\
+ { "FR_fpu_exception", 0xDB, 0xF }, \
+ { "FR_num_brkpts_dr0", 0xDC, 0x0 }, \
+ { "FR_num_brkpts_dr1", 0xDD, 0x0 }, \
+ { "FR_num_brkpts_dr2", 0xDE, 0x0 }, \
+ { "FR_num_brkpts_dr3", 0xDF, 0x0 }, \
+ { "NB_mem_ctrlr_bypass_counter_saturation", 0xE4, 0xF }
+
+#define OPT_events \
+ { "LS_locked_operation", 0x24, 0x7 }, \
+ { "DC_copyback", 0x44, 0x1F }, \
+ { "DC_dtlb_L1_miss_L2_hit", 0x45, 0x0 }, \
+ { "DC_dtlb_L1_miss_L2_miss", 0x46, 0x0 }, \
+ { "DC_1bit_ecc_error_found", 0x4A, 0x3 }, \
+ { "BU_system_read_responses", 0x6C, 0x7 }, \
+ { "BU_quadwords_written_to_system", 0x6D, 0x1 }, \
+ { "BU_internal_L2_req", 0x7D, 0x1F }, \
+ { "BU_fill_req_missed_L2", 0x7E, 0x7 }, \
+ { "BU_fill_into_L2", 0x7F, 0x1 }, \
+ { "IC_itlb_L1_miss_L2_miss", 0x85, 0x0 }, \
+ { "FR_retired_fpu_instr", 0xCB, 0xF }, \
+ { "NB_mem_ctrlr_page_access", 0xE0, 0x7 }, \
+ { "NB_mem_ctrlr_page_table_overflow", 0xE1, 0x0 }, \
+ { "NB_mem_ctrlr_turnaround", 0xE3, 0x7 }, \
+ { "NB_ECC_errors", 0xE8, 0x80}, \
+ { "NB_sized_commands", 0xEB, 0x7F }, \
+ { "NB_probe_result", 0xEC, 0x7F}, \
+ { "NB_gart_events", 0xEE, 0x7 }, \
+ { "NB_ht_bus0_bandwidth", 0xF6, 0xF }, \
+ { "NB_ht_bus1_bandwidth", 0xF7, 0xF }, \
+ { "NB_ht_bus2_bandwidth", 0xF8, 0xF }
+
+#define OPT_RevD_events \
+ { "NB_sized_blocks", 0xE5, 0x3C }
+
+#define OPT_RevE_events \
+ { "NB_cpu_io_to_mem_io", 0xE9, 0xFF}, \
+ { "NB_cache_block_commands", 0xEA, 0x3D}
+
+#define AMD_FAMILY_10h_cmn_events \
+ { "FP_retired_sse_ops", 0x3, 0x7F}, \
+ { "FP_retired_move_ops", 0x4, 0xF}, \
+ { "FP_retired_serialize_ops", 0x5, 0xF}, \
+ { "FP_serialize_ops_cycles", 0x6, 0x3}, \
+ { "DC_copyback", 0x44, 0x7F }, \
+ { "DC_dtlb_L1_miss_L2_hit", 0x45, 0x3 }, \
+ { "DC_dtlb_L1_miss_L2_miss", 0x46, 0x7 }, \
+ { "DC_1bit_ecc_error_found", 0x4A, 0xF }, \
+ { "DC_dtlb_L1_hit", 0x4D, 0x7 }, \
+ { "BU_system_read_responses", 0x6C, 0x17 }, \
+ { "BU_octwords_written_to_system", 0x6D, 0x1 }, \
+ { "BU_internal_L2_req", 0x7D, 0x3F }, \
+ { "BU_fill_req_missed_L2", 0x7E, 0xF }, \
+ { "BU_fill_into_L2", 0x7F, 0x3 }, \
+ { "IC_itlb_L1_miss_L2_miss", 0x85, 0x3 }, \
+ { "IC_eviction", 0x8B, 0x0 }, \
+ { "IC_cache_lines_invalidate", 0x8C, 0xF }, \
+ { "IC_itlb_reload", 0x99, 0x0 }, \
+ { "IC_itlb_reload_aborted", 0x9A, 0x0 }, \
+ { "FR_retired_mmx_sse_fp_instr", 0xCB, 0x7 }, \
+ { "NB_mem_ctrlr_page_access", 0xE0, 0xFF }, \
+ { "NB_mem_ctrlr_page_table_overflow", 0xE1, 0x3 }, \
+ { "NB_mem_ctrlr_turnaround", 0xE3, 0x3F }, \
+ { "NB_thermal_status", 0xE8, 0x7C}, \
+ { "NB_sized_commands", 0xEB, 0x3F }, \
+ { "NB_probe_results_upstream_req", 0xEC, 0xFF}, \
+ { "NB_gart_events", 0xEE, 0xFF }, \
+ { "NB_ht_bus0_bandwidth", 0xF6, 0xBF }, \
+ { "NB_ht_bus1_bandwidth", 0xF7, 0xBF }, \
+ { "NB_ht_bus2_bandwidth", 0xF8, 0xBF }, \
+ { "NB_ht_bus3_bandwidth", 0x1F9, 0xBF }, \
+ { "LS_locked_operation", 0x24, 0xF }, \
+ { "LS_cancelled_store_to_load_fwd_ops", 0x2A, 0x7 }, \
+ { "LS_smi_received", 0x2B, 0x0 }, \
+ { "LS_ineffective_prefetch", 0x52, 0x9 }, \
+ { "LS_global_tlb_flush", 0x54, 0x0 }, \
+ { "NB_mem_ctrlr_dram_cmd_slots_missed", 0xE2, 0x3 }, \
+ { "NB_mem_ctrlr_req", 0x1F0, 0xFF }, \
+ { "CB_cpu_to_dram_req_to_target", 0x1E0, 0xFF }, \
+ { "CB_io_to_dram_req_to_target", 0x1E1, 0xFF }, \
+ { "CB_cpu_read_cmd_latency_to_target_0_to_3", 0x1E2, 0xFF }, \
+ { "CB_cpu_read_cmd_req_to_target_0_to_3", 0x1E3, 0xFF }, \
+ { "CB_cpu_read_cmd_latency_to_target_4_to_7", 0x1E4, 0xFF }, \
+ { "CB_cpu_read_cmd_req_to_target_4_to_7", 0x1E5, 0xFF }, \
+ { "CB_cpu_cmd_latency_to_target_0_to_7", 0x1E6, 0xFF }, \
+ { "CB_cpu_req_to_target_0_to_7", 0x1E7, 0xFF }, \
+ { "L3_read_req", 0x4E0, 0xF7 }, \
+ { "L3_miss", 0x4E1, 0xF7 }, \
+ { "L3_l2_eviction_l3_fill", 0x4E2, 0xFF }, \
+ { "L3_eviction", 0x4E3, 0xF }
+
+#define AMD_cmn_generic_events \
+ { "PAPI_br_ins", "FR_retired_branches_w_excp_intr", 0x0 },\
+ { "PAPI_br_msp", "FR_retired_branches_mispred", 0x0 }, \
+ { "PAPI_br_tkn", "FR_retired_taken_branches", 0x0 }, \
+ { "PAPI_fp_ops", "FP_dispatched_fpu_ops", 0x3 }, \
+ { "PAPI_fad_ins", "FP_dispatched_fpu_ops", 0x1 }, \
+ { "PAPI_fml_ins", "FP_dispatched_fpu_ops", 0x2 }, \
+ { "PAPI_fpu_idl", "FP_cycles_no_fpu_ops_retired", 0x0 }, \
+ { "PAPI_tot_cyc", "BU_cpu_clk_unhalted", 0x0 }, \
+ { "PAPI_tot_ins", "FR_retired_x86_instr_w_excp_intr", 0x0 }, \
+ { "PAPI_l1_dca", "DC_access", 0x0 }, \
+ { "PAPI_l1_dcm", "DC_miss", 0x0 }, \
+ { "PAPI_l1_ldm", "DC_refill_from_L2", 0xe }, \
+ { "PAPI_l1_stm", "DC_refill_from_L2", 0x10 }, \
+ { "PAPI_l1_ica", "IC_fetch", 0x0 }, \
+ { "PAPI_l1_icm", "IC_miss", 0x0 }, \
+ { "PAPI_l1_icr", "IC_fetch", 0x0 }, \
+ { "PAPI_l2_dch", "DC_refill_from_L2", 0x1e }, \
+ { "PAPI_l2_dcm", "DC_refill_from_system", 0x1e }, \
+ { "PAPI_l2_dcr", "DC_refill_from_L2", 0xe }, \
+ { "PAPI_l2_dcw", "DC_refill_from_L2", 0x10 }, \
+ { "PAPI_l2_ich", "IC_refill_from_L2", 0x0 }, \
+ { "PAPI_l2_icm", "IC_refill_from_system", 0x0 }, \
+ { "PAPI_l2_ldm", "DC_refill_from_system", 0xe }, \
+ { "PAPI_l2_stm", "DC_refill_from_system", 0x10 }, \
+ { "PAPI_res_stl", "FR_dispatch_stalls", 0x0 }, \
+ { "PAPI_stl_icy", "FR_nothing_to_dispatch", 0x0 }, \
+ { "PAPI_hw_int", "FR_taken_hardware_intrs", 0x0 }
+
+#define OPT_cmn_generic_events \
+ { "PAPI_tlb_dm", "DC_dtlb_L1_miss_L2_miss", 0x0 }, \
+ { "PAPI_tlb_im", "IC_itlb_L1_miss_L2_miss", 0x0 }, \
+ { "PAPI_fp_ins", "FR_retired_fpu_instr", 0xd }, \
+ { "PAPI_vec_ins", "FR_retired_fpu_instr", 0x4 }
+
+#define AMD_FAMILY_10h_generic_events \
+ { "PAPI_tlb_dm", "DC_dtlb_L1_miss_L2_miss", 0x7 }, \
+ { "PAPI_tlb_im", "IC_itlb_L1_miss_L2_miss", 0x3 }, \
+ { "PAPI_l3_dcr", "L3_read_req", 0xf1 }, \
+ { "PAPI_l3_icr", "L3_read_req", 0xf2 }, \
+ { "PAPI_l3_tcr", "L3_read_req", 0xf7 }, \
+ { "PAPI_l3_stm", "L3_miss", 0xf4 }, \
+ { "PAPI_l3_ldm", "L3_miss", 0xf3 }, \
+ { "PAPI_l3_tcm", "L3_miss", 0xf7 }
+
+static amd_event_t opt_events_rev_E[] = {
+ AMD_cmn_events,
+ OPT_events,
+ OPT_RevD_events,
+ OPT_RevE_events,
+ EV_END
+};
+
+static amd_event_t family_10h_events[] = {
+ AMD_cmn_events,
+ OPT_RevE_events,
+ AMD_FAMILY_10h_cmn_events,
+ EV_END
+};
+
+static amd_generic_event_t opt_generic_events[] = {
+ AMD_cmn_generic_events,
+ OPT_cmn_generic_events,
+ GEN_EV_END
+};
+
+static amd_generic_event_t family_10h_generic_events[] = {
+ AMD_cmn_generic_events,
+ AMD_FAMILY_10h_generic_events,
+ GEN_EV_END
+};
+
+static amd_event_t *amd_events = NULL;
+static uint_t amd_family;
+static amd_generic_event_t *amd_generic_events = NULL;
+
+#define BITS(v, u, l) (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
+#define OPTERON_FAMILY 0x0f
+#define AMD_FAMILY_10H 0x10
+
+static int
+opt_pcbe_init (void)
+{
+ amd_family = cpuid_getfamily ();
+ /*
+ * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel
+ * loads this module based on its name in the module directory, but it
+ * could have been renamed.
+ */
+ if (cpuid_getvendor () != X86_VENDOR_AMD
+ || (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H))
+ return (-1);
+
+ /*
+ * Figure out processor revision here and assign appropriate
+ * event configuration.
+ */
+ if (amd_family == OPTERON_FAMILY)
+ {
+ amd_events = opt_events_rev_E;
+ amd_generic_events = opt_generic_events;
+ }
+ else
+ {
+ amd_events = family_10h_events;
+ amd_generic_events = family_10h_generic_events;
+ }
+ return (0);
+}
+
+static uint_t
+opt_pcbe_ncounters (void)
+{
+ return (4);
+}
+
+static const char *
+opt_pcbe_impl_name (void)
+{
+ if (amd_family == OPTERON_FAMILY)
+ return ("AMD Opteron & Athlon64");
+ else if (amd_family == AMD_FAMILY_10H)
+ return ("AMD Family 10h");
+ else
+ return ("Unknown AMD processor");
+}
+
+static const char *
+opt_pcbe_cpuref (void)
+{
+ if (amd_family == OPTERON_FAMILY)
+ return GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\nAMD publication #26094");
+ else if (amd_family == AMD_FAMILY_10H)
+ return GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors,\"\nAMD publication #31116");
+ else
+ return GTXT ("Unknown AMD processor");
+}
+
+static int
+opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
+{
+ int count = 0;
+ for (uint_t kk = 0; amd_events && amd_events[kk].name; kk++)
+ for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++)
+ {
+ hwc_cb (jj, amd_events[kk].name);
+ count++;
+ }
+ for (uint_t kk = 0; amd_generic_events && amd_generic_events[kk].name; kk++)
+ for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++)
+ {
+ hwc_cb (jj, amd_generic_events[kk].name);
+ count++;
+ }
+ return count;
+}
+
+static int
+opt_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel,
+ eventsel_t *event_valid_umask, uint_t *pmc_sel)
+{
+ uint_t kk;
+ *pmc_sel = pmc; /* for AMD, pmc doesn't need to be adjusted */
+ *eventsel = (eventsel_t) - 1;
+ *event_valid_umask = 0x0;
+
+ /* search table */
+ for (kk = 0; amd_events && amd_events[kk].name; kk++)
+ {
+ if (strcmp (eventname, amd_events[kk].name) == 0)
+ {
+ *eventsel = EXTENDED_EVNUM_2_EVSEL (amd_events[kk].emask);
+ *event_valid_umask = amd_events[kk].umask_valid;
+ return 0;
+ }
+ }
+
+ /* search generic */
+ int generic = 0;
+ eventsel_t tmp_umask = 0;
+ for (kk = 0; amd_generic_events && amd_generic_events[kk].name; kk++)
+ {
+ if (strcmp (eventname, amd_generic_events[kk].name) == 0)
+ {
+ generic = 1;
+ eventname = amd_generic_events[kk].event;
+ tmp_umask = amd_generic_events[kk].umask;
+ break;
+ }
+ }
+ if (!generic)
+ return -1;
+
+ /* find real event # for generic event */
+ for (kk = 0; amd_events && amd_events[kk].name; kk++)
+ {
+ if (strcmp (eventname, amd_events[kk].name) == 0)
+ {
+ *eventsel = EXTENDED_EVNUM_2_EVSEL (amd_events[kk].emask);
+ *eventsel |= (tmp_umask << PERFCTR_UMASK_SHIFT);
+ *event_valid_umask = 0; /* user umask not allowed w/generic events */
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static hdrv_pcbe_api_t hdrv_pcbe_opteron_api = {
+ opt_pcbe_init,
+ opt_pcbe_ncounters,
+ opt_pcbe_impl_name,
+ opt_pcbe_cpuref,
+ opt_pcbe_get_events,
+ opt_pcbe_get_eventnum
+};