aboutsummaryrefslogtreecommitdiff
path: root/gprofng/common
diff options
context:
space:
mode:
authorVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-05-16 21:00:51 -0700
committerVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-05-17 18:28:05 -0700
commitee7af0e7107e918d37bd2686fea1db8f88d2242a (patch)
tree12897d98828474dcde1099595921b4e344305bea /gprofng/common
parent663741df74f975ca00de84ba17d2cd8417bb1d03 (diff)
downloadbinutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.zip
binutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.tar.gz
binutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.tar.bz2
gprofng: add hardware counters for AMD Zen3
Historically, we have used several APIs (perfctr, libcpc, perf_event_open) for profiling. For each hardware we have several tables of hardware counters. Some information is duplicated in these tables. Some of the information is no longer used. I did not touch the existing hwc tables. I added a new hwc table for an AMD Zen3 machine. ChangeLog 2024-05-16 Vladimir Mezentsev <vladimir.mezentsev@oracle.com> PR gprofng/31123 * common/core_pcbe.c (core_pcbe_get_events): Add new argument. * common/hwc_cpus.h: New constants for AMD hardware. * common/hwcdrv.c: Add new argument to hwcdrv_get_descriptions. Clean up the code. * common/hwcdrv.h: Likewise. * common/hwcfuncs.c (hwcdrv_get_descriptions): Add new argument. * common/hwctable.c: Add the hwc table for AMD Zen3. * src/hwc_amd_zen3.h: New file. * common/opteron_pcbe.c: Add new argument to opt_pcbe_get_events. * src/collctrl.cc: Remove unused variable. * src/collctrl.h: Likewise.
Diffstat (limited to 'gprofng/common')
-rw-r--r--gprofng/common/core_pcbe.c17
-rw-r--r--gprofng/common/hwc_cpus.h39
-rw-r--r--gprofng/common/hwcdrv.c22
-rw-r--r--gprofng/common/hwcdrv.h17
-rw-r--r--gprofng/common/hwcfuncs.c2
-rw-r--r--gprofng/common/hwctable.c149
-rw-r--r--gprofng/common/opteron_pcbe.c60
7 files changed, 184 insertions, 122 deletions
diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c
index 30977f0..805bd14 100644
--- a/gprofng/common/core_pcbe.c
+++ b/gprofng/common/core_pcbe.c
@@ -2734,13 +2734,6 @@ core_pcbe_init (void)
{
switch (cpuid_getvendor ())
{
- case X86_VENDOR_AMD:
- snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD);
- events_table = events_generic;
- num_gpc = 4;
- num_ffc = 0;
- total_pmc = num_gpc + num_ffc;
- return 0;
case ARM_CPU_IMP_ARM:
case ARM_CPU_IMP_BRCM:
case ARM_CPU_IMP_CAVIUM:
@@ -2948,7 +2941,7 @@ core_pcbe_cpuref (void)
}
static int
-core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
+core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl)
{
int count = 0;
const struct events_table_t *pevent;
@@ -2966,6 +2959,14 @@ core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
count++;
}
/* add generic events here */
+ if (raw_hwc_tbl)
+ for (Hwcentry *h = raw_hwc_tbl; h->name; h++)
+ if (h->use_perf_event_type)
+ for (int jj = 0; jj < num_gpc; jj++)
+ {
+ hwc_cb (jj, h->name);
+ count++;
+ }
return count;
}
diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h
index 4b77083..59052a0 100644
--- a/gprofng/common/hwc_cpus.h
+++ b/gprofng/common/hwc_cpus.h
@@ -34,8 +34,16 @@ typedef struct
char *cpu_modelstr;
} cpu_info_t;
+#ifdef __cplusplus
+extern "C"
+{
+#endif
extern cpu_info_t *read_cpuinfo();
+#ifdef __cplusplus
+}
+#endif
+
#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */
/* type for specifying CPU register number */
@@ -105,6 +113,8 @@ extern cpu_info_t *read_cpuinfo();
#define CPC_AMD_FAM_11H 2502 /* Griffin... */
#define CPC_AMD_FAM_15H 2503
#define CPC_AMD_Authentic 2504
+#define CPC_AMD_FAM_19H_ZEN3 2505
+#define CPC_AMD_FAM_19H_ZEN4 2506
#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier)
#define CPC_FOX 3004 /* pseudo-chip */
@@ -117,7 +127,32 @@ extern cpu_info_t *read_cpuinfo();
#define CPC_SPARC64_X 4006 /* Athena */
#define CPC_SPARC64_XII 4010 /* Athena++ */
-// aarch64. Constants from tools/arch/arm64/include/asm/cputype.h
+#define AMD_FAM_19H_ZEN3_NAME "AMD Family 19h (Zen3)"
+#define AMD_FAM_19H_ZEN4_NAME "AMD Family 19h (Zen4)"
+
+enum Amd_famaly
+{
+ AMD_ZEN_FAMILY = 0x17,
+ AMD_ZEN3_FAMILY = 0x19
+};
+
+enum Amd_model
+{
+ AMD_ZEN_RYZEN = 0x1,
+ AMD_ZENPLUS_RYZEN = 0x8,
+ AMD_ZENPLUS_RYZEN2 = 0x18,
+ AMD_ZEN2_RYZEN = 0x31,
+ AMD_ZEN2_RYZEN2 = 0x71,
+ AMD_ZEN2_RYZEN3 = 0x60,
+ AMD_ZEN3_RYZEN = 0x1,
+ AMD_ZEN3_RYZEN2 = 0x21,
+ AMD_ZEN3_RYZEN3 = 0x50,
+ AMD_ZEN3_EPYC_TRENTO = 0x30,
+ AMD_ZEN4_RYZEN = 0x61,
+ AMD_ZEN4_EPYC = 0x11
+};
+
+ // aarch64. Constants from tools/arch/arm64/include/asm/cputype.h
// in https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
enum {
ARM_CPU_IMP_ARM = 0x41,
@@ -147,6 +182,8 @@ enum {
{CPC_AMD_FAM_15H , "AMD Family 15h Model 01h"}, \
{CPC_AMD_FAM_15H , "AMD Family 15h Model 02h"},/*future*/ \
{CPC_AMD_FAM_15H , "AMD Family 15h Model 03h"},/*future*/ \
+ {CPC_AMD_FAM_19H_ZEN3 , AMD_FAM_19H_ZEN3_NAME}, \
+ {CPC_AMD_FAM_19H_ZEN4 , AMD_FAM_19H_ZEN4_NAME}, \
{CPC_PENTIUM_4_HT , "Pentium 4 with HyperThreading"}, \
{CPC_PENTIUM_4 , "Pentium 4"}, \
{CPC_PENTIUM_PRO_MMX , "Pentium Pro with MMX, Pentium II"}, \
diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c
index 0ada09d..0b4cfc3 100644
--- a/gprofng/common/hwcdrv.c
+++ b/gprofng/common/hwcdrv.c
@@ -34,6 +34,7 @@
#include "cpuid.c" /* ftns for identifying a chip */
+static hdrv_pcbe_api_t *pcbe_driver = NULL;
static hdrv_pcbe_api_t hdrv_pcbe_core_api;
static hdrv_pcbe_api_t hdrv_pcbe_opteron_api;
static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = {
@@ -94,8 +95,6 @@ hwcdrv_lookup_cpuver (const char * cpcN_cciname)
* For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
*/
-IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0;
-
static const attr_info_t perfctr_sparc_attrs[] = {
{NTXT ("user"), 0, 0x01, 16}, //usr
{NTXT ("system"), 0, 0x01, 17}, //os
@@ -132,8 +131,9 @@ myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc,
eventsel_t *eventsel, eventsel_t *valid_umask,
uint_t *pmc_sel)
{
- if (hwcdrv_get_x86_eventnum &&
- !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel))
+ if (pcbe_driver && pcbe_driver->hdrv_pcbe_get_eventnum &&
+ !pcbe_driver->hdrv_pcbe_get_eventnum (eventname, pmc, eventsel,
+ valid_umask, pmc_sel))
return 0;
/* check for numerically-specified counters */
@@ -214,7 +214,7 @@ set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask,
return 0;
}
-IS_GLOBAL int
+static int
hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
eventsel_t *return_event, uint_t *return_pmc_sel)
{
@@ -287,6 +287,7 @@ perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid,
rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags);
if (rc != -1)
return rc;
+ TprintfT (0, "perf_event_open %d: errno=%d %s\n", retry, errno, strerror(errno));
}
return rc;
}
@@ -375,7 +376,6 @@ static struct
int internal_open_called;
hwcfuncs_tsd_get_fn_t find_vpc_ctx;
unsigned hwcdef_cnt; /* number of *active* hardware counters */
- hwcdrv_get_events_fn_t *get_events;
} hdrv_pcl_state;
static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED};
@@ -813,14 +813,13 @@ hdrv_pcl_internal_open ()
hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii];
if (!ppcbe->hdrv_pcbe_init ())
{
+ pcbe_driver = ppcbe;
hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name ();
hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname);
if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
goto internal_open_error;
hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters ();
hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref ();
- hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events;
- hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum;
break;
}
}
@@ -894,11 +893,12 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
}
HWCDRV_API int
-hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb)
+hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb,
+ Hwcentry *raw_hwc_tbl)
{
int count = 0;
- if (hwc_cb && hdrv_pcl_state.get_events)
- count = hdrv_pcl_state.get_events (hwc_cb);
+ if (hwc_cb && pcbe_driver && pcbe_driver->hdrv_pcbe_get_events)
+ count = pcbe_driver->hdrv_pcbe_get_events (hwc_cb, raw_hwc_tbl);
if (attr_cb)
for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++)
attr_cb (perfctr_attrs_table[ii].attrname);
diff --git a/gprofng/common/hwcdrv.h b/gprofng/common/hwcdrv.h
index 0a5eb33..fb97c8a 100644
--- a/gprofng/common/hwcdrv.h
+++ b/gprofng/common/hwcdrv.h
@@ -126,11 +126,13 @@ extern "C"
*/
int (*hwcdrv_get_descriptions)(hwcf_hwc_cb_t *hwc_find_action,
- hwcf_attr_cb_t *attr_find_action);
- /* Initiate callbacks with all available HWC names and and HWC attributes.
+ hwcf_attr_cb_t *attr_find_action,
+ Hwcentry *raw_hwc_tbl);
+ /* Initiate callbacks with all available HWC names and HWC attributes.
Input:
<hwc_find_action>: if not NULL, will be called once for each HWC
<attr_find_action>: if not NULL, will be called once for each attribute
+ <raw_hwc_tbl>: counter definitions.
Return: 0 if successful
or a cpc return code upon error
*/
@@ -260,15 +262,6 @@ extern "C"
( (((eventsel_t)(evnum) & 0x0f00ULL) << 24) | ((eventsel_t)(evnum) & ~0x0f00ULL) )
typedef uint64_t eventsel_t;
- extern int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
- eventsel_t *return_event, uint_t *return_pmc_sel);
-
- typedef int (hwcdrv_get_events_fn_t) (hwcf_hwc_cb_t *hwc_cb);
- typedef int (hwcdrv_get_eventnum_fn_t) (const char *eventname, uint_t pmc,
- eventsel_t *eventnum,
- eventsel_t *valid_umask, uint_t *pmc_sel);
- extern hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum;
-
typedef struct
{
const char * attrname; // user-visible name of attribute
@@ -285,7 +278,7 @@ extern "C"
uint_t (*hdrv_pcbe_ncounters)(void);
const char *(*hdrv_pcbe_impl_name)(void);
const char *(*hdrv_pcbe_cpuref)(void);
- int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb);
+ int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl);
int (*hdrv_pcbe_get_eventnum)(const char * eventname, uint_t pmc,
eventsel_t *eventnum, eventsel_t *valid_umask,
uint_t *pmc_sel);
diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c
index fce711d..e6448a9 100644
--- a/gprofng/common/hwcfuncs.c
+++ b/gprofng/common/hwcfuncs.c
@@ -63,7 +63,7 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
HWCDRV_API int
hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_find_action,
- hwcf_attr_cb_t *attr_find_action)
+ hwcf_attr_cb_t *attr_find_action, Hwcentry *hwcdef)
{
return 0;
}
diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c
index 40b4cd8..5dc8dde 100644
--- a/gprofng/common/hwctable.c
+++ b/gprofng/common/hwctable.c
@@ -2369,82 +2369,86 @@ static Hwcentry amd_15h[] = {
#define HWCE(nm, mtr, id, op, res) \
INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
-static Hwcentry generic_list[] = {
-// Hardware event:
- { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
- .int_name = "cycles" },
- { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
- .int_name = "cycles~system=1~user=0" },
- { HWE("branch-instructions", STXT("Branch-instructions"),
- PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },
- { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },
- { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),
- .timecvt = 1 },
- { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },
- { HWE("cache-references", STXT("Cache-references"),
- PERF_COUNT_HW_CACHE_REFERENCES) },
- { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },
- { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),
- .int_name = "instructions" },
- { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),
- .timecvt = 1 },
- { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),
- PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },
- { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),
- PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },
-// Software event:
- { SWE("alignment-faults", STXT("Alignment Faults"),
- PERF_COUNT_SW_ALIGNMENT_FAULTS) },
- { SWE("context-switches", STXT("Context Switches"),
- PERF_COUNT_SW_CONTEXT_SWITCHES) },
- { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),
- .timecvt = 1 },
- { SWE("cpu-migrations", STXT("CPU Migrations"),
- PERF_COUNT_SW_CPU_MIGRATIONS) },
- { SWE("emulation-faults", STXT("Emulation Faults"),
- PERF_COUNT_SW_EMULATION_FAULTS) },
- { SWE("major-faults", STXT("Major Page Faults"),
- PERF_COUNT_SW_PAGE_FAULTS_MAJ) },
- { SWE("minor-faults", STXT("Minor Page Faults"),
- PERF_COUNT_SW_PAGE_FAULTS_MIN) },
- { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },
- { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),
- .timecvt = 1 },
-// Hardware cache event
- { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),
- PERF_COUNT_HW_CACHE_L1D,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
- { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),
- PERF_COUNT_HW_CACHE_L1D,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
- { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),
- PERF_COUNT_HW_CACHE_L1D,
- PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
- { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),
- PERF_COUNT_HW_CACHE_L1D,
- PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
- { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),
- PERF_COUNT_HW_CACHE_L1I,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
- { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),
- PERF_COUNT_HW_CACHE_L1I,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
- { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),
- PERF_COUNT_HW_CACHE_DTLB,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
- { HWCE("dTLB-loads", STXT("D-TLB Loads"),
- PERF_COUNT_HW_CACHE_DTLB,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
- { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),
- PERF_COUNT_HW_CACHE_ITLB,
- PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
- { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),
- PERF_COUNT_HW_CACHE_ITLB,
+#define HWC_GENERIC \
+ /* Hardware event: */\
+ { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
+ .int_name = "cycles" },\
+ { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
+ .int_name = "cycles~system=1~user=0" },\
+ { HWE("branch-instructions", STXT("Branch-instructions"),\
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },\
+ { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },\
+ { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),\
+ .timecvt = 1 },\
+ { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },\
+ { HWE("cache-references", STXT("Cache-references"),\
+ PERF_COUNT_HW_CACHE_REFERENCES) },\
+ { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },\
+ { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),\
+ .int_name = "instructions" },\
+ { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),\
+ .timecvt = 1 },\
+ { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),\
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },\
+ { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),\
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },\
+ /* Software event: */\
+ { SWE("alignment-faults", STXT("Alignment Faults"),\
+ PERF_COUNT_SW_ALIGNMENT_FAULTS) },\
+ { SWE("context-switches", STXT("Context Switches"),\
+ PERF_COUNT_SW_CONTEXT_SWITCHES) },\
+ { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),\
+ .timecvt = 1 },\
+ { SWE("cpu-migrations", STXT("CPU Migrations"),\
+ PERF_COUNT_SW_CPU_MIGRATIONS) },\
+ { SWE("emulation-faults", STXT("Emulation Faults"),\
+ PERF_COUNT_SW_EMULATION_FAULTS) },\
+ { SWE("major-faults", STXT("Major Page Faults"),\
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ) },\
+ { SWE("minor-faults", STXT("Minor Page Faults"),\
+ PERF_COUNT_SW_PAGE_FAULTS_MIN) },\
+ { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },\
+ { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),\
+ .timecvt = 1 },\
+ /* Hardware cache event: */\
+ { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),\
+ PERF_COUNT_HW_CACHE_L1D,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
+ { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),\
+ PERF_COUNT_HW_CACHE_L1D,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
+ { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),\
+ PERF_COUNT_HW_CACHE_L1D,\
+ PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
+ { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),\
+ PERF_COUNT_HW_CACHE_L1D,\
+ PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
+ { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),\
+ PERF_COUNT_HW_CACHE_L1I,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
+ { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),\
+ PERF_COUNT_HW_CACHE_L1I,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
+ { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),\
+ PERF_COUNT_HW_CACHE_DTLB,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
+ { HWCE("dTLB-loads", STXT("D-TLB Loads"),\
+ PERF_COUNT_HW_CACHE_DTLB,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
+ { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),\
+ PERF_COUNT_HW_CACHE_ITLB,\
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
+ { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\
+ PERF_COUNT_HW_CACHE_ITLB,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+static Hwcentry generic_list[] = {
+ HWC_GENERIC
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
+#include "hwc_amd_zen3.h"
+
/* structure defining the counters for a CPU type */
typedef struct
{
@@ -2516,6 +2520,7 @@ static cpu_list_t cputabs[] = {
{CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
{ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
+ {CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}},
{0, generic_list, {"insts,,cycles", 0}},
};
@@ -3033,7 +3038,7 @@ setup_cpc_general (int skip_hwc_test)
valid_cpu_tables[1] = papi_generic_list;
Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n");
// populate cpcx_raw and cpcx_attr
- hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb);
+ hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb, cputabs_entry->stdlist_table);
for (int kk = 0; kk < 2; kk++)
{ // collect and er_kernel
hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk],
diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c
index 0f1815d..a8d7e76 100644
--- a/gprofng/common/opteron_pcbe.c
+++ b/gprofng/common/opteron_pcbe.c
@@ -304,6 +304,8 @@ static amd_generic_event_t family_10h_generic_events[] = {
};
static amd_event_t *amd_events = NULL;
+static const char *amd_impl_name = "";
+static const char *amd_cpuref = "";
static uint_t amd_family;
static amd_generic_event_t *amd_generic_events = NULL;
@@ -318,19 +320,39 @@ opt_pcbe_init (void)
if (cpuid_getvendor () != X86_VENDOR_AMD)
return -1;
- /*
- * Figure out processor revision here and assign appropriate
- * event configuration.
- */
+ amd_impl_name = GTXT ("Unknown AMD processor");
switch (amd_family)
{
case OPTERON_FAMILY:
amd_events = opt_events_rev_E;
amd_generic_events = opt_generic_events;
+ amd_impl_name = "AMD Opteron & Athlon64";
+ amd_cpuref = GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's"
+ " Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\n"
+ "AMD publication #26094");
break;
case AMD_FAMILY_10H:
amd_events = family_10h_events;
amd_generic_events = family_10h_generic_events;
+ amd_impl_name = "AMD Family 10h";
+ amd_cpuref = GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's"
+ " Guide (BKDG) For AMD Family 10h Processors,\"\n"
+ "AMD publication #31116");
+ break;
+ case AMD_ZEN3_FAMILY:
+ switch (cpuid_getmodel ())
+ {
+ case AMD_ZEN3_RYZEN:
+ case AMD_ZEN3_RYZEN2:
+ case AMD_ZEN3_RYZEN3:
+ case AMD_ZEN3_EPYC_TRENTO:
+ amd_impl_name = AMD_FAM_19H_ZEN3_NAME;
+ break;
+ case AMD_ZEN4_RYZEN:
+ case AMD_ZEN4_EPYC:
+ amd_impl_name = AMD_FAM_19H_ZEN4_NAME;
+ break;
+ }
break;
}
return 0;
@@ -345,27 +367,17 @@ opt_pcbe_ncounters (void)
static const char *
opt_pcbe_impl_name (void)
{
- if (amd_family == OPTERON_FAMILY)
- return ("AMD Opteron & Athlon64");
- else if (amd_family == AMD_FAMILY_10H)
- return ("AMD Family 10h");
- else
- return ("Unknown AMD processor");
+ return amd_impl_name;
}
static const char *
opt_pcbe_cpuref (void)
{
- if (amd_family == OPTERON_FAMILY)
- return GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\nAMD publication #26094");
- else if (amd_family == AMD_FAMILY_10H)
- return GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors,\"\nAMD publication #31116");
- else
- return GTXT ("Unknown AMD processor");
+ return amd_cpuref;
}
static int
-opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
+opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl)
{
int count = 0;
for (uint_t kk = 0; amd_events && amd_events[kk].name; kk++)
@@ -380,6 +392,14 @@ opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
hwc_cb (jj, amd_generic_events[kk].name);
count++;
}
+ if (raw_hwc_tbl)
+ for (Hwcentry *h = raw_hwc_tbl; h->name; h++)
+ if (h->use_perf_event_type)
+ for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++)
+ {
+ hwc_cb (jj, h->name);
+ count++;
+ }
return count;
}
@@ -392,6 +412,12 @@ opt_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel,
*eventsel = (eventsel_t) - 1;
*event_valid_umask = 0x0;
+ if (amd_events == NULL && amd_generic_events == NULL)
+ { // These tables are created only for old hardware.
+ *eventsel = 0;
+ return 0;
+ }
+
/* search table */
for (kk = 0; amd_events && amd_events[kk].name; kk++)
{