diff options
author | Vladimir Mezentsev <vladimir.mezentsev@oracle.com> | 2024-05-16 21:00:51 -0700 |
---|---|---|
committer | Vladimir Mezentsev <vladimir.mezentsev@oracle.com> | 2024-05-17 18:28:05 -0700 |
commit | ee7af0e7107e918d37bd2686fea1db8f88d2242a (patch) | |
tree | 12897d98828474dcde1099595921b4e344305bea /gprofng/common | |
parent | 663741df74f975ca00de84ba17d2cd8417bb1d03 (diff) | |
download | binutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.zip binutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.tar.gz binutils-ee7af0e7107e918d37bd2686fea1db8f88d2242a.tar.bz2 |
gprofng: add hardware counters for AMD Zen3
Historically, we have used several APIs (perfctr, libcpc, perf_event_open) for profiling.
For each hardware we have several tables of hardware counters.
Some information is duplicated in these tables.
Some of the information is no longer used.
I did not touch the existing hwc tables.
I added a new hwc table for an AMD Zen3 machine.
ChangeLog
2024-05-16 Vladimir Mezentsev <vladimir.mezentsev@oracle.com>
PR gprofng/31123
* common/core_pcbe.c (core_pcbe_get_events): Add new argument.
* common/hwc_cpus.h: New constants for AMD hardware.
* common/hwcdrv.c: Add new argument to hwcdrv_get_descriptions.
Clean up the code.
* common/hwcdrv.h: Likewise.
* common/hwcfuncs.c (hwcdrv_get_descriptions): Add new argument.
* common/hwctable.c: Add the hwc table for AMD Zen3.
* src/hwc_amd_zen3.h: New file.
* common/opteron_pcbe.c: Add new argument to opt_pcbe_get_events.
* src/collctrl.cc: Remove unused variable.
* src/collctrl.h: Likewise.
Diffstat (limited to 'gprofng/common')
-rw-r--r-- | gprofng/common/core_pcbe.c | 17 | ||||
-rw-r--r-- | gprofng/common/hwc_cpus.h | 39 | ||||
-rw-r--r-- | gprofng/common/hwcdrv.c | 22 | ||||
-rw-r--r-- | gprofng/common/hwcdrv.h | 17 | ||||
-rw-r--r-- | gprofng/common/hwcfuncs.c | 2 | ||||
-rw-r--r-- | gprofng/common/hwctable.c | 149 | ||||
-rw-r--r-- | gprofng/common/opteron_pcbe.c | 60 |
7 files changed, 184 insertions, 122 deletions
diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c index 30977f0..805bd14 100644 --- a/gprofng/common/core_pcbe.c +++ b/gprofng/common/core_pcbe.c @@ -2734,13 +2734,6 @@ core_pcbe_init (void) { switch (cpuid_getvendor ()) { - case X86_VENDOR_AMD: - snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD); - events_table = events_generic; - num_gpc = 4; - num_ffc = 0; - total_pmc = num_gpc + num_ffc; - return 0; case ARM_CPU_IMP_ARM: case ARM_CPU_IMP_BRCM: case ARM_CPU_IMP_CAVIUM: @@ -2948,7 +2941,7 @@ core_pcbe_cpuref (void) } static int -core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) +core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl) { int count = 0; const struct events_table_t *pevent; @@ -2966,6 +2959,14 @@ core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) count++; } /* add generic events here */ + if (raw_hwc_tbl) + for (Hwcentry *h = raw_hwc_tbl; h->name; h++) + if (h->use_perf_event_type) + for (int jj = 0; jj < num_gpc; jj++) + { + hwc_cb (jj, h->name); + count++; + } return count; } diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h index 4b77083..59052a0 100644 --- a/gprofng/common/hwc_cpus.h +++ b/gprofng/common/hwc_cpus.h @@ -34,8 +34,16 @@ typedef struct char *cpu_modelstr; } cpu_info_t; +#ifdef __cplusplus +extern "C" +{ +#endif extern cpu_info_t *read_cpuinfo(); +#ifdef __cplusplus +} +#endif + #define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */ /* type for specifying CPU register number */ @@ -105,6 +113,8 @@ extern cpu_info_t *read_cpuinfo(); #define CPC_AMD_FAM_11H 2502 /* Griffin... */ #define CPC_AMD_FAM_15H 2503 #define CPC_AMD_Authentic 2504 +#define CPC_AMD_FAM_19H_ZEN3 2505 +#define CPC_AMD_FAM_19H_ZEN4 2506 #define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier) #define CPC_FOX 3004 /* pseudo-chip */ @@ -117,7 +127,32 @@ extern cpu_info_t *read_cpuinfo(); #define CPC_SPARC64_X 4006 /* Athena */ #define CPC_SPARC64_XII 4010 /* Athena++ */ -// aarch64. Constants from tools/arch/arm64/include/asm/cputype.h +#define AMD_FAM_19H_ZEN3_NAME "AMD Family 19h (Zen3)" +#define AMD_FAM_19H_ZEN4_NAME "AMD Family 19h (Zen4)" + +enum Amd_famaly +{ + AMD_ZEN_FAMILY = 0x17, + AMD_ZEN3_FAMILY = 0x19 +}; + +enum Amd_model +{ + AMD_ZEN_RYZEN = 0x1, + AMD_ZENPLUS_RYZEN = 0x8, + AMD_ZENPLUS_RYZEN2 = 0x18, + AMD_ZEN2_RYZEN = 0x31, + AMD_ZEN2_RYZEN2 = 0x71, + AMD_ZEN2_RYZEN3 = 0x60, + AMD_ZEN3_RYZEN = 0x1, + AMD_ZEN3_RYZEN2 = 0x21, + AMD_ZEN3_RYZEN3 = 0x50, + AMD_ZEN3_EPYC_TRENTO = 0x30, + AMD_ZEN4_RYZEN = 0x61, + AMD_ZEN4_EPYC = 0x11 +}; + + // aarch64. Constants from tools/arch/arm64/include/asm/cputype.h // in https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git enum { ARM_CPU_IMP_ARM = 0x41, @@ -147,6 +182,8 @@ enum { {CPC_AMD_FAM_15H , "AMD Family 15h Model 01h"}, \ {CPC_AMD_FAM_15H , "AMD Family 15h Model 02h"},/*future*/ \ {CPC_AMD_FAM_15H , "AMD Family 15h Model 03h"},/*future*/ \ + {CPC_AMD_FAM_19H_ZEN3 , AMD_FAM_19H_ZEN3_NAME}, \ + {CPC_AMD_FAM_19H_ZEN4 , AMD_FAM_19H_ZEN4_NAME}, \ {CPC_PENTIUM_4_HT , "Pentium 4 with HyperThreading"}, \ {CPC_PENTIUM_4 , "Pentium 4"}, \ {CPC_PENTIUM_PRO_MMX , "Pentium Pro with MMX, Pentium II"}, \ diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c index 0ada09d..0b4cfc3 100644 --- a/gprofng/common/hwcdrv.c +++ b/gprofng/common/hwcdrv.c @@ -34,6 +34,7 @@ #include "cpuid.c" /* ftns for identifying a chip */ +static hdrv_pcbe_api_t *pcbe_driver = NULL; static hdrv_pcbe_api_t hdrv_pcbe_core_api; static hdrv_pcbe_api_t hdrv_pcbe_opteron_api; static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = { @@ -94,8 +95,6 @@ hwcdrv_lookup_cpuver (const char * cpcN_cciname) * For M8, a 4-bit mask of supported PICs is stored in bits [23:20]. */ -IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0; - static const attr_info_t perfctr_sparc_attrs[] = { {NTXT ("user"), 0, 0x01, 16}, //usr {NTXT ("system"), 0, 0x01, 17}, //os @@ -132,8 +131,9 @@ myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel, eventsel_t *valid_umask, uint_t *pmc_sel) { - if (hwcdrv_get_x86_eventnum && - !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel)) + if (pcbe_driver && pcbe_driver->hdrv_pcbe_get_eventnum && + !pcbe_driver->hdrv_pcbe_get_eventnum (eventname, pmc, eventsel, + valid_umask, pmc_sel)) return 0; /* check for numerically-specified counters */ @@ -214,7 +214,7 @@ set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask, return 0; } -IS_GLOBAL int +static int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, eventsel_t *return_event, uint_t *return_pmc_sel) { @@ -287,6 +287,7 @@ perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid, rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags); if (rc != -1) return rc; + TprintfT (0, "perf_event_open %d: errno=%d %s\n", retry, errno, strerror(errno)); } return rc; } @@ -375,7 +376,6 @@ static struct int internal_open_called; hwcfuncs_tsd_get_fn_t find_vpc_ctx; unsigned hwcdef_cnt; /* number of *active* hardware counters */ - hwcdrv_get_events_fn_t *get_events; } hdrv_pcl_state; static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED}; @@ -813,14 +813,13 @@ hdrv_pcl_internal_open () hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii]; if (!ppcbe->hdrv_pcbe_init ()) { + pcbe_driver = ppcbe; hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name (); hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname); if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) goto internal_open_error; hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters (); hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref (); - hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events; - hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum; break; } } @@ -894,11 +893,12 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) } HWCDRV_API int -hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb) +hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb, + Hwcentry *raw_hwc_tbl) { int count = 0; - if (hwc_cb && hdrv_pcl_state.get_events) - count = hdrv_pcl_state.get_events (hwc_cb); + if (hwc_cb && pcbe_driver && pcbe_driver->hdrv_pcbe_get_events) + count = pcbe_driver->hdrv_pcbe_get_events (hwc_cb, raw_hwc_tbl); if (attr_cb) for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++) attr_cb (perfctr_attrs_table[ii].attrname); diff --git a/gprofng/common/hwcdrv.h b/gprofng/common/hwcdrv.h index 0a5eb33..fb97c8a 100644 --- a/gprofng/common/hwcdrv.h +++ b/gprofng/common/hwcdrv.h @@ -126,11 +126,13 @@ extern "C" */ int (*hwcdrv_get_descriptions)(hwcf_hwc_cb_t *hwc_find_action, - hwcf_attr_cb_t *attr_find_action); - /* Initiate callbacks with all available HWC names and and HWC attributes. + hwcf_attr_cb_t *attr_find_action, + Hwcentry *raw_hwc_tbl); + /* Initiate callbacks with all available HWC names and HWC attributes. Input: <hwc_find_action>: if not NULL, will be called once for each HWC <attr_find_action>: if not NULL, will be called once for each attribute + <raw_hwc_tbl>: counter definitions. Return: 0 if successful or a cpc return code upon error */ @@ -260,15 +262,6 @@ extern "C" ( (((eventsel_t)(evnum) & 0x0f00ULL) << 24) | ((eventsel_t)(evnum) & ~0x0f00ULL) ) typedef uint64_t eventsel_t; - extern int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, - eventsel_t *return_event, uint_t *return_pmc_sel); - - typedef int (hwcdrv_get_events_fn_t) (hwcf_hwc_cb_t *hwc_cb); - typedef int (hwcdrv_get_eventnum_fn_t) (const char *eventname, uint_t pmc, - eventsel_t *eventnum, - eventsel_t *valid_umask, uint_t *pmc_sel); - extern hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum; - typedef struct { const char * attrname; // user-visible name of attribute @@ -285,7 +278,7 @@ extern "C" uint_t (*hdrv_pcbe_ncounters)(void); const char *(*hdrv_pcbe_impl_name)(void); const char *(*hdrv_pcbe_cpuref)(void); - int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb); + int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl); int (*hdrv_pcbe_get_eventnum)(const char * eventname, uint_t pmc, eventsel_t *eventnum, eventsel_t *valid_umask, uint_t *pmc_sel); diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c index fce711d..e6448a9 100644 --- a/gprofng/common/hwcfuncs.c +++ b/gprofng/common/hwcfuncs.c @@ -63,7 +63,7 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) HWCDRV_API int hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_find_action, - hwcf_attr_cb_t *attr_find_action) + hwcf_attr_cb_t *attr_find_action, Hwcentry *hwcdef) { return 0; } diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c index 40b4cd8..5dc8dde 100644 --- a/gprofng/common/hwctable.c +++ b/gprofng/common/hwctable.c @@ -2369,82 +2369,86 @@ static Hwcentry amd_15h[] = { #define HWCE(nm, mtr, id, op, res) \ INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE) -static Hwcentry generic_list[] = { -// Hardware event: - { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1, - .int_name = "cycles" }, - { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1, - .int_name = "cycles~system=1~user=0" }, - { HWE("branch-instructions", STXT("Branch-instructions"), - PERF_COUNT_HW_BRANCH_INSTRUCTIONS) }, - { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) }, - { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES), - .timecvt = 1 }, - { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) }, - { HWE("cache-references", STXT("Cache-references"), - PERF_COUNT_HW_CACHE_REFERENCES) }, - { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 }, - { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS), - .int_name = "instructions" }, - { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES), - .timecvt = 1 }, - { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."), - PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 }, - { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."), - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 }, -// Software event: - { SWE("alignment-faults", STXT("Alignment Faults"), - PERF_COUNT_SW_ALIGNMENT_FAULTS) }, - { SWE("context-switches", STXT("Context Switches"), - PERF_COUNT_SW_CONTEXT_SWITCHES) }, - { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK), - .timecvt = 1 }, - { SWE("cpu-migrations", STXT("CPU Migrations"), - PERF_COUNT_SW_CPU_MIGRATIONS) }, - { SWE("emulation-faults", STXT("Emulation Faults"), - PERF_COUNT_SW_EMULATION_FAULTS) }, - { SWE("major-faults", STXT("Major Page Faults"), - PERF_COUNT_SW_PAGE_FAULTS_MAJ) }, - { SWE("minor-faults", STXT("Minor Page Faults"), - PERF_COUNT_SW_PAGE_FAULTS_MIN) }, - { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) }, - { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK), - .timecvt = 1 }, -// Hardware cache event - { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"), - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, - { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"), - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"), - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"), - PERF_COUNT_HW_CACHE_L1D, - PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"), - PERF_COUNT_HW_CACHE_L1I, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, - { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"), - PERF_COUNT_HW_CACHE_L1I, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"), - PERF_COUNT_HW_CACHE_DTLB, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, - { HWCE("dTLB-loads", STXT("D-TLB Loads"), - PERF_COUNT_HW_CACHE_DTLB, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"), - PERF_COUNT_HW_CACHE_ITLB, - PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, - { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"), - PERF_COUNT_HW_CACHE_ITLB, +#define HWC_GENERIC \ + /* Hardware event: */\ + { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\ + .int_name = "cycles" },\ + { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\ + .int_name = "cycles~system=1~user=0" },\ + { HWE("branch-instructions", STXT("Branch-instructions"),\ + PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },\ + { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },\ + { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),\ + .timecvt = 1 },\ + { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },\ + { HWE("cache-references", STXT("Cache-references"),\ + PERF_COUNT_HW_CACHE_REFERENCES) },\ + { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },\ + { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),\ + .int_name = "instructions" },\ + { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),\ + .timecvt = 1 },\ + { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),\ + PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },\ + { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),\ + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },\ + /* Software event: */\ + { SWE("alignment-faults", STXT("Alignment Faults"),\ + PERF_COUNT_SW_ALIGNMENT_FAULTS) },\ + { SWE("context-switches", STXT("Context Switches"),\ + PERF_COUNT_SW_CONTEXT_SWITCHES) },\ + { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),\ + .timecvt = 1 },\ + { SWE("cpu-migrations", STXT("CPU Migrations"),\ + PERF_COUNT_SW_CPU_MIGRATIONS) },\ + { SWE("emulation-faults", STXT("Emulation Faults"),\ + PERF_COUNT_SW_EMULATION_FAULTS) },\ + { SWE("major-faults", STXT("Major Page Faults"),\ + PERF_COUNT_SW_PAGE_FAULTS_MAJ) },\ + { SWE("minor-faults", STXT("Minor Page Faults"),\ + PERF_COUNT_SW_PAGE_FAULTS_MIN) },\ + { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },\ + { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),\ + .timecvt = 1 },\ + /* Hardware cache event: */\ + { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),\ + PERF_COUNT_HW_CACHE_L1D,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\ + { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),\ + PERF_COUNT_HW_CACHE_L1D,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\ + { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),\ + PERF_COUNT_HW_CACHE_L1D,\ + PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\ + { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),\ + PERF_COUNT_HW_CACHE_L1D,\ + PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\ + { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),\ + PERF_COUNT_HW_CACHE_L1I,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\ + { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),\ + PERF_COUNT_HW_CACHE_L1I,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\ + { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),\ + PERF_COUNT_HW_CACHE_DTLB,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\ + { HWCE("dTLB-loads", STXT("D-TLB Loads"),\ + PERF_COUNT_HW_CACHE_DTLB,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\ + { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),\ + PERF_COUNT_HW_CACHE_ITLB,\ + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\ + { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\ + PERF_COUNT_HW_CACHE_ITLB,\ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, +static Hwcentry generic_list[] = { + HWC_GENERIC {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; +#include "hwc_amd_zen3.h" + /* structure defining the counters for a CPU type */ typedef struct { @@ -2516,6 +2520,7 @@ static cpu_list_t cputabs[] = { {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR) {ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}}, {CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}}, + {CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}}, {0, generic_list, {"insts,,cycles", 0}}, }; @@ -3033,7 +3038,7 @@ setup_cpc_general (int skip_hwc_test) valid_cpu_tables[1] = papi_generic_list; Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n"); // populate cpcx_raw and cpcx_attr - hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb); + hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb, cputabs_entry->stdlist_table); for (int kk = 0; kk < 2; kk++) { // collect and er_kernel hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk], diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c index 0f1815d..a8d7e76 100644 --- a/gprofng/common/opteron_pcbe.c +++ b/gprofng/common/opteron_pcbe.c @@ -304,6 +304,8 @@ static amd_generic_event_t family_10h_generic_events[] = { }; static amd_event_t *amd_events = NULL; +static const char *amd_impl_name = ""; +static const char *amd_cpuref = ""; static uint_t amd_family; static amd_generic_event_t *amd_generic_events = NULL; @@ -318,19 +320,39 @@ opt_pcbe_init (void) if (cpuid_getvendor () != X86_VENDOR_AMD) return -1; - /* - * Figure out processor revision here and assign appropriate - * event configuration. - */ + amd_impl_name = GTXT ("Unknown AMD processor"); switch (amd_family) { case OPTERON_FAMILY: amd_events = opt_events_rev_E; amd_generic_events = opt_generic_events; + amd_impl_name = "AMD Opteron & Athlon64"; + amd_cpuref = GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's" + " Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\n" + "AMD publication #26094"); break; case AMD_FAMILY_10H: amd_events = family_10h_events; amd_generic_events = family_10h_generic_events; + amd_impl_name = "AMD Family 10h"; + amd_cpuref = GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's" + " Guide (BKDG) For AMD Family 10h Processors,\"\n" + "AMD publication #31116"); + break; + case AMD_ZEN3_FAMILY: + switch (cpuid_getmodel ()) + { + case AMD_ZEN3_RYZEN: + case AMD_ZEN3_RYZEN2: + case AMD_ZEN3_RYZEN3: + case AMD_ZEN3_EPYC_TRENTO: + amd_impl_name = AMD_FAM_19H_ZEN3_NAME; + break; + case AMD_ZEN4_RYZEN: + case AMD_ZEN4_EPYC: + amd_impl_name = AMD_FAM_19H_ZEN4_NAME; + break; + } break; } return 0; @@ -345,27 +367,17 @@ opt_pcbe_ncounters (void) static const char * opt_pcbe_impl_name (void) { - if (amd_family == OPTERON_FAMILY) - return ("AMD Opteron & Athlon64"); - else if (amd_family == AMD_FAMILY_10H) - return ("AMD Family 10h"); - else - return ("Unknown AMD processor"); + return amd_impl_name; } static const char * opt_pcbe_cpuref (void) { - if (amd_family == OPTERON_FAMILY) - return GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\nAMD publication #26094"); - else if (amd_family == AMD_FAMILY_10H) - return GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors,\"\nAMD publication #31116"); - else - return GTXT ("Unknown AMD processor"); + return amd_cpuref; } static int -opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) +opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl) { int count = 0; for (uint_t kk = 0; amd_events && amd_events[kk].name; kk++) @@ -380,6 +392,14 @@ opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb) hwc_cb (jj, amd_generic_events[kk].name); count++; } + if (raw_hwc_tbl) + for (Hwcentry *h = raw_hwc_tbl; h->name; h++) + if (h->use_perf_event_type) + for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++) + { + hwc_cb (jj, h->name); + count++; + } return count; } @@ -392,6 +412,12 @@ opt_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel, *eventsel = (eventsel_t) - 1; *event_valid_umask = 0x0; + if (amd_events == NULL && amd_generic_events == NULL) + { // These tables are created only for old hardware. + *eventsel = 0; + return 0; + } + /* search table */ for (kk = 0; amd_events && amd_events[kk].name; kk++) { |