From 8fe04eeb2cbb8c4cf7b6e8d9183fe09a8b2e8d51 Mon Sep 17 00:00:00 2001 From: Vladimir Mezentsev Date: Mon, 8 Jan 2024 22:00:24 -0800 Subject: gprofng: 31123 improvements to hardware event implementation Our hardware counter profiling is based on perf_event_open(). Our HWC tables are absent for new machines. I have added HWC tables for the following events: PERF_TYPE_HARDWARE, PERF_TYPE_SOFTWARE, PERF_TYPE_HW_CACHE. Other events require additional fixes. Did a little cleaning: marked the symbols as static, used Stringbuilder, created a function to read /proc/cpuinfo. gprofng/ChangeLog 2024-01-08 Vladimir Mezentsev PR gprofng/31123 * common/core_pcbe.c: Mark the symbols as static. Add events_generic[]. * common/hwc_cpus.h: Declare a new function read_cpuinfo. * common/hwcdrv.c: Add a new parameter in init_perf_event(). * common/hwcentry.h: Add use_perf_event_type in Hwcentry. * common/hwcfuncs.c (process_data_descriptor): Read use_perf_event_type, type, config. * common/hwctable.c: Add a new HWC table generic_list[]. * common/opteron_pcbe.c (opt_pcbe_init): Accept AMD machines. * src/collctrl.cc: Use StringBuilder in Coll_Ctrl::build_data_desc(). Add a new function read_cpuinfo. --- gprofng/common/core_pcbe.c | 46 ++++----- gprofng/common/hwc_cpus.h | 16 ++++ gprofng/common/hwcdrv.c | 35 +++---- gprofng/common/hwcentry.h | 5 +- gprofng/common/hwcfuncs.c | 82 ++++++++-------- gprofng/common/hwctable.c | 124 ++++++++++++++++--------- gprofng/common/opteron_pcbe.c | 21 ++--- gprofng/src/collctrl.cc | 211 ++++++++++++++++++++++-------------------- 8 files changed, 293 insertions(+), 247 deletions(-) (limited to 'gprofng') diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c index 14c4268..25bf484 100644 --- a/gprofng/common/core_pcbe.c +++ b/gprofng/common/core_pcbe.c @@ -2597,102 +2597,95 @@ struct events_table_t static const struct events_table_t *events_table = NULL; -const struct events_table_t events_fam6_mod23[] = { +static const struct events_table_t events_fam6_mod23[] = { ARCH_EVENTS EVENTS_FAM6_MOD23 NT_END }; -const struct events_table_t events_fam6_mod28[] = { +static const struct events_table_t events_fam6_mod28[] = { ARCH_EVENTS EVENTS_FAM6_MOD28 NT_END }; -const struct events_table_t events_fam6_mod26[] = { +static const struct events_table_t events_fam6_mod26[] = { ARCH_EVENTS EVENTS_FAM6_MOD26 NT_END }; -const struct events_table_t events_fam6_mod46[] = { +static const struct events_table_t events_fam6_mod46[] = { ARCH_EVENTS EVENTS_FAM6_MOD26 EVENTS_FAM6_MOD46_ONLY NT_END }; -const struct events_table_t events_fam6_mod37[] = { +static const struct events_table_t events_fam6_mod37[] = { ARCH_EVENTS EVENTS_FAM6_MOD37 EVENTS_FAM6_MOD37_ALSO NT_END }; -const struct events_table_t events_fam6_mod47[] = { +static const struct events_table_t events_fam6_mod47[] = { ARCH_EVENTS EVENTS_FAM6_MOD37 NT_END }; -const struct events_table_t events_fam6_mod42[] = { +static const struct events_table_t events_fam6_mod42[] = { ARCH_EVENTS EVENTS_FAM6_MOD42 EVENTS_FAM6_MOD42_ONLY NT_END }; -const struct events_table_t events_fam6_mod45[] = { +static const struct events_table_t events_fam6_mod45[] = { ARCH_EVENTS EVENTS_FAM6_MOD42 EVENTS_FAM6_MOD45_ONLY NT_END }; -const struct events_table_t events_fam6_mod58[] = { +static const struct events_table_t events_fam6_mod58[] = { ARCH_EVENTS EVENTS_FAM6_MOD58 NT_END }; -const struct events_table_t events_fam6_mod62[] = { +static const struct events_table_t events_fam6_mod62[] = { ARCH_EVENTS EVENTS_FAM6_MOD58 EVENTS_FAM6_MOD62_ONLY NT_END }; -const struct events_table_t events_fam6_mod60[] = { +static const struct events_table_t events_fam6_mod60[] = { ARCH_EVENTS EVENTS_FAM6_MOD60 NT_END }; -const struct events_table_t events_fam6_mod61[] = { +static const struct events_table_t events_fam6_mod61[] = { ARCH_EVENTS EVENTS_FAM6_MOD61 NT_END }; -const struct events_table_t events_fam6_mod78[] = { +static const struct events_table_t events_fam6_mod78[] = { ARCH_EVENTS EVENTS_FAM6_MOD78 NT_END }; -const struct events_table_t events_fam6_unknown[] = { +static const struct events_table_t events_fam6_unknown[] = { ARCH_EVENTS NT_END }; -const struct events_table_t events_fam_arm[] = { -// ARCH_EVENTS -// *eventnum = pevent->eventselect; -// *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT); -// *eventnum |= (pevent->attrs << 16); -// *eventnum |= (pevent->cmask << 24); -// eventselect, unitmask, supported_counters, name, cmask, attrs, msr_offset - +const struct events_table_t events_generic[] = { // Hardware event #define HWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_HARDWARE, 0, 0 }, HWE("branch-instructions", PERF_COUNT_HW_BRANCH_INSTRUCTIONS) @@ -2741,13 +2734,20 @@ core_pcbe_init (void) { switch (cpuid_getvendor ()) { + case X86_VENDOR_AMD: + snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD); + events_table = events_generic; + num_gpc = 4; + num_ffc = 0; + total_pmc = num_gpc + num_ffc; + return 0; case ARM_CPU_IMP_ARM: case ARM_CPU_IMP_BRCM: case ARM_CPU_IMP_CAVIUM: case ARM_CPU_IMP_APM: case ARM_CPU_IMP_QCOM: snprintf (core_impl_name, sizeof (core_impl_name), "%s", AARCH64_VENDORSTR_ARM); - events_table = events_fam_arm; + events_table = events_generic; num_gpc = 4; // MEZ: a real implementation is needed num_ffc = 0; total_pmc = num_gpc + num_ffc; diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h index 634aa4f..34896d6 100644 --- a/gprofng/common/hwc_cpus.h +++ b/gprofng/common/hwc_cpus.h @@ -23,6 +23,19 @@ #ifndef __HWC_CPUS_H #define __HWC_CPUS_H +typedef struct +{ + int cpu_cnt; + int cpu_clk_freq; + int cpu_model; + int cpu_family; + int cpu_vendor; + char *cpu_vendorstr; + char *cpu_modelstr; +} cpu_info_t; + +extern cpu_info_t *read_cpuinfo(); + #define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */ /* type for specifying CPU register number */ @@ -91,6 +104,8 @@ #define CPC_AMD_FAM_10H 2501 /* Barcelona, Shanghai... */ #define CPC_AMD_FAM_11H 2502 /* Griffin... */ #define CPC_AMD_FAM_15H 2503 +#define CPC_AMD_Authentic 2504 + #define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier) #define CPC_FOX 3004 /* pseudo-chip */ @@ -191,6 +206,7 @@ enum { {CPC_ULTRA2 , "UltraSPARC I&II"}, \ {CPC_ULTRA1 , "UltraSPARC I&II"}, \ {ARM_CPU_IMP_APM , AARCH64_VENDORSTR_ARM}, \ + {CPC_AMD_Authentic , "AuthenticAMD"}, \ {0, NULL} /* init like this: static libcpc2_cpu_lookup_t cpu_table[]={LIBCPC2_CPU_LOOKUP_LIST}; diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c index 29a0e17..2d549b0 100644 --- a/gprofng/common/hwcdrv.c +++ b/gprofng/common/hwcdrv.c @@ -675,26 +675,21 @@ dump_perf_event_attr (struct perf_event_attr *at) } static void -init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period) +init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period, + Hwcentry *hwce) { memset (hw, 0, sizeof (struct perf_event_attr)); - hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat - -#if defined(__i386__) || defined(__x86_64) - //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits - hw->config = event; - hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... -#elif defined(__aarch64__) - hw->type = (event >> 24) & 7; - hw->config = event & 0xff; -#elif defined(sparc) - //SPARC needs to be shifted up 16 bits - hw->config = (event & 0xFFFF) << 16; // uint64_t event - uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c - hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4] - hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... -#endif - + hw->size = sizeof (struct perf_event_attr); + if (hwce && hwce->use_perf_event_type) + { + hw->config = hwce->config; + hw->type = hwce->type; + } + else + { // backward compatibility. The old interface had no 'hwce' argument. + hw->config = event; + hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... + } hw->sample_period = period; hw->sample_type = PERF_SAMPLE_IP | // PERF_SAMPLE_TID | @@ -858,7 +853,7 @@ hdrv_pcl_internal_open () perf_event_def_t tmp_event_def; memset (&tmp_event_def, 0, sizeof (tmp_event_def)); struct perf_event_attr *pe_attr = &tmp_event_def.hw; - init_perf_event (pe_attr, 0, 0); + init_perf_event (pe_attr, 0, 0, NULL); pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts int hwc_fd = perf_event_open (pe_attr, @@ -1283,7 +1278,7 @@ hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) glb_event_def->min_time = hwcdef[idx].min_time; glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor init_perf_event (&glb_event_def->hw, glb_event_def->eventsel, - glb_event_def->counter_preload); + glb_event_def->counter_preload, hwcdef + idx); TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld" "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n", idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload, diff --git a/gprofng/common/hwcentry.h b/gprofng/common/hwcentry.h index 739bc4e..a35a363 100644 --- a/gprofng/common/hwcentry.h +++ b/gprofng/common/hwcentry.h @@ -112,11 +112,12 @@ extern "C" int timecvt; /* multiplier to convert metric to time, 0 if N/A */ ABST_type memop; /* type of backtracking allowed */ char *short_desc; /* optional one-liner description, or NULL */ - int type; /* Type of perf_event_attr */ - long long config; /* perf_event_type -specific configuration */ /* the fields above this line are expected, in order, by the tables in hwctable.c */ /* ================================================== */ /* the fields below this line are more flexible */ + unsigned int use_perf_event_type : 16; /* Set 1 to use two fields below */ + unsigned int type : 16; /* Type of perf_event_attr */ + long long config; /* perf_event_type -specific configuration */ int sort_order; /* "tag" to associate experiment record with HWC def */ regno_t *reg_list; /* if not NULL, legal values for field above */ /* Note: reg_list will be terminated by REGNO_ANY */ diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c index 3c44ab6..86d6935 100644 --- a/gprofng/common/hwcfuncs.c +++ b/gprofng/common/hwcfuncs.c @@ -259,18 +259,11 @@ process_data_descriptor (const char *defstring) clear_hwcdefs (); if (!defstring || !strlen (defstring)) - { - err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; - } + return HWCFUNCS_ERROR_HWCARGS; ds = strdup (defstring); if (!ds) - { - err = HWCFUNCS_ERROR_HWCINIT; - goto ext_hw_install_end; - } + return HWCFUNCS_ERROR_HWCINIT; dsp = ds; - for (idx = 0; idx < MAX_PICS && *dsp; idx++) { char *name = NULL; @@ -281,13 +274,33 @@ process_data_descriptor (const char *defstring) int timecvt = 0; unsigned sort_order = (unsigned) - 1; + // Read use_perf_event_type, type, config + hwcdef[idx].use_perf_event_type = (int) strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + break; + } + hwcdef[idx].type = (int) strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + break; + } + hwcdef[idx].config = strtol (dsp, &dsp, 0); + if (*dsp++ != ':') + { + err = HWCFUNCS_ERROR_HWCARGS; + break; + } + /* name */ name = dsp; dsp = strchr (dsp, ':'); if (dsp == NULL) { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } *dsp++ = (char) 0; @@ -297,7 +310,7 @@ process_data_descriptor (const char *defstring) if (dsp == NULL) { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } *dsp++ = (char) 0; @@ -306,12 +319,12 @@ process_data_descriptor (const char *defstring) if (*dsp++ != ':') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } if (reg < 0 && reg != -1) { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } if (reg >= 0) hwcdef[idx].reg_num = reg; @@ -321,21 +334,16 @@ process_data_descriptor (const char *defstring) if (*dsp++ != ':') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } if (interval < 0) { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } hwcdef[idx].val = interval; /* min_time */ - /* - * This is a new field. - * An old launcher (dbx, etc.) would not include it. - * Detect the presence of the field by the char 'm'. - */ if (*dsp == 'm') { long long tmp_ll = 0; @@ -344,12 +352,12 @@ process_data_descriptor (const char *defstring) if (*dsp++ != ':') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } if (tmp_ll < 0) { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } hwcdef[idx].min_time = tmp_ll; } @@ -361,7 +369,7 @@ process_data_descriptor (const char *defstring) if (*dsp++ != ':') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } hwcdef[idx].sort_order = sort_order; @@ -370,7 +378,7 @@ process_data_descriptor (const char *defstring) if (*dsp++ != ':') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } hwcdef[idx].timecvt = timecvt; @@ -379,7 +387,7 @@ process_data_descriptor (const char *defstring) if (*dsp != 0 && *dsp++ != ',') { err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; + break; } hwcdef[idx].memop = memop; if (*name) @@ -394,27 +402,11 @@ process_data_descriptor (const char *defstring) } if (*dsp) - { - TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): " - "ctr string had some trailing garbage:" - " '%s'\n", dsp); - err = HWCFUNCS_ERROR_HWCARGS; - goto ext_hw_install_end; - } - free (ds); - hwcdef_cnt = idx; - return 0; - -ext_hw_install_end: - if (dsp && *dsp) - { - TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): " - " syntax error just before:" - " '%s;\n", dsp); - logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp); - } + err = HWCFUNCS_ERROR_HWCARGS; + if (err != 0) + logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp); else - logerr (GTXT ("Data descriptor syntax error\n")); + hwcdef_cnt = idx; free (ds); return err; } diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c index 2a3b59a..d073513 100644 --- a/gprofng/common/hwctable.c +++ b/gprofng/common/hwctable.c @@ -23,9 +23,9 @@ #include #include #include +#include #include "hwcdrv.h" -#include "hwcfuncs.h" /* TprintfT(,...) definitions. Adjust per module as needed */ #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings @@ -2367,52 +2367,87 @@ static Hwcentry amd_15h[] = { {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; -#define USE_ARM_REF_CYCLES \ - {"usr_time","cycles", REGNO_ANY, STXT("User CPU"), PRELOADS_85, 1, ABST_NONE}, \ - {"sys_time","cycles~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_85, 1, ABST_NONE}, \ +#define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \ + .config = (cfg), .type = ty, .use_perf_event_type = 1, \ + .val = PRELOAD_DEF, .reg_num = REGNO_ANY +#define HWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_HARDWARE) +#define SWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_SOFTWARE) +#define HWCE(nm, mtr, id, op, res) \ + INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE) -static Hwcentry armlist[] = { - USE_ARM_REF_CYCLES +static Hwcentry generic_list[] = { // Hardware event: - {"branch-instructions", NULL, REGNO_ANY, STXT("Branch-instructions"), PRELOADS_35, 0, ABST_NONE}, - {"branch-misses", NULL, REGNO_ANY, STXT("Branch-misses"), PRELOADS_35, 0, ABST_NONE}, - {"bus-cycles", NULL, REGNO_ANY, STXT("Bus Cycles"), PRELOADS_35, 1, ABST_NONE}, - {"cache-misses", NULL, REGNO_ANY, STXT("Cache-misses"), PRELOADS_35, 0, ABST_NONE}, - {"cache-references", NULL, REGNO_ANY, STXT("Cache-references"), PRELOADS_35, 0, ABST_NONE}, - {"cycles", NULL, REGNO_ANY, STXT("CPU Cycles"), PRELOADS_85, 1, ABST_NONE}, - {"insts", "instructions", REGNO_ANY, STXT("Instructions Executed"), PRELOADS_75, 0, ABST_NONE}, - {"ref-cycles", NULL, REGNO_ANY, STXT("Total Cycles"), PRELOADS_85, 1, ABST_NONE}, - {"stalled-cycles-backend", NULL, REGNO_ANY, STXT("Stalled Cycles during issue."), PRELOADS_85, 1, ABST_NONE}, - {"stalled-cycles-frontend", NULL, REGNO_ANY, STXT("Stalled Cycles during retirement."), PRELOADS_85, 1, ABST_NONE}, - + { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1, + .int_name = "cycles" }, + { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1, + .int_name = "cycles~system=1~user=0" }, + { HWE("branch-instructions", STXT("Branch-instructions"), + PERF_COUNT_HW_BRANCH_INSTRUCTIONS) }, + { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) }, + { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES), + .timecvt = 1 }, + { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) }, + { HWE("cache-references", STXT("Cache-references"), + PERF_COUNT_HW_CACHE_REFERENCES) }, + { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 }, + { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS), + .int_name = "instructions" }, + { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES), + .timecvt = 1 }, + { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."), + PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 }, + { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."), + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 }, // Software event: - {"alignment-faults", NULL, REGNO_ANY, STXT("Alignment Faults"), PRELOADS_85, 0, ABST_NONE}, - {"context-switches", NULL, REGNO_ANY, STXT("Context Switches"), PRELOADS_85, 0, ABST_NONE}, - {"cpu-clock", NULL, REGNO_ANY, STXT("CPU Clock"), PRELOADS_85, 1, ABST_NONE}, - {"cpu-migrations", NULL, REGNO_ANY, STXT("CPU Migrations"), PRELOADS_85, 0, ABST_NONE}, - {"emulation-faults", NULL, REGNO_ANY, STXT("Emulation Faults"), PRELOADS_85, 0, ABST_NONE}, - {"major-faults", NULL, REGNO_ANY, STXT("Major Page Faults"), PRELOADS_85, 0, ABST_NONE}, - {"minor-faults", NULL, REGNO_ANY, STXT("Minor Page Faults"), PRELOADS_85, 0, ABST_NONE}, - {"page-faults", NULL, REGNO_ANY, STXT("Page Faults"), PRELOADS_85, 0, ABST_NONE}, - {"task-clock", NULL, REGNO_ANY, STXT("Clock Count Specific"), PRELOADS_85, 1, ABST_NONE}, - + { SWE("alignment-faults", STXT("Alignment Faults"), + PERF_COUNT_SW_ALIGNMENT_FAULTS) }, + { SWE("context-switches", STXT("Context Switches"), + PERF_COUNT_SW_CONTEXT_SWITCHES) }, + { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK), + .timecvt = 1 }, + { SWE("cpu-migrations", STXT("CPU Migrations"), + PERF_COUNT_SW_CPU_MIGRATIONS) }, + { SWE("emulation-faults", STXT("Emulation Faults"), + PERF_COUNT_SW_EMULATION_FAULTS) }, + { SWE("major-faults", STXT("Major Page Faults"), + PERF_COUNT_SW_PAGE_FAULTS_MAJ) }, + { SWE("minor-faults", STXT("Minor Page Faults"), + PERF_COUNT_SW_PAGE_FAULTS_MIN) }, + { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) }, + { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK), + .timecvt = 1 }, // Hardware cache event - {"L1-dcache-load-misses", NULL, REGNO_ANY, STXT("L1 D-cache Load Misses"), PRELOADS_35, 0, ABST_NONE}, - {"L1-dcache-loads", NULL, REGNO_ANY, STXT("L1 D-cache Loads"), PRELOADS_35, 0, ABST_NONE}, - {"L1-dcache-store-misses", NULL, REGNO_ANY, STXT("L1 D-cache Store Misses"), PRELOADS_35, 0, ABST_NONE}, - {"L1-dcache-stores", NULL, REGNO_ANY, STXT("L1 D-cache Store Stores"), PRELOADS_35, 0, ABST_NONE}, - {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Load Misses"), PRELOADS_35, 0, ABST_NONE}, - {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Loads"), PRELOADS_35, 0, ABST_NONE}, - {"dTLB-load-misses", NULL, REGNO_ANY, STXT("D-TLB Load Misses"), PRELOADS_35, 0, ABST_NONE}, - {"dTLB-loads", NULL, REGNO_ANY, STXT("D-TLB Loads"), PRELOADS_35, 0, ABST_NONE}, - {"iTLB-load-misses", NULL, REGNO_ANY, STXT("The Instruction TLB Load Misses"), PRELOADS_35, 0, ABST_NONE}, - {"iTLB-loads", NULL, REGNO_ANY, STXT("The Instruction TLB Loads"), PRELOADS_35, 0, ABST_NONE}, - - {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} -}; + { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"), + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, + { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"), + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, + { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"), + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, + { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"), + PERF_COUNT_HW_CACHE_L1D, + PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, + { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"), + PERF_COUNT_HW_CACHE_L1I, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, + { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"), + PERF_COUNT_HW_CACHE_L1I, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, + { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"), + PERF_COUNT_HW_CACHE_DTLB, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, + { HWCE("dTLB-loads", STXT("D-TLB Loads"), + PERF_COUNT_HW_CACHE_DTLB, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, + { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"), + PERF_COUNT_HW_CACHE_ITLB, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) }, + { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"), + PERF_COUNT_HW_CACHE_ITLB, + PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, -static Hwcentry unknownlist[] = - /* used for unrecognized CPU type */{ {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; @@ -2485,8 +2520,9 @@ static cpu_list_t cputabs[] = { {CPC_SPARC64_X, usfuji_X_list, {"insts,,cycles,,dcstall", 0}}, {CPC_SPARC64_XII, usfuji_XII_list, {"insts,,cycles,,dcstall", 0}}, {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR) - {ARM_CPU_IMP_APM, armlist, {"insts,,cycles", 0}}, - {0, unknownlist, {NULL}} /* processor is unknown, but experiment is allowed */ + {ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}}, + {CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}}, + {0, generic_list, {"insts,,cycles", 0}}, }; /*---------------------------------------------------------------------------*/ diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c index 4104a13..0f1815d 100644 --- a/gprofng/common/opteron_pcbe.c +++ b/gprofng/common/opteron_pcbe.c @@ -315,30 +315,25 @@ static int opt_pcbe_init (void) { amd_family = cpuid_getfamily (); - /* - * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel - * loads this module based on its name in the module directory, but it - * could have been renamed. - */ - if (cpuid_getvendor () != X86_VENDOR_AMD - || (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H)) - return (-1); + if (cpuid_getvendor () != X86_VENDOR_AMD) + return -1; /* * Figure out processor revision here and assign appropriate * event configuration. */ - if (amd_family == OPTERON_FAMILY) + switch (amd_family) { + case OPTERON_FAMILY: amd_events = opt_events_rev_E; amd_generic_events = opt_generic_events; - } - else - { + break; + case AMD_FAMILY_10H: amd_events = family_10h_events; amd_generic_events = family_10h_generic_events; + break; } - return (0); + return 0; } static uint_t diff --git a/gprofng/src/collctrl.cc b/gprofng/src/collctrl.cc index 703344c..ebf888c 100644 --- a/gprofng/src/collctrl.cc +++ b/gprofng/src/collctrl.cc @@ -39,7 +39,7 @@ #include "libiberty.h" #include "collctrl.h" #include "hwcdrv.h" -//#include "hwcfuncs.h" +#include "StringBuilder.h" #define SP_GROUP_HEADER "#analyzer experiment group" #define DD_MAXPATHLEN (MAXPATHLEN * 4) /* large, to build up data descriptor */ @@ -55,7 +55,84 @@ extern const char *strsignal (int); #define _SC_CPUID_MAX 517 #endif -const char *get_fstype (char *); +static const char *get_fstype (char *); +static cpu_info_t cpu_info; + +static void +read_str (char *from, char **to) +{ + if (*to != NULL) + return; + for (char *s = from; *s; s++) + if (*s != ':' && *s != '\t' && *s != ' ') + { + for (int i = ((int) strlen (s)) - 1; i >= 0; i--) + { + if (s[i] != '\n' && s[i] != ' ' && s[i] != '\t') + { + *to = strndup(s, i + 1); + return; + } + } + return; // string is empty + } +} + +static int +read_int (char *from) +{ + char *val = strchr (from, ':'); + if (val) + return atoi (val + 1); + return 0; +} + +cpu_info_t * +read_cpuinfo() +{ + static int inited = 0; + if (inited) + return &cpu_info; + inited = 1; + +#if defined(__aarch64__) + asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_info.cpu_clk_freq)); +#endif + + // Read /proc/cpuinfo to get CPU info and clock rate + FILE *procf = fopen ("/proc/cpuinfo", "r"); + if (procf != NULL) + { + char temp[1024]; + while (fgets (temp, (int) sizeof (temp), procf) != NULL) + { + if (strncmp (temp, "processor", 9) == 0) + cpu_info.cpu_cnt++; + else if (strncmp (temp, "cpu MHz", 7) == 0) + cpu_info.cpu_clk_freq = read_int (temp + 9); + else if (strncmp (temp, "cpu family", 10) == 0) + cpu_info.cpu_family = read_int (temp + 10); + else if (strncmp (temp, "vendor_id", 9) == 0) + { + if (cpu_info.cpu_vendorstr == NULL) + read_str (temp + 9, &cpu_info.cpu_vendorstr); + } + else if (strncmp (temp, "model name", 10) == 0) + { + if (cpu_info.cpu_modelstr == NULL) + read_str (temp + 10, &cpu_info.cpu_modelstr); + } + else if (strncmp (temp, "model", 5) == 0) + cpu_info.cpu_model = read_int (temp + 5); + else if (strncmp (temp, "CPU implementer", 15) == 0) + cpu_info.cpu_family = read_int (temp + 15); + else if (strncmp (temp, "CPU architecture", 16) == 0) + cpu_info.cpu_model = read_int (temp + 16); + } + fclose (procf); + } + return &cpu_info; +} Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC) { @@ -81,59 +158,9 @@ Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC) /* add 2048 to count, since on some systems CPUID does not start at zero */ ncpumax = ncpus + 2048; } - ncpus = 0; - cpu_clk_freq = 0; - - // On Linux, read /proc/cpuinfo to get CPU count and clock rate - // Note that parsing is different on SPARC and x86 -#if defined(sparc) - FILE *procf = fopen ("/proc/cpuinfo", "r"); - if (procf != NULL) - { - char temp[1024]; - while (fgets (temp, (int) sizeof (temp), procf) != NULL) - { - if (strncmp (temp, "Cpu", 3) == 0 && temp[3] != '\0' - && strncmp ((strchr (temp + 1, 'C')) ? strchr (temp + 1, 'C') - : (temp + 4), "ClkTck", 6) == 0) - { - ncpus++; - char *val = strchr (temp, ':'); - if (val) - { - unsigned long long freq; - sscanf (val + 2, "%llx", &freq); - cpu_clk_freq = (unsigned int) (((double) freq) / 1000000.0 + 0.5); - } - else - cpu_clk_freq = 0; - } - } - fclose (procf); - } - -#elif defined(__aarch64__) - asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_clk_freq)); - -#else - FILE *procf = fopen ("/proc/cpuinfo", "r"); - if (procf != NULL) - { - char temp[1024]; - while (fgets (temp, (int) sizeof (temp), procf) != NULL) - { - // x86 Linux - if (strncmp (temp, "processor", 9) == 0) - ncpus++; - else if (strncmp (temp, "cpu MHz", 7) == 0) - { - char *val = strchr (temp, ':'); - cpu_clk_freq = val ? atoi (val + 1) : 0; - } - } - fclose (procf); - } -#endif + cpu_info_t *cpu_p = read_cpuinfo(); + ncpus = cpu_p->cpu_cnt; + cpu_clk_freq = cpu_p->cpu_clk_freq; /* check resolution of system clock */ sys_resolution = sysconf (_SC_CLK_TCK); @@ -1720,78 +1747,62 @@ Coll_Ctrl::set_size_limit (const char *string) void Coll_Ctrl::build_data_desc () { - char spec[DD_MAXPATHLEN]; - spec[0] = 0; + StringBuilder sb; // Put sample sig before clock profiling. Dbx uses PROF // for that purpose and we want it to be processed first. if (project_home) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "P:%s;", project_home); + sb.appendf ("P:%s;", project_home); if (sample_sig != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "g:%d;", sample_sig); + sb.appendf ("g:%d;", sample_sig); if (pauseresume_sig != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "d:%d%s;", pauseresume_sig, - (pauseresume_pause == 1 ? "p" : "")); + sb.appendf ("d:%d%s;", pauseresume_sig, pauseresume_pause == 1 ? "p" : ""); if (clkprof_enabled == 1) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "p:%d;", clkprof_timer); + sb.appendf ("p:%d;", clkprof_timer); if (synctrace_enabled == 1) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "s:%d,%d;", synctrace_thresh, synctrace_scope); + sb.appendf ("s:%d,%d;", synctrace_thresh, synctrace_scope); if (heaptrace_enabled == 1) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "H:%d;", heaptrace_checkenabled); + sb.appendf ("H:%d;", heaptrace_checkenabled); if (iotrace_enabled == 1) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "i:;"); + sb.append ("i:;"); if (hwcprof_enabled_cnt > 0) { - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "h:%s", - (hwcprof_default == true) ? "*" : ""); + sb.appendf ("h:%s", (hwcprof_default == true) ? "*" : ""); for (int ii = 0; ii < hwcprof_enabled_cnt; ii++) { - /* min_time is a "new" field. - * - * To help process_data_descriptor() in hwcfuncs.c parse - * the HWC portion of this string -- specifically, to - * recognize min_time when it's present and skip over - * when it's not -- we prepend 'm' to the min_time value. - * - * When we no longer worry about, say, an old dbx - * writing this string and a new libcollector looking for - * the min_time field, the 'm' character can be - * removed and process_data_descriptor() simplified. - */ - hrtime_t min_time = hwctr[ii].min_time; + Hwcentry *h = hwctr + ii; + hrtime_t min_time = h->min_time; if (min_time == HWCTIME_TBD) // user did not specify any value for overflow rate - min_time = hwctr[ii].min_time_default; - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), - "%s%s:%s:%d:%d:m%lld:%d:%d:0x%x", ii ? "," : "", - strcmp (hwctr[ii].name, hwctr[ii].int_name) ? hwctr[ii].name : "", - hwctr[ii].int_name, hwctr[ii].reg_num, hwctr[ii].val, - min_time, ii, /*tag*/ hwctr[ii].timecvt, hwctr[ii].memop); + min_time = h->min_time_default; + if (ii > 0) + sb.append (','); + sb.appendf ("%d:%d:%lld:%s:%s:%lld:%d:m%lld:%d:%d:0x%x", + h->use_perf_event_type, h->type, (long long) h->config, + strcmp (h->name, h->int_name) ? h->name : "", + h->int_name, (long long) h->reg_num, h->val, + (long long) min_time, ii, /*tag*/ h->timecvt, h->memop); } - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), ";"); + sb.append (";"); } - if ((time_run != 0) || (start_delay != 0)) + if (time_run != 0 || start_delay != 0) { if (start_delay != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d:%d;", start_delay, time_run); + sb.appendf ("t:%d:%d;", start_delay, time_run); else - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d;", time_run); + sb.appendf ("t:%d;", time_run); } if (sample_period != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "S:%d;", - sample_period); + sb.appendf ("S:%d;", sample_period); if (size_limit != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "L:%d;", - size_limit); + sb.appendf ("L:%d;", size_limit); if (java_mode != 0) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "j:%d;", (int) java_mode); + sb.appendf ("j:%d;", (int) java_mode); if (follow_mode != FOLLOW_NONE) - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "F:%d;", (int) follow_mode); - snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "a:%s;", archive_mode); - if (strlen (spec) + 1 >= sizeof (spec)) - abort (); + sb.appendf ("F:%d;", (int) follow_mode); + sb.appendf ("a:%s;", archive_mode); free (data_desc); - data_desc = strdup (spec); + data_desc = sb.toString (); } char * -- cgit v1.1