aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-01-08 22:00:24 -0800
committerVladimir Mezentsev <vladimir.mezentsev@oracle.com>2024-01-10 09:31:08 -0800
commit8fe04eeb2cbb8c4cf7b6e8d9183fe09a8b2e8d51 (patch)
treec3c29dfecf57ea78d8de663665a8c4ab53706b6a
parente1cab50d2dd3d416662b5399bd80e6311a6ade0e (diff)
downloadbinutils-8fe04eeb2cbb8c4cf7b6e8d9183fe09a8b2e8d51.zip
binutils-8fe04eeb2cbb8c4cf7b6e8d9183fe09a8b2e8d51.tar.gz
binutils-8fe04eeb2cbb8c4cf7b6e8d9183fe09a8b2e8d51.tar.bz2
gprofng: 31123 improvements to hardware event implementation
Our hardware counter profiling is based on perf_event_open(). Our HWC tables are absent for new machines. I have added HWC tables for the following events: PERF_TYPE_HARDWARE, PERF_TYPE_SOFTWARE, PERF_TYPE_HW_CACHE. Other events require additional fixes. Did a little cleaning: marked the symbols as static, used Stringbuilder, created a function to read /proc/cpuinfo. gprofng/ChangeLog 2024-01-08 Vladimir Mezentsev <vladimir.mezentsev@oracle.com> PR gprofng/31123 * common/core_pcbe.c: Mark the symbols as static. Add events_generic[]. * common/hwc_cpus.h: Declare a new function read_cpuinfo. * common/hwcdrv.c: Add a new parameter in init_perf_event(). * common/hwcentry.h: Add use_perf_event_type in Hwcentry. * common/hwcfuncs.c (process_data_descriptor): Read use_perf_event_type, type, config. * common/hwctable.c: Add a new HWC table generic_list[]. * common/opteron_pcbe.c (opt_pcbe_init): Accept AMD machines. * src/collctrl.cc: Use StringBuilder in Coll_Ctrl::build_data_desc(). Add a new function read_cpuinfo.
-rw-r--r--gprofng/common/core_pcbe.c46
-rw-r--r--gprofng/common/hwc_cpus.h16
-rw-r--r--gprofng/common/hwcdrv.c35
-rw-r--r--gprofng/common/hwcentry.h5
-rw-r--r--gprofng/common/hwcfuncs.c82
-rw-r--r--gprofng/common/hwctable.c124
-rw-r--r--gprofng/common/opteron_pcbe.c21
-rw-r--r--gprofng/src/collctrl.cc211
8 files changed, 293 insertions, 247 deletions
diff --git a/gprofng/common/core_pcbe.c b/gprofng/common/core_pcbe.c
index 14c4268..25bf484 100644
--- a/gprofng/common/core_pcbe.c
+++ b/gprofng/common/core_pcbe.c
@@ -2597,102 +2597,95 @@ struct events_table_t
static const struct events_table_t *events_table = NULL;
-const struct events_table_t events_fam6_mod23[] = {
+static const struct events_table_t events_fam6_mod23[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD23
NT_END
};
-const struct events_table_t events_fam6_mod28[] = {
+static const struct events_table_t events_fam6_mod28[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD28
NT_END
};
-const struct events_table_t events_fam6_mod26[] = {
+static const struct events_table_t events_fam6_mod26[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD26
NT_END
};
-const struct events_table_t events_fam6_mod46[] = {
+static const struct events_table_t events_fam6_mod46[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD26
EVENTS_FAM6_MOD46_ONLY
NT_END
};
-const struct events_table_t events_fam6_mod37[] = {
+static const struct events_table_t events_fam6_mod37[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD37
EVENTS_FAM6_MOD37_ALSO
NT_END
};
-const struct events_table_t events_fam6_mod47[] = {
+static const struct events_table_t events_fam6_mod47[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD37
NT_END
};
-const struct events_table_t events_fam6_mod42[] = {
+static const struct events_table_t events_fam6_mod42[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD42
EVENTS_FAM6_MOD42_ONLY
NT_END
};
-const struct events_table_t events_fam6_mod45[] = {
+static const struct events_table_t events_fam6_mod45[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD42
EVENTS_FAM6_MOD45_ONLY
NT_END
};
-const struct events_table_t events_fam6_mod58[] = {
+static const struct events_table_t events_fam6_mod58[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD58
NT_END
};
-const struct events_table_t events_fam6_mod62[] = {
+static const struct events_table_t events_fam6_mod62[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD58
EVENTS_FAM6_MOD62_ONLY
NT_END
};
-const struct events_table_t events_fam6_mod60[] = {
+static const struct events_table_t events_fam6_mod60[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD60
NT_END
};
-const struct events_table_t events_fam6_mod61[] = {
+static const struct events_table_t events_fam6_mod61[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD61
NT_END
};
-const struct events_table_t events_fam6_mod78[] = {
+static const struct events_table_t events_fam6_mod78[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD78
NT_END
};
-const struct events_table_t events_fam6_unknown[] = {
+static const struct events_table_t events_fam6_unknown[] = {
ARCH_EVENTS
NT_END
};
-const struct events_table_t events_fam_arm[] = {
-// ARCH_EVENTS
-// *eventnum = pevent->eventselect;
-// *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT);
-// *eventnum |= (pevent->attrs << 16);
-// *eventnum |= (pevent->cmask << 24);
-// eventselect, unitmask, supported_counters, name, cmask, attrs, msr_offset
-
+const struct events_table_t events_generic[] = {
// Hardware event
#define HWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_HARDWARE, 0, 0 },
HWE("branch-instructions", PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
@@ -2741,13 +2734,20 @@ core_pcbe_init (void)
{
switch (cpuid_getvendor ())
{
+ case X86_VENDOR_AMD:
+ snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD);
+ events_table = events_generic;
+ num_gpc = 4;
+ num_ffc = 0;
+ total_pmc = num_gpc + num_ffc;
+ return 0;
case ARM_CPU_IMP_ARM:
case ARM_CPU_IMP_BRCM:
case ARM_CPU_IMP_CAVIUM:
case ARM_CPU_IMP_APM:
case ARM_CPU_IMP_QCOM:
snprintf (core_impl_name, sizeof (core_impl_name), "%s", AARCH64_VENDORSTR_ARM);
- events_table = events_fam_arm;
+ events_table = events_generic;
num_gpc = 4; // MEZ: a real implementation is needed
num_ffc = 0;
total_pmc = num_gpc + num_ffc;
diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h
index 634aa4f..34896d6 100644
--- a/gprofng/common/hwc_cpus.h
+++ b/gprofng/common/hwc_cpus.h
@@ -23,6 +23,19 @@
#ifndef __HWC_CPUS_H
#define __HWC_CPUS_H
+typedef struct
+{
+ int cpu_cnt;
+ int cpu_clk_freq;
+ int cpu_model;
+ int cpu_family;
+ int cpu_vendor;
+ char *cpu_vendorstr;
+ char *cpu_modelstr;
+} cpu_info_t;
+
+extern cpu_info_t *read_cpuinfo();
+
#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */
/* type for specifying CPU register number */
@@ -91,6 +104,8 @@
#define CPC_AMD_FAM_10H 2501 /* Barcelona, Shanghai... */
#define CPC_AMD_FAM_11H 2502 /* Griffin... */
#define CPC_AMD_FAM_15H 2503
+#define CPC_AMD_Authentic 2504
+
#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier)
#define CPC_FOX 3004 /* pseudo-chip */
@@ -191,6 +206,7 @@ enum {
{CPC_ULTRA2 , "UltraSPARC I&II"}, \
{CPC_ULTRA1 , "UltraSPARC I&II"}, \
{ARM_CPU_IMP_APM , AARCH64_VENDORSTR_ARM}, \
+ {CPC_AMD_Authentic , "AuthenticAMD"}, \
{0, NULL}
/* init like this:
static libcpc2_cpu_lookup_t cpu_table[]={LIBCPC2_CPU_LOOKUP_LIST};
diff --git a/gprofng/common/hwcdrv.c b/gprofng/common/hwcdrv.c
index 29a0e17..2d549b0 100644
--- a/gprofng/common/hwcdrv.c
+++ b/gprofng/common/hwcdrv.c
@@ -675,26 +675,21 @@ dump_perf_event_attr (struct perf_event_attr *at)
}
static void
-init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period)
+init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period,
+ Hwcentry *hwce)
{
memset (hw, 0, sizeof (struct perf_event_attr));
- hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat
-
-#if defined(__i386__) || defined(__x86_64)
- //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits
- hw->config = event;
- hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
-#elif defined(__aarch64__)
- hw->type = (event >> 24) & 7;
- hw->config = event & 0xff;
-#elif defined(sparc)
- //SPARC needs to be shifted up 16 bits
- hw->config = (event & 0xFFFF) << 16; // uint64_t event
- uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c
- hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4]
- hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
-#endif
-
+ hw->size = sizeof (struct perf_event_attr);
+ if (hwce && hwce->use_perf_event_type)
+ {
+ hw->config = hwce->config;
+ hw->type = hwce->type;
+ }
+ else
+ { // backward compatibility. The old interface had no 'hwce' argument.
+ hw->config = event;
+ hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
+ }
hw->sample_period = period;
hw->sample_type = PERF_SAMPLE_IP |
// PERF_SAMPLE_TID |
@@ -858,7 +853,7 @@ hdrv_pcl_internal_open ()
perf_event_def_t tmp_event_def;
memset (&tmp_event_def, 0, sizeof (tmp_event_def));
struct perf_event_attr *pe_attr = &tmp_event_def.hw;
- init_perf_event (pe_attr, 0, 0);
+ init_perf_event (pe_attr, 0, 0, NULL);
pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event
pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts
int hwc_fd = perf_event_open (pe_attr,
@@ -1283,7 +1278,7 @@ hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
glb_event_def->min_time = hwcdef[idx].min_time;
glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor
init_perf_event (&glb_event_def->hw, glb_event_def->eventsel,
- glb_event_def->counter_preload);
+ glb_event_def->counter_preload, hwcdef + idx);
TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
"(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload,
diff --git a/gprofng/common/hwcentry.h b/gprofng/common/hwcentry.h
index 739bc4e..a35a363 100644
--- a/gprofng/common/hwcentry.h
+++ b/gprofng/common/hwcentry.h
@@ -112,11 +112,12 @@ extern "C"
int timecvt; /* multiplier to convert metric to time, 0 if N/A */
ABST_type memop; /* type of backtracking allowed */
char *short_desc; /* optional one-liner description, or NULL */
- int type; /* Type of perf_event_attr */
- long long config; /* perf_event_type -specific configuration */
/* the fields above this line are expected, in order, by the tables in hwctable.c */
/* ================================================== */
/* the fields below this line are more flexible */
+ unsigned int use_perf_event_type : 16; /* Set 1 to use two fields below */
+ unsigned int type : 16; /* Type of perf_event_attr */
+ long long config; /* perf_event_type -specific configuration */
int sort_order; /* "tag" to associate experiment record with HWC def */
regno_t *reg_list; /* if not NULL, legal values for <reg_num> field above */
/* Note: reg_list will be terminated by REGNO_ANY */
diff --git a/gprofng/common/hwcfuncs.c b/gprofng/common/hwcfuncs.c
index 3c44ab6..86d6935 100644
--- a/gprofng/common/hwcfuncs.c
+++ b/gprofng/common/hwcfuncs.c
@@ -259,18 +259,11 @@ process_data_descriptor (const char *defstring)
clear_hwcdefs ();
if (!defstring || !strlen (defstring))
- {
- err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
- }
+ return HWCFUNCS_ERROR_HWCARGS;
ds = strdup (defstring);
if (!ds)
- {
- err = HWCFUNCS_ERROR_HWCINIT;
- goto ext_hw_install_end;
- }
+ return HWCFUNCS_ERROR_HWCINIT;
dsp = ds;
-
for (idx = 0; idx < MAX_PICS && *dsp; idx++)
{
char *name = NULL;
@@ -281,13 +274,33 @@ process_data_descriptor (const char *defstring)
int timecvt = 0;
unsigned sort_order = (unsigned) - 1;
+ // Read use_perf_event_type, type, config
+ hwcdef[idx].use_perf_event_type = (int) strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ break;
+ }
+ hwcdef[idx].type = (int) strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ break;
+ }
+ hwcdef[idx].config = strtol (dsp, &dsp, 0);
+ if (*dsp++ != ':')
+ {
+ err = HWCFUNCS_ERROR_HWCARGS;
+ break;
+ }
+
/* name */
name = dsp;
dsp = strchr (dsp, ':');
if (dsp == NULL)
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
*dsp++ = (char) 0;
@@ -297,7 +310,7 @@ process_data_descriptor (const char *defstring)
if (dsp == NULL)
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
*dsp++ = (char) 0;
@@ -306,12 +319,12 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
if (reg < 0 && reg != -1)
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
if (reg >= 0)
hwcdef[idx].reg_num = reg;
@@ -321,21 +334,16 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
if (interval < 0)
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
hwcdef[idx].val = interval;
/* min_time */
- /*
- * This is a new field.
- * An old launcher (dbx, etc.) would not include it.
- * Detect the presence of the field by the char 'm'.
- */
if (*dsp == 'm')
{
long long tmp_ll = 0;
@@ -344,12 +352,12 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
if (tmp_ll < 0)
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
hwcdef[idx].min_time = tmp_ll;
}
@@ -361,7 +369,7 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
hwcdef[idx].sort_order = sort_order;
@@ -370,7 +378,7 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
hwcdef[idx].timecvt = timecvt;
@@ -379,7 +387,7 @@ process_data_descriptor (const char *defstring)
if (*dsp != 0 && *dsp++ != ',')
{
err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
+ break;
}
hwcdef[idx].memop = memop;
if (*name)
@@ -394,27 +402,11 @@ process_data_descriptor (const char *defstring)
}
if (*dsp)
- {
- TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
- "ctr string had some trailing garbage:"
- " '%s'\n", dsp);
- err = HWCFUNCS_ERROR_HWCARGS;
- goto ext_hw_install_end;
- }
- free (ds);
- hwcdef_cnt = idx;
- return 0;
-
-ext_hw_install_end:
- if (dsp && *dsp)
- {
- TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
- " syntax error just before:"
- " '%s;\n", dsp);
- logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp);
- }
+ err = HWCFUNCS_ERROR_HWCARGS;
+ if (err != 0)
+ logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp);
else
- logerr (GTXT ("Data descriptor syntax error\n"));
+ hwcdef_cnt = idx;
free (ds);
return err;
}
diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c
index 2a3b59a..d073513 100644
--- a/gprofng/common/hwctable.c
+++ b/gprofng/common/hwctable.c
@@ -23,9 +23,9 @@
#include <errno.h>
#include <string.h>
#include <limits.h>
+#include <linux/perf_event.h>
#include "hwcdrv.h"
-#include "hwcfuncs.h"
/* TprintfT(<level>,...) definitions. Adjust per module as needed */
#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
@@ -2367,52 +2367,87 @@ static Hwcentry amd_15h[] = {
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
-#define USE_ARM_REF_CYCLES \
- {"usr_time","cycles", REGNO_ANY, STXT("User CPU"), PRELOADS_85, 1, ABST_NONE}, \
- {"sys_time","cycles~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_85, 1, ABST_NONE}, \
+#define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \
+ .config = (cfg), .type = ty, .use_perf_event_type = 1, \
+ .val = PRELOAD_DEF, .reg_num = REGNO_ANY
+#define HWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_HARDWARE)
+#define SWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_SOFTWARE)
+#define HWCE(nm, mtr, id, op, res) \
+ INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
-static Hwcentry armlist[] = {
- USE_ARM_REF_CYCLES
+static Hwcentry generic_list[] = {
// Hardware event:
- {"branch-instructions", NULL, REGNO_ANY, STXT("Branch-instructions"), PRELOADS_35, 0, ABST_NONE},
- {"branch-misses", NULL, REGNO_ANY, STXT("Branch-misses"), PRELOADS_35, 0, ABST_NONE},
- {"bus-cycles", NULL, REGNO_ANY, STXT("Bus Cycles"), PRELOADS_35, 1, ABST_NONE},
- {"cache-misses", NULL, REGNO_ANY, STXT("Cache-misses"), PRELOADS_35, 0, ABST_NONE},
- {"cache-references", NULL, REGNO_ANY, STXT("Cache-references"), PRELOADS_35, 0, ABST_NONE},
- {"cycles", NULL, REGNO_ANY, STXT("CPU Cycles"), PRELOADS_85, 1, ABST_NONE},
- {"insts", "instructions", REGNO_ANY, STXT("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
- {"ref-cycles", NULL, REGNO_ANY, STXT("Total Cycles"), PRELOADS_85, 1, ABST_NONE},
- {"stalled-cycles-backend", NULL, REGNO_ANY, STXT("Stalled Cycles during issue."), PRELOADS_85, 1, ABST_NONE},
- {"stalled-cycles-frontend", NULL, REGNO_ANY, STXT("Stalled Cycles during retirement."), PRELOADS_85, 1, ABST_NONE},
-
+ { HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
+ .int_name = "cycles" },
+ { HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
+ .int_name = "cycles~system=1~user=0" },
+ { HWE("branch-instructions", STXT("Branch-instructions"),
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },
+ { HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },
+ { HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),
+ .timecvt = 1 },
+ { HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },
+ { HWE("cache-references", STXT("Cache-references"),
+ PERF_COUNT_HW_CACHE_REFERENCES) },
+ { HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },
+ { HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),
+ .int_name = "instructions" },
+ { HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),
+ .timecvt = 1 },
+ { HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },
+ { HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },
// Software event:
- {"alignment-faults", NULL, REGNO_ANY, STXT("Alignment Faults"), PRELOADS_85, 0, ABST_NONE},
- {"context-switches", NULL, REGNO_ANY, STXT("Context Switches"), PRELOADS_85, 0, ABST_NONE},
- {"cpu-clock", NULL, REGNO_ANY, STXT("CPU Clock"), PRELOADS_85, 1, ABST_NONE},
- {"cpu-migrations", NULL, REGNO_ANY, STXT("CPU Migrations"), PRELOADS_85, 0, ABST_NONE},
- {"emulation-faults", NULL, REGNO_ANY, STXT("Emulation Faults"), PRELOADS_85, 0, ABST_NONE},
- {"major-faults", NULL, REGNO_ANY, STXT("Major Page Faults"), PRELOADS_85, 0, ABST_NONE},
- {"minor-faults", NULL, REGNO_ANY, STXT("Minor Page Faults"), PRELOADS_85, 0, ABST_NONE},
- {"page-faults", NULL, REGNO_ANY, STXT("Page Faults"), PRELOADS_85, 0, ABST_NONE},
- {"task-clock", NULL, REGNO_ANY, STXT("Clock Count Specific"), PRELOADS_85, 1, ABST_NONE},
-
+ { SWE("alignment-faults", STXT("Alignment Faults"),
+ PERF_COUNT_SW_ALIGNMENT_FAULTS) },
+ { SWE("context-switches", STXT("Context Switches"),
+ PERF_COUNT_SW_CONTEXT_SWITCHES) },
+ { SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),
+ .timecvt = 1 },
+ { SWE("cpu-migrations", STXT("CPU Migrations"),
+ PERF_COUNT_SW_CPU_MIGRATIONS) },
+ { SWE("emulation-faults", STXT("Emulation Faults"),
+ PERF_COUNT_SW_EMULATION_FAULTS) },
+ { SWE("major-faults", STXT("Major Page Faults"),
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ) },
+ { SWE("minor-faults", STXT("Minor Page Faults"),
+ PERF_COUNT_SW_PAGE_FAULTS_MIN) },
+ { SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },
+ { SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),
+ .timecvt = 1 },
// Hardware cache event
- {"L1-dcache-load-misses", NULL, REGNO_ANY, STXT("L1 D-cache Load Misses"), PRELOADS_35, 0, ABST_NONE},
- {"L1-dcache-loads", NULL, REGNO_ANY, STXT("L1 D-cache Loads"), PRELOADS_35, 0, ABST_NONE},
- {"L1-dcache-store-misses", NULL, REGNO_ANY, STXT("L1 D-cache Store Misses"), PRELOADS_35, 0, ABST_NONE},
- {"L1-dcache-stores", NULL, REGNO_ANY, STXT("L1 D-cache Store Stores"), PRELOADS_35, 0, ABST_NONE},
- {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Load Misses"), PRELOADS_35, 0, ABST_NONE},
- {"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Loads"), PRELOADS_35, 0, ABST_NONE},
- {"dTLB-load-misses", NULL, REGNO_ANY, STXT("D-TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
- {"dTLB-loads", NULL, REGNO_ANY, STXT("D-TLB Loads"), PRELOADS_35, 0, ABST_NONE},
- {"iTLB-load-misses", NULL, REGNO_ANY, STXT("The Instruction TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
- {"iTLB-loads", NULL, REGNO_ANY, STXT("The Instruction TLB Loads"), PRELOADS_35, 0, ABST_NONE},
-
- {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
-};
+ { HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),
+ PERF_COUNT_HW_CACHE_L1D,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
+ { HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),
+ PERF_COUNT_HW_CACHE_L1D,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+ { HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),
+ PERF_COUNT_HW_CACHE_L1D,
+ PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+ { HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),
+ PERF_COUNT_HW_CACHE_L1D,
+ PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+ { HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),
+ PERF_COUNT_HW_CACHE_L1I,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
+ { HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),
+ PERF_COUNT_HW_CACHE_L1I,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+ { HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),
+ PERF_COUNT_HW_CACHE_DTLB,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
+ { HWCE("dTLB-loads", STXT("D-TLB Loads"),
+ PERF_COUNT_HW_CACHE_DTLB,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
+ { HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),
+ PERF_COUNT_HW_CACHE_ITLB,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
+ { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),
+ PERF_COUNT_HW_CACHE_ITLB,
+ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
-static Hwcentry unknownlist[] =
- /* used for unrecognized CPU type */{
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
@@ -2485,8 +2520,9 @@ static cpu_list_t cputabs[] = {
{CPC_SPARC64_X, usfuji_X_list, {"insts,,cycles,,dcstall", 0}},
{CPC_SPARC64_XII, usfuji_XII_list, {"insts,,cycles,,dcstall", 0}},
{CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
- {ARM_CPU_IMP_APM, armlist, {"insts,,cycles", 0}},
- {0, unknownlist, {NULL}} /* processor is unknown, but experiment is allowed */
+ {ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}},
+ {CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
+ {0, generic_list, {"insts,,cycles", 0}},
};
/*---------------------------------------------------------------------------*/
diff --git a/gprofng/common/opteron_pcbe.c b/gprofng/common/opteron_pcbe.c
index 4104a13..0f1815d 100644
--- a/gprofng/common/opteron_pcbe.c
+++ b/gprofng/common/opteron_pcbe.c
@@ -315,30 +315,25 @@ static int
opt_pcbe_init (void)
{
amd_family = cpuid_getfamily ();
- /*
- * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel
- * loads this module based on its name in the module directory, but it
- * could have been renamed.
- */
- if (cpuid_getvendor () != X86_VENDOR_AMD
- || (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H))
- return (-1);
+ if (cpuid_getvendor () != X86_VENDOR_AMD)
+ return -1;
/*
* Figure out processor revision here and assign appropriate
* event configuration.
*/
- if (amd_family == OPTERON_FAMILY)
+ switch (amd_family)
{
+ case OPTERON_FAMILY:
amd_events = opt_events_rev_E;
amd_generic_events = opt_generic_events;
- }
- else
- {
+ break;
+ case AMD_FAMILY_10H:
amd_events = family_10h_events;
amd_generic_events = family_10h_generic_events;
+ break;
}
- return (0);
+ return 0;
}
static uint_t
diff --git a/gprofng/src/collctrl.cc b/gprofng/src/collctrl.cc
index 703344c..ebf888c 100644
--- a/gprofng/src/collctrl.cc
+++ b/gprofng/src/collctrl.cc
@@ -39,7 +39,7 @@
#include "libiberty.h"
#include "collctrl.h"
#include "hwcdrv.h"
-//#include "hwcfuncs.h"
+#include "StringBuilder.h"
#define SP_GROUP_HEADER "#analyzer experiment group"
#define DD_MAXPATHLEN (MAXPATHLEN * 4) /* large, to build up data descriptor */
@@ -55,7 +55,84 @@ extern const char *strsignal (int);
#define _SC_CPUID_MAX 517
#endif
-const char *get_fstype (char *);
+static const char *get_fstype (char *);
+static cpu_info_t cpu_info;
+
+static void
+read_str (char *from, char **to)
+{
+ if (*to != NULL)
+ return;
+ for (char *s = from; *s; s++)
+ if (*s != ':' && *s != '\t' && *s != ' ')
+ {
+ for (int i = ((int) strlen (s)) - 1; i >= 0; i--)
+ {
+ if (s[i] != '\n' && s[i] != ' ' && s[i] != '\t')
+ {
+ *to = strndup(s, i + 1);
+ return;
+ }
+ }
+ return; // string is empty
+ }
+}
+
+static int
+read_int (char *from)
+{
+ char *val = strchr (from, ':');
+ if (val)
+ return atoi (val + 1);
+ return 0;
+}
+
+cpu_info_t *
+read_cpuinfo()
+{
+ static int inited = 0;
+ if (inited)
+ return &cpu_info;
+ inited = 1;
+
+#if defined(__aarch64__)
+ asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_info.cpu_clk_freq));
+#endif
+
+ // Read /proc/cpuinfo to get CPU info and clock rate
+ FILE *procf = fopen ("/proc/cpuinfo", "r");
+ if (procf != NULL)
+ {
+ char temp[1024];
+ while (fgets (temp, (int) sizeof (temp), procf) != NULL)
+ {
+ if (strncmp (temp, "processor", 9) == 0)
+ cpu_info.cpu_cnt++;
+ else if (strncmp (temp, "cpu MHz", 7) == 0)
+ cpu_info.cpu_clk_freq = read_int (temp + 9);
+ else if (strncmp (temp, "cpu family", 10) == 0)
+ cpu_info.cpu_family = read_int (temp + 10);
+ else if (strncmp (temp, "vendor_id", 9) == 0)
+ {
+ if (cpu_info.cpu_vendorstr == NULL)
+ read_str (temp + 9, &cpu_info.cpu_vendorstr);
+ }
+ else if (strncmp (temp, "model name", 10) == 0)
+ {
+ if (cpu_info.cpu_modelstr == NULL)
+ read_str (temp + 10, &cpu_info.cpu_modelstr);
+ }
+ else if (strncmp (temp, "model", 5) == 0)
+ cpu_info.cpu_model = read_int (temp + 5);
+ else if (strncmp (temp, "CPU implementer", 15) == 0)
+ cpu_info.cpu_family = read_int (temp + 15);
+ else if (strncmp (temp, "CPU architecture", 16) == 0)
+ cpu_info.cpu_model = read_int (temp + 16);
+ }
+ fclose (procf);
+ }
+ return &cpu_info;
+}
Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC)
{
@@ -81,59 +158,9 @@ Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC)
/* add 2048 to count, since on some systems CPUID does not start at zero */
ncpumax = ncpus + 2048;
}
- ncpus = 0;
- cpu_clk_freq = 0;
-
- // On Linux, read /proc/cpuinfo to get CPU count and clock rate
- // Note that parsing is different on SPARC and x86
-#if defined(sparc)
- FILE *procf = fopen ("/proc/cpuinfo", "r");
- if (procf != NULL)
- {
- char temp[1024];
- while (fgets (temp, (int) sizeof (temp), procf) != NULL)
- {
- if (strncmp (temp, "Cpu", 3) == 0 && temp[3] != '\0'
- && strncmp ((strchr (temp + 1, 'C')) ? strchr (temp + 1, 'C')
- : (temp + 4), "ClkTck", 6) == 0)
- {
- ncpus++;
- char *val = strchr (temp, ':');
- if (val)
- {
- unsigned long long freq;
- sscanf (val + 2, "%llx", &freq);
- cpu_clk_freq = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
- }
- else
- cpu_clk_freq = 0;
- }
- }
- fclose (procf);
- }
-
-#elif defined(__aarch64__)
- asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_clk_freq));
-
-#else
- FILE *procf = fopen ("/proc/cpuinfo", "r");
- if (procf != NULL)
- {
- char temp[1024];
- while (fgets (temp, (int) sizeof (temp), procf) != NULL)
- {
- // x86 Linux
- if (strncmp (temp, "processor", 9) == 0)
- ncpus++;
- else if (strncmp (temp, "cpu MHz", 7) == 0)
- {
- char *val = strchr (temp, ':');
- cpu_clk_freq = val ? atoi (val + 1) : 0;
- }
- }
- fclose (procf);
- }
-#endif
+ cpu_info_t *cpu_p = read_cpuinfo();
+ ncpus = cpu_p->cpu_cnt;
+ cpu_clk_freq = cpu_p->cpu_clk_freq;
/* check resolution of system clock */
sys_resolution = sysconf (_SC_CLK_TCK);
@@ -1720,78 +1747,62 @@ Coll_Ctrl::set_size_limit (const char *string)
void
Coll_Ctrl::build_data_desc ()
{
- char spec[DD_MAXPATHLEN];
- spec[0] = 0;
+ StringBuilder sb;
// Put sample sig before clock profiling. Dbx uses PROF
// for that purpose and we want it to be processed first.
if (project_home)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "P:%s;", project_home);
+ sb.appendf ("P:%s;", project_home);
if (sample_sig != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "g:%d;", sample_sig);
+ sb.appendf ("g:%d;", sample_sig);
if (pauseresume_sig != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "d:%d%s;", pauseresume_sig,
- (pauseresume_pause == 1 ? "p" : ""));
+ sb.appendf ("d:%d%s;", pauseresume_sig, pauseresume_pause == 1 ? "p" : "");
if (clkprof_enabled == 1)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "p:%d;", clkprof_timer);
+ sb.appendf ("p:%d;", clkprof_timer);
if (synctrace_enabled == 1)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "s:%d,%d;", synctrace_thresh, synctrace_scope);
+ sb.appendf ("s:%d,%d;", synctrace_thresh, synctrace_scope);
if (heaptrace_enabled == 1)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "H:%d;", heaptrace_checkenabled);
+ sb.appendf ("H:%d;", heaptrace_checkenabled);
if (iotrace_enabled == 1)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "i:;");
+ sb.append ("i:;");
if (hwcprof_enabled_cnt > 0)
{
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "h:%s",
- (hwcprof_default == true) ? "*" : "");
+ sb.appendf ("h:%s", (hwcprof_default == true) ? "*" : "");
for (int ii = 0; ii < hwcprof_enabled_cnt; ii++)
{
- /* min_time is a "new" field.
- *
- * To help process_data_descriptor() in hwcfuncs.c parse
- * the HWC portion of this string -- specifically, to
- * recognize min_time when it's present and skip over
- * when it's not -- we prepend 'm' to the min_time value.
- *
- * When we no longer worry about, say, an old dbx
- * writing this string and a new libcollector looking for
- * the min_time field, the 'm' character can be
- * removed and process_data_descriptor() simplified.
- */
- hrtime_t min_time = hwctr[ii].min_time;
+ Hwcentry *h = hwctr + ii;
+ hrtime_t min_time = h->min_time;
if (min_time == HWCTIME_TBD)
// user did not specify any value for overflow rate
- min_time = hwctr[ii].min_time_default;
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec),
- "%s%s:%s:%d:%d:m%lld:%d:%d:0x%x", ii ? "," : "",
- strcmp (hwctr[ii].name, hwctr[ii].int_name) ? hwctr[ii].name : "",
- hwctr[ii].int_name, hwctr[ii].reg_num, hwctr[ii].val,
- min_time, ii, /*tag*/ hwctr[ii].timecvt, hwctr[ii].memop);
+ min_time = h->min_time_default;
+ if (ii > 0)
+ sb.append (',');
+ sb.appendf ("%d:%d:%lld:%s:%s:%lld:%d:m%lld:%d:%d:0x%x",
+ h->use_perf_event_type, h->type, (long long) h->config,
+ strcmp (h->name, h->int_name) ? h->name : "",
+ h->int_name, (long long) h->reg_num, h->val,
+ (long long) min_time, ii, /*tag*/ h->timecvt, h->memop);
}
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), ";");
+ sb.append (";");
}
- if ((time_run != 0) || (start_delay != 0))
+ if (time_run != 0 || start_delay != 0)
{
if (start_delay != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d:%d;", start_delay, time_run);
+ sb.appendf ("t:%d:%d;", start_delay, time_run);
else
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d;", time_run);
+ sb.appendf ("t:%d;", time_run);
}
if (sample_period != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "S:%d;",
- sample_period);
+ sb.appendf ("S:%d;", sample_period);
if (size_limit != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "L:%d;",
- size_limit);
+ sb.appendf ("L:%d;", size_limit);
if (java_mode != 0)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "j:%d;", (int) java_mode);
+ sb.appendf ("j:%d;", (int) java_mode);
if (follow_mode != FOLLOW_NONE)
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "F:%d;", (int) follow_mode);
- snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "a:%s;", archive_mode);
- if (strlen (spec) + 1 >= sizeof (spec))
- abort ();
+ sb.appendf ("F:%d;", (int) follow_mode);
+ sb.appendf ("a:%s;", archive_mode);
free (data_desc);
- data_desc = strdup (spec);
+ data_desc = sb.toString ();
}
char *